Skip to content

Commit e00387b

Browse files
committed
enh: write a base data class that is modality-agnostic
1 parent a78af6c commit e00387b

File tree

2 files changed

+429
-0
lines changed

2 files changed

+429
-0
lines changed

src/nifreeze/data/base.py

+280
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
2+
# vi: set ft=python sts=4 ts=4 sw=4 et:
3+
#
4+
# Copyright The NiPreps Developers <[email protected]>
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
# We support and encourage derived works from this project, please read
19+
# about our expectations at
20+
#
21+
# https://www.nipreps.org/community/licensing/
22+
#
23+
"""Representing data in hard-disk and memory."""
24+
25+
from __future__ import annotations
26+
27+
from collections import namedtuple
28+
from pathlib import Path
29+
from tempfile import mkdtemp
30+
from typing import Any
31+
32+
import attr
33+
import h5py
34+
import nibabel as nb
35+
import numpy as np
36+
from nitransforms.linear import Affine
37+
38+
NFDH5_EXT = ".h5"
39+
40+
41+
def _data_repr(value: np.ndarray | None) -> str:
42+
if value is None:
43+
return "None"
44+
return f"<{'x'.join(str(v) for v in value.shape)} ({value.dtype})>"
45+
46+
47+
def _cmp(lh: Any, rh: Any) -> bool:
48+
if isinstance(lh, np.ndarray) and isinstance(rh, np.ndarray):
49+
return np.allclose(lh, rh)
50+
51+
return lh == rh
52+
53+
54+
@attr.s(slots=True)
55+
class BaseDataset:
56+
"""
57+
Base dataset representation structure.
58+
59+
A general data structure to represent 4D images and the necessary metadata
60+
for head-motion estimation (that is, potentially a brain mask and the head-motion
61+
estimates).
62+
63+
The data structure has a direct HDF5 mapping to facilitate memory efficiency.
64+
For modalities requiring additional metadata such as DWI (which requires the gradient table
65+
and potentially a b=0 reference), this class may be derived to override certain behaviors
66+
(in the case of DWIs, the indexed access should also return the corresponding gradient
67+
specification).
68+
69+
"""
70+
71+
dataobj = attr.ib(default=None, repr=_data_repr, eq=attr.cmp_using(eq=_cmp))
72+
"""A :obj:`~numpy.ndarray` object for the data array."""
73+
affine = attr.ib(default=None, repr=_data_repr, eq=attr.cmp_using(eq=_cmp))
74+
"""Best affine for RAS-to-voxel conversion of coordinates (NIfTI header)."""
75+
brainmask = attr.ib(default=None, repr=_data_repr, eq=attr.cmp_using(eq=_cmp))
76+
"""A boolean ndarray object containing a corresponding brainmask."""
77+
motion_affines = attr.ib(default=None, eq=attr.cmp_using(eq=_cmp))
78+
"""List of :obj:`~nitransforms.linear.Affine` realigning the dataset."""
79+
datahdr = attr.ib(default=None)
80+
"""A :obj:`~nibabel.spatialimages.SpatialHeader` header corresponding to the data."""
81+
82+
_filepath = attr.ib(
83+
factory=lambda: Path(mkdtemp()) / "hmxfms_cache.h5",
84+
repr=False,
85+
eq=False,
86+
)
87+
"""A path to an HDF5 file to store the whole dataset."""
88+
89+
def __len__(self) -> int:
90+
"""Obtain the number of volumes/frames in the dataset."""
91+
return self.dataobj.shape[-1]
92+
93+
def __getitem__(
94+
self, idx: int | slice | tuple | np.ndarray
95+
) -> tuple[np.ndarray, np.ndarray | None]:
96+
"""
97+
Returns volume(s) and corresponding affine(s) through fancy indexing.
98+
99+
Parameters
100+
----------
101+
idx : :obj:`int` or :obj:`slice` or :obj:`tuple` or :obj:`~numpy.ndarray`
102+
Indexer for the last dimension (or possibly other dimensions if extended).
103+
104+
Returns
105+
-------
106+
:obj:`tuple`
107+
The selected volume(s) and corresponding affine(s).
108+
109+
"""
110+
if self.dataobj is None:
111+
raise ValueError("No data available (dataobj is None).")
112+
113+
affine = self.motion_affines[idx] if self.motion_affines is not None else None
114+
return self.dataobj[..., idx], affine
115+
116+
def get_filename(self) -> Path:
117+
"""Get the filepath of the HDF5 file."""
118+
return self._filepath
119+
120+
def set_transform(self, index: int, affine: np.ndarray, order: int = 3) -> None:
121+
"""
122+
Set an affine transform for a particular index and update the data object.
123+
124+
Parameters
125+
----------
126+
index : :obj:`int`
127+
The volume index to transform.
128+
affine : :obj:`numpy.ndarray`
129+
The 4x4 affine matrix to be applied.
130+
order : :obj:`int`, optional
131+
The order of the spline interpolation. Default is 3.
132+
133+
"""
134+
reference = namedtuple("ImageGrid", ("shape", "affine"))(
135+
shape=self.dataobj.shape[:3], affine=self.affine
136+
)
137+
138+
xform = Affine(matrix=affine, reference=reference)
139+
140+
if not Path(self._filepath).exists():
141+
self.to_filename(self._filepath)
142+
143+
# read original DWI data & b-vector
144+
with h5py.File(self._filepath, "r") as in_file:
145+
root = in_file["/0"]
146+
dataframe = np.asanyarray(root["dataobj"][..., index])
147+
148+
datamoving = nb.Nifti1Image(dataframe, self.affine, None)
149+
150+
# resample and update orientation at index
151+
self.dataobj[..., index] = np.asanyarray(
152+
xform.apply(datamoving, order=order).dataobj,
153+
dtype=self.dataobj.dtype,
154+
)
155+
156+
# if head motion affines are to be used, initialized to identities
157+
if self.motion_affines is None:
158+
self.motion_affines = np.repeat(np.eye(4)[None, ...], len(self), axis=0)
159+
160+
self.motion_affines[index] = xform.matrix
161+
162+
def to_filename(
163+
self, filename: Path | str, compression: str | None = None, compression_opts: Any = None
164+
) -> None:
165+
"""
166+
Write an HDF5 file to disk.
167+
168+
Parameters
169+
----------
170+
filename : :obj:`os.pathlike`
171+
The HDF5 file path to write to.
172+
compression : :obj:`str`, optional
173+
Compression filter ('gzip', etc.). Default is None (no compression).
174+
compression_opts : :obj:`typing.Any`, optional
175+
Compression level or other compression parameters.
176+
177+
"""
178+
filename = Path(filename)
179+
if not filename.name.endswith(NFDH5_EXT):
180+
filename = filename.parent / f"{filename.name}.h5"
181+
182+
with h5py.File(filename, "w") as out_file:
183+
out_file.attrs["Format"] = "NFDH5" # NiFreeze Data HDF5
184+
out_file.attrs["Version"] = np.uint16(1)
185+
root = out_file.create_group("/0")
186+
root.attrs["Type"] = "base dataset"
187+
for f in attr.fields(self.__class__):
188+
if f.name.startswith("_"):
189+
continue
190+
191+
value = getattr(self, f.name)
192+
if value is not None:
193+
root.create_dataset(
194+
f.name,
195+
data=value,
196+
compression=compression,
197+
compression_opts=compression_opts,
198+
)
199+
200+
def to_nifti(self, filename: Path) -> None:
201+
"""
202+
Write a NIfTI 1.0 file to disk.
203+
204+
Parameters
205+
----------
206+
filename : Path or str
207+
The output NIfTI file path.
208+
209+
"""
210+
nii = nb.Nifti1Image(self.dataobj, self.affine, self.datahdr)
211+
if self.datahdr is None:
212+
nii.header.set_xyzt_units("mm")
213+
nii.to_filename(filename)
214+
215+
@classmethod
216+
def from_filename(cls, filename: Path | str) -> BaseDataset:
217+
"""
218+
Read an HDF5 file from disk and create a BaseDataset.
219+
220+
Parameters
221+
----------
222+
filename : :obj:`os.pathlike`
223+
The HDF5 file path to read.
224+
225+
Returns
226+
-------
227+
BaseDataset
228+
The constructed dataset with data loaded from the file.
229+
230+
"""
231+
with h5py.File(filename, "r") as in_file:
232+
root = in_file["/0"]
233+
data = {k: np.asanyarray(v) for k, v in root.items() if not k.startswith("_")}
234+
return cls(**data)
235+
236+
237+
def load(
238+
filename: Path | str,
239+
brainmask_file: Path | str | None = None,
240+
motion_file: Path | str | None = None,
241+
) -> BaseDataset:
242+
"""
243+
Load 4D data from a filename or an HDF5 file.
244+
245+
Parameters
246+
----------
247+
filename : :obj:`os.pathlike`
248+
The NIfTI or HDF5 file.
249+
brainmask_file : :obj:`os.pathlike`, optional
250+
A brainmask NIfTI file. If provided, will be loaded and
251+
stored in the returned dataset.
252+
motion_file : :obj:`os.pathlike`
253+
A file containing head-motion affine matrices (linear)
254+
255+
Returns
256+
-------
257+
BaseDataset
258+
The loaded dataset.
259+
260+
Raises
261+
------
262+
ValueError
263+
If the file extension is not supported or the file cannot be loaded.
264+
265+
"""
266+
filename = Path(filename)
267+
if filename.name.endswith(NFDH5_EXT):
268+
return BaseDataset.from_filename(filename)
269+
270+
img = nb.load(filename)
271+
retval = BaseDataset(dataobj=img.dataobj, affine=img.affine)
272+
273+
if brainmask_file:
274+
mask = nb.load(brainmask_file)
275+
retval.brainmask = np.asanyarray(mask.dataobj)
276+
277+
if motion_file:
278+
raise NotImplementedError
279+
280+
return retval

0 commit comments

Comments
 (0)