from typing import Generator, Iterable, Any
import copy
from mirp._data_import.generic_file import ImageFile
from mirp.settings.generic import SettingsClass
from mirp.utilities.parallel import ray_remote, ray_init, ray_is_initialized, ray_get, ray_shutdown
from mirp._workflows.standardWorkflow import StandardWorkflow
[docs]
def deep_learning_preprocessing(
output_slices: bool = False,
crop_size: None | list[float] | list[int] = None,
image_export_format: str = "dict",
write_file_format: str = "numpy",
export_images: None | bool = None,
write_images: None | bool = None,
write_dir: None | str = None,
num_cpus: None | int = None,
**kwargs
) -> None | list[Any]:
"""
Pre-processes images for deep learning.
Parameters
----------
output_slices: bool, optional, default: False
Determines whether separate slices should be extracted.
crop_size: list of float or list of int, optional, default: None
Size to which the images and masks should be cropped. Images and masks are cropped around the center of the
mask(s).
.. note::
MIRP follows the numpy convention for indexing (*z*, *y*, *x*). The final element always corresponds to the
*x* dimension.
image_export_format: {"dict", "native", "numpy"}, default: "dict"
Return format for processed images and masks. ``"dict"`` returns dictionaries of images and masks as numpy
arrays and associated characteristics. ``"native"`` returns images and masks in their internal format.
``"numpy"`` returns images and masks in numpy format. This argument is only used if ``export_images=True``.
write_file_format: {"nifti", "numpy"}, default: "numpy"
File format for processed images and masks. ``"nifti"`` writes images and masks in the NIfTI file format,
and ``"numpy"`` writes images and masks as numpy files. This argument is only used if ``write_images=True``.
export_images: bool, optional
Determines whether processed images and masks should be returned by the function.
write_images: bool, optional
Determines whether processed images and masks should be written to the directory indicated by the
``write_dir`` keyword argument.
write_dir: str, optional
Path to directory where processed images and masks should be written. If not set, processed images and masks
are returned by this function. Required if ``write_images=True``.
num_cpus: int, optional, default: None
Number of CPU nodes that should be used for parallel processing. Image and mask processing can be
parallelized using the ``ray`` package. If a ray cluster is defined by the user, this cluster will be used
instead. By default, image and mask processing are processed sequentially.
**kwargs:
Keyword arguments passed for importing images and masks (
:func:`~mirp.data_import.import_image_and_mask.import_image_and_mask`) and configuring settings (notably
:class:`~mirp.settings.image_processing_parameters.ImagePostProcessingClass`,
:class:`~mirp.settings.perturbation_parameters.ImagePerturbationSettingsClass`), among others.
Returns
-------
None | list[Any]
List of images and masks in the format indicated by ``image_export_format``, if ``export_images=True``.
See Also
--------
Keyword arguments can be provided to configure the following:
* image and mask import (:func:`~mirp.data_import.import_image_and_mask.import_image_and_mask`)
* image post-processing (:class:`~mirp.settings.image_processing_parameters.ImagePostProcessingClass`)
* image perturbation / augmentation (:class:`~mirp.settings.perturbation_parameters.ImagePerturbationSettingsClass`)
* image interpolation / resampling (:class:`~mirp.settings.interpolation_parameters.ImageInterpolationSettingsClass` and
:class:`~mirp.settings.interpolation_parameters.MaskInterpolationSettingsClass`)
* mask resegmentation (:class:`~mirp.settings.resegmentation_parameters.ResegmentationSettingsClass`)
"""
# Conditionally start a ray cluster.
external_ray = ray_is_initialized()
if not external_ray and num_cpus is not None and num_cpus > 1:
ray_init(num_cpus=num_cpus)
if ray_is_initialized():
# Parallel processing.
results = [
_ray_extractor.remote(
workflow=workflow,
output_slices=output_slices,
crop_size=crop_size,
image_export_format=image_export_format,
write_file_format=write_file_format
)
for workflow in _base_deep_learning_preprocessing(
export_images=export_images,
write_images=write_images,
write_dir=write_dir,
**kwargs
)
]
results = ray_get(results)
if not external_ray:
ray_shutdown()
else:
workflows = list(_base_deep_learning_preprocessing(
export_images=export_images,
write_images=write_images,
write_dir=write_dir,
**kwargs)
)
results = [
workflow.deep_learning_conversion(
output_slices=output_slices,
crop_size=crop_size,
image_export_format=image_export_format,
write_file_format=write_file_format
)
for workflow in workflows
]
return results
@ray_remote
def _ray_extractor(
workflow: StandardWorkflow,
output_slices: bool = False,
crop_size: None | list[float] | list[int] = None,
image_export_format: str = "numpy",
write_file_format: str = "numpy"
):
# Limit internal threading by third-party libraries.
from mirp.utilities.parallel import limit_inner_threads
limit_inner_threads()
return workflow.deep_learning_conversion(
output_slices=output_slices,
crop_size=crop_size,
image_export_format=image_export_format,
write_file_format=write_file_format
)
[docs]
def deep_learning_preprocessing_generator(
output_slices: bool = False,
crop_size: None | list[float] | list[int] = None,
image_export_format: str = "dict",
write_file_format: str = "numpy",
export_images: None | bool = None,
write_images: None | bool = None,
write_dir: None | str = None,
**kwargs
) -> Generator[Any, None, None]:
"""
Generator for pre-processing images for deep learning.
Parameters
----------
output_slices: bool, optional, default: False
Determines whether separate slices should be extracted.
crop_size: list of float or list of int, optional, default: None
Size to which the images and masks should be cropped. Images and masks are cropped around the center of the
mask(s).
.. note::
MIRP follows the numpy convention for indexing (*z*, *y*, *x*). The final element always corresponds to the
*x* dimension.
image_export_format: {"dict", "native", "numpy"}, default: "dict"
Return format for processed images and masks. ``"dict"`` returns dictionaries of images and masks as numpy
arrays and associated characteristics. ``"native"`` returns images and masks in their internal format.
``"numpy"`` returns images and masks in numpy format. This argument is only used if ``export_images=True``.
write_file_format: {"nifti", "numpy"}, default: "numpy"
File format for processed images and masks. ``"nifti"`` writes images and masks in the NIfTI file format,
and ``"numpy"`` writes images and masks as numpy files. This argument is only used if ``write_images=True``.
export_images: bool, optional
Determines whether processed images and masks should be returned by the function.
write_images: bool, optional
Determines whether processed images and masks should be written to the directory indicated by the
``write_dir`` keyword argument.
write_dir: str, optional
Path to directory where processed images and masks should be written. If not set, processed images and masks
are returned by this function. Required if ``write_images=True``.
**kwargs:
Keyword arguments passed for importing images and masks (
:func:`~mirp.data_import.import_image_and_mask.import_image_and_mask`) and configuring settings (notably
:class:`~mirp.settings.image_processing_parameters.ImagePostProcessingClass`,
:class:`~mirp.settings.settingsPerturbation.ImagePerturbationSettingsClass`), among others.
Yields
-------
None | list[Any]
List of images and masks in the format indicated by ``image_export_format``, if ``export_images=True``.
See Also
--------
Keyword arguments can be provided to configure the following:
* image and mask import (:func:`~mirp.data_import.import_image_and_mask.import_image_and_mask`)
* image post-processing (:class:`~mirp.settings.image_processing_parameters.ImagePostProcessingClass`)
* image perturbation / augmentation (:class:`~mirp.settings.perturbation_parameters.ImagePerturbationSettingsClass`)
* image interpolation / resampling (:class:`~mirp.settings.interpolation_parameters.ImageInterpolationSettingsClass` and
:class:`~mirp.settings.interpolation_parameters.MaskInterpolationSettingsClass`)
* mask resegmentation (:class:`~mirp.settings.resegmentation_parameters.ResegmentationSettingsClass`)
"""
workflows = list(_base_deep_learning_preprocessing(
export_images=export_images,
write_images=write_images,
write_dir=write_dir,
**kwargs))
for workflow in workflows:
yield workflow.deep_learning_conversion(
output_slices=output_slices,
crop_size=crop_size,
image_export_format=image_export_format,
write_file_format=write_file_format
)
def _base_deep_learning_preprocessing(
image,
mask=None,
sample_name: None | str | list[str] = None,
image_name: None | str | list[str] = None,
image_file_type: None | str = None,
image_modality: None | str | list[str] = None,
image_sub_folder: None | str = None,
mask_name: None | str | list[str] = None,
mask_file_type: None | str = None,
mask_modality: None | str | list[str] = None,
mask_sub_folder: None | str = None,
roi_name: None | str | list[str] | dict[str, str] = None,
association_strategy: None | str | list[str] = None,
settings: None | str | SettingsClass | list[SettingsClass] = None,
stack_masks: str = "auto",
stack_images: str = "auto",
write_images: None | bool = None,
export_images: None | bool = None,
write_dir: None | str = None,
**kwargs
):
from mirp.data_import.import_image_and_mask import import_image_and_mask
from mirp.settings.import_config_parameters import import_configuration_settings
# Infer write_images, export_images based on write_dir.
if write_images is None:
write_images = write_dir is not None
if export_images is None:
export_images = write_dir is None
if not write_images:
write_dir = None
if write_images and write_dir is None:
raise ValueError("write_dir argument should be provided for writing images and masks to.")
if not write_images and not export_images:
raise ValueError(f"write_images and export_images arguments cannot both be False.")
# Import settings (to provide immediate feedback if something is amiss).
if isinstance(settings, str):
settings = import_configuration_settings(
compute_features=False,
path=settings
)
elif isinstance(settings, SettingsClass):
settings = [settings]
elif isinstance(settings, Iterable) and all(isinstance(x, SettingsClass) for x in settings):
settings = list(settings)
elif settings is None:
settings = import_configuration_settings(
compute_features=False,
**kwargs
)
else:
raise TypeError(
f"The 'settings' argument is expected to be a path to a configuration xml file, "
f"a SettingsClass object, or a list thereof. Found: {type(settings)}."
)
image_list = import_image_and_mask(
image=image,
mask=mask,
sample_name=sample_name,
image_name=image_name,
image_file_type=image_file_type,
image_modality=image_modality,
image_sub_folder=image_sub_folder,
mask_name=mask_name,
mask_file_type=mask_file_type,
mask_modality=mask_modality,
mask_sub_folder=mask_sub_folder,
roi_name=roi_name,
association_strategy=association_strategy,
stack_images=stack_images,
stack_masks=stack_masks
)
yield from _generate_dl_preprocessing_workflows(
image_list=image_list,
settings=settings,
write_dir=write_dir,
write_images=write_images,
export_images=export_images
)
def _generate_dl_preprocessing_workflows(
image_list: list[ImageFile],
settings: list[SettingsClass],
write_dir: None | str,
write_images: bool,
export_images: bool
) -> Generator[StandardWorkflow, None, None]:
for image_file in image_list:
for current_settings in settings:
# Update settings to remove settings that may cause problems.
current_settings.feature_extr.families = "none"
current_settings.img_transform.feature_settings.families = "none"
current_settings.perturbation.crop_around_roi = False
current_settings.roi_resegment.resegmentation_method = "none"
if current_settings.perturbation.noise_repetitions is None or \
current_settings.perturbation.noise_repetitions == 0:
noise_repetition_ids = [None]
else:
noise_repetition_ids = list(range(current_settings.perturbation.noise_repetitions))
if current_settings.perturbation.rotation_angles is None or len(
current_settings.perturbation.rotation_angles) == 0 or all(
x == 0.0 for x in current_settings.perturbation.rotation_angles
):
rotation_angles = [None]
else:
rotation_angles = copy.deepcopy(current_settings.perturbation.rotation_angles)
if current_settings.perturbation.translation_fraction is None or len(
current_settings.perturbation.translation_fraction) == 0 or all(
x == 0.0 for x in current_settings.perturbation.translation_fraction
):
translations = [None]
else:
config_translation = copy.deepcopy(current_settings.perturbation.translation_fraction)
translations = []
for translation_x in config_translation:
for translation_y in config_translation:
if not current_settings.general.by_slice:
for translation_z in config_translation:
translations += [(translation_z, translation_y, translation_x)]
else:
translations += [(0.0, translation_y, translation_x)]
if current_settings.img_interpolate.new_spacing is None or len(
current_settings.img_interpolate.new_spacing) == 0 or all(
x == 0.0 for x in current_settings.img_interpolate.new_spacing
):
spacings = [None]
else:
spacings = copy.deepcopy(current_settings.img_interpolate.new_spacing)
for noise_repetition_id in noise_repetition_ids:
for rotation_angle in rotation_angles:
for translation in translations:
for spacing in spacings:
yield StandardWorkflow(
image_file=copy.deepcopy(image_file),
write_dir=write_dir,
settings=current_settings,
settings_name=current_settings.general.config_str,
write_features=False,
export_features=False,
write_images=write_images,
export_images=export_images,
noise_iteration_id=noise_repetition_id,
rotation=rotation_angle,
translation=translation,
new_image_spacing=spacing
)