Source code for mirp.deep_learning_preprocessing

from typing import Generator, Iterable, Any
import copy

from mirp._data_import.generic_file import ImageFile
from mirp.settings.generic import SettingsClass
from mirp.utilities.parallel import ray_remote, ray_init, ray_is_initialized, ray_get, ray_shutdown
from mirp._workflows.standardWorkflow import StandardWorkflow


[docs] def deep_learning_preprocessing( output_slices: bool = False, crop_size: None | list[float] | list[int] = None, image_export_format: str = "dict", write_file_format: str = "numpy", export_images: None | bool = None, write_images: None | bool = None, write_dir: None | str = None, num_cpus: None | int = None, **kwargs ) -> None | list[Any]: """ Pre-processes images for deep learning. Parameters ---------- output_slices: bool, optional, default: False Determines whether separate slices should be extracted. crop_size: list of float or list of int, optional, default: None Size to which the images and masks should be cropped. Images and masks are cropped around the center of the mask(s). .. note:: MIRP follows the numpy convention for indexing (*z*, *y*, *x*). The final element always corresponds to the *x* dimension. image_export_format: {"dict", "native", "numpy"}, default: "dict" Return format for processed images and masks. ``"dict"`` returns dictionaries of images and masks as numpy arrays and associated characteristics. ``"native"`` returns images and masks in their internal format. ``"numpy"`` returns images and masks in numpy format. This argument is only used if ``export_images=True``. write_file_format: {"nifti", "numpy"}, default: "numpy" File format for processed images and masks. ``"nifti"`` writes images and masks in the NIfTI file format, and ``"numpy"`` writes images and masks as numpy files. This argument is only used if ``write_images=True``. export_images: bool, optional Determines whether processed images and masks should be returned by the function. write_images: bool, optional Determines whether processed images and masks should be written to the directory indicated by the ``write_dir`` keyword argument. write_dir: str, optional Path to directory where processed images and masks should be written. If not set, processed images and masks are returned by this function. Required if ``write_images=True``. num_cpus: int, optional, default: None Number of CPU nodes that should be used for parallel processing. Image and mask processing can be parallelized using the ``ray`` package. If a ray cluster is defined by the user, this cluster will be used instead. By default, image and mask processing are processed sequentially. **kwargs: Keyword arguments passed for importing images and masks ( :func:`~mirp.data_import.import_image_and_mask.import_image_and_mask`) and configuring settings (notably :class:`~mirp.settings.image_processing_parameters.ImagePostProcessingClass`, :class:`~mirp.settings.perturbation_parameters.ImagePerturbationSettingsClass`), among others. Returns ------- None | list[Any] List of images and masks in the format indicated by ``image_export_format``, if ``export_images=True``. See Also -------- Keyword arguments can be provided to configure the following: * image and mask import (:func:`~mirp.data_import.import_image_and_mask.import_image_and_mask`) * image post-processing (:class:`~mirp.settings.image_processing_parameters.ImagePostProcessingClass`) * image perturbation / augmentation (:class:`~mirp.settings.perturbation_parameters.ImagePerturbationSettingsClass`) * image interpolation / resampling (:class:`~mirp.settings.interpolation_parameters.ImageInterpolationSettingsClass` and :class:`~mirp.settings.interpolation_parameters.MaskInterpolationSettingsClass`) * mask resegmentation (:class:`~mirp.settings.resegmentation_parameters.ResegmentationSettingsClass`) """ # Conditionally start a ray cluster. external_ray = ray_is_initialized() if not external_ray and num_cpus is not None and num_cpus > 1: ray_init(num_cpus=num_cpus) if ray_is_initialized(): # Parallel processing. results = [ _ray_extractor.remote( workflow=workflow, output_slices=output_slices, crop_size=crop_size, image_export_format=image_export_format, write_file_format=write_file_format ) for workflow in _base_deep_learning_preprocessing( export_images=export_images, write_images=write_images, write_dir=write_dir, **kwargs ) ] results = ray_get(results) if not external_ray: ray_shutdown() else: workflows = list(_base_deep_learning_preprocessing( export_images=export_images, write_images=write_images, write_dir=write_dir, **kwargs) ) results = [ workflow.deep_learning_conversion( output_slices=output_slices, crop_size=crop_size, image_export_format=image_export_format, write_file_format=write_file_format ) for workflow in workflows ] return results
@ray_remote def _ray_extractor( workflow: StandardWorkflow, output_slices: bool = False, crop_size: None | list[float] | list[int] = None, image_export_format: str = "numpy", write_file_format: str = "numpy" ): # Limit internal threading by third-party libraries. from mirp.utilities.parallel import limit_inner_threads limit_inner_threads() return workflow.deep_learning_conversion( output_slices=output_slices, crop_size=crop_size, image_export_format=image_export_format, write_file_format=write_file_format )
[docs] def deep_learning_preprocessing_generator( output_slices: bool = False, crop_size: None | list[float] | list[int] = None, image_export_format: str = "dict", write_file_format: str = "numpy", export_images: None | bool = None, write_images: None | bool = None, write_dir: None | str = None, **kwargs ) -> Generator[Any, None, None]: """ Generator for pre-processing images for deep learning. Parameters ---------- output_slices: bool, optional, default: False Determines whether separate slices should be extracted. crop_size: list of float or list of int, optional, default: None Size to which the images and masks should be cropped. Images and masks are cropped around the center of the mask(s). .. note:: MIRP follows the numpy convention for indexing (*z*, *y*, *x*). The final element always corresponds to the *x* dimension. image_export_format: {"dict", "native", "numpy"}, default: "dict" Return format for processed images and masks. ``"dict"`` returns dictionaries of images and masks as numpy arrays and associated characteristics. ``"native"`` returns images and masks in their internal format. ``"numpy"`` returns images and masks in numpy format. This argument is only used if ``export_images=True``. write_file_format: {"nifti", "numpy"}, default: "numpy" File format for processed images and masks. ``"nifti"`` writes images and masks in the NIfTI file format, and ``"numpy"`` writes images and masks as numpy files. This argument is only used if ``write_images=True``. export_images: bool, optional Determines whether processed images and masks should be returned by the function. write_images: bool, optional Determines whether processed images and masks should be written to the directory indicated by the ``write_dir`` keyword argument. write_dir: str, optional Path to directory where processed images and masks should be written. If not set, processed images and masks are returned by this function. Required if ``write_images=True``. **kwargs: Keyword arguments passed for importing images and masks ( :func:`~mirp.data_import.import_image_and_mask.import_image_and_mask`) and configuring settings (notably :class:`~mirp.settings.image_processing_parameters.ImagePostProcessingClass`, :class:`~mirp.settings.settingsPerturbation.ImagePerturbationSettingsClass`), among others. Yields ------- None | list[Any] List of images and masks in the format indicated by ``image_export_format``, if ``export_images=True``. See Also -------- Keyword arguments can be provided to configure the following: * image and mask import (:func:`~mirp.data_import.import_image_and_mask.import_image_and_mask`) * image post-processing (:class:`~mirp.settings.image_processing_parameters.ImagePostProcessingClass`) * image perturbation / augmentation (:class:`~mirp.settings.perturbation_parameters.ImagePerturbationSettingsClass`) * image interpolation / resampling (:class:`~mirp.settings.interpolation_parameters.ImageInterpolationSettingsClass` and :class:`~mirp.settings.interpolation_parameters.MaskInterpolationSettingsClass`) * mask resegmentation (:class:`~mirp.settings.resegmentation_parameters.ResegmentationSettingsClass`) """ workflows = list(_base_deep_learning_preprocessing( export_images=export_images, write_images=write_images, write_dir=write_dir, **kwargs)) for workflow in workflows: yield workflow.deep_learning_conversion( output_slices=output_slices, crop_size=crop_size, image_export_format=image_export_format, write_file_format=write_file_format )
def _base_deep_learning_preprocessing( image, mask=None, sample_name: None | str | list[str] = None, image_name: None | str | list[str] = None, image_file_type: None | str = None, image_modality: None | str | list[str] = None, image_sub_folder: None | str = None, mask_name: None | str | list[str] = None, mask_file_type: None | str = None, mask_modality: None | str | list[str] = None, mask_sub_folder: None | str = None, roi_name: None | str | list[str] | dict[str, str] = None, association_strategy: None | str | list[str] = None, settings: None | str | SettingsClass | list[SettingsClass] = None, stack_masks: str = "auto", stack_images: str = "auto", write_images: None | bool = None, export_images: None | bool = None, write_dir: None | str = None, **kwargs ): from mirp.data_import.import_image_and_mask import import_image_and_mask from mirp.settings.import_config_parameters import import_configuration_settings # Infer write_images, export_images based on write_dir. if write_images is None: write_images = write_dir is not None if export_images is None: export_images = write_dir is None if not write_images: write_dir = None if write_images and write_dir is None: raise ValueError("write_dir argument should be provided for writing images and masks to.") if not write_images and not export_images: raise ValueError(f"write_images and export_images arguments cannot both be False.") # Import settings (to provide immediate feedback if something is amiss). if isinstance(settings, str): settings = import_configuration_settings( compute_features=False, path=settings ) elif isinstance(settings, SettingsClass): settings = [settings] elif isinstance(settings, Iterable) and all(isinstance(x, SettingsClass) for x in settings): settings = list(settings) elif settings is None: settings = import_configuration_settings( compute_features=False, **kwargs ) else: raise TypeError( f"The 'settings' argument is expected to be a path to a configuration xml file, " f"a SettingsClass object, or a list thereof. Found: {type(settings)}." ) image_list = import_image_and_mask( image=image, mask=mask, sample_name=sample_name, image_name=image_name, image_file_type=image_file_type, image_modality=image_modality, image_sub_folder=image_sub_folder, mask_name=mask_name, mask_file_type=mask_file_type, mask_modality=mask_modality, mask_sub_folder=mask_sub_folder, roi_name=roi_name, association_strategy=association_strategy, stack_images=stack_images, stack_masks=stack_masks ) yield from _generate_dl_preprocessing_workflows( image_list=image_list, settings=settings, write_dir=write_dir, write_images=write_images, export_images=export_images ) def _generate_dl_preprocessing_workflows( image_list: list[ImageFile], settings: list[SettingsClass], write_dir: None | str, write_images: bool, export_images: bool ) -> Generator[StandardWorkflow, None, None]: for image_file in image_list: for current_settings in settings: # Update settings to remove settings that may cause problems. current_settings.feature_extr.families = "none" current_settings.img_transform.feature_settings.families = "none" current_settings.perturbation.crop_around_roi = False current_settings.roi_resegment.resegmentation_method = "none" if current_settings.perturbation.noise_repetitions is None or \ current_settings.perturbation.noise_repetitions == 0: noise_repetition_ids = [None] else: noise_repetition_ids = list(range(current_settings.perturbation.noise_repetitions)) if current_settings.perturbation.rotation_angles is None or len( current_settings.perturbation.rotation_angles) == 0 or all( x == 0.0 for x in current_settings.perturbation.rotation_angles ): rotation_angles = [None] else: rotation_angles = copy.deepcopy(current_settings.perturbation.rotation_angles) if current_settings.perturbation.translation_fraction is None or len( current_settings.perturbation.translation_fraction) == 0 or all( x == 0.0 for x in current_settings.perturbation.translation_fraction ): translations = [None] else: config_translation = copy.deepcopy(current_settings.perturbation.translation_fraction) translations = [] for translation_x in config_translation: for translation_y in config_translation: if not current_settings.general.by_slice: for translation_z in config_translation: translations += [(translation_z, translation_y, translation_x)] else: translations += [(0.0, translation_y, translation_x)] if current_settings.img_interpolate.new_spacing is None or len( current_settings.img_interpolate.new_spacing) == 0 or all( x == 0.0 for x in current_settings.img_interpolate.new_spacing ): spacings = [None] else: spacings = copy.deepcopy(current_settings.img_interpolate.new_spacing) for noise_repetition_id in noise_repetition_ids: for rotation_angle in rotation_angles: for translation in translations: for spacing in spacings: yield StandardWorkflow( image_file=copy.deepcopy(image_file), write_dir=write_dir, settings=current_settings, settings_name=current_settings.general.config_str, write_features=False, export_features=False, write_images=write_images, export_images=export_images, noise_iteration_id=noise_repetition_id, rotation=rotation_angle, translation=translation, new_image_spacing=spacing )