Source code for ducho.multimodal.visual.VisualDataset

from abc import ABC

from PIL import Image
from ducho.internal.father_classes.DatasetFather import DatasetFather
from torchvision import transforms
import tensorflow
import numpy as np
import os
import torch


class MinMaxNormalize(object):
    """
    This class allows to perform the MinMAx normalization.
    """
    def __call__(self, img):
        """
        This method yields the image preprocessed through MinMax normalization.

        Args:
            img: The input image

        Returns:
            The normalized image
        """
        min_value = img.min()
        max_value = img.max()

        normalized_img = (img - min_value) / (max_value - min_value)

        return normalized_img



[docs]
class VisualDataset(DatasetFather, ABC):
    """
    This class represents the Visual Dataset used for the data loading process.
    """
    def __init__(self, input_directory_path, output_directory_path, model_name='VGG19', reshape=(224, 224)):
        """
        It manages the Image Dataset, which consists of a folder containing input data and another folder for output data.
        It handles the preprocessing of input data and manages the output data.

        Args:
            input_directory_path: A string representing the path to the folder containing the input data to be processed.
            output_directory_path: A string representing the path to the folder where the output data will be stored. If the folder does not exist, it will be created.
            model_name: A string specifying the model to be used. This can be reset later.
            reshape: A tuple (int, int) representing the width and height for resizing the input images. This can be reset later.

        Returns:
            None
        """
        super().__init__(input_directory_path, output_directory_path, model_name)
        self._reshape = reshape
        self._preprocessing_type = None
        self._mean = [0.485, 0.456, 0.406]
        self._std = [0.229, 0.224, 0.225]

    def __getitem__(self, idx):
        """
        It retrieves a sample preprocessed given its id (the id refers to the sorted filenames).

        Args:
            idx: Integer, indicates the number associated to the file o elaborate.

        Returns:
             the image blob data preprocessed.
        """
        image_path = os.path.join(self._input_directory_path, self._filenames[idx])
        sample = Image.open(image_path)

        if sample.mode != 'RGB':
            sample = sample.convert(mode='RGB')

        norm_sample = self._pre_processing(sample)

        if 'tensorflow' in self._backend_libraries_list:
            # np for tensorflow
            return np.expand_dims(norm_sample, axis=0)
        else:
            # torch
            return norm_sample

    def _pre_processing(self, sample):
        """
        It pre-process the data for the feature extraction.

        Args:
            sample: the read image.

        Returns:
             the processed image.
        """
        # resize
        if self._reshape:
            res_sample = sample.resize(self._reshape, resample=Image.BICUBIC)
        else:
            res_sample = sample

        # normalize
        tensorflow_keras_list = list(tensorflow.keras.applications.__dict__)
        if self._model_name.lower() in tensorflow_keras_list and 'tensorflow' in self._backend_libraries_list:
            # if the model is a tensorflow model, each one execute a different command (retrieved from the model map)
            command = getattr(tensorflow.keras.applications, self._model_name.lower())
            norm_sample = command.preprocess_input(np.array(res_sample))
            # update the framework list
            self._backend_libraries_list = ['tensorflow']
        elif 'torch' in self._backend_libraries_list:
            # if the model is a torch model, the normalization is the same for everyone
            if self._preprocessing_type is not None:
                if self._preprocessing_type == 'zscore':
                    transform = transforms.Compose([transforms.ToTensor(),
                                                    transforms.Normalize(mean=self._mean,
                                                                         std=self._std)
                                                    ])
                else:

                    transform = transforms.Compose([transforms.ToTensor(),
                                                    MinMaxNormalize()
                                                    ])
            else:
                transform = transforms.ToTensor()

            norm_sample = transform(res_sample)
            self._backend_libraries_list = ['torch']

        elif 'transformers' in self._backend_libraries_list:
            transform = transforms.PILToTensor()
            norm_sample = transform(res_sample)

            # update the framework list
            self._backend_libraries_list = ['transformers']

        return norm_sample


[docs]
    def set_reshape(self, reshape):
        """
        Set the reshape variable according to the desired value.

        Args:
             reshape: Tuple (int, int) representing the width and height for resizing the input.

        Returns:
            None
        """
        self._reshape = reshape


    def set_preprocessing_flag(self, preprocessing_flag):
        self._reshape = preprocessing_flag


[docs]
    def set_preprocessing_type(self,
                               preprocessing_type: str
                               ) -> None:
        """
        Set the desired pre-processing type. It must be between minmax and z-score.

        Args:
             preprocessing_type: the desired pre-processing.

        Returns:
            None
        """
        self._preprocessing_type = preprocessing_type



[docs]
    def set_mean_std(self,
                     mean: torch.Tensor,
                     std: torch.Tensor
                     ) -> None:
        """
        Set custom values of mean and std for z-score normalization.

        Args:
             mean: torch.Tensor containing the desired mean along the three channels.
             std: torch.Tensor containing the desired standard deviation along the three channels.

        Returns:
            None
        """
        self._mean = mean
        self._std = std


    def _reset_mean_std(self) -> None:
        """
        Reset mean and std values to ImageNet ones.

        Returns:
            None
        """
        self._mean = [0.485, 0.456, 0.406]
        self._std = [0.229, 0.224, 0.225]