Source code for ducho.multimodal.visual.VisualFeatureExtractor

import tensorflow as tf
import numpy as np
import torchvision
import torch
from torchvision.models.feature_extraction import get_graph_node_names, create_feature_extractor
from ducho.internal.father_classes.FeatureExtractorFather import FeatureExtractorFather
from transformers import pipeline


[docs] class VisualFeatureExtractor(FeatureExtractorFather): """ This class represents the Visual Feature Extractor utilized for feature extraction. """ def __init__(self, gpu='-1'): """ This function carries out Image Feature Extraction, requiring the 'model_name', 'framework', and 'output_layer'. Args: gpu: A string indicating the GPU to be used. '-1' specifies the CPU. Returns: None """ self._pipeline = None self._image_processor = None super().__init__(gpu)
[docs] def set_model(self, model): """ This procedure facilitates the configuration of the Visual Feature Extractor model using YAML specifications. Args: model: The row of the YAML file containing the user's specifications. Returns: None """ model_name = model['model_name'] image_processor = model['image_processor'] if 'image_processor' in model else None torchvision_list = list(torchvision.models.__dict__) tensorflow_keras_list = list(tf.keras.applications.__dict__) self._model_name = model_name if self._model_name in tensorflow_keras_list and 'tensorflow' in self._backend_libraries_list: self._model = getattr(tf.keras.applications, self._model_name)() elif self._model_name.lower() in torchvision_list and 'torch' in self._backend_libraries_list: self._model = getattr(torchvision.models, self._model_name.lower())(weights='DEFAULT') self._model.to(self._device) self._model.eval() elif 'torch' in self._backend_libraries_list: # Custom Model Loading self._model = torch.load(model_name, map_location=self._device) elif 'transformers' in self._backend_libraries_list: built_pipeline = pipeline(task='feature-extraction', model=model_name, image_processor=image_processor, framework='pt', device=self._device) self._model = built_pipeline.model self._image_processor = built_pipeline.image_processor else: raise NotImplementedError('This feature extractor has not been added yet!')
[docs] def extract_feature(self, image): """ This function extracts features from the input image data. Prior to calling this function, the framework, model, and layer have to be configured using their respective set methods. Args: image: The preprocessed image data. Returns: A numpy array representing the extracted features, which will be stored in a .npy file using the appropriate method of the Dataset Class. """ torchvision_list = list(torchvision.models.__dict__) tensorflow_keras_list = list(tf.keras.applications.__dict__) if 'torch' in self._backend_libraries_list: #and self._model_name.lower() in torchvision_list: _, eval_nodes = get_graph_node_names(self._model) return_nodes = {} output_layer = 'layer0' found = False for idx, e in enumerate(eval_nodes): return_nodes[e] = f'layer{idx}' if e == self._output_layer: output_layer = f'layer{idx}' found = True break if not found: raise ValueError(f"The specified output layer {self._output_layer} does not exist. Please carefully check its name!") feature_model = create_feature_extractor(self._model, return_nodes) feature_model.eval() with torch.no_grad(): output = np.squeeze(feature_model( image.to(self._device) )[output_layer].data.cpu().numpy()) # update the framework list self._backend_libraries_list = ['torch'] return output elif self._model_name in tensorflow_keras_list and 'tensorflow' in self._backend_libraries_list: # tensorflow input_model = self._model.input output_layer = self._model.get_layer(self._output_layer).output output = tf.keras.Model(input_model, output_layer)(image, training=False) # update the framework list self._backend_libraries_list = ['tensorflow'] return output elif 'transformers' in self._backend_libraries_list: model_input = self._image_processor(image, return_tensors="pt") model_input = {k: torch.tensor(v).to(self._device) for k, v in model_input.items()} model_output = getattr(self._model(**model_input), self._output_layer.lower()) return model_output.detach().cpu().numpy()