Skip to content

Model class


Parent class for the models

Source code in template_vision/models_training/
class ModelClass:
    '''Parent class for the models'''

    _default_name = 'none'
    # Variable annotation :
    # Solves lots of typing errors, cf mypy
    list_classes: list
    dict_classes: dict

    # Not implemented :
    # -> fit
    # -> predict
    # -> predict_proba
    # -> inverse_transform
    # -> get_and_save_metrics

    def __init__(self, model_dir: Union[str, None] = None, model_name: Union[str, None] = None,
                 level_save: str = 'HIGH', **kwargs) -> None:
        '''Initialization of the parent class.

            model_dir (str): Folder where to save the model
                If None, creates a directory based on the model's name and the date (most common usage)
            model_name (str): The name of the model
            level_save (str): Level of saving
                LOW: stats + configurations + logger keras - /!\\ The model can't be reused /!\\ -
                MEDIUM: LOW + hdf5 + pkl + plots
                HIGH: MEDIUM + predictions
            ValueError: If the object level_save is not a valid option (['LOW', 'MEDIUM', 'HIGH'])
            NotADirectoryError: If a provided model directory is not a directory (i.e. it's a file)
        if level_save not in ['LOW', 'MEDIUM', 'HIGH']:
            raise ValueError(f"The object level_save ({level_save}) is not a valid option (['LOW', 'MEDIUM', 'HIGH'])")

        # Get logger
        self.logger = logging.getLogger(__name__)

        # Model type -> 'classifier' or 'object_detector' depending on the model
        self.model_type = None

        # Model name
        self.model_name = self._default_name if model_name is None else model_name

        # Model folder
        if model_dir is None:
            self.model_dir = self._get_new_model_dir()
            if not os.path.exists(model_dir):
            if not os.path.isdir(model_dir):
                raise NotADirectoryError(f"{model_dir} is not a valid directory")
            self.model_dir = os.path.abspath(model_dir)

        # Other options
        self.level_save = level_save

        # is trained ?
        self.trained = False
        self.nb_fit = 0

        # Configuration dict. to be logged. Set on save.
        self.json_dict: Dict[Any, Any] = {}

    def fit(self, df_train, **kwargs) -> dict:
        '''Trains the model

            df_train (pd.DataFrame): Train dataset
                Must contain file_path & file_class columns if classifier
                Must contain file_path & bboxes columns if object detector
            dict: Fit arguments, to be used with transfer learning fine-tuning
        raise NotImplementedError("'fit' needs to be overridden")

    def predict(self, df_test: pd.DataFrame, **kwargs) -> Union[np.ndarray, list]:
        '''Predictions on test set

            df_test (pd.DataFrame): DataFrame to be predicted, with column file_path
            (np.ndarray | list): Array, shape = [n_samples, n_classes] or List of n_samples elements
        raise NotImplementedError("'predict' needs to be overridden")

    def predict_proba(self, df_test: pd.DataFrame, **kwargs) -> np.ndarray:
        '''Predicts probabilities on the test dataset

            df_test (pd.DataFrame): DataFrame to be predicted, with column file_path
            (np.ndarray): Array, shape = [n_samples, n_classes]
        raise NotImplementedError("'predict_proba' needs to be overridden")

    def inverse_transform(self, y: Union[list, np.ndarray]) -> Union[list, tuple]:
        '''Gets the final format of prediction
            - Classification : classes from predictions
            - Object detections : list of bboxes per image

            y (list | np.ndarray): Array-like
            List of classes if classifier
            List of bboxes if object detector
        raise NotImplementedError("'inverse_transform' needs to be overridden")

    def get_and_save_metrics(self, y_true, y_pred, list_files_x: Union[list, None] = None,
                             type_data: str = '') -> pd.DataFrame:
        '''Gets and saves the metrics of a model

            y_true (?): Array-like [n_samples, 1] if classifier
                # If classifier, class of each image
                # If object detector, list of list of bboxes per image
                    bbox format : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ...}
            y_pred (?): Array-like [n_samples, 1] if classifier
                # If classifier, class of each image
                # If object detector, list of list of bboxes per image
                    bbox format : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ...}
            list_files_x (list): Input images file paths
            type_data (str): Type of dataset (validation, test, ...)
            pd.DataFrame: The dataframe containing statistics
        raise NotImplementedError("'get_and_save_metrics' needs to be overridden")

    def save(self, json_data: Union[dict, None] = None) -> None:
        '''Saves the model

            json_data (dict): Additional configurations to be saved

        # Manage paths
        pkl_path = os.path.join(self.model_dir, f"{self.model_name}.pkl")
        conf_path = os.path.join(self.model_dir, "configurations.json")

        # Save model & pipeline preprocessing si level_save > 'LOW'
        if self.level_save in ['MEDIUM', 'HIGH']:
            with open(pkl_path, 'wb') as f:
                pickle.dump(self, f)

        # Save configuration JSON
        json_dict = {
            'maintainers': 'Agence DataServices',
            'gabarit_version': '1.3.4.dev0+local',
            'date':"%d/%m/%Y - %H:%M:%S"),  # Not the same as the folder's name
            'package_version': utils.get_package_version(),
            'model_name': self.model_name,
            'model_dir': self.model_dir,
            'model_type': self.model_type,
            'trained': self.trained,
            'nb_fit': self.nb_fit,
            'level_save': self.level_save,
            'librairie': None,
        # Merge json_data if not None
        if json_data is not None:
            # Priority given to json_data !
            json_dict = {**json_dict, **json_data}

        # Add conf to attributes
        self.json_dict = json_dict

        # Save conf
        with open(conf_path, 'w', encoding='utf-8') as json_file:
            json.dump(json_dict, json_file, indent=4, cls=utils.NpEncoder)

        # Now, save a properties file for the model upload

    def _save_upload_properties(self, json_dict: Union[dict, None] = None) -> None:
        '''Prepares a configuration file for a future export (e.g on an artifactory)

            json_dict: Configurations to save
        if json_dict is None:
            json_dict = {}

        # Manage paths
        properties_path = os.path.join(self.model_dir, "properties.json")
        vanilla_model_upload_instructions = os.path.join(utils.get_ressources_path(), '')
        specific_model_upload_instructions = os.path.join(self.model_dir, "")

        # First, we define a list of "allowed" properties
        allowed_properties = ["maintainers", "gabarit_version", "date", "package_version", "model_name", "list_classes",
                              "librairie", "fit_time"]
        # Now we filter these properties
        final_dict = {k: v for k, v in json_dict.items() if k in allowed_properties}
        # Save
        with open(properties_path, 'w', encoding='utf-8') as f:
            json.dump(final_dict, f, indent=4, cls=utils.NpEncoder)

        # Add instructions to upload a model to a storage solution (e.g. Artifactory)
        with open(vanilla_model_upload_instructions, 'r', encoding='utf-8') as f:
            content =
        # TODO: to be improved
        new_content = content.replace('model_dir_path_identifier', os.path.abspath(self.model_dir))
        with open(specific_model_upload_instructions, 'w', encoding='utf-8') as f:

    def _get_new_model_dir(self) -> str:
        '''Gets a folder where to save the model

            str: Path to the folder
        models_dir = utils.get_models_path()
        subfolder = os.path.join(models_dir, self.model_name)
        folder_name ="{self.model_name}_%Y_%m_%d-%H_%M_%S")
        model_dir = os.path.join(subfolder, folder_name)
        if os.path.isdir(model_dir):
            time.sleep(1)  # Wait 1 second so that the 'date' changes...
            return self._get_new_model_dir()  # Get new directory name
        return model_dir

    def display_if_gpu_activated(self) -> None:
        '''Displays if a GPU is being used'''
        if self._is_gpu_activated():
  "GPU activated")

    def _is_gpu_activated(self) -> bool:
        '''Checks if we use a GPU

            bool: whether GPU is available or not
        # By default, no GPU
        return False

__init__(model_dir=None, model_name=None, level_save='HIGH', **kwargs)

Initialization of the parent class.


model_dir (str): Folder where to save the model If None, creates a directory based on the model's name and the date (most common usage) model_name (str): The name of the model level_save (str): Level of saving LOW: stats + configurations + logger keras - /! The model can't be reused /! - MEDIUM: LOW + hdf5 + pkl + plots HIGH: MEDIUM + predictions

Raises: ValueError: If the object level_save is not a valid option (['LOW', 'MEDIUM', 'HIGH']) NotADirectoryError: If a provided model directory is not a directory (i.e. it's a file)

Source code in template_vision/models_training/
def __init__(self, model_dir: Union[str, None] = None, model_name: Union[str, None] = None,
             level_save: str = 'HIGH', **kwargs) -> None:
    '''Initialization of the parent class.

        model_dir (str): Folder where to save the model
            If None, creates a directory based on the model's name and the date (most common usage)
        model_name (str): The name of the model
        level_save (str): Level of saving
            LOW: stats + configurations + logger keras - /!\\ The model can't be reused /!\\ -
            MEDIUM: LOW + hdf5 + pkl + plots
            HIGH: MEDIUM + predictions
        ValueError: If the object level_save is not a valid option (['LOW', 'MEDIUM', 'HIGH'])
        NotADirectoryError: If a provided model directory is not a directory (i.e. it's a file)
    if level_save not in ['LOW', 'MEDIUM', 'HIGH']:
        raise ValueError(f"The object level_save ({level_save}) is not a valid option (['LOW', 'MEDIUM', 'HIGH'])")

    # Get logger
    self.logger = logging.getLogger(__name__)

    # Model type -> 'classifier' or 'object_detector' depending on the model
    self.model_type = None

    # Model name
    self.model_name = self._default_name if model_name is None else model_name

    # Model folder
    if model_dir is None:
        self.model_dir = self._get_new_model_dir()
        if not os.path.exists(model_dir):
        if not os.path.isdir(model_dir):
            raise NotADirectoryError(f"{model_dir} is not a valid directory")
        self.model_dir = os.path.abspath(model_dir)

    # Other options
    self.level_save = level_save

    # is trained ?
    self.trained = False
    self.nb_fit = 0

    # Configuration dict. to be logged. Set on save.
    self.json_dict: Dict[Any, Any] = {}


Displays if a GPU is being used

Source code in template_vision/models_training/
def display_if_gpu_activated(self) -> None:
    '''Displays if a GPU is being used'''
    if self._is_gpu_activated():"GPU activated")

fit(df_train, **kwargs)

Trains the model


Name Type Description Default
df_train DataFrame

Train dataset Must contain file_path & file_class columns if classifier Must contain file_path & bboxes columns if object detector


Returns: dict: Fit arguments, to be used with transfer learning fine-tuning

Source code in template_vision/models_training/
def fit(self, df_train, **kwargs) -> dict:
    '''Trains the model

        df_train (pd.DataFrame): Train dataset
            Must contain file_path & file_class columns if classifier
            Must contain file_path & bboxes columns if object detector
        dict: Fit arguments, to be used with transfer learning fine-tuning
    raise NotImplementedError("'fit' needs to be overridden")

get_and_save_metrics(y_true, y_pred, list_files_x=None, type_data='')

Gets and saves the metrics of a model


Name Type Description Default
y_true ?

Array-like [n_samples, 1] if classifier

If classifier, class of each image

If object detector, list of list of bboxes per image

bbox format : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ...}
y_pred ?

Array-like [n_samples, 1] if classifier

If classifier, class of each image

If object detector, list of list of bboxes per image

bbox format : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ...}

Kwargs: list_files_x (list): Input images file paths type_data (str): Type of dataset (validation, test, ...) Returns: pd.DataFrame: The dataframe containing statistics

Source code in template_vision/models_training/
def get_and_save_metrics(self, y_true, y_pred, list_files_x: Union[list, None] = None,
                         type_data: str = '') -> pd.DataFrame:
    '''Gets and saves the metrics of a model

        y_true (?): Array-like [n_samples, 1] if classifier
            # If classifier, class of each image
            # If object detector, list of list of bboxes per image
                bbox format : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ...}
        y_pred (?): Array-like [n_samples, 1] if classifier
            # If classifier, class of each image
            # If object detector, list of list of bboxes per image
                bbox format : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ...}
        list_files_x (list): Input images file paths
        type_data (str): Type of dataset (validation, test, ...)
        pd.DataFrame: The dataframe containing statistics
    raise NotImplementedError("'get_and_save_metrics' needs to be overridden")


Gets the final format of prediction - Classification : classes from predictions - Object detections : list of bboxes per image


Name Type Description Default
y list | ndarray



Returns: List of classes if classifier List of bboxes if object detector

Source code in template_vision/models_training/
def inverse_transform(self, y: Union[list, np.ndarray]) -> Union[list, tuple]:
    '''Gets the final format of prediction
        - Classification : classes from predictions
        - Object detections : list of bboxes per image

        y (list | np.ndarray): Array-like
        List of classes if classifier
        List of bboxes if object detector
    raise NotImplementedError("'inverse_transform' needs to be overridden")

predict(df_test, **kwargs)

Predictions on test set


Name Type Description Default
df_test DataFrame

DataFrame to be predicted, with column file_path


Returns: (np.ndarray | list): Array, shape = [n_samples, n_classes] or List of n_samples elements

Source code in template_vision/models_training/
def predict(self, df_test: pd.DataFrame, **kwargs) -> Union[np.ndarray, list]:
    '''Predictions on test set

        df_test (pd.DataFrame): DataFrame to be predicted, with column file_path
        (np.ndarray | list): Array, shape = [n_samples, n_classes] or List of n_samples elements
    raise NotImplementedError("'predict' needs to be overridden")

predict_proba(df_test, **kwargs)

Predicts probabilities on the test dataset


Name Type Description Default
df_test DataFrame

DataFrame to be predicted, with column file_path


Returns: (np.ndarray): Array, shape = [n_samples, n_classes]

Source code in template_vision/models_training/
def predict_proba(self, df_test: pd.DataFrame, **kwargs) -> np.ndarray:
    '''Predicts probabilities on the test dataset

        df_test (pd.DataFrame): DataFrame to be predicted, with column file_path
        (np.ndarray): Array, shape = [n_samples, n_classes]
    raise NotImplementedError("'predict_proba' needs to be overridden")


Saves the model


json_data (dict): Additional configurations to be saved

Source code in template_vision/models_training/
def save(self, json_data: Union[dict, None] = None) -> None:
    '''Saves the model

        json_data (dict): Additional configurations to be saved

    # Manage paths
    pkl_path = os.path.join(self.model_dir, f"{self.model_name}.pkl")
    conf_path = os.path.join(self.model_dir, "configurations.json")

    # Save model & pipeline preprocessing si level_save > 'LOW'
    if self.level_save in ['MEDIUM', 'HIGH']:
        with open(pkl_path, 'wb') as f:
            pickle.dump(self, f)

    # Save configuration JSON
    json_dict = {
        'maintainers': 'Agence DataServices',
        'gabarit_version': '1.3.4.dev0+local',
        'date':"%d/%m/%Y - %H:%M:%S"),  # Not the same as the folder's name
        'package_version': utils.get_package_version(),
        'model_name': self.model_name,
        'model_dir': self.model_dir,
        'model_type': self.model_type,
        'trained': self.trained,
        'nb_fit': self.nb_fit,
        'level_save': self.level_save,
        'librairie': None,
    # Merge json_data if not None
    if json_data is not None:
        # Priority given to json_data !
        json_dict = {**json_dict, **json_data}

    # Add conf to attributes
    self.json_dict = json_dict

    # Save conf
    with open(conf_path, 'w', encoding='utf-8') as json_file:
        json.dump(json_dict, json_file, indent=4, cls=utils.NpEncoder)

    # Now, save a properties file for the model upload