Model object detector

`ModelObjectDetectorMixin`

Parent class (Mixin) for the model of type object detector

Source code in template_vision/models_training/object_detectors/model_object_detector.py

class ModelObjectDetectorMixin:
    '''Parent class (Mixin) for the model of type object detector'''

    # Not implemented :
    # -> predict : to be implemented by the parent class using this mixin

    def __init__(self, level_save: str = 'HIGH', **kwargs) -> None:
        '''Initialization of the parent class - Object detector

        Kwargs:
            level_save (str): Level of saving
                LOW: stats + configurations + logger keras - /!\\ The model can't be reused /!\\ -
                MEDIUM: LOW + hdf5 + pkl + plots
                HIGH: MEDIUM + predictions
        Raises:
            ValueError: If the object level_save is not a valid option (['LOW', 'MEDIUM', 'HIGH'])
        '''
        super().__init__(level_save=level_save, **kwargs)  # forwards level_save & all unused arguments

        if level_save not in ['LOW', 'MEDIUM', 'HIGH']:
            raise ValueError(f"The object level_save ({level_save}) is not a valid option (['LOW', 'MEDIUM', 'HIGH'])")

        # Get logger
        self.logger = logging.getLogger(__name__)

        # Model type
        self.model_type = 'object_detector'

        # List of classes to consider (set on fit)
        self.list_classes = None
        self.dict_classes = None

        # Other options
        self.level_save = level_save

    def inverse_transform(self, y) -> list:
        '''Gets a list of classes from predictions.
        Useless here, used solely for compatibility.

        Args:
            y (?): Array-like, shape = [n_samples, n_features], arrays of 0s and 1s
        Returns:
            (list)
        '''
        return list(y) if isinstance(y, np.ndarray) else y

    def get_and_save_metrics(self, y_true: list, y_pred: list, list_files_x: list = None,
                             type_data: str = '', **kwargs) -> pd.DataFrame:
        '''Gets and saves the metrics of a model

        Args:
            y_true (list): Bboxes list, one entry corresponds to the bboxes of one file - truth
                format bbox : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ...}
            y_pred (list): Bboxes list, one entry corresponds to the bboxes of one file - predicted
                format bbox : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ..., 'proba': ...}
        Kwargs:
            list_files_x (?): List of input files for the prediction
            type_data (str): Type of the dataset (validation, test, ...)
        Returns:
            pd.DataFrame: The dataframe containing statistics
        '''
        # Manage errors
        if len(y_true) != len(y_pred):
            raise ValueError(f"The size of the two lists (y_true et y_pred) must be equal ({len(y_true)} != {len(y_pred)})")
        if list_files_x is not None and len(y_true) != len(list_files_x):
            raise ValueError(f"The size of the two lists (y_true et list_files_x) must be equal ({len(y_true)} != {len(list_files_x)})")

        # Construction dataframe
        if list_files_x is None:
            df = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
        else:
            df = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred, 'file_path': list_files_x})

        # Save a prediction file if wanted
        if self.level_save == 'HIGH':
            file_path = os.path.join(self.model_dir, f"predictions{'_' + type_data if len(type_data) > 0 else ''}.csv")
            if 'file_path' in df.columns:
                df = df.sort_values('file_path')
            df.to_csv(file_path, sep=';', index=None, encoding='utf-8')

        # Print info on missing classes and the impact on metrics
        gt_classes = set([bbox['class'] for bboxes in y_true for bbox in bboxes])
        gt_classes_not_in_model = gt_classes.difference(set(self.list_classes))
        model_classes_not_in_gt = set(self.list_classes).difference(gt_classes)
        # Prints
        if len(gt_classes_not_in_model):
            self.logger.info(f"Classes {gt_classes_not_in_model} are not predicted by the model.")
            self.logger.info("We won't take them into account in the calculation of the metrics.")
        if len(model_classes_not_in_gt):
            self.logger.info(f"Classes {model_classes_not_in_gt} are not present in the dataset used to calculate the metrics.")
            self.logger.info("Metrics on these classes won't be accurate.")

        # Get the classes support
        total_bbox = sum([1 for image in y_true for bbox in image if bbox['class'] in self.list_classes])
        classes_support = {}
        if total_bbox == 0:
            total_bbox = 1
        for cl in self.list_classes:
            classes_support[cl] = sum([bbox['class'] == cl for image in y_true for bbox in image]) / total_bbox

        # Get metrics
        # We use the COCO method to get the Average Precision (AP)
        dict_ap_coco = self._get_coco_ap(y_true, y_pred)

        # Calculate the mean Average Precision (mAP) (weighted or not)
        coco_map = np.mean([value for value in list(dict_ap_coco.values()) if not np.isnan(value)])
        coco_wap = sum([dict_ap_coco[cl] * classes_support[cl] for cl in self.list_classes if classes_support[cl] > 0])

        # Global statistics
        self.logger.info('-- * * * * * * * * * * * * * * --')
        self.logger.info(f"Statistics mAP{' ' + type_data if len(type_data) > 0 else ''}")
        self.logger.info('--------------------------------')
        self.logger.info(f"mean Average Precision (mAP) - COCO method : {round(coco_map, 4)}")
        self.logger.info('--------------------------------')
        self.logger.info(f"weighted Average Precision (wAP) - COCO method : {round(coco_wap, 4)}")
        self.logger.info('--------------------------------')

        # Statistics per classes
        for cl in self.list_classes:
            self.logger.info(f"Class {cl}: AP COCO = {round(dict_ap_coco[cl], 4)} /// Support = {round(classes_support[cl], 4)}")
        self.logger.info('--------------------------------')

        # Construction df_stats
        dict_df_stats = {}
        dict_df_stats[0] = {'Label': 'All', 'AP COCO': coco_map, 'Support': 1.0}
        for i, cl in enumerate(self.list_classes):
            dict_df_stats[i+1] = {'Label': cl, 'AP COCO': dict_ap_coco[cl], 'Support': classes_support[cl]}

        df_stats = pd.DataFrame.from_dict(dict_df_stats, orient='index')
        # Save csv
        file_path = os.path.join(self.model_dir, f"map_coco{'_' + type_data if len(type_data) > 0 else ''}@{round(coco_map, 4)}.csv")
        df_stats.to_csv(file_path, sep=';', index=False, encoding='utf-8')

        # Return df_stats
        return df_stats

    def _get_coco_ap(self, y_true: list, y_pred: list) -> dict:
        '''Calculate COCO's AP for each of the class and gives the result in a dictionary
        where the keys are the classes and the valeus, the corresponding AP value

         Args:
            y_true (list): Bboxes list, one entry corresponds to the bboxes of one file - truth
                format bbox : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ...}
            y_pred (list): Bboxes list, one entry corresponds to the bboxes of one file - predicted
                format bbox : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ..., 'proba': ...}
        Returns:
            The dictionary containing AP for each class
        '''
        inv_dict_classes = {value: key for key, value in self.dict_classes.items()}
        # Put the bboxes in COCO format
        coco_true = self._put_bboxes_in_coco_format(y_true, inv_dict_classes)
        coco_pred = self._put_bboxes_in_coco_format(y_pred, inv_dict_classes)
        images = [{'id': i + 1} for i in range(len(y_true))]
        categories = [{'id': class_id, 'name': class_name, 'supercategory': 'none'}
                      for class_id, class_name in self.dict_classes.items()]
        dataset_coco_true = {'type': 'instances',
                             'images': images.copy(),
                             'categories': categories.copy(),
                             'annotations': coco_true}
        dataset_coco_pred = {'images': images.copy(),
                             'categories': categories.copy(),
                             'annotations': coco_pred}
        # Call pycocotools API to calculate the AP
        coco_eval = self._get_coco_evaluations(dataset_coco_true, dataset_coco_pred)
        dict_ap = self._get_ap_for_classes(coco_eval)
        return dict_ap

    @classmethod
    def _put_bboxes_in_coco_format(self, bboxes: List[List[dict]], inv_dict_classes: dict) -> List[dict]:
        '''Puts a list of list of bboxes (for example from a prediction) in the right format for pycocotools API.

        Args:
            bboxes (list<list<dict>>) : A list of list of bboxes. The first level of list corresponds to the images and the second level to the
            bboxes of this image.
            inv_dict_classes (dict) : The dictionary of classes in the format {class_name: class_id}
        Returns:
            A list of bboxes
        '''
        annotations = []
        idx_bbox = 1  # WARNING: index begins at 1
        for idx_img, list_bboxes in enumerate(bboxes):
            for bbox in list_bboxes:
                dict_bbox = {'id': idx_bbox,
                             'image_id': idx_img + 1,  # WARNING : index begins at 1
                             'category_id': inv_dict_classes[bbox['class']],
                             'bbox': np.array([bbox['x1'], bbox['y1'], bbox['x2'] - bbox['x1'], bbox['y2'] - bbox['y1']]),
                             'area': (bbox['y2'] - bbox['y1']) * (bbox['x2'] - bbox['x1']),
                             'iscrowd': 0,
                             'score': bbox.get('proba', 1)}
                idx_bbox += 1
                annotations.append(dict_bbox.copy())
        return annotations

    @classmethod
    def _get_coco_evaluations(self, dataset_coco_true: dict, dataset_coco_pred: dict) -> COCOeval:
        '''Calculates the AP from true and predicted datasets in the COCO format, the returns COCOeval,
        the pycocotools API containing all the results.

        Args:
            dataset_coco_true (dict) : Ground truth bboxes in COCO format
            dataset_coco_pred (dict) : Predicted bboxes in COCO format
        Returns:
            A COCOeval (pycocotools API) containing the AP
        '''
        # Everything on mute ! pycocotools library prints too much logs and there are no level settings
        with utils.HiddenPrints():
            # Put the ground truth bboxes in the pycocotools API
            coco_ds = COCO()
            coco_ds.dataset = dataset_coco_true.copy()
            coco_ds.createIndex()

            # Put the predicted bboxes in the pycocotools API
            coco_dt = COCO()
            coco_dt.dataset = dataset_coco_pred.copy()
            coco_dt.createIndex()

            # Get image IDs
            imgIds = sorted(coco_ds.getImgIds())

            # Set evaluator
            cocoEval = COCOeval(coco_ds, coco_dt, 'bbox')
            cocoEval.params.imgIds = imgIds
            cocoEval.params.useCats = True
            cocoEval.params.iouType = "bbox"

            # Evaluate
            cocoEval.evaluate()
            cocoEval.accumulate()

        # Return evaluator
        return cocoEval

    def _get_ap_for_classes(self, coco_eval: COCOeval) -> dict:
        '''Gets the AP per class from cocoEval, the pycocotools API.

        Args:
            coco_eval (COCOeval) : A pycocotools COCOeval which calculated the AP.
                In this function, we just get them, we do not calculate them
        Raises:
            ValueError : The precision has not the right shape (iou, recall, cls, area range, max dets)
        Returns:
            The dictionary containing the AP for each class
        '''
        # Compute per-category AP
        # from https://detectron2.readthedocs.io/en/latest/_modules/detectron2/evaluation/coco_evaluation.html
        precisions = coco_eval.eval["precision"]
        # precision has dims (iou, recall, cls, area range, max dets)
        if len(self.dict_classes) != precisions.shape[2]:
            raise ValueError(f"The precision has not the right shape (iou, recall, cls, area range, max dets): {precisions.shape}")

        # Retrieve APs
        dict_ap = {}
        for idx, name in self.dict_classes.items():
            # area range index 0: all area ranges
            # max dets index -1: typically 100 per image
            precision = precisions[:, :, idx, 0, -1]
            precision = precision[precision > -1]
            ap = np.mean(precision) if precision.size else float("nan")
            dict_ap[name] = ap
        return dict_ap

    def save(self, json_data: Union[dict, None] = None) -> None:
        '''Saves the model

        Kwargs:
            json_data (dict): Additional configurations to be saved
        '''
        # Save model
        if json_data is None:
            json_data = {}

        json_data['list_classes'] = self.list_classes
        json_data['dict_classes'] = self.dict_classes

        # Save
        super().save(json_data=json_data)

`init(level_save='HIGH', **kwargs)`

Initialization of the parent class - Object detector

Kwargs

level_save (str): Level of saving LOW: stats + configurations + logger keras - /! The model can't be reused /! - MEDIUM: LOW + hdf5 + pkl + plots HIGH: MEDIUM + predictions

Raises: ValueError: If the object level_save is not a valid option (['LOW', 'MEDIUM', 'HIGH'])

Source code in template_vision/models_training/object_detectors/model_object_detector.py

def __init__(self, level_save: str = 'HIGH', **kwargs) -> None:
    '''Initialization of the parent class - Object detector

    Kwargs:
        level_save (str): Level of saving
            LOW: stats + configurations + logger keras - /!\\ The model can't be reused /!\\ -
            MEDIUM: LOW + hdf5 + pkl + plots
            HIGH: MEDIUM + predictions
    Raises:
        ValueError: If the object level_save is not a valid option (['LOW', 'MEDIUM', 'HIGH'])
    '''
    super().__init__(level_save=level_save, **kwargs)  # forwards level_save & all unused arguments

    if level_save not in ['LOW', 'MEDIUM', 'HIGH']:
        raise ValueError(f"The object level_save ({level_save}) is not a valid option (['LOW', 'MEDIUM', 'HIGH'])")

    # Get logger
    self.logger = logging.getLogger(__name__)

    # Model type
    self.model_type = 'object_detector'

    # List of classes to consider (set on fit)
    self.list_classes = None
    self.dict_classes = None

    # Other options
    self.level_save = level_save

`get_and_save_metrics(y_true, y_pred, list_files_x=None, type_data='', **kwargs)`

Gets and saves the metrics of a model

Parameters:

Name	Type	Description	Default
`y_true`	`list`	Bboxes list, one entry corresponds to the bboxes of one file - truth format bbox : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ...}	required
`y_pred`	`list`	Bboxes list, one entry corresponds to the bboxes of one file - predicted format bbox : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ..., 'proba': ...}	required

Kwargs: list_files_x (?): List of input files for the prediction type_data (str): Type of the dataset (validation, test, ...) Returns: pd.DataFrame: The dataframe containing statistics

Source code in template_vision/models_training/object_detectors/model_object_detector.py

def get_and_save_metrics(self, y_true: list, y_pred: list, list_files_x: list = None,
                         type_data: str = '', **kwargs) -> pd.DataFrame:
    '''Gets and saves the metrics of a model

    Args:
        y_true (list): Bboxes list, one entry corresponds to the bboxes of one file - truth
            format bbox : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ...}
        y_pred (list): Bboxes list, one entry corresponds to the bboxes of one file - predicted
            format bbox : {'class': ..., 'x1': ..., 'y1': ..., 'x2': ..., 'y2': ..., 'proba': ...}
    Kwargs:
        list_files_x (?): List of input files for the prediction
        type_data (str): Type of the dataset (validation, test, ...)
    Returns:
        pd.DataFrame: The dataframe containing statistics
    '''
    # Manage errors
    if len(y_true) != len(y_pred):
        raise ValueError(f"The size of the two lists (y_true et y_pred) must be equal ({len(y_true)} != {len(y_pred)})")
    if list_files_x is not None and len(y_true) != len(list_files_x):
        raise ValueError(f"The size of the two lists (y_true et list_files_x) must be equal ({len(y_true)} != {len(list_files_x)})")

    # Construction dataframe
    if list_files_x is None:
        df = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred})
    else:
        df = pd.DataFrame({'y_true': y_true, 'y_pred': y_pred, 'file_path': list_files_x})

    # Save a prediction file if wanted
    if self.level_save == 'HIGH':
        file_path = os.path.join(self.model_dir, f"predictions{'_' + type_data if len(type_data) > 0 else ''}.csv")
        if 'file_path' in df.columns:
            df = df.sort_values('file_path')
        df.to_csv(file_path, sep=';', index=None, encoding='utf-8')

    # Print info on missing classes and the impact on metrics
    gt_classes = set([bbox['class'] for bboxes in y_true for bbox in bboxes])
    gt_classes_not_in_model = gt_classes.difference(set(self.list_classes))
    model_classes_not_in_gt = set(self.list_classes).difference(gt_classes)
    # Prints
    if len(gt_classes_not_in_model):
        self.logger.info(f"Classes {gt_classes_not_in_model} are not predicted by the model.")
        self.logger.info("We won't take them into account in the calculation of the metrics.")
    if len(model_classes_not_in_gt):
        self.logger.info(f"Classes {model_classes_not_in_gt} are not present in the dataset used to calculate the metrics.")
        self.logger.info("Metrics on these classes won't be accurate.")

    # Get the classes support
    total_bbox = sum([1 for image in y_true for bbox in image if bbox['class'] in self.list_classes])
    classes_support = {}
    if total_bbox == 0:
        total_bbox = 1
    for cl in self.list_classes:
        classes_support[cl] = sum([bbox['class'] == cl for image in y_true for bbox in image]) / total_bbox

    # Get metrics
    # We use the COCO method to get the Average Precision (AP)
    dict_ap_coco = self._get_coco_ap(y_true, y_pred)

    # Calculate the mean Average Precision (mAP) (weighted or not)
    coco_map = np.mean([value for value in list(dict_ap_coco.values()) if not np.isnan(value)])
    coco_wap = sum([dict_ap_coco[cl] * classes_support[cl] for cl in self.list_classes if classes_support[cl] > 0])

    # Global statistics
    self.logger.info('-- * * * * * * * * * * * * * * --')
    self.logger.info(f"Statistics mAP{' ' + type_data if len(type_data) > 0 else ''}")
    self.logger.info('--------------------------------')
    self.logger.info(f"mean Average Precision (mAP) - COCO method : {round(coco_map, 4)}")
    self.logger.info('--------------------------------')
    self.logger.info(f"weighted Average Precision (wAP) - COCO method : {round(coco_wap, 4)}")
    self.logger.info('--------------------------------')

    # Statistics per classes
    for cl in self.list_classes:
        self.logger.info(f"Class {cl}: AP COCO = {round(dict_ap_coco[cl], 4)} /// Support = {round(classes_support[cl], 4)}")
    self.logger.info('--------------------------------')

    # Construction df_stats
    dict_df_stats = {}
    dict_df_stats[0] = {'Label': 'All', 'AP COCO': coco_map, 'Support': 1.0}
    for i, cl in enumerate(self.list_classes):
        dict_df_stats[i+1] = {'Label': cl, 'AP COCO': dict_ap_coco[cl], 'Support': classes_support[cl]}

    df_stats = pd.DataFrame.from_dict(dict_df_stats, orient='index')
    # Save csv
    file_path = os.path.join(self.model_dir, f"map_coco{'_' + type_data if len(type_data) > 0 else ''}@{round(coco_map, 4)}.csv")
    df_stats.to_csv(file_path, sep=';', index=False, encoding='utf-8')

    # Return df_stats
    return df_stats

`inverse_transform(y)`

Gets a list of classes from predictions. Useless here, used solely for compatibility.

Parameters:

Name	Type	Description	Default
`y`	`?`	Array-like, shape = [n_samples, n_features], arrays of 0s and 1s	required

Returns: (list)

Source code in template_vision/models_training/object_detectors/model_object_detector.py

def inverse_transform(self, y) -> list:
    '''Gets a list of classes from predictions.
    Useless here, used solely for compatibility.

    Args:
        y (?): Array-like, shape = [n_samples, n_features], arrays of 0s and 1s
    Returns:
        (list)
    '''
    return list(y) if isinstance(y, np.ndarray) else y

`save(json_data=None)`

Saves the model

Kwargs

json_data (dict): Additional configurations to be saved

Source code in template_vision/models_training/object_detectors/model_object_detector.py

def save(self, json_data: Union[dict, None] = None) -> None:
    '''Saves the model

    Kwargs:
        json_data (dict): Additional configurations to be saved
    '''
    # Save model
    if json_data is None:
        json_data = {}

    json_data['list_classes'] = self.list_classes
    json_data['dict_classes'] = self.dict_classes

    # Save
    super().save(json_data=json_data)

Model object detector