Skip to content

Model regressor

ModelRegressorMixin

Parent class (Mixin) for regressor models

Source code in template_num/models_training/regressors/model_regressor.py
class ModelRegressorMixin:
    '''Parent class (Mixin) for regressor models'''

    def __init__(self, level_save: str = 'HIGH', **kwargs) -> None:
        '''Initialization of the class

        Kwargs:
            level_save (str): Level of saving
                LOW: stats + configurations + logger keras - /!\\ The model can't be reused /!\\ -
                MEDIUM: LOW + hdf5 + pkl + plots
                HIGH: MEDIUM + predictions
        Raises:
             ValueError: If the object level_save is not a valid option (['LOW', 'MEDIUM', 'HIGH'])
        '''
        super().__init__(level_save=level_save, **kwargs)  # forwards level_save & all unused arguments

        if level_save not in ['LOW', 'MEDIUM', 'HIGH']:
            raise ValueError(f"The object level_save ({level_save}) is not a valid option (['LOW', 'MEDIUM', 'HIGH'])")

        # Get logger
        self.logger = logging.getLogger(__name__)

        # Model type
        self.model_type = 'regressor'

        # TODO: add multi-outputs !

        # Other options
        self.level_save = level_save

    def inverse_transform(self, y: Union[list, np.ndarray]) -> Union[list, tuple]:
        '''Identity function - Manages compatibility with classifiers

        Args:
            y (list | np.ndarray): Array-like, shape = [n_samples, 1]
        Returns:
            (np.ndarray): List, shape = [n_samples, 1]
        '''
        return list(y) if isinstance(y, np.ndarray) else y

    def get_and_save_metrics(self, y_true, y_pred, df_x: Union[pd.DataFrame, None] = None,
                             series_to_add: Union[List[pd.Series], None] = None,
                             type_data: str = '') -> pd.DataFrame:
        '''Gets and saves the metrics of a model

        Args:
            y_true (?): Array-like, shape = [n_samples,]
            y_pred (?): Array-like, shape = [n_samples,]
        Kwargs:
            df_x (pd.DataFrame or None): Input dataFrame used for the prediction
            series_to_add (list<pd.Series>): List of pd.Series to add to the dataframe
            type_data (str): Type of dataset (validation, test, ...)
        Returns:
            pd.DataFrame: The dataframe containing the statistics
        '''

        # Cast to np.array
        y_true = np.array(y_true)
        y_pred = np.array(y_pred)

        # Save a predictionn file if wanted
        if self.level_save == 'HIGH':
            # Inverse transform
            y_true_df = list(self.inverse_transform(y_true))
            y_pred_df = list(self.inverse_transform(y_pred))

            # Concat in a dataframe
            if df_x is not None:
                df = df_x.copy()
                df['y_true'] = y_true_df
                df['y_pred'] = y_pred_df
            else:
                df = pd.DataFrame({'y_true': y_true_df, 'y_pred': y_pred_df})
            # Add column abs_err
            df.loc[:, 'abs_err'] = df[['y_true', 'y_pred']].apply(lambda x: x.y_true - x.y_pred, axis=1)
            # Add column rel_err
            df.loc[:, 'rel_err'] = df[['y_true', 'y_pred']].apply(lambda x: (x.y_true - x.y_pred) / abs(x.y_true), axis=1)
            # Add some more columns
            if series_to_add is not None:
                for ser in series_to_add:
                    df[ser.name] = ser.reset_index(drop=True).reindex(index=df.index)  # Reindex correctly

            # Save predictions
            file_path = os.path.join(self.model_dir, f"predictions{'_' + type_data if len(type_data) > 0 else ''}.csv")
            df.sort_values('abs_err', ascending=True).to_csv(file_path, sep=';', index=None, encoding='utf-8')

        # Get global metrics
        metric_mae = mean_absolute_error(y_true, y_pred)
        metric_mse = mean_squared_error(y_true, y_pred)
        metric_rmse = mean_squared_error(y_true, y_pred, squared=False)
        metric_explained_variance_score = explained_variance_score(y_true, y_pred)
        metric_r2 = r2_score(y_true, y_pred)

        # Global statistics
        self.logger.info('-- * * * * * * * * * * * * * * --')
        self.logger.info(f"Statistics{' ' + type_data if len(type_data) > 0 else ''}")
        self.logger.info('--------------------------------')
        self.logger.info(f"MAE : {round(metric_mae, 5)}")
        self.logger.info(f"MSE : {round(metric_mse, 5)}")
        self.logger.info(f"RMSE : {round(metric_rmse, 5)}")
        self.logger.info(f"Explained variance : {round(metric_explained_variance_score, 5)}")
        self.logger.info(f"R² (coefficient of determination) : {round(metric_r2, 5)}")
        self.logger.info('--------------------------------')

        # Metrics file
        # TODO : add multi-outputs and stats for each output

        # Add global statistics
        dict_df_stats = {0: {
            'Label': 'All',
            'MAE': metric_mae,
            'MSE': metric_mse,
            'RMSE': metric_rmse,
            'Explained variance': metric_explained_variance_score,
            'Coefficient of determination': metric_r2,
        }}
        df_stats = pd.DataFrame.from_dict(dict_df_stats, orient='index')

        # Save .csv
        file_path = os.path.join(self.model_dir, f"mae{'_' + type_data if len(type_data) > 0 else ''}@{metric_mae}.csv")
        df_stats.to_csv(file_path, sep=';', index=False, encoding='utf-8')

        # Save some metrics
        mae_path = os.path.join(self.model_dir, f"mae{'_' + type_data if len(type_data) > 0 else ''}@{round(metric_mae, 5)}")
        with open(mae_path, 'w'):
            pass
        mse_path = os.path.join(self.model_dir, f"mse{'_' + type_data if len(type_data) > 0 else ''}@{round(metric_mse, 5)}")
        with open(mse_path, 'w'):
            pass
        rmse_path = os.path.join(self.model_dir, f"rmse{'_' + type_data if len(type_data) > 0 else ''}@{round(metric_rmse, 5)}")
        with open(rmse_path, 'w'):
            pass
        explained_variance_path = os.path.join(self.model_dir, f"explained_variance{'_' + type_data if len(type_data) > 0 else ''}@{round(metric_explained_variance_score, 5)}")
        with open(explained_variance_path, 'w'):
            pass
        r2_path = os.path.join(self.model_dir, f"r2{'_' + type_data if len(type_data) > 0 else ''}@{round(metric_r2, 5)}")
        with open(r2_path, 'w'):
            pass

        # Plots
        if self.level_save in ['MEDIUM', 'HIGH']:
            # TODO: put a condition on the maximum number of points ?
            is_train = True if type_data == 'train' else False
            if is_train:
                self.plot_prediction_errors(y_true_train=y_true, y_pred_train=y_pred,
                                            y_true_test=None, y_pred_test=None,
                                            type_data=type_data)
                self.plot_residuals(y_true_train=y_true, y_pred_train=y_pred,
                                    y_true_test=None, y_pred_test=None,
                                    type_data=type_data)
            else:
                self.plot_prediction_errors(y_true_train=None, y_pred_train=None,
                                            y_true_test=y_true, y_pred_test=y_pred,
                                            type_data=type_data)
                self.plot_residuals(y_true_train=None, y_pred_train=None,
                                    y_true_test=y_true, y_pred_test=y_pred,
                                    type_data=type_data)

        # Return metrics
        return df_stats

    def get_metrics_simple(self, y_true, y_pred) -> pd.DataFrame:
        '''Gets metrics on predictions (single-output for now)
        Same as the method get_and_save_metrics but without all the fluff (save, etc.)

        Args:
            y_true (?): Array-like, shape = [n_samples]
            y_pred (?): Array-like, shape = [n_samples]
        Returns:
            pd.DataFrame: The dataframe containing statistics
        '''
        # Cast to np.array
        y_true = np.array(y_true)
        y_pred = np.array(y_pred)

        # Get global metrics:
        metric_mae = mean_absolute_error(y_true, y_pred)
        metric_mse = mean_squared_error(y_true, y_pred)
        metric_rmse = mean_squared_error(y_true, y_pred, squared=False)
        metric_explained_variance_score = explained_variance_score(y_true, y_pred)
        metric_r2 = r2_score(y_true, y_pred)

        # Metrics file
        # TODO : add multi-outputs and stats for each output

        # Add global statistics
        dict_df_stats = {0: {
            'Label': 'All',
            'MAE': metric_mae,
            'MSE': metric_mse,
            'RMSE': metric_rmse,
            'Explained variance': metric_explained_variance_score,
            'Coefficient of determination': metric_r2,
        }}
        df_stats = pd.DataFrame.from_dict(dict_df_stats, orient='index')

        # Return dataframe
        return df_stats

    def plot_prediction_errors(self, y_true_train: Union[np.ndarray, None] = None, y_pred_train: Union[np.ndarray, None] = None,
                               y_true_test: Union[np.ndarray, None] = None, y_pred_test: Union[np.ndarray, None] = None,
                               type_data: str = '') -> None:
        '''Plots prediction errors

        We use yellowbrick for the plots + a trick to be model agnostic

        Kwargs:
            y_true_train (np.ndarray): Array-like, shape = [n_samples]
            y_pred_train (np.ndarray): Array-like, shape = [n_samples]
            y_true_test (np.ndarray): Array-like, shape = [n_samples]
            y_pred_test (np.ndarray): Array-like, shape = [n_samples]
            type_data (str): Type of the dataset (validation, test, ...)
        Raises:
            ValueError: If a "true" is given, but not the corresponding "pred" (or vice-versa)
        '''
        if (y_true_train is not None and y_pred_train is None) or (y_true_train is None and y_pred_train is not None):
            raise ValueError('"true" and "pred" must both be given, or not at all - train')
        if (y_true_test is not None and y_pred_test is None) or (y_true_test is None and y_pred_test is not None):
            raise ValueError('"true" and "pred" must both be given, or not at all - test')

        # Get figure & ax
        fig, ax = plt.subplots(figsize=(12, 10))

        # Set visualizer
        visualizer = PredictionError(LinearRegression(), ax=ax, bestfit=False, is_fitted=True)  # Trick model not used
        visualizer.name = self.model_name

        # PredictionError does not support train and test at the same time :'(

        # Train
        if y_true_train is not None:
            visualizer.score_ = r2_score(y_true_train, y_pred_train)
            visualizer.draw(y_true_train, y_pred_train)

        # Test
        if y_true_test is not None:
            visualizer.score_ = r2_score(y_true_test, y_pred_test)
            visualizer.draw(y_true_test, y_pred_test)

        # Save
        plots_path = os.path.join(self.model_dir, 'plots')
        if not os.path.exists(plots_path):
            os.makedirs(plots_path)
        file_name = f"{type_data + '_' if len(type_data) > 0 else ''}errors.png"
        visualizer.show(outpath=os.path.join(plots_path, file_name))

        # Close figures
        plt.close('all')

    def plot_residuals(self, y_true_train: Union[np.ndarray, None] = None, y_pred_train: Union[np.ndarray, None] = None,
                       y_true_test: Union[np.ndarray, None] = None, y_pred_test: Union[np.ndarray, None] = None,
                       type_data: str = '') -> None:
        '''Plots the "residuals" from the predictions

        Uses yellowbrick for the plots plus a trick in order to be model agnostic

        Kwargs:
            y_true_train (np.ndarray): Array-like, shape = [n_samples]
            y_pred_train (np.ndarray): Array-like, shape = [n_samples]
            y_true_test (np.ndarray): Array-like, shape = [n_samples]
            y_pred_test (np.ndarray): Array-like, shape = [n_samples]
            type_data (str): Type of the dataset (validation, test, ...)
        Raises:
            ValueError: If a "true" is given, but not the corresponding "pred" (or vice-versa)
        '''
        if (y_true_train is not None and y_pred_train is None) or (y_true_train is None and y_pred_train is not None):
            raise ValueError('"true" and "pred" must both be given, or not at all - train')
        if (y_true_test is not None and y_pred_test is None) or (y_true_test is None and y_pred_test is not None):
            raise ValueError('"true" and "pred" must both be given, or not at all - test')

        # Get figure & ax
        fig, ax = plt.subplots(figsize=(12, 10))

        # Set visualizer
        visualizer = ResidualsPlot(LinearRegression(), ax=ax, is_fitted=True)  # Trick model not used
        visualizer.name = self.model_name

        # Train
        if y_true_train is not None:
            visualizer.train_score_ = r2_score(y_true_train, y_pred_train)
            residuals = y_pred_train - y_true_train
            visualizer.draw(y_pred_train, residuals, train=True)

        # Test
        if y_true_test is not None:
            visualizer.test_score_ = r2_score(y_true_test, y_pred_test)
            residuals = y_pred_test - y_true_test
            visualizer.draw(y_pred_test, residuals, train=False)

        # Save
        plots_path = os.path.join(self.model_dir, 'plots')
        if not os.path.exists(plots_path):
            os.makedirs(plots_path)
        file_name = f"{type_data + '_' if len(type_data) > 0 else ''}residuals.png"
        visualizer.show(outpath=os.path.join(plots_path, file_name))

__init__(level_save='HIGH', **kwargs)

Initialization of the class

Kwargs

level_save (str): Level of saving LOW: stats + configurations + logger keras - /! The model can't be reused /! - MEDIUM: LOW + hdf5 + pkl + plots HIGH: MEDIUM + predictions

Raises: ValueError: If the object level_save is not a valid option (['LOW', 'MEDIUM', 'HIGH'])

Source code in template_num/models_training/regressors/model_regressor.py
def __init__(self, level_save: str = 'HIGH', **kwargs) -> None:
    '''Initialization of the class

    Kwargs:
        level_save (str): Level of saving
            LOW: stats + configurations + logger keras - /!\\ The model can't be reused /!\\ -
            MEDIUM: LOW + hdf5 + pkl + plots
            HIGH: MEDIUM + predictions
    Raises:
         ValueError: If the object level_save is not a valid option (['LOW', 'MEDIUM', 'HIGH'])
    '''
    super().__init__(level_save=level_save, **kwargs)  # forwards level_save & all unused arguments

    if level_save not in ['LOW', 'MEDIUM', 'HIGH']:
        raise ValueError(f"The object level_save ({level_save}) is not a valid option (['LOW', 'MEDIUM', 'HIGH'])")

    # Get logger
    self.logger = logging.getLogger(__name__)

    # Model type
    self.model_type = 'regressor'

    # TODO: add multi-outputs !

    # Other options
    self.level_save = level_save

get_and_save_metrics(y_true, y_pred, df_x=None, series_to_add=None, type_data='')

Gets and saves the metrics of a model

Parameters:

Name Type Description Default
y_true ?

Array-like, shape = [n_samples,]

required
y_pred ?

Array-like, shape = [n_samples,]

required

Kwargs: df_x (pd.DataFrame or None): Input dataFrame used for the prediction series_to_add (list): List of pd.Series to add to the dataframe type_data (str): Type of dataset (validation, test, ...) Returns: pd.DataFrame: The dataframe containing the statistics

Source code in template_num/models_training/regressors/model_regressor.py
def get_and_save_metrics(self, y_true, y_pred, df_x: Union[pd.DataFrame, None] = None,
                         series_to_add: Union[List[pd.Series], None] = None,
                         type_data: str = '') -> pd.DataFrame:
    '''Gets and saves the metrics of a model

    Args:
        y_true (?): Array-like, shape = [n_samples,]
        y_pred (?): Array-like, shape = [n_samples,]
    Kwargs:
        df_x (pd.DataFrame or None): Input dataFrame used for the prediction
        series_to_add (list<pd.Series>): List of pd.Series to add to the dataframe
        type_data (str): Type of dataset (validation, test, ...)
    Returns:
        pd.DataFrame: The dataframe containing the statistics
    '''

    # Cast to np.array
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Save a predictionn file if wanted
    if self.level_save == 'HIGH':
        # Inverse transform
        y_true_df = list(self.inverse_transform(y_true))
        y_pred_df = list(self.inverse_transform(y_pred))

        # Concat in a dataframe
        if df_x is not None:
            df = df_x.copy()
            df['y_true'] = y_true_df
            df['y_pred'] = y_pred_df
        else:
            df = pd.DataFrame({'y_true': y_true_df, 'y_pred': y_pred_df})
        # Add column abs_err
        df.loc[:, 'abs_err'] = df[['y_true', 'y_pred']].apply(lambda x: x.y_true - x.y_pred, axis=1)
        # Add column rel_err
        df.loc[:, 'rel_err'] = df[['y_true', 'y_pred']].apply(lambda x: (x.y_true - x.y_pred) / abs(x.y_true), axis=1)
        # Add some more columns
        if series_to_add is not None:
            for ser in series_to_add:
                df[ser.name] = ser.reset_index(drop=True).reindex(index=df.index)  # Reindex correctly

        # Save predictions
        file_path = os.path.join(self.model_dir, f"predictions{'_' + type_data if len(type_data) > 0 else ''}.csv")
        df.sort_values('abs_err', ascending=True).to_csv(file_path, sep=';', index=None, encoding='utf-8')

    # Get global metrics
    metric_mae = mean_absolute_error(y_true, y_pred)
    metric_mse = mean_squared_error(y_true, y_pred)
    metric_rmse = mean_squared_error(y_true, y_pred, squared=False)
    metric_explained_variance_score = explained_variance_score(y_true, y_pred)
    metric_r2 = r2_score(y_true, y_pred)

    # Global statistics
    self.logger.info('-- * * * * * * * * * * * * * * --')
    self.logger.info(f"Statistics{' ' + type_data if len(type_data) > 0 else ''}")
    self.logger.info('--------------------------------')
    self.logger.info(f"MAE : {round(metric_mae, 5)}")
    self.logger.info(f"MSE : {round(metric_mse, 5)}")
    self.logger.info(f"RMSE : {round(metric_rmse, 5)}")
    self.logger.info(f"Explained variance : {round(metric_explained_variance_score, 5)}")
    self.logger.info(f"R² (coefficient of determination) : {round(metric_r2, 5)}")
    self.logger.info('--------------------------------')

    # Metrics file
    # TODO : add multi-outputs and stats for each output

    # Add global statistics
    dict_df_stats = {0: {
        'Label': 'All',
        'MAE': metric_mae,
        'MSE': metric_mse,
        'RMSE': metric_rmse,
        'Explained variance': metric_explained_variance_score,
        'Coefficient of determination': metric_r2,
    }}
    df_stats = pd.DataFrame.from_dict(dict_df_stats, orient='index')

    # Save .csv
    file_path = os.path.join(self.model_dir, f"mae{'_' + type_data if len(type_data) > 0 else ''}@{metric_mae}.csv")
    df_stats.to_csv(file_path, sep=';', index=False, encoding='utf-8')

    # Save some metrics
    mae_path = os.path.join(self.model_dir, f"mae{'_' + type_data if len(type_data) > 0 else ''}@{round(metric_mae, 5)}")
    with open(mae_path, 'w'):
        pass
    mse_path = os.path.join(self.model_dir, f"mse{'_' + type_data if len(type_data) > 0 else ''}@{round(metric_mse, 5)}")
    with open(mse_path, 'w'):
        pass
    rmse_path = os.path.join(self.model_dir, f"rmse{'_' + type_data if len(type_data) > 0 else ''}@{round(metric_rmse, 5)}")
    with open(rmse_path, 'w'):
        pass
    explained_variance_path = os.path.join(self.model_dir, f"explained_variance{'_' + type_data if len(type_data) > 0 else ''}@{round(metric_explained_variance_score, 5)}")
    with open(explained_variance_path, 'w'):
        pass
    r2_path = os.path.join(self.model_dir, f"r2{'_' + type_data if len(type_data) > 0 else ''}@{round(metric_r2, 5)}")
    with open(r2_path, 'w'):
        pass

    # Plots
    if self.level_save in ['MEDIUM', 'HIGH']:
        # TODO: put a condition on the maximum number of points ?
        is_train = True if type_data == 'train' else False
        if is_train:
            self.plot_prediction_errors(y_true_train=y_true, y_pred_train=y_pred,
                                        y_true_test=None, y_pred_test=None,
                                        type_data=type_data)
            self.plot_residuals(y_true_train=y_true, y_pred_train=y_pred,
                                y_true_test=None, y_pred_test=None,
                                type_data=type_data)
        else:
            self.plot_prediction_errors(y_true_train=None, y_pred_train=None,
                                        y_true_test=y_true, y_pred_test=y_pred,
                                        type_data=type_data)
            self.plot_residuals(y_true_train=None, y_pred_train=None,
                                y_true_test=y_true, y_pred_test=y_pred,
                                type_data=type_data)

    # Return metrics
    return df_stats

get_metrics_simple(y_true, y_pred)

Gets metrics on predictions (single-output for now) Same as the method get_and_save_metrics but without all the fluff (save, etc.)

Parameters:

Name Type Description Default
y_true ?

Array-like, shape = [n_samples]

required
y_pred ?

Array-like, shape = [n_samples]

required

Returns: pd.DataFrame: The dataframe containing statistics

Source code in template_num/models_training/regressors/model_regressor.py
def get_metrics_simple(self, y_true, y_pred) -> pd.DataFrame:
    '''Gets metrics on predictions (single-output for now)
    Same as the method get_and_save_metrics but without all the fluff (save, etc.)

    Args:
        y_true (?): Array-like, shape = [n_samples]
        y_pred (?): Array-like, shape = [n_samples]
    Returns:
        pd.DataFrame: The dataframe containing statistics
    '''
    # Cast to np.array
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    # Get global metrics:
    metric_mae = mean_absolute_error(y_true, y_pred)
    metric_mse = mean_squared_error(y_true, y_pred)
    metric_rmse = mean_squared_error(y_true, y_pred, squared=False)
    metric_explained_variance_score = explained_variance_score(y_true, y_pred)
    metric_r2 = r2_score(y_true, y_pred)

    # Metrics file
    # TODO : add multi-outputs and stats for each output

    # Add global statistics
    dict_df_stats = {0: {
        'Label': 'All',
        'MAE': metric_mae,
        'MSE': metric_mse,
        'RMSE': metric_rmse,
        'Explained variance': metric_explained_variance_score,
        'Coefficient of determination': metric_r2,
    }}
    df_stats = pd.DataFrame.from_dict(dict_df_stats, orient='index')

    # Return dataframe
    return df_stats

inverse_transform(y)

Identity function - Manages compatibility with classifiers

Parameters:

Name Type Description Default
y list | ndarray

Array-like, shape = [n_samples, 1]

required

Returns: (np.ndarray): List, shape = [n_samples, 1]

Source code in template_num/models_training/regressors/model_regressor.py
def inverse_transform(self, y: Union[list, np.ndarray]) -> Union[list, tuple]:
    '''Identity function - Manages compatibility with classifiers

    Args:
        y (list | np.ndarray): Array-like, shape = [n_samples, 1]
    Returns:
        (np.ndarray): List, shape = [n_samples, 1]
    '''
    return list(y) if isinstance(y, np.ndarray) else y

plot_prediction_errors(y_true_train=None, y_pred_train=None, y_true_test=None, y_pred_test=None, type_data='')

Plots prediction errors

We use yellowbrick for the plots + a trick to be model agnostic

Kwargs

y_true_train (np.ndarray): Array-like, shape = [n_samples] y_pred_train (np.ndarray): Array-like, shape = [n_samples] y_true_test (np.ndarray): Array-like, shape = [n_samples] y_pred_test (np.ndarray): Array-like, shape = [n_samples] type_data (str): Type of the dataset (validation, test, ...)

Raises: ValueError: If a "true" is given, but not the corresponding "pred" (or vice-versa)

Source code in template_num/models_training/regressors/model_regressor.py
def plot_prediction_errors(self, y_true_train: Union[np.ndarray, None] = None, y_pred_train: Union[np.ndarray, None] = None,
                           y_true_test: Union[np.ndarray, None] = None, y_pred_test: Union[np.ndarray, None] = None,
                           type_data: str = '') -> None:
    '''Plots prediction errors

    We use yellowbrick for the plots + a trick to be model agnostic

    Kwargs:
        y_true_train (np.ndarray): Array-like, shape = [n_samples]
        y_pred_train (np.ndarray): Array-like, shape = [n_samples]
        y_true_test (np.ndarray): Array-like, shape = [n_samples]
        y_pred_test (np.ndarray): Array-like, shape = [n_samples]
        type_data (str): Type of the dataset (validation, test, ...)
    Raises:
        ValueError: If a "true" is given, but not the corresponding "pred" (or vice-versa)
    '''
    if (y_true_train is not None and y_pred_train is None) or (y_true_train is None and y_pred_train is not None):
        raise ValueError('"true" and "pred" must both be given, or not at all - train')
    if (y_true_test is not None and y_pred_test is None) or (y_true_test is None and y_pred_test is not None):
        raise ValueError('"true" and "pred" must both be given, or not at all - test')

    # Get figure & ax
    fig, ax = plt.subplots(figsize=(12, 10))

    # Set visualizer
    visualizer = PredictionError(LinearRegression(), ax=ax, bestfit=False, is_fitted=True)  # Trick model not used
    visualizer.name = self.model_name

    # PredictionError does not support train and test at the same time :'(

    # Train
    if y_true_train is not None:
        visualizer.score_ = r2_score(y_true_train, y_pred_train)
        visualizer.draw(y_true_train, y_pred_train)

    # Test
    if y_true_test is not None:
        visualizer.score_ = r2_score(y_true_test, y_pred_test)
        visualizer.draw(y_true_test, y_pred_test)

    # Save
    plots_path = os.path.join(self.model_dir, 'plots')
    if not os.path.exists(plots_path):
        os.makedirs(plots_path)
    file_name = f"{type_data + '_' if len(type_data) > 0 else ''}errors.png"
    visualizer.show(outpath=os.path.join(plots_path, file_name))

    # Close figures
    plt.close('all')

plot_residuals(y_true_train=None, y_pred_train=None, y_true_test=None, y_pred_test=None, type_data='')

Plots the "residuals" from the predictions

Uses yellowbrick for the plots plus a trick in order to be model agnostic

Kwargs

y_true_train (np.ndarray): Array-like, shape = [n_samples] y_pred_train (np.ndarray): Array-like, shape = [n_samples] y_true_test (np.ndarray): Array-like, shape = [n_samples] y_pred_test (np.ndarray): Array-like, shape = [n_samples] type_data (str): Type of the dataset (validation, test, ...)

Raises: ValueError: If a "true" is given, but not the corresponding "pred" (or vice-versa)

Source code in template_num/models_training/regressors/model_regressor.py
def plot_residuals(self, y_true_train: Union[np.ndarray, None] = None, y_pred_train: Union[np.ndarray, None] = None,
                   y_true_test: Union[np.ndarray, None] = None, y_pred_test: Union[np.ndarray, None] = None,
                   type_data: str = '') -> None:
    '''Plots the "residuals" from the predictions

    Uses yellowbrick for the plots plus a trick in order to be model agnostic

    Kwargs:
        y_true_train (np.ndarray): Array-like, shape = [n_samples]
        y_pred_train (np.ndarray): Array-like, shape = [n_samples]
        y_true_test (np.ndarray): Array-like, shape = [n_samples]
        y_pred_test (np.ndarray): Array-like, shape = [n_samples]
        type_data (str): Type of the dataset (validation, test, ...)
    Raises:
        ValueError: If a "true" is given, but not the corresponding "pred" (or vice-versa)
    '''
    if (y_true_train is not None and y_pred_train is None) or (y_true_train is None and y_pred_train is not None):
        raise ValueError('"true" and "pred" must both be given, or not at all - train')
    if (y_true_test is not None and y_pred_test is None) or (y_true_test is None and y_pred_test is not None):
        raise ValueError('"true" and "pred" must both be given, or not at all - test')

    # Get figure & ax
    fig, ax = plt.subplots(figsize=(12, 10))

    # Set visualizer
    visualizer = ResidualsPlot(LinearRegression(), ax=ax, is_fitted=True)  # Trick model not used
    visualizer.name = self.model_name

    # Train
    if y_true_train is not None:
        visualizer.train_score_ = r2_score(y_true_train, y_pred_train)
        residuals = y_pred_train - y_true_train
        visualizer.draw(y_pred_train, residuals, train=True)

    # Test
    if y_true_test is not None:
        visualizer.test_score_ = r2_score(y_true_test, y_pred_test)
        residuals = y_pred_test - y_true_test
        visualizer.draw(y_pred_test, residuals, train=False)

    # Save
    plots_path = os.path.join(self.model_dir, 'plots')
    if not os.path.exists(plots_path):
        os.makedirs(plots_path)
    file_name = f"{type_data + '_' if len(type_data) > 0 else ''}residuals.png"
    visualizer.show(outpath=os.path.join(plots_path, file_name))