Model logistic regression classifier

`ModelLogisticRegressionClassifier`

Bases: ModelClassifierMixin, ModelPipeline

Logistic Regression mode for classification

Source code in template_num/models_training/classifiers/models_sklearn/model_logistic_regression_classifier.py

class ModelLogisticRegressionClassifier(ModelClassifierMixin, ModelPipeline):
    '''Logistic Regression mode for classification'''

    _default_name = 'model_lr_classifier'

    def __init__(self, lr_params: Union[dict, None] = None, multiclass_strategy: Union[str, None] = None, **kwargs) -> None:
        '''Initialization of the class (see ModelPipeline, ModelClass & ModelClassifierMixin for more arguments)

        Kwargs:
            lr_params (dict) : Parameters for the Logistic Regression
            multiclass_strategy (str): Multi-classes strategy, 'ovr' (OneVsRest), or 'ovo' (OneVsOne). If None, use the default of the algorithm.
        Raises:
            multiclass_strategy (str):  Multi-classes strategy, 'ovr' (OneVsRest), or 'ovo' (OneVsOne). If None, use the default of the algorithm.
        '''
        if multiclass_strategy is not None and multiclass_strategy not in ['ovo', 'ovr']:
            raise ValueError(f"The value of 'multiclass_strategy' must be 'ovo' or 'ovr' (not {multiclass_strategy})")
        # Init.
        super().__init__(**kwargs)

        # Get logger (must be done after super init)
        self.logger = logging.getLogger(__name__)

        # Manage model
        if lr_params is None:
            lr_params = {}
        self.lr = LogisticRegression(**lr_params)
        self.multiclass_strategy = multiclass_strategy

        # Can't do multi-labels / multi-classes
        if not self.multi_label:
            # If not multi-classes : no impact
            if multiclass_strategy == 'ovr':
                self.pipeline = Pipeline([('lr', OneVsRestClassifier(self.lr))])
            elif multiclass_strategy == 'ovo':
                self.pipeline = Pipeline([('lr', OneVsOneClassifier(self.lr))])
            else:
                self.pipeline = Pipeline([('lr', self.lr)])

        # LogisticRegression does not natively support multi-labels
        if self.multi_label:
            self.pipeline = Pipeline([('lr', MultiOutputClassifier(self.lr))])

    @utils.trained_needed
    def predict_proba(self, x_test: pd.DataFrame, **kwargs) -> np.ndarray:
        '''Predicts the probabilities on the test set
            'ovo' can't predict probabilities : by default, return 1 for the predicted class, 0 otherwise.

        Args:
            x_test (pd.DataFrame): DataFrame with the test data to be predicted
        Returns:
            (np.ndarray): Array, shape = [n_samples, n_classes]
        '''
        # Uses super() of the ModelPipeline class if != 'ovo' or multi-labels
        if self.multi_label or self.multiclass_strategy != 'ovo':
            return super().predict_proba(x_test=x_test, **kwargs)
        else:
            # We check input format
            x_test, _ = self._check_input_format(x_test)
            # Get preds
            preds = self.pipeline.predict(x_test)
            # Format ['a', 'b', 'c', 'a', ..., 'b']
            # Transform to "proba"
            transform_dict = {col: [0. if _ != i else 1. for _ in range(len(self.list_classes))] for i, col in enumerate(self.list_classes)}
            probas = np.array([transform_dict[x] for x in preds])
        return probas

    def save(self, json_data: Union[dict, None] = None) -> None:
        '''Saves the model

        Kwargs:
            json_data (dict): Additional configurations to be saved
        '''
        # Save model
        if json_data is None:
            json_data = {}

        json_data['multiclass_strategy'] = self.multiclass_strategy

        # Save
        super().save(json_data=json_data)

    def reload_from_standalone(self, **kwargs) -> None:
        '''Reloads a model from its configuration and "standalones" files
        - /!\\ Experimental /!\\ -

        Kwargs:
            configuration_path (str): Path to configuration file
            sklearn_pipeline_path (str): Path to standalone pipeline
            preprocess_pipeline_path (str): Path to preprocess pipeline
        Raises:
            ValueError: If configuration_path is None
            ValueError: If sklearn_pipeline_path is None
            ValueError: If preprocess_pipeline_path is None
            FileNotFoundError: If the object configuration_path is not an existing file
            FileNotFoundError: If the object sklearn_pipeline_path is not an existing file
            FileNotFoundError: If the object preprocess_pipeline_path is not an existing file
        '''
        # Retrieve args
        configuration_path = kwargs.get('configuration_path', None)
        sklearn_pipeline_path = kwargs.get('sklearn_pipeline_path', None)
        preprocess_pipeline_path = kwargs.get('preprocess_pipeline_path', None)

        # Checks
        if configuration_path is None:
            raise ValueError("The argument configuration_path can't be None")
        if sklearn_pipeline_path is None:
            raise ValueError("The argument sklearn_pipeline_path can't be None")
        if preprocess_pipeline_path is None:
            raise ValueError("The argument preprocess_pipeline_path can't be None")
        if not os.path.exists(configuration_path):
            raise FileNotFoundError(f"The file {configuration_path} does not exist")
        if not os.path.exists(sklearn_pipeline_path):
            raise FileNotFoundError(f"The file {sklearn_pipeline_path} does not exist")
        if not os.path.exists(preprocess_pipeline_path):
            raise FileNotFoundError(f"The file {preprocess_pipeline_path} does not exist")

        # Load confs
        with open(configuration_path, 'r', encoding='utf-8') as f:
            configs = json.load(f)
        # Can't set int as keys in json, so need to cast it after reloading
        # dict_classes keys are always ints
        if 'dict_classes' in configs.keys():
            configs['dict_classes'] = {int(k): v for k, v in configs['dict_classes'].items()}
        elif 'list_classes' in configs.keys():
            configs['dict_classes'] = {i: col for i, col in enumerate(configs['list_classes'])}

        # Set class vars
        # self.model_name = # Keep the created name
        # self.model_dir = # Keep the created folder
        self.nb_fit = configs.get('nb_fit', 1)  # Consider one unique fit by default
        self.trained = configs.get('trained', True)  # Consider trained by default
        # Try to read the following attributes from configs and, if absent, keep the current one
        for attribute in ['model_type', 'x_col', 'y_col', 'columns_in', 'mandatory_columns',
                          'list_classes', 'dict_classes', 'multi_label', 'level_save',
                          'multiclass_strategy']:
            setattr(self, attribute, configs.get(attribute, getattr(self, attribute)))

        # Reload pipeline model
        with open(sklearn_pipeline_path, 'rb') as f:
            self.pipeline = pickle.load(f)

        # Manage multi-labels or multi-classes
        if not self.multi_label and self.multiclass_strategy is None:
            self.lr = self.pipeline['lr']
        else:
            self.lr = self.pipeline['lr'].estimator

        # Reload pipeline preprocessing
        with open(preprocess_pipeline_path, 'rb') as f:
            self.preprocess_pipeline = pickle.load(f)

`init(lr_params=None, multiclass_strategy=None, **kwargs)`

Initialization of the class (see ModelPipeline, ModelClass & ModelClassifierMixin for more arguments)

Kwargs

lr_params (dict) : Parameters for the Logistic Regression multiclass_strategy (str): Multi-classes strategy, 'ovr' (OneVsRest), or 'ovo' (OneVsOne). If None, use the default of the algorithm.

Raises: multiclass_strategy (str): Multi-classes strategy, 'ovr' (OneVsRest), or 'ovo' (OneVsOne). If None, use the default of the algorithm.

Source code in template_num/models_training/classifiers/models_sklearn/model_logistic_regression_classifier.py

def __init__(self, lr_params: Union[dict, None] = None, multiclass_strategy: Union[str, None] = None, **kwargs) -> None:
    '''Initialization of the class (see ModelPipeline, ModelClass & ModelClassifierMixin for more arguments)

    Kwargs:
        lr_params (dict) : Parameters for the Logistic Regression
        multiclass_strategy (str): Multi-classes strategy, 'ovr' (OneVsRest), or 'ovo' (OneVsOne). If None, use the default of the algorithm.
    Raises:
        multiclass_strategy (str):  Multi-classes strategy, 'ovr' (OneVsRest), or 'ovo' (OneVsOne). If None, use the default of the algorithm.
    '''
    if multiclass_strategy is not None and multiclass_strategy not in ['ovo', 'ovr']:
        raise ValueError(f"The value of 'multiclass_strategy' must be 'ovo' or 'ovr' (not {multiclass_strategy})")
    # Init.
    super().__init__(**kwargs)

    # Get logger (must be done after super init)
    self.logger = logging.getLogger(__name__)

    # Manage model
    if lr_params is None:
        lr_params = {}
    self.lr = LogisticRegression(**lr_params)
    self.multiclass_strategy = multiclass_strategy

    # Can't do multi-labels / multi-classes
    if not self.multi_label:
        # If not multi-classes : no impact
        if multiclass_strategy == 'ovr':
            self.pipeline = Pipeline([('lr', OneVsRestClassifier(self.lr))])
        elif multiclass_strategy == 'ovo':
            self.pipeline = Pipeline([('lr', OneVsOneClassifier(self.lr))])
        else:
            self.pipeline = Pipeline([('lr', self.lr)])

    # LogisticRegression does not natively support multi-labels
    if self.multi_label:
        self.pipeline = Pipeline([('lr', MultiOutputClassifier(self.lr))])

`predict_proba(x_test, **kwargs)`

Predicts the probabilities on the test set 'ovo' can't predict probabilities : by default, return 1 for the predicted class, 0 otherwise.

Parameters:

Name	Type	Description	Default
`x_test`	`DataFrame`	DataFrame with the test data to be predicted	required

Returns: (np.ndarray): Array, shape = [n_samples, n_classes]

Source code in template_num/models_training/classifiers/models_sklearn/model_logistic_regression_classifier.py

@utils.trained_needed
def predict_proba(self, x_test: pd.DataFrame, **kwargs) -> np.ndarray:
    '''Predicts the probabilities on the test set
        'ovo' can't predict probabilities : by default, return 1 for the predicted class, 0 otherwise.

    Args:
        x_test (pd.DataFrame): DataFrame with the test data to be predicted
    Returns:
        (np.ndarray): Array, shape = [n_samples, n_classes]
    '''
    # Uses super() of the ModelPipeline class if != 'ovo' or multi-labels
    if self.multi_label or self.multiclass_strategy != 'ovo':
        return super().predict_proba(x_test=x_test, **kwargs)
    else:
        # We check input format
        x_test, _ = self._check_input_format(x_test)
        # Get preds
        preds = self.pipeline.predict(x_test)
        # Format ['a', 'b', 'c', 'a', ..., 'b']
        # Transform to "proba"
        transform_dict = {col: [0. if _ != i else 1. for _ in range(len(self.list_classes))] for i, col in enumerate(self.list_classes)}
        probas = np.array([transform_dict[x] for x in preds])
    return probas

`reload_from_standalone(**kwargs)`

Reloads a model from its configuration and "standalones" files - /! Experimental /! -

Kwargs

configuration_path (str): Path to configuration file sklearn_pipeline_path (str): Path to standalone pipeline preprocess_pipeline_path (str): Path to preprocess pipeline

Raises: ValueError: If configuration_path is None ValueError: If sklearn_pipeline_path is None ValueError: If preprocess_pipeline_path is None FileNotFoundError: If the object configuration_path is not an existing file FileNotFoundError: If the object sklearn_pipeline_path is not an existing file FileNotFoundError: If the object preprocess_pipeline_path is not an existing file

Source code in template_num/models_training/classifiers/models_sklearn/model_logistic_regression_classifier.py

def reload_from_standalone(self, **kwargs) -> None:
    '''Reloads a model from its configuration and "standalones" files
    - /!\\ Experimental /!\\ -

    Kwargs:
        configuration_path (str): Path to configuration file
        sklearn_pipeline_path (str): Path to standalone pipeline
        preprocess_pipeline_path (str): Path to preprocess pipeline
    Raises:
        ValueError: If configuration_path is None
        ValueError: If sklearn_pipeline_path is None
        ValueError: If preprocess_pipeline_path is None
        FileNotFoundError: If the object configuration_path is not an existing file
        FileNotFoundError: If the object sklearn_pipeline_path is not an existing file
        FileNotFoundError: If the object preprocess_pipeline_path is not an existing file
    '''
    # Retrieve args
    configuration_path = kwargs.get('configuration_path', None)
    sklearn_pipeline_path = kwargs.get('sklearn_pipeline_path', None)
    preprocess_pipeline_path = kwargs.get('preprocess_pipeline_path', None)

    # Checks
    if configuration_path is None:
        raise ValueError("The argument configuration_path can't be None")
    if sklearn_pipeline_path is None:
        raise ValueError("The argument sklearn_pipeline_path can't be None")
    if preprocess_pipeline_path is None:
        raise ValueError("The argument preprocess_pipeline_path can't be None")
    if not os.path.exists(configuration_path):
        raise FileNotFoundError(f"The file {configuration_path} does not exist")
    if not os.path.exists(sklearn_pipeline_path):
        raise FileNotFoundError(f"The file {sklearn_pipeline_path} does not exist")
    if not os.path.exists(preprocess_pipeline_path):
        raise FileNotFoundError(f"The file {preprocess_pipeline_path} does not exist")

    # Load confs
    with open(configuration_path, 'r', encoding='utf-8') as f:
        configs = json.load(f)
    # Can't set int as keys in json, so need to cast it after reloading
    # dict_classes keys are always ints
    if 'dict_classes' in configs.keys():
        configs['dict_classes'] = {int(k): v for k, v in configs['dict_classes'].items()}
    elif 'list_classes' in configs.keys():
        configs['dict_classes'] = {i: col for i, col in enumerate(configs['list_classes'])}

    # Set class vars
    # self.model_name = # Keep the created name
    # self.model_dir = # Keep the created folder
    self.nb_fit = configs.get('nb_fit', 1)  # Consider one unique fit by default
    self.trained = configs.get('trained', True)  # Consider trained by default
    # Try to read the following attributes from configs and, if absent, keep the current one
    for attribute in ['model_type', 'x_col', 'y_col', 'columns_in', 'mandatory_columns',
                      'list_classes', 'dict_classes', 'multi_label', 'level_save',
                      'multiclass_strategy']:
        setattr(self, attribute, configs.get(attribute, getattr(self, attribute)))

    # Reload pipeline model
    with open(sklearn_pipeline_path, 'rb') as f:
        self.pipeline = pickle.load(f)

    # Manage multi-labels or multi-classes
    if not self.multi_label and self.multiclass_strategy is None:
        self.lr = self.pipeline['lr']
    else:
        self.lr = self.pipeline['lr'].estimator

    # Reload pipeline preprocessing
    with open(preprocess_pipeline_path, 'rb') as f:
        self.preprocess_pipeline = pickle.load(f)

`save(json_data=None)`

Saves the model

Kwargs

json_data (dict): Additional configurations to be saved

Source code in template_num/models_training/classifiers/models_sklearn/model_logistic_regression_classifier.py

def save(self, json_data: Union[dict, None] = None) -> None:
    '''Saves the model

    Kwargs:
        json_data (dict): Additional configurations to be saved
    '''
    # Save model
    if json_data is None:
        json_data = {}

    json_data['multiclass_strategy'] = self.multiclass_strategy

    # Save
    super().save(json_data=json_data)

Model logistic regression classifier