Skip to content

Model gabarit

This module contains a ModelGabarit class you can use for your gabarit generated projects

ModelGabarit overwrite some methods of the base Model class
  • download_model method to download a model from a JFrog Artifactory repository ;
  • _load_model method to use the gabarit_package.models_training.utils_models.load_model function from a typical gabarit project ;
  • predict method to use the the gabarit_package.models_training.utils_models.predict function from a typical gabarit project.

ModelGabarit

Bases: Model

Model class for a Gabarit generated project

  • download_model has been redefined to download a model from artifactory based on the settings : ARTIFACTORY_MODEL_URL, ARTIFACTORY_USER, ARTIFACTORY_PASSWORD
  • _load_model has been redefined to use utils_models.load_model
  • predict has been redefined to use utils_models.predict
Source code in template_api/model/model_gabarit.py
class ModelGabarit(Model):
    """Model class for a Gabarit generated project

    - download_model has been redefined to download a model from artifactory based on
    the settings : ARTIFACTORY_MODEL_URL, ARTIFACTORY_USER, ARTIFACTORY_PASSWORD
    - _load_model has been redefined to use utils_models.load_model
    - predict has been redefined to use utils_models.predict
    """
    def __init__(self, *args, **kwargs):
        """Object initialization
        By default, it initialize the attributes _model, _model_config and _loaded

        see the parent __init__ method in template_api.model.model_base.Model
        """
        super().__init__(*args, **kwargs)

    def predict(self, content: Any, *args, **kwargs) -> Any:
        """Make a prediction by calling utils_models.predict with the loaded model"""
        if isinstance(content, list) or isinstance(content, dict):
            content = pd.DataFrame(content)

        # For APIs, we default to alternative_version = True
        # It uses `tf.function` and `model.__call__` which is way faster for low number of inputs
        # It also prevents some memory issues with newest version of TensorFlow
        # https://github.com/tensorflow/tensorflow/issues/58676
        # You can change the inference batch size if it doesn't suit your model/project
        return utils_models.predict(content, model=self._model, model_conf=self._model_conf,
                                    inference_batch_size=128, alternative_version=True, **kwargs)

    def explain_as_json(self, content: Any, *args, **kwargs) -> Union[dict, list]:
        """Compute explanations about a prediction and return a JSON serializable object"""
        if isinstance(content, list) or isinstance(content, dict):
            content = pd.DataFrame(content)

        return self._model_explainer.explain_instance_as_json(content, *args, **kwargs)

    def explain_as_html(self, content: Any, *args, **kwargs) -> str:
        """Compute explanations about a prediction and return an HTML report"""
        if isinstance(content, list) or isinstance(content, dict):
            content = pd.DataFrame(content)

        return self._model_explainer.explain_instance_as_html(content, *args, **kwargs)

    def _load_model(self, **kwargs) -> None:
        """Load a model in a gabarit fashion"""
        settings = ModelSettings(**kwargs)

        # Replace get_data_path method from gabarit.utils to use template_api data directory
        if hasattr(utils_gabarit, "get_data_path"):
            utils_gabarit.get_data_path = lambda: str(settings.data_dir.resolve())

        # Using is_path=True allow to specify a path instead of a folder relative
        # to gabarit_package.utils.DIR_PATH
        model, model_conf = utils_models.load_model(model_dir=settings.model_path, is_path=True)

        # Set attributes
        self._model = model
        self._model_conf = model_conf

        # Create a model explainer
        self._model_explainer = Explainer(model=model, model_conf=model_conf)

    @staticmethod
    def download_model(**kwargs) -> bool:
        """Download the model from an JFrog Artifactory repository"""
        settings = ModelSettings(**kwargs)

        model_path = settings.model_path

        # If the model already exists there is no need to download it
        if not settings.redownload and model_path.is_dir() and not any(model_path.iterdir()):
            logger.info(f"The model is already dowloaded : {model_path} already exists")
            return True

        # Create models directory if it doesn not exists
        models_dir = settings.models_dir
        models_dir.mkdir(parents=True, exist_ok=True)

        # Download model from artifactory
        try:
            from artifactory import ArtifactoryPath
        except ImportError:
            raise ImportError("Module artifactory not found. Please install it : `pip install dohq-artifactory`")

        model_artifactory_path = ArtifactoryPath(
            settings.artifactory_model_url,
            auth=(settings.artifactory_user, settings.artifactory_password),
            verify=False,
        )

        with tempfile.TemporaryDirectory(dir=models_dir) as tmpdir:
            model_archive_path = Path(tmpdir) / model_artifactory_path.name

            # Download model
            logger.info(f"Downloading the model to : {model_path}")
            with model_archive_path.open("wb") as out:
                model_artifactory_path.writeto(out)

            # Unzip model
            shutil.unpack_archive(model_archive_path, model_path)
            logger.info(f"Model downloaded")

        logger.info(f"Model archive removed")
        return True

__init__(*args, **kwargs)

Object initialization By default, it initialize the attributes _model, _model_config and _loaded

see the parent init method in template_api.model.model_base.Model

Source code in template_api/model/model_gabarit.py
def __init__(self, *args, **kwargs):
    """Object initialization
    By default, it initialize the attributes _model, _model_config and _loaded

    see the parent __init__ method in template_api.model.model_base.Model
    """
    super().__init__(*args, **kwargs)

download_model(**kwargs) staticmethod

Download the model from an JFrog Artifactory repository

Source code in template_api/model/model_gabarit.py
@staticmethod
def download_model(**kwargs) -> bool:
    """Download the model from an JFrog Artifactory repository"""
    settings = ModelSettings(**kwargs)

    model_path = settings.model_path

    # If the model already exists there is no need to download it
    if not settings.redownload and model_path.is_dir() and not any(model_path.iterdir()):
        logger.info(f"The model is already dowloaded : {model_path} already exists")
        return True

    # Create models directory if it doesn not exists
    models_dir = settings.models_dir
    models_dir.mkdir(parents=True, exist_ok=True)

    # Download model from artifactory
    try:
        from artifactory import ArtifactoryPath
    except ImportError:
        raise ImportError("Module artifactory not found. Please install it : `pip install dohq-artifactory`")

    model_artifactory_path = ArtifactoryPath(
        settings.artifactory_model_url,
        auth=(settings.artifactory_user, settings.artifactory_password),
        verify=False,
    )

    with tempfile.TemporaryDirectory(dir=models_dir) as tmpdir:
        model_archive_path = Path(tmpdir) / model_artifactory_path.name

        # Download model
        logger.info(f"Downloading the model to : {model_path}")
        with model_archive_path.open("wb") as out:
            model_artifactory_path.writeto(out)

        # Unzip model
        shutil.unpack_archive(model_archive_path, model_path)
        logger.info(f"Model downloaded")

    logger.info(f"Model archive removed")
    return True

explain_as_html(content, *args, **kwargs)

Compute explanations about a prediction and return an HTML report

Source code in template_api/model/model_gabarit.py
def explain_as_html(self, content: Any, *args, **kwargs) -> str:
    """Compute explanations about a prediction and return an HTML report"""
    if isinstance(content, list) or isinstance(content, dict):
        content = pd.DataFrame(content)

    return self._model_explainer.explain_instance_as_html(content, *args, **kwargs)

explain_as_json(content, *args, **kwargs)

Compute explanations about a prediction and return a JSON serializable object

Source code in template_api/model/model_gabarit.py
def explain_as_json(self, content: Any, *args, **kwargs) -> Union[dict, list]:
    """Compute explanations about a prediction and return a JSON serializable object"""
    if isinstance(content, list) or isinstance(content, dict):
        content = pd.DataFrame(content)

    return self._model_explainer.explain_instance_as_json(content, *args, **kwargs)

predict(content, *args, **kwargs)

Make a prediction by calling utils_models.predict with the loaded model

Source code in template_api/model/model_gabarit.py
def predict(self, content: Any, *args, **kwargs) -> Any:
    """Make a prediction by calling utils_models.predict with the loaded model"""
    if isinstance(content, list) or isinstance(content, dict):
        content = pd.DataFrame(content)

    # For APIs, we default to alternative_version = True
    # It uses `tf.function` and `model.__call__` which is way faster for low number of inputs
    # It also prevents some memory issues with newest version of TensorFlow
    # https://github.com/tensorflow/tensorflow/issues/58676
    # You can change the inference batch size if it doesn't suit your model/project
    return utils_models.predict(content, model=self._model, model_conf=self._model_conf,
                                inference_batch_size=128, alternative_version=True, **kwargs)

ModelSettings

Bases: BaseSettings

Download settings

This class is used for settings management purpose, have a look at the pydantic documentation for more details : https://pydantic-docs.helpmanual.io/usage/settings/

By default, it looks for environment variables (case insensitive) to set the settings if a variable is not found, it looks for a file name .env in your working directory where you can declare the values of the variables and finally it sets the values to the default ones you can see above.

Source code in template_api/model/model_gabarit.py
class ModelSettings(BaseSettings):
    """Download settings

    This class is used for settings management purpose, have a look at the pydantic
    documentation for more details : https://pydantic-docs.helpmanual.io/usage/settings/

    By default, it looks for environment variables (case insensitive) to set the settings
    if a variable is not found, it looks for a file name .env in your working directory
    where you can declare the values of the variables and finally it sets the values
    to the default ones you can see above.
    """

    data_dir: Path = DEFAULT_DATA_DIR
    models_dir: Path = DEFAULT_MODELS_DIR
    model_path: Path = DEFAULT_MODELS_DIR / "model"
    artifactory_model_url: str = ""
    artifactory_user: str = ""
    artifactory_password: str = ""
    redownload: bool = False

    model_config = SettingsConfigDict(env_file=".env", extra='ignore', protected_namespaces=('settings', ))