Source code for evidently_iris.evidently_iris

# Copyright 2025 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional

import pandas as pd
from sklearn.datasets import load_iris

import mlrun.model_monitoring.applications.context as mm_context
from mlrun.common.schemas.model_monitoring.constants import (
    ResultKindApp,
    ResultStatusApp,
)
from mlrun.feature_store.api import norm_column_name
from mlrun.model_monitoring.applications import ModelMonitoringApplicationResult
from mlrun.model_monitoring.applications.evidently import EvidentlyModelMonitoringApplicationBase

from evidently.core.report import Report, Snapshot
from evidently.metrics import DatasetMissingValueCount, ValueDrift
from evidently.presets import DataDriftPreset, DataSummaryPreset
from evidently.ui.workspace import (
    STR_UUID,
    OrgID,
)

_PROJECT_NAME = "Iris Monitoring"
_PROJECT_DESCRIPTION = "Test project using iris dataset"


[docs] class EvidentlyIrisMonitoringApp(EvidentlyModelMonitoringApplicationBase): """ This model monitoring application is a simple example of integrating MLRun with Evidently for data monitoring, which you can adapt to fit your own project needs or use as a reference implementation. """ NAME = "Evidently-App-Test" def __init__( self, evidently_project_id: Optional["STR_UUID"] = None, evidently_workspace_path: Optional[str] = None, cloud_workspace: bool = False, evidently_organization_id: Optional["OrgID"] = None, ) -> None: self.org_id = evidently_organization_id self._init_iris_data() super().__init__( evidently_project_id=evidently_project_id, evidently_workspace_path=evidently_workspace_path, cloud_workspace=cloud_workspace, ) def _init_iris_data(self) -> None: iris = load_iris() self.columns = [norm_column_name(col) for col in iris.feature_names] self.train_set = pd.DataFrame(iris.data, columns=self.columns)
[docs] def do_tracking( self, monitoring_context: mm_context.MonitoringApplicationContext ) -> ModelMonitoringApplicationResult: monitoring_context.logger.info("Running evidently app") sample_df = monitoring_context.sample_df[self.columns] data_drift_report_run = self.create_report_run( sample_df, monitoring_context.end_infer_time ) self.evidently_workspace.add_run( self.evidently_project_id, data_drift_report_run ) self.log_evidently_object( monitoring_context, data_drift_report_run, "evidently_report" ) monitoring_context.logger.info("Logged evidently object") return ModelMonitoringApplicationResult( name="data_drift_test", value=0.5, kind=ResultKindApp.data_drift, status=ResultStatusApp.potential_detection, )
[docs] def create_report_run( self, sample_df: pd.DataFrame, schedule_time: pd.Timestamp ) -> "Snapshot": metrics = [ DataDriftPreset(), DatasetMissingValueCount(), DataSummaryPreset(), ] metrics.extend( [ ValueDrift(column=col_name, method="wasserstein") for col_name in self.columns ] ) data_drift_report = Report( metrics=metrics, metadata={"timestamp": str(schedule_time)}, include_tests=True, ) return data_drift_report.run( current_data=sample_df, reference_data=self.train_set )