Model Server#

import mlrun
%nuclio config kind="serving"
%nuclio env MODEL_CLASS=ClassifierModel
%nuclio config spec.build.baseImage = "mlrun/mlrun"
%nuclio: setting kind to 'serving'
%nuclio: setting 'MODEL_CLASS' environment variable
%nuclio: setting spec.build.baseImage to 'mlrun/mlrun'
from cloudpickle import load
from typing import List
from datetime import datetime
from sklearn.datasets import load_iris

import warnings
warnings.filterwarnings('ignore')

import os
import numpy as np

class ClassifierModel(mlrun.runtimes.MLModelServer):
    def load(self):
        """Load model from storage."""
        model_file, extra_data = self.get_model('.pkl')
        self.model = load(open(model_file, 'rb'))

    def predict(self, body: dict) -> List:
        """Generate model predictions from sample.
        
        :param body : A dict of observations, each of which is an 1-dimensional feature vector.
            
        Returns model predictions as a `List`, one for each row in the `body` input `List`.
        """
        try:
            feats = np.asarray(body['instances'])
            result: np.ndarray = self.model.predict(feats)
            resp = result.tolist()
        except Exception as e:
            raise Exception(f"Failed to predict {e}")
        
        return resp
# nuclio: end-code

Test models locally and deploy#

The sklearn-project generated one or more models that will be deployed in the server project sklearn-servers

Test locally#

model = 'https://s3.wasabisys.com/iguazio/models/iris/model.pkl'

iris = load_iris()

x = iris['data'].tolist()
y = iris['target']

my_server = ClassifierModel('classifier', model_dir=model)
my_server.load()

a = my_server.predict({"instances": x})
assert len(a)==150

Document and save#

fn = mlrun.new_model_server('model-server', model_class='ClassifierModel')
fn.spec.description = "generic sklearn model server"
fn.metadata.categories = ['serving', 'ml']
fn.metadata.labels = {'author': 'yaronh', 'framework': 'sklearn'}
fn.export()

Deploy server#

import mlrun
user_name = os.getenv("V3IO_USER_NAME")
artifact_path = mlrun.set_environment(api_path = 'http://mlrun-api:8080',
                                      artifact_path = os.path.abspath('./'))
fn.apply(mlrun.mount_v3io())
fn.set_envs({'SERVING_MODEL_iris_dataset_v1': model,
             'INFERENCE_STREAM': 'users/{}/tststream'.format(user_name)})

address = fn.deploy(project='sk-project')
> 2020-12-06 11:26:41,000 [info] Starting remote function deploy
2020-12-06 11:26:41  (info) Deploying function
2020-12-06 11:26:41  (info) Building
2020-12-06 11:26:41  (info) Staging files and preparing base images
2020-12-06 11:26:41  (info) Building processor image
2020-12-06 11:28:28  (info) Build complete
2020-12-06 11:28:34  (info) Function deploy complete
> 2020-12-06 11:28:35,076 [info] function deployed, address=default-tenant.app.yh210.iguazio-cd2.com:31804

Test server#

predict_url = address+"/iris_dataset_v1/predict"
my_data = '''{"instances":[[5.1, 3.5, 1.4, 0.2],[7.7, 3.8, 6.7, 2.2]]}'''
!curl {predict_url} -d '{my_data}'
[0, 2]