Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
pip install mlserver mlserver-mlflowThe `dict` content type can be _stacked_ with other content types, like
[`np`](../../docs/user-guide/content-type).
This allows the user to use a different set of content types to decode each of
the dict entries./v2/models/{model_name}/versions/{model_version}/docs/dataplane.json



pip install mlserver mlserver-sklearn---
emphasize-lines: 10-12
---
{
"inputs": [
{
"name": "my-input",
"datatype": "INT32",
"shape": [2, 2],
"data": [1, 2, 3, 4]
}
],
"outputs": [
{ "name": "predict_proba" }
]
}pip install mlserver mlserver-lightgbmon_worker_stop(worker: Worker) -> Nonestart()stop(sig: Optional[int] = None)configure_metrics(settings: Settings)log(metrics)register(name: str, description: str) -> Histogramimport mlserver
from mlserver.types import InferenceRequest, InferenceResponse
class MyCustomRuntime(mlserver.MLModel):
async def load(self) -> bool:
self._model = load_my_custom_model()
mlserver.register("my_custom_metric", "This is a custom metric example")
return True
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
mlserver.log(my_custom_metric=34)
# TODO: Replace for custom logic to run inference
return self._model.predict(payload)apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: my-model
spec:
predictor:
sklearn:
protocolVersion: v2
storageUri: gs://seldon-models/sklearn/iriskubectl apply -f my-inferenceservice-manifest.yamlapiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: my-model
spec:
predictor:
containers:
- name: classifier
image: my-custom-server:0.1.0
env:
- name: PROTOCOL
value: v2
ports:
- containerPort: 8080
protocol: TCPkubectl apply -f my-inferenceservice-manifest.yamlapiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
name: my-model
spec:
protocol: v2
predictors:
- name: default
graph:
name: classifier
implementation: SKLEARN_SERVER
modelUri: gs://seldon-models/sklearn/iriskubectl apply -f my-seldondeployment-manifest.yamlapiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
name: my-model
spec:
protocol: v2
predictors:
- name: default
graph:
name: classifier
componentSpecs:
- spec:
containers:
- name: classifier
image: my-custom-server:0.1.0kubectl apply -f my-seldondeployment-manifest.yamlpip install mlserver mlserver-xgboostBy default, the runtime will look for a file called `model.[json | ubj | bst]`.
However, this can be modified through the `parameters.uri` field of your
{class}`ModelSettings <mlserver.settings.ModelSettings>` config (see the
section on [Model Settings](../../docs/reference/model-settings.md) for more
details).
```{code-block} json
---
emphasize-lines: 3-5
---
{
"name": "foo",
"parameters": {
"uri": "./my-own-model-filename.json"
}
}
```---
emphasize-lines: 10-12
---
{
"inputs": [
{
"name": "my-input",
"datatype": "INT32",
"shape": [2, 2],
"data": [1, 2, 3, 4]
}
],
"outputs": [
{ "name": "predict_proba" }
]
}load(), predict(), unload().




pip install mlserver mlserver-catboostpip install mlserver spacy wikipedia-apipython -m spacy download en_core_web_lgmkdir -p similarity_modelimport spacynlp = spacy.load("en_core_web_lg")import wikipediaapiwiki_wiki = wikipediaapi.Wikipedia('MyMovieEval ([email protected])', 'en')barbie = wiki_wiki.page('Barbie_(film)').summary
oppenheimer = wiki_wiki.page('Oppenheimer_(film)').summary
print(barbie)
print()
print(oppenheimer)Barbie is a 2023 American fantasy comedy film directed by Greta Gerwig and written by Gerwig and Noah Baumbach. Based on the Barbie fashion dolls by Mattel, it is the first live-action Barbie film after numerous computer-animated direct-to-video and streaming television films. The film stars Margot Robbie as Barbie and Ryan Gosling as Ken, and follows the two on a journey of self-discovery following an existential crisis. The film also features an ensemble cast that includes America Ferrera, Kate McKinnon, Issa Rae, Rhea Perlman, and Will Ferrell...
Oppenheimer is a 2023 biographical thriller film written and directed by Christopher Nolan. Based on the 2005 biography American Prometheus by Kai Bird and Martin J. Sherwin, the film chronicles the life of J. Robert Oppenheimer, a theoretical physicist who was pivotal in developing the first nuclear weapons as part of the Manhattan Project, and thereby ushering in the Atomic Age. Cillian Murphy stars as Oppenheimer, with Emily Blunt as Oppenheimer's wife Katherine "Kitty" Oppenheimer; Matt Damon as General Leslie Groves, director of the Manhattan Project; and Robert Downey Jr. as Lewis Strauss, a senior member of the United States Atomic Energy Commission. The ensemble supporting cast includes Florence Pugh, Josh Hartnett, Casey Affleck, Rami Malek, Gary Oldman and Kenneth Branagh...doc1 = nlp(barbie)
doc2 = nlp(oppenheimer)doc1.similarity(doc2)0.9866910567224084# similarity_model/my_model.py
from mlserver.codecs import decode_args
from mlserver import MLModel
from typing import List
import numpy as np
import spacy
class MyKulModel(MLModel):
async def load(self):
self.model = spacy.load("en_core_web_lg")
@decode_args
async def predict(self, docs: List[str]) -> np.ndarray:
doc1 = self.model(docs[0])
doc2 = self.model(docs[1])
return np.array(doc1.similarity(doc2))# similarity_model/model-settings.json
{
"name": "doc-sim-model",
"implementation": "my_model.MyKulModel"
}mlserver start similarity_model/from mlserver.codecs import StringCodec
import requestsinference_request = {
"inputs": [
StringCodec.encode_input(name='docs', payload=[barbie, oppenheimer], use_bytes=False).model_dump()
]
}
print(inference_request){'inputs': [{'name': 'docs',
'shape': [2, 1],
'datatype': 'BYTES',
'parameters': {'content_type': 'str'},
'data': [
'Barbie is a 2023 American fantasy comedy...',
'Oppenheimer is a 2023 biographical thriller...'
]
}]
}r = requests.post('http://0.0.0.0:8080/v2/models/doc-sim-model/infer', json=inference_request)r.json(){'model_name': 'doc-sim-model',
'id': 'a4665ddb-1868-4523-bd00-a25902d9b124',
'parameters': {},
'outputs': [{'name': 'output-0',
'shape': [1],
'datatype': 'FP64',
'parameters': {'content_type': 'np'},
'data': [0.9866910567224084]}]}print(f"Our movies are {round(r.json()['outputs'][0]['data'][0] * 100, 4)}% similar!")Our movies are 98.6691% similar# similarity_model/settings.json
{
"parallel_workers": 3
}mlserver start similarity_modeldeep_impact = wiki_wiki.page('Deep_Impact_(film)').summary
armageddon = wiki_wiki.page('Armageddon_(1998_film)').summary
antz = wiki_wiki.page('Antz').summary
a_bugs_life = wiki_wiki.page("A_Bug's_Life").summary
the_dark_night = wiki_wiki.page('The_Dark_Knight').summary
mamma_mia = wiki_wiki.page('Mamma_Mia!_(film)').summarydef get_sim_score(movie1, movie2):
response = requests.post(
'http://0.0.0.0:8080/v2/models/doc-sim-model/infer',
json={
"inputs": [
StringCodec.encode_input(name='docs', payload=[movie1, movie2], use_bytes=False).model_dump()
]
})
return response.json()['outputs'][0]['data'][0]get_sim_score(deep_impact, armageddon)0.9569279450151813results = list(
map(get_sim_score, (deep_impact, antz, the_dark_night), (armageddon, a_bugs_life, mamma_mia))
)
results[0.9569279450151813, 0.9725374771538605, 0.9626173937217876]for movie1, movie2 in zip((deep_impact, antz, the_dark_night), (armageddon, a_bugs_life, mamma_mia)):
print(get_sim_score(movie1, movie2))0.9569279450151813
0.9725374771538605
0.9626173937217876# similarity_model/requirements.txt
mlserver
spacy==3.6.0
https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.6.0/en_core_web_lg-3.6.0-py3-none-any.whlmlserver build similarity_model/ -t 'fancy_ml_service'docker imagesdocker run -it --rm -p 8080:8080 fancy_ml_servicepip install mlserver mlserver-huggingface---
emphasize-lines: 5-8
---
{
"name": "qa",
"implementation": "mlserver_huggingface.HuggingFaceRuntime",
"parameters": {
"extra": {
"task": "question-answering",
"optimum_model": true
}
}
}These settings can also be injected through environment variables prefixed with `MLSERVER_MODEL_HUGGINGFACE_`, e.g.
```bash
MLSERVER_MODEL_HUGGINGFACE_TASK="question-answering"
MLSERVER_MODEL_HUGGINGFACE_OPTIMUM_MODEL=true
```If `parameters.extra.pretrained_model` is specified, it takes precedence over `parameters.uri`.
.. autopydantic_settings:: mlserver_huggingface.settings.HuggingFaceSettingspip install mlserver mlserver-alibi-detect---
emphasize-lines: 6-8
---
{
"name": "drift-detector",
"implementation": "mlserver_alibi_detect.AlibiDetectRuntime",
"parameters": {
"uri": "./alibi-detect-artifact/",
"extra": {
"batch_size": 5
}
}
}
.. autopydantic_settings:: mlserver_alibi_detect.runtime.AlibiDetectSettingsmlserver --helpmlserver [OPTIONS] COMMAND [ARGS]...decode(request_input: RequestInput, default_codec: Union[type[ForwardRef('InputCodec')], ForwardRef('InputCodec'), None] = None) -> Any<text>MLSERVER_INFER_MODEL_NAMEimport lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import os
model_dir = "."
BST_FILE = "iris-lightgbm.bst"
iris = load_iris()
y = iris['target']
X = iris['data']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
dtrain = lgb.Dataset(X_train, label=y_train)
params = {
'objective':'multiclass',
'metric':'softmax',
'num_class': 3
}
lgb_model = lgb.train(params=params, train_set=dtrain)
model_file = os.path.join(model_dir, BST_FILE)
lgb_model.save_model(model_file)%%writefile settings.json
{
"debug": "true"
}%%writefile model-settings.json
{
"name": "iris-lgb",
"implementation": "mlserver_lightgbm.LightGBMModel",
"parameters": {
"uri": "./iris-lightgbm.bst",
"version": "v0.1.0"
}
}mlserver start .import requests
x_0 = X_test[0:1]
inference_request = {
"inputs": [
{
"name": "predict-prob",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.tolist()
}
]
}
endpoint = "http://localhost:8080/v2/models/iris-lgb/versions/v0.1.0/infer"
response = requests.post(endpoint, json=inference_request)
response.json()y_test[0]mlserver build [OPTIONS] FOLDERmlserver dockerfile [OPTIONS] FOLDERmlserver infer [OPTIONS]mlserver init [OPTIONS]mlserver start [OPTIONS] FOLDERdecode_request(inference_request: InferenceRequest, default_codec: Union[type[ForwardRef('RequestCodec')], ForwardRef('RequestCodec'), None] = None) -> Anyencode(payload: Any, request_output: RequestOutput, default_codec: Union[type[ForwardRef('InputCodec')], ForwardRef('InputCodec'), None] = None) -> ResponseOutputencode_response(payload: Any, default_codec: Union[type[ForwardRef('RequestCodec')], ForwardRef('RequestCodec'), None] = None) -> InferenceResponseload() -> boolmetadata() -> MetadataModelResponsepredict(payload: InferenceRequest) -> InferenceResponsepredict_stream(payloads: AsyncIterator[InferenceRequest]) -> AsyncIterator[InferenceResponse]unload() -> bool!cp -r ../mms/models/* ./modelsmlserver start .import requests
response = requests.post("http://localhost:8080/v2/repository/index", json={})
response.json()requests.post("http://localhost:8080/v2/repository/models/mushroom-xgboost/unload")response = requests.post("http://localhost:8080/v2/repository/index", json={})
response.json()requests.post("http://localhost:8080/v2/repository/models/mushroom-xgboost/load")response = requests.post("http://localhost:8080/v2/repository/index", json={})
response.json()# Original code and extra details can be found in:
# https://xgboost.readthedocs.io/en/latest/get_started.html#python
import os
import xgboost as xgb
import requests
from urllib.parse import urlparse
from sklearn.datasets import load_svmlight_file
TRAIN_DATASET_URL = 'https://raw.githubusercontent.com/dmlc/xgboost/master/demo/data/agaricus.txt.train'
TEST_DATASET_URL = 'https://raw.githubusercontent.com/dmlc/xgboost/master/demo/data/agaricus.txt.test'
def _download_file(url: str) -> str:
parsed = urlparse(url)
file_name = os.path.basename(parsed.path)
file_path = os.path.join(os.getcwd(), file_name)
res = requests.get(url)
with open(file_path, 'wb') as file:
file.write(res.content)
return file_path
train_dataset_path = _download_file(TRAIN_DATASET_URL)
test_dataset_path = _download_file(TEST_DATASET_URL)
# NOTE: Workaround to load SVMLight files from the XGBoost example
X_train, y_train = load_svmlight_file(train_dataset_path)
X_test, y_test = load_svmlight_file(test_dataset_path)
X_train = X_train.toarray()
X_test = X_test.toarray()
# read in data
dtrain = xgb.DMatrix(data=X_train, label=y_train)
# specify parameters via map
param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic' }
num_round = 2
bst = xgb.train(param, dtrain, num_round)
bstmodel_file_name = 'mushroom-xgboost.json'
bst.save_model(model_file_name)%%writefile settings.json
{
"debug": "true"
}%%writefile model-settings.json
{
"name": "mushroom-xgboost",
"implementation": "mlserver_xgboost.XGBoostModel",
"parameters": {
"uri": "./mushroom-xgboost.json",
"version": "v0.1.0"
}
}mlserver start .import requests
x_0 = X_test[0:1]
inference_request = {
"inputs": [
{
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.tolist()
}
]
}
endpoint = "http://localhost:8080/v2/models/mushroom-xgboost/versions/v0.1.0/infer"
response = requests.post(endpoint, json=inference_request)
response.json()y_test[0]# Original source code and more details can be found in:
# https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
# The digits dataset
digits = datasets.load_digits()
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
# Split data into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(
data, digits.target, test_size=0.5, shuffle=False)
# We learn the digits on the first half of the digits
classifier.fit(X_train, y_train)import joblib
model_file_name = "mnist-svm.joblib"
joblib.dump(classifier, model_file_name)%%writefile settings.json
{
"debug": "true"
}%%writefile model-settings.json
{
"name": "mnist-svm",
"implementation": "mlserver_sklearn.SKLearnModel",
"parameters": {
"uri": "./mnist-svm.joblib",
"version": "v0.1.0"
}
}mlserver start .import requests
x_0 = X_test[0:1]
inference_request = {
"inputs": [
{
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.tolist()
}
]
}
endpoint = "http://localhost:8080/v2/models/mnist-svm/versions/v0.1.0/infer"
response = requests.post(endpoint, json=inference_request)
response.json()y_test[0]%%writefile environment.yml
name: old-sklearn
channels:
- conda-forge
dependencies:
- python == 3.8
- scikit-learn == 0.24.2
- joblib == 0.17.0
- requests
- pip
- pip:
- mlserver == 1.1.0
- mlserver-sklearn == 1.1.0!conda env create --force -f environment.yml
!conda activate old-sklearn# Original source code and more details can be found in:
# https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
# The digits dataset
digits = datasets.load_digits()
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
# Split data into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(
data, digits.target, test_size=0.5, shuffle=False)
# We learn the digits on the first half of the digits
classifier.fit(X_train, y_train)import joblib
model_file_name = "model.joblib"
joblib.dump(classifier, model_file_name)!conda pack --force -n old-sklearn -o old-sklearn.tar.gz%%writefile model-settings.json
{
"name": "mnist-svm",
"implementation": "mlserver_sklearn.SKLearnModel"
}docker run -it --rm \
-v "$PWD":/mnt/models \
-e "MLSERVER_ENV_TARBALL=/mnt/models/old-sklearn.tar.gz" \
-p 8080:8080 \
seldonio/mlserver:1.1.0-slimimport requests
x_0 = X_test[0:1]
inference_request = {
"inputs": [
{
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.tolist()
}
]
}
endpoint = "http://localhost:8080/v2/models/mnist-svm/infer"
response = requests.post(endpoint, json=inference_request)
response.json()MLModel <mlserver.MLModel>{
"model": "sum-model",
"implementation": "models.MyCustomRuntime"
}{
"model": "sum-model",
"implementation": "models.MyCustomRuntime",
"parameters": {
"environment_tarball": "./environment.tar.gz"
}
}DOCKER_BUILDKIT=1 docker build . -t my-custom-runtime:0.1.0from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse
class MyCustomRuntime(MLModel):
async def load(self) -> bool:
# TODO: Replace for custom logic to load a model artifact
self._model = load_my_custom_model()
return True
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
# TODO: Replace for custom logic to run inference
return self._model.predict(payload)from mlserver import MLModel
from mlserver.codecs import decode_args
from typing import List
class MyCustomRuntime(MLModel):
async def load(self) -> bool:
# TODO: Replace for custom logic to load a model artifact
self._model = load_my_custom_model()
return True
@decode_args
async def predict(self, questions: List[str], context: List[str]) -> np.ndarray:
# TODO: Replace for custom logic to run inference
return self._model.predict(questions, context)from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse
class CustomHeadersRuntime(MLModel):
...
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
if payload.parameters and payload.parametes.headers:
# These are all the incoming HTTP headers / gRPC metadata
print(payload.parameters.headers)
...from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse
class CustomHeadersRuntime(MLModel):
...
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
...
return InferenceResponse(
# Include any actual outputs from inference
outputs=[],
parameters=Parameters(headers={"foo": "bar"})
).
└── models
└── sum-model
├── model-settings.json
├── models.py.
└── models
└── sum-model
├── environment.tar.gz
├── model-settings.json
├── models.pymlserver build . -t my-custom-servermodels/mushroom-xgboost/model-settings.json: holds the configuration specific to our mushroom-xgboost model (e.g. input type, runtime to use, etc.).# Original source code and more details can be found in:
# https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
# The digits dataset
digits = datasets.load_digits()
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
# Split data into train and test subsets
X_train, X_test_digits, y_train, y_test_digits = train_test_split(
data, digits.target, test_size=0.5, shuffle=False)
# We learn the digits on the first half of the digits
classifier.fit(X_train, y_train)import joblib
import os
mnist_svm_path = os.path.join("models", "mnist-svm")
os.makedirs(mnist_svm_path, exist_ok=True)
mnist_svm_model_path = os.path.join(mnist_svm_path, "model.joblib")
joblib.dump(classifier, mnist_svm_model_path)# Original code and extra details can be found in:
# https://xgboost.readthedocs.io/en/latest/get_started.html#python
import os
import xgboost as xgb
import requests
from urllib.parse import urlparse
from sklearn.datasets import load_svmlight_file
TRAIN_DATASET_URL = 'https://raw.githubusercontent.com/dmlc/xgboost/master/demo/data/agaricus.txt.train'
TEST_DATASET_URL = 'https://raw.githubusercontent.com/dmlc/xgboost/master/demo/data/agaricus.txt.test'
def _download_file(url: str) -> str:
parsed = urlparse(url)
file_name = os.path.basename(parsed.path)
file_path = os.path.join(os.getcwd(), file_name)
res = requests.get(url)
with open(file_path, 'wb') as file:
file.write(res.content)
return file_path
train_dataset_path = _download_file(TRAIN_DATASET_URL)
test_dataset_path = _download_file(TEST_DATASET_URL)
# NOTE: Workaround to load SVMLight files from the XGBoost example
X_train, y_train = load_svmlight_file(train_dataset_path)
X_test_agar, y_test_agar = load_svmlight_file(test_dataset_path)
X_train = X_train.toarray()
X_test_agar = X_test_agar.toarray()
# read in data
dtrain = xgb.DMatrix(data=X_train, label=y_train)
# specify parameters via map
param = {'max_depth':2, 'eta':1, 'objective':'binary:logistic' }
num_round = 2
bst = xgb.train(param, dtrain, num_round)
bstimport os
mushroom_xgboost_path = os.path.join("models", "mushroom-xgboost")
os.makedirs(mushroom_xgboost_path, exist_ok=True)
mushroom_xgboost_model_path = os.path.join(mushroom_xgboost_path, "model.json")
bst.save_model(mushroom_xgboost_model_path)%%writefile settings.json
{
"debug": "true"
}%%writefile models/mnist-svm/model-settings.json
{
"name": "mnist-svm",
"implementation": "mlserver_sklearn.SKLearnModel",
"parameters": {
"version": "v0.1.0"
}
}%%writefile models/mushroom-xgboost/model-settings.json
{
"name": "mushroom-xgboost",
"implementation": "mlserver_xgboost.XGBoostModel",
"parameters": {
"version": "v0.1.0"
}
}
mlserver start .import requests
x_0 = X_test_digits[0:1]
inference_request = {
"inputs": [
{
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.tolist()
}
]
}
endpoint = "http://localhost:8080/v2/models/mnist-svm/versions/v0.1.0/infer"
response = requests.post(endpoint, json=inference_request)
response.json()import requests
x_0 = X_test_agar[0:1]
inference_request = {
"inputs": [
{
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.tolist()
}
]
}
endpoint = "http://localhost:8080/v2/models/mushroom-xgboost/versions/v0.1.0/infer"
response = requests.post(endpoint, json=inference_request)
response.json()!wget https://apache.mirrors.nublue.co.uk/kafka/2.8.0/kafka_2.12-2.8.0.tgz
!tar -zxvf kafka_2.12-2.8.0.tgz
!./kafka_2.12-2.8.0/bin/kafka-storage.sh format -t OXn8RTSlQdmxwjhKnSB_6A -c ./kafka_2.12-2.8.0/config/kraft/server.properties!./kafka_2.12-2.8.0/bin/kafka-server-start.sh ./kafka_2.12-2.8.0/config/kraft/server.properties!./kafka_2.12-2.8.0/bin/kafka-topics.sh --create --topic mlserver-input --partitions 1 --replication-factor 1 --bootstrap-server localhost:9092
!./kafka_2.12-2.8.0/bin/kafka-topics.sh --create --topic mlserver-output --partitions 1 --replication-factor 1 --bootstrap-server localhost:9092# Original source code and more details can be found in:
# https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html
# Import datasets, classifiers and performance metrics
from sklearn import datasets, svm, metrics
from sklearn.model_selection import train_test_split
# The digits dataset
digits = datasets.load_digits()
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)
# Split data into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(
data, digits.target, test_size=0.5, shuffle=False)
# We learn the digits on the first half of the digits
classifier.fit(X_train, y_train)import joblib
model_file_name = "mnist-svm.joblib"
joblib.dump(classifier, model_file_name)%%writefile settings.json
{
"debug": "true",
"kafka_enabled": "true"
}%%writefile model-settings.json
{
"name": "mnist-svm",
"implementation": "mlserver_sklearn.SKLearnModel",
"parameters": {
"uri": "./mnist-svm.joblib",
"version": "v0.1.0"
}
}mlserver start .import requests
x_0 = X_test[0:1]
inference_request = {
"inputs": [
{
"name": "predict",
"shape": x_0.shape,
"datatype": "FP32",
"data": x_0.tolist()
}
]
}
endpoint = "http://localhost:8080/v2/models/mnist-svm/versions/v0.1.0/infer"
response = requests.post(endpoint, json=inference_request)
response.json()import json
from kafka import KafkaProducer
producer = KafkaProducer(bootstrap_servers="localhost:9092")
headers = {
"mlserver-model": b"mnist-svm",
"mlserver-version": b"v0.1.0",
}
producer.send(
"mlserver-input",
json.dumps(inference_request).encode("utf-8"),
headers=list(headers.items()))from kafka import KafkaConsumer
consumer = KafkaConsumer(
"mlserver-output",
bootstrap_servers="localhost:9092",
auto_offset_reset="earliest")
for msg in consumer:
print(f"key: {msg.key}")
print(f"value: {msg.value}\n")
breakcan_encode(payload: Any) -> booldecode_input(request_input: RequestInput) -> List[bytes]decode_output(response_output: ResponseOutput) -> List[bytes]encode_input(name: str, payload: List[bytes], use_bytes: bool = True, kwargs) -> RequestInputencode_output(name: str, payload: List[bytes], use_bytes: bool = True, kwargs) -> ResponseOutputadd_note(...)with_traceback(...)can_encode(payload: Any) -> booldecode_input(request_input: RequestInput) -> List[datetime]decode_output(response_output: ResponseOutput) -> List[datetime]encode_input(name: str, payload: List[Union[str, datetime]], use_bytes: bool = True, kwargs) -> RequestInputencode_output(name: str, payload: List[Union[str, datetime]], use_bytes: bool = True, kwargs) -> ResponseOutputcan_encode(payload: Any) -> booldecode_input(request_input: RequestInput) -> Anydecode_output(response_output: ResponseOutput) -> Anyencode_input(name: str, payload: Any, kwargs) -> RequestInputencode_output(name: str, payload: Any, kwargs) -> ResponseOutputcan_encode(payload: Any) -> booldecode_input(request_input: RequestInput) -> ndarraydecode_output(response_output: ResponseOutput) -> ndarrayencode_input(name: str, payload: ndarray, kwargs) -> RequestInputencode_output(name: str, payload: ndarray, kwargs) -> ResponseOutputcan_encode(payload: Any) -> booldecode_request(request: InferenceRequest) -> Anydecode_response(response: InferenceResponse) -> Anyencode_request(payload: Any, kwargs) -> InferenceRequestencode_response(model_name: str, payload: Any, model_version: Optional[str] = None, kwargs) -> InferenceResponsecan_encode(payload: Any) -> booldecode_request(request: InferenceRequest) -> DataFramedecode_response(response: InferenceResponse) -> DataFrameencode_outputs(payload: DataFrame, use_bytes: bool = True) -> List[ResponseOutput]encode_request(payload: DataFrame, use_bytes: bool = True, kwargs) -> InferenceRequestencode_response(model_name: str, payload: DataFrame, model_version: Optional[str] = None, use_bytes: bool = True, kwargs) -> InferenceResponsecan_encode(payload: Any) -> booldecode_request(request: InferenceRequest) -> Anydecode_response(response: InferenceResponse) -> Anyencode_request(payload: Any, kwargs) -> InferenceRequestencode_response(model_name: str, payload: Any, model_version: Optional[str] = None, kwargs) -> InferenceResponsecan_encode(payload: Any) -> booldecode_input(request_input: RequestInput) -> List[str]decode_output(response_output: ResponseOutput) -> List[str]encode_input(name: str, payload: List[str], use_bytes: bool = True, kwargs) -> RequestInputencode_output(name: str, payload: List[str], use_bytes: bool = True, kwargs) -> ResponseOutputcan_encode(payload: Any) -> booldecode_request(request: InferenceRequest) -> Anydecode_response(response: InferenceResponse) -> Anyencode_request(payload: Any, kwargs) -> InferenceRequestencode_response(model_name: str, payload: Any, model_version: Optional[str] = None, kwargs) -> InferenceResponsedecode_args(predict: Callable) -> Callable[[ForwardRef('MLModel'), <class 'mlserver.types.dataplane.InferenceRequest'>], Coroutine[Any, Any, InferenceResponse]]decode_inference_request(inference_request: InferenceRequest, model_settings: Optional[ModelSettings] = None, metadata_inputs: Dict[str, MetadataTensor] = {}) -> Optional[Any]decode_request_input(request_input: RequestInput, metadata_inputs: Dict[str, MetadataTensor] = {}) -> Optional[Any]encode_inference_response(payload: Any, model_settings: ModelSettings) -> Optional[InferenceResponse]encode_response_output(payload: Any, request_output: RequestOutput, metadata_outputs: Dict[str, MetadataTensor] = {}) -> Optional[ResponseOutput]get_decoded(parametrised_obj: Union[InferenceRequest, RequestInput, RequestOutput, ResponseOutput, InferenceResponse]) -> Anyget_decoded_or_raw(parametrised_obj: Union[InferenceRequest, RequestInput, RequestOutput, ResponseOutput, InferenceResponse]) -> Anyhas_decoded(parametrised_obj: Union[InferenceRequest, RequestInput, RequestOutput, ResponseOutput, InferenceResponse]) -> boolregister_input_codec(CodecKlass: Union[type[InputCodec], InputCodec])register_request_codec(CodecKlass: Union[type[RequestCodec], RequestCodec])inputoutputshape{
"parameters": {
"content_type": "pd"
},
"inputs": [
{
"name": "First Name",
"datatype": "BYTES",
"parameters": {
"content_type": "str"
},
"shape": [2],
"data": ["Joanne", "Michael"]
},
{
"name": "Age",
"datatype": "INT32",
"shape": [2],
"data": [34, 22]
},
]
}import pandas as pd
from mlserver.codecs import PandasCodec
dataframe = pd.DataFrame({'First Name': ["Joanne", "Michael"], 'Age': [34, 22]})
inference_request = PandasCodec.encode_request(dataframe)
print(inference_request)import pandas as pd
import requests
from mlserver.codecs import PandasCodec
dataframe = pd.DataFrame({'First Name': ["Joanne", "Michael"], 'Age': [34, 22]})
inference_request = PandasCodec.encode_request(dataframe)
# raw_request will be a Python dictionary compatible with `requests`'s `json` kwarg
raw_request = inference_request.dict()
response = requests.post("localhost:8080/v2/models/foo/infer", json=raw_request)
# raw_response will be a dictionary (loaded from the response's JSON),
# therefore we can pass it as the InferenceResponse constructors' kwargs
raw_response = response.json()
inference_response = InferenceResponse(**raw_response)import numpy as np
foo = np.array([[1.2, 2.3], [np.NaN, 4.5]]){
"inputs": [
{
"name": "foo",
"parameters": {
"content_type": "np"
},
"data": [1.2, 2.3, null, 4.5]
"datatype": "FP64",
"shape": [2, 2],
}
]
}{
"parameters": {
"content_type": "pd"
},
"inputs": [
{
"name": "First Name",
"datatype": "BYTES",
"parameters": {
"content_type": "str"
},
"shape": [-1],
},
{
"name": "Age",
"datatype": "INT32",
"shape": [-1],
},
]
}import numpy as np
foo = np.array([[1, 2], [3, 4]]){
"inputs": [
{
"name": "foo",
"parameters": {
"content_type": "np"
},
"data": [1, 2, 3, 4]
"datatype": "INT32",
"shape": [2, 2],
}
]
}from mlserver.codecs import NumpyRequestCodec
# Encode an entire V2 request
inference_request = NumpyRequestCodec.encode_request(foo)from mlserver.types import InferenceRequest
from mlserver.codecs import NumpyCodec
# We can use the `NumpyCodec` to encode a single input head with name `foo`
# within a larger request
inference_request = InferenceRequest(
inputs=[
NumpyCodec.encode_input("foo", foo)
]
){
"parameters": {
"content_type": "pd"
},
"inputs": [
{
"name": "A",
"data": ["a1", "a2", "a3", "a4"]
"datatype": "BYTES",
"shape": [4],
},
{
"name": "B",
"data": ["b1", "b2", "b3", "b4"]
"datatype": "BYTES",
"shape": [4],
},
{
"name": "C",
"data": ["c1", "c2", "c3", "c4"]
"datatype": "BYTES",
"shape": [4],
},
]
}import pandas as pd
from mlserver.codecs import PandasCodec
foo = pd.DataFrame({
"A": ["a1", "a2", "a3", "a4"],
"B": ["b1", "b2", "b3", "b4"],
"C": ["c1", "c2", "c3", "c4"]
})
inference_request = PandasCodec.encode_request(foo)foo = ["bar", "bar2"]{
"parameters": {
"content_type": "str"
},
"inputs": [
{
"name": "foo",
"data": ["bar", "bar2"]
"datatype": "BYTES",
"shape": [2],
}
]
}from mlserver.codecs.string import StringRequestCodec
# Encode an entire V2 request
inference_request = StringRequestCodec.encode_request(foo, use_bytes=False)from mlserver.types import InferenceRequest
from mlserver.codecs import StringCodec
# We can use the `StringCodec` to encode a single input head with name `foo`
# within a larger request
inference_request = InferenceRequest(
inputs=[
StringCodec.encode_input("foo", foo, use_bytes=False)
]
)foo = b"Python is fun"{
"inputs": [
{
"name": "foo",
"parameters": {
"content_type": "base64"
},
"data": ["UHl0aG9uIGlzIGZ1bg=="]
"datatype": "BYTES",
"shape": [1],
}
]
}from mlserver.types import InferenceRequest
from mlserver.codecs import Base64Codec
# We can use the `Base64Codec` to encode a single input head with name `foo`
# within a larger request
inference_request = InferenceRequest(
inputs=[
Base64Codec.encode_input("foo", foo, use_bytes=False)
]
)import datetime
foo = datetime.datetime(2022, 1, 11, 11, 0, 0){
"inputs": [
{
"name": "foo",
"parameters": {
"content_type": "datetime"
},
"data": ["2022-01-11T11:00:00"]
"datatype": "BYTES",
"shape": [1],
}
]
}from mlserver.types import InferenceRequest
from mlserver.codecs import DatetimeCodec
# We can use the `DatetimeCodec` to encode a single input head with name `foo`
# within a larger request
inference_request = InferenceRequest(
inputs=[
DatetimeCodec.encode_input("foo", foo, use_bytes=False)
]
)%%writefile text_model.py
import asyncio
from typing import AsyncIterator
from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse
from mlserver.codecs import StringCodec
class TextModel(MLModel):
async def predict_stream(
self, payloads: AsyncIterator[InferenceRequest]
) -> AsyncIterator[InferenceResponse]:
payload = [_ async for _ in payloads][0]
text = StringCodec.decode_input(payload.inputs[0])[0]
words = text.split(" ")
split_text = []
for i, word in enumerate(words):
split_text.append(word if i == 0 else " " + word)
for word in split_text:
await asyncio.sleep(0.5)
yield InferenceResponse(
model_name=self._settings.name,
outputs=[
StringCodec.encode_output(
name="output",
payload=[word],
use_bytes=True,
),
],
)
%%writefile settings.json
{
"debug": false,
"parallel_workers": 0,
"gzip_enabled": false
}
%%writefile model-settings.json
{
"name": "text-model",
"implementation": "text_model.TextModel",
"versions": ["text-model/v1.2.3"],
"platform": "mlserver",
"inputs": [
{
"datatype": "BYTES",
"name": "prompt",
"shape": [1]
}
],
"outputs": [
{
"datatype": "BYTES",
"name": "output",
"shape": [1]
}
]
}mlserver start .%%writefile generate-request.json
{
"inputs": [
{
"name": "prompt",
"shape": [1],
"datatype": "BYTES",
"data": ["What is the capital of France?"],
"parameters": {
"content_type": "str"
}
}
],
"outputs": [
{
"name": "output"
}
]
}import httpx
from httpx_sse import connect_sse
from mlserver import types
from mlserver.codecs import StringCodec
inference_request = types.InferenceRequest.parse_file("./generate-request.json")
with httpx.Client() as client:
with connect_sse(client, "POST", "http://localhost:8080/v2/models/text-model/generate_stream", json=inference_request.dict()) as event_source:
for sse in event_source.iter_sse():
response = types.InferenceResponse.parse_raw(sse.data)
print(StringCodec.decode_output(response.outputs[0]))
import grpc
import mlserver.types as types
from mlserver.codecs import StringCodec
from mlserver.grpc.converters import ModelInferResponseConverter
import mlserver.grpc.converters as converters
import mlserver.grpc.dataplane_pb2_grpc as dataplane
inference_request = types.InferenceRequest.parse_file("./generate-request.json")
# need to convert from string to bytes for grpc
inference_request.inputs[0] = StringCodec.encode_input("prompt", inference_request.inputs[0].data.root)
inference_request_g = converters.ModelInferRequestConverter.from_types(
inference_request, model_name="text-model", model_version=None
)
async def get_inference_request_stream(inference_request):
yield inference_request
async with grpc.aio.insecure_channel("localhost:8081") as grpc_channel:
grpc_stub = dataplane.GRPCInferenceServiceStub(grpc_channel)
inference_request_stream = get_inference_request_stream(inference_request_g)
async for response in grpc_stub.ModelStreamInfer(inference_request_stream):
response = ModelInferResponseConverter.to_types(response)
print(StringCodec.decode_output(response.outputs[0]))# Original source code and more details can be found in:
# https://nbviewer.jupyter.org/github/pyro-ppl/numpyro/blob/master/notebooks/source/bayesian_regression.ipynb
import numpyro
import numpy as np
import pandas as pd
from numpyro import distributions as dist
from jax import random
from numpyro.infer import MCMC, NUTS
DATASET_URL = "https://raw.githubusercontent.com/rmcelreath/rethinking/master/data/WaffleDivorce.csv"
dset = pd.read_csv(DATASET_URL, sep=";")
standardize = lambda x: (x - x.mean()) / x.std()
dset["AgeScaled"] = dset.MedianAgeMarriage.pipe(standardize)
dset["MarriageScaled"] = dset.Marriage.pipe(standardize)
dset["DivorceScaled"] = dset.Divorce.pipe(standardize)
def model(marriage=None, age=None, divorce=None):
a = numpyro.sample("a", dist.Normal(0.0, 0.2))
M, A = 0.0, 0.0
if marriage is not None:
bM = numpyro.sample("bM", dist.Normal(0.0, 0.5))
M = bM * marriage
if age is not None:
bA = numpyro.sample("bA", dist.Normal(0.0, 0.5))
A = bA * age
sigma = numpyro.sample("sigma", dist.Exponential(1.0))
mu = a + M + A
numpyro.sample("obs", dist.Normal(mu, sigma), obs=divorce)
# Start from this source of randomness. We will split keys for subsequent operations.
rng_key = random.PRNGKey(0)
rng_key, rng_key_ = random.split(rng_key)
num_warmup, num_samples = 1000, 2000
# Run NUTS.
kernel = NUTS(model)
mcmc = MCMC(kernel, num_warmup=num_warmup, num_samples=num_samples)
mcmc.run(
rng_key_, marriage=dset.MarriageScaled.values, divorce=dset.DivorceScaled.values
)
mcmc.print_summary()import json
samples = mcmc.get_samples()
serialisable = {}
for k, v in samples.items():
serialisable[k] = np.asarray(v).tolist()
model_file_name = "numpyro-divorce.json"
with open(model_file_name, "w") as model_file:
json.dump(serialisable, model_file)# %load models.py
import json
import numpyro
import numpy as np
from jax import random
from mlserver import MLModel
from mlserver.codecs import decode_args
from mlserver.utils import get_model_uri
from numpyro.infer import Predictive
from numpyro import distributions as dist
from typing import Optional
class NumpyroModel(MLModel):
async def load(self) -> bool:
model_uri = await get_model_uri(self._settings)
with open(model_uri) as model_file:
raw_samples = json.load(model_file)
self._samples = {}
for k, v in raw_samples.items():
self._samples[k] = np.array(v)
self._predictive = Predictive(self._model, self._samples)
return True
@decode_args
async def predict(
self,
marriage: Optional[np.ndarray] = None,
age: Optional[np.ndarray] = None,
divorce: Optional[np.ndarray] = None,
) -> np.ndarray:
predictions = self._predictive(
rng_key=random.PRNGKey(0), marriage=marriage, age=age, divorce=divorce
)
obs = predictions["obs"]
obs_mean = obs.mean()
return np.asarray(obs_mean)
def _model(self, marriage=None, age=None, divorce=None):
a = numpyro.sample("a", dist.Normal(0.0, 0.2))
M, A = 0.0, 0.0
if marriage is not None:
bM = numpyro.sample("bM", dist.Normal(0.0, 0.5))
M = bM * marriage
if age is not None:
bA = numpyro.sample("bA", dist.Normal(0.0, 0.5))
A = bA * age
sigma = numpyro.sample("sigma", dist.Exponential(1.0))
mu = a + M + A
numpyro.sample("obs", dist.Normal(mu, sigma), obs=divorce)
# %load settings.json
{
"debug": "true"
}
# %load model-settings.json
{
"name": "numpyro-divorce",
"implementation": "models.NumpyroModel",
"parameters": {
"uri": "./numpyro-divorce.json"
}
}
mlserver start .import requests
import numpy as np
from mlserver.types import InferenceRequest
from mlserver.codecs import NumpyCodec
x_0 = np.array([28.0])
inference_request = InferenceRequest(
inputs=[
NumpyCodec.encode_input(name="marriage", payload=x_0)
]
)
endpoint = "http://localhost:8080/v2/models/numpyro-divorce/infer"
response = requests.post(endpoint, json=inference_request.model_dump())
response.json()# %load requirements.txt
numpy==1.22.4
numpyro==0.8.0
jax==0.2.24
jaxlib==0.3.7
This section expects that Docker is available and running in the background.%%bash
mlserver build . -t 'my-custom-numpyro-server:0.1.0'docker run -it --rm -p 8080:8080 my-custom-numpyro-server:0.1.0import numpy as np
from mlserver.types import InferenceRequest
from mlserver.codecs import NumpyCodec
x_0 = np.array([28.0])
inference_request = InferenceRequest(
inputs=[
NumpyCodec.encode_input(name="marriage", payload=x_0)
]
)
endpoint = "http://localhost:8080/v2/models/numpyro-divorce/infer"
response = requests.post(endpoint, json=inference_request.model_dump())
response.json()This section expects access to a functional Kubernetes cluster with Seldon Core installed and some familiarity with `kubectl`.Also consider that depending on your Kubernetes installation Seldon Core might expect to get the container image from a public container registry like [Docker hub](https://hub.docker.com/) or [Google Container Registry](https://cloud.google.com/container-registry). For that you need to do an extra step of pushing the container to the registry using `docker tag <image name> <container registry>/<image name>` and `docker push <container registry>/<image name>` and also updating the `image` section of the yaml file to `<container registry>/<image name>`.%%writefile seldondeployment.yaml
apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
name: numpyro-model
spec:
protocol: v2
predictors:
- name: default
graph:
name: numpyro-divorce
type: MODEL
componentSpecs:
- spec:
containers:
- name: numpyro-divorce
image: my-custom-numpyro-server:0.1.0from IPython.core.magic import register_line_cell_magic
@register_line_cell_magic
def writetemplate(line, cell):
with open(line, 'w') as f:
f.write(cell.format(**globals()))# %load src/train.py
# Original source code and more details can be found in:
# https://www.mlflow.org/docs/latest/tutorials-and-examples/tutorial.html
# The data set used in this example is from
# http://archive.ics.uci.edu/ml/datasets/Wine+Quality
# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
# Modeling wine preferences by data mining from physicochemical properties.
# In Decision Support Systems, Elsevier, 47(4):547-553, 2009.
import warnings
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
import logging
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)
def eval_metrics(actual, pred):
rmse = np.sqrt(mean_squared_error(actual, pred))
mae = mean_absolute_error(actual, pred)
r2 = r2_score(actual, pred)
return rmse, mae, r2
if __name__ == "__main__":
warnings.filterwarnings("ignore")
np.random.seed(40)
# Read the wine-quality csv file from the URL
csv_url = (
"http://archive.ics.uci.edu/ml"
"/machine-learning-databases/wine-quality/winequality-red.csv"
)
try:
data = pd.read_csv(csv_url, sep=";")
except Exception as e:
logger.exception(
"Unable to download training & test CSV, "
"check your internet connection. Error: %s",
e,
)
# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)
# The predicted column is "quality" which is a scalar from [3, 9]
train_x = train.drop(["quality"], axis=1)
test_x = test.drop(["quality"], axis=1)
train_y = train[["quality"]]
test_y = test[["quality"]]
alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
with mlflow.start_run():
lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
lr.fit(train_x, train_y)
predicted_qualities = lr.predict(test_x)
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
print(" RMSE: %s" % rmse)
print(" MAE: %s" % mae)
print(" R2: %s" % r2)
mlflow.log_param("alpha", alpha)
mlflow.log_param("l1_ratio", l1_ratio)
mlflow.log_metric("rmse", rmse)
mlflow.log_metric("r2", r2)
mlflow.log_metric("mae", mae)
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
model_signature = infer_signature(train_x, train_y)
# Model registry does not work with file store
if tracking_url_type_store != "file":
# Register the model
# There are other ways to use the Model Registry,
# which depends on the use case,
# please refer to the doc for more information:
# https://mlflow.org/docs/latest/model-registry.html#api-workflow
mlflow.sklearn.log_model(
lr,
"model",
registered_model_name="ElasticnetWineModel",
signature=model_signature,
)
else:
mlflow.sklearn.log_model(lr, "model", signature=model_signature)
!python src/train.pyimport os
[experiment_file_path] = !ls -td ./mlruns/0/* | head -1
model_path = os.path.join(experiment_file_path, "artifacts", "model")
print(model_path)!ls {model_path} %%writetemplate ./model-settings.json
{{
"name": "wine-classifier",
"implementation": "mlserver_mlflow.MLflowRuntime",
"parameters": {{
"uri": "{model_path}"
}}
}}mlserver start .import requests
inference_request = {
"inputs": [
{
"name": "fixed acidity",
"shape": [1],
"datatype": "FP32",
"data": [7.4],
},
{
"name": "volatile acidity",
"shape": [1],
"datatype": "FP32",
"data": [0.7000],
},
{
"name": "citric acid",
"shape": [1],
"datatype": "FP32",
"data": [0],
},
{
"name": "residual sugar",
"shape": [1],
"datatype": "FP32",
"data": [1.9],
},
{
"name": "chlorides",
"shape": [1],
"datatype": "FP32",
"data": [0.076],
},
{
"name": "free sulfur dioxide",
"shape": [1],
"datatype": "FP32",
"data": [11],
},
{
"name": "total sulfur dioxide",
"shape": [1],
"datatype": "FP32",
"data": [34],
},
{
"name": "density",
"shape": [1],
"datatype": "FP32",
"data": [0.9978],
},
{
"name": "pH",
"shape": [1],
"datatype": "FP32",
"data": [3.51],
},
{
"name": "sulphates",
"shape": [1],
"datatype": "FP32",
"data": [0.56],
},
{
"name": "alcohol",
"shape": [1],
"datatype": "FP32",
"data": [9.4],
},
]
}
endpoint = "http://localhost:8080/v2/models/wine-classifier/infer"
response = requests.post(endpoint, json=inference_request)
response.json()import requests
inference_request = {
"dataframe_split": {
"columns": [
"fixed acidity",
"volatile acidity",
"citric acid",
"residual sugar",
"chlorides",
"free sulfur dioxide",
"total sulfur dioxide",
"density",
"pH",
"sulphates",
"alcohol",
],
"data": [[7.4,0.7,0,1.9,0.076,11,34,0.9978,3.51,0.56,9.4]]
}
}
endpoint = "http://localhost:8080/invocations"
response = requests.post(endpoint, json=inference_request)
response.json()!cat {model_path}/MLmodelimport requests
endpoint = "http://localhost:8080/v2/models/wine-classifier"
response = requests.get(endpoint)
response.json()git clone https://github.com/SeldonIO/cassava-example.gitcd cassava-example/pip install -r requirements.txtfrom helpers import plot, preprocess
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_hub as hub
# Fixes an issue with Jax and TF competing for GPU
tf.config.experimental.set_visible_devices([], 'GPU')
# Load the model
model_path = './model'
classifier = hub.KerasLayer(model_path)
# Load the dataset and store the class names
dataset, info = tfds.load('cassava', with_info=True)
class_names = info.features['label'].names + ['unknown']
# Select a batch of examples and plot them
batch_size = 9
batch = dataset['validation'].map(preprocess).batch(batch_size).as_numpy_iterator()
examples = next(batch)
plot(examples, class_names)
# Generate predictions for the batch and plot them against their labels
predictions = classifier(examples['image'])
predictions_max = tf.argmax(predictions, axis=-1)
print(predictions_max)
plot(examples, class_names, predictions_max)python app.pyfrom mlserver import MLModel
from mlserver.codecs import decode_args
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
# Define a class for our Model, inheriting the MLModel class from MLServer
class CassavaModel(MLModel):
# Load the model into memory
async def load(self) -> bool:
tf.config.experimental.set_visible_devices([], 'GPU')
model_path = '.'
self._model = hub.KerasLayer(model_path)
self.ready = True
return self.ready
# Logic for making predictions against our model
@decode_args
async def predict(self, payload: np.ndarray) -> np.ndarray:
# convert payload to tf.tensor
payload_tensor = tf.constant(payload)
# Make predictions
predictions = self._model(payload_tensor)
predictions_max = tf.argmax(predictions, axis=-1)
# convert predictions to np.ndarray
response_data = np.array(predictions_max)
return response_data{
"name": "cassava",
"implementation": "serve-model.CassavaModel"
}mlserver start model/python test.py --localtensorflow==2.12.0
tensorflow-hub==0.13.0mlserver build model/ -t [YOUR_CONTAINER_REGISTRY]/[IMAGE_NAME]docker imagesdocker push [YOUR_CONTAINER_REGISTRY]/[IMAGE_NAME]apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
name: cassava
spec:
protocol: v2
predictors:
- componentSpecs:
- spec:
containers:
- image: YOUR_CONTAINER_REGISTRY/IMAGE_NAME
name: cassava
imagePullPolicy: Always
graph:
name: cassava
type: MODEL
name: cassavakubectl create -f deployment.yamlkubectl get podspython test.py --remotekubectl scale sdep cassava --replicas=3kubectl get pods --watch






# Import required dependencies
import requests%%writefile ./model-settings.json
{
"name": "transformer",
"implementation": "mlserver_huggingface.HuggingFaceRuntime",
"parameters": {
"extra": {
"task": "text-generation",
"pretrained_model": "distilgpt2"
}
}
}Overwriting ./model-settings.jsonmlserver start .inference_request = {
"inputs": [
{
"name": "args",
"shape": [1],
"datatype": "BYTES",
"data": ["this is a test"],
}
]
}
requests.post(
"http://localhost:8080/v2/models/transformer/infer", json=inference_request
).json(){'model_name': 'transformer',
'id': 'eb160c6b-8223-4342-ad92-6ac301a9fa5d',
'parameters': {},
'outputs': [{'name': 'output',
'shape': [1, 1],
'datatype': 'BYTES',
'parameters': {'content_type': 'hg_jsonlist'},
'data': ['{"generated_text": "this is a testnet with 1-3,000-bit nodes as nodes."}']}]}%%writefile ./model-settings.json
{
"name": "transformer",
"implementation": "mlserver_huggingface.HuggingFaceRuntime",
"parameters": {
"extra": {
"task": "text-generation",
"pretrained_model": "distilgpt2",
"optimum_model": true
}
}
}Overwriting ./model-settings.jsonmlserver start .inference_request = {
"inputs": [
{
"name": "args",
"shape": [1],
"datatype": "BYTES",
"data": ["this is a test"],
}
]
}
requests.post(
"http://localhost:8080/v2/models/transformer/infer", json=inference_request
).json(){'model_name': 'transformer',
'id': '9c482c8d-b21e-44b1-8a42-7650a9dc01ef',
'parameters': {},
'outputs': [{'name': 'output',
'shape': [1, 1],
'datatype': 'BYTES',
'parameters': {'content_type': 'hg_jsonlist'},
'data': ['{"generated_text": "this is a test of the \\"safe-code-safe-code-safe-code\\" approach. The method only accepts two parameters as parameters: the code. The parameter \'unsafe-code-safe-code-safe-code\' should"}']}]}%%writefile ./model-settings.json
{
"name": "transformer",
"implementation": "mlserver_huggingface.HuggingFaceRuntime",
"parameters": {
"extra": {
"task": "question-answering"
}
}
}Overwriting ./model-settings.jsonmlserver start .inference_request = {
"inputs": [
{
"name": "question",
"shape": [1],
"datatype": "BYTES",
"data": ["what is your name?"],
},
{
"name": "context",
"shape": [1],
"datatype": "BYTES",
"data": ["Hello, I am Seldon, how is it going"],
},
]
}
requests.post(
"http://localhost:8080/v2/models/transformer/infer", json=inference_request
).json(){'model_name': 'transformer',
'id': '4efac938-86d8-41a1-b78f-7690b2dcf197',
'parameters': {},
'outputs': [{'name': 'output',
'shape': [1, 1],
'datatype': 'BYTES',
'parameters': {'content_type': 'hg_jsonlist'},
'data': ['{"score": 0.9869915843009949, "start": 12, "end": 18, "answer": "Seldon"}']}]}%%writefile ./model-settings.json
{
"name": "transformer",
"implementation": "mlserver_huggingface.HuggingFaceRuntime",
"parameters": {
"extra": {
"task": "text-classification"
}
}
}Overwriting ./model-settings.jsonmlserver start .inference_request = {
"inputs": [
{
"name": "args",
"shape": [1],
"datatype": "BYTES",
"data": ["This is terrible!"],
}
]
}
requests.post(
"http://localhost:8080/v2/models/transformer/infer", json=inference_request
).json(){'model_name': 'transformer',
'id': '835eabbd-daeb-4423-a64f-a7c4d7c60a9b',
'parameters': {},
'outputs': [{'name': 'output',
'shape': [1, 1],
'datatype': 'BYTES',
'parameters': {'content_type': 'hg_jsonlist'},
'data': ['{"label": "NEGATIVE", "score": 0.9996137022972107}']}]}%%writefile ./model-settings.json
{
"name": "transformer",
"implementation": "mlserver_huggingface.HuggingFaceRuntime",
"max_batch_size": 128,
"max_batch_time": 1,
"parameters": {
"extra": {
"task": "text-generation",
"device": -1
}
}
}Overwriting ./model-settings.jsonmlserver start .inference_request = {
"inputs": [
{
"name": "text_inputs",
"shape": [1],
"datatype": "BYTES",
"data": ["This is a generation for the work" for i in range(512)],
}
]
}
# Benchmark time
import time
start_time = time.monotonic()
requests.post(
"http://localhost:8080/v2/models/transformer/infer", json=inference_request
)
print(f"Elapsed time: {time.monotonic() - start_time}")Elapsed time: 66.42268538899953%%writefile ./model-settings.json
{
"name": "transformer",
"implementation": "mlserver_huggingface.HuggingFaceRuntime",
"parameters": {
"extra": {
"task": "text-generation",
"device": 0
}
}
}Overwriting ./model-settings.jsoninference_request = {
"inputs": [
{
"name": "text_inputs",
"shape": [1],
"datatype": "BYTES",
"data": ["This is a generation for the work" for i in range(512)],
}
]
}
# Benchmark time
import time
start_time = time.monotonic()
requests.post(
"http://localhost:8080/v2/models/transformer/infer", json=inference_request
)
print(f"Elapsed time: {time.monotonic() - start_time}")Elapsed time: 11.27933280000434%%writefile ./model-settings.json
{
"name": "transformer",
"implementation": "mlserver_huggingface.HuggingFaceRuntime",
"max_batch_size": 128,
"max_batch_time": 1,
"parameters": {
"extra": {
"task": "text-generation",
"pretrained_model": "distilgpt2",
"device": 0
}
}
}Overwriting ./model-settings.json%%bash
jq -ncM '{"method": "POST", "header": {"Content-Type": ["application/json"] }, "url": "http://localhost:8080/v2/models/transformer/infer", "body": "{\"inputs\":[{\"name\":\"text_inputs\",\"shape\":[1],\"datatype\":\"BYTES\",\"data\":[\"test\"]}]}" | @base64 }' \
| vegeta \
-cpus="2" \
attack \
-duration="3s" \
-rate="50" \
-format=json \
| vegeta \
report \
-type=textRequests [total, rate, throughput] 150, 50.34, 22.28
Duration [total, attack, wait] 6.732s, 2.98s, 3.753s
Latencies [min, mean, 50, 90, 95, 99, max] 1.975s, 3.168s, 3.22s, 4.065s, 4.183s, 4.299s, 4.318s
Bytes In [total, mean] 60978, 406.52
Bytes Out [total, mean] 12300, 82.00
Success [ratio] 100.00%
Status Codes [code:count] 200:150
Error Set:mlserver --help# request 1
types.RequestInput(
name="parameters-np",
shape=[1],
datatype="BYTES",
data=[],
parameters=types.Parameters(
custom-param='value-1',
)
)
# request 2
types.RequestInput(
name="parameters-np",
shape=[1],
datatype="BYTES",
data=[],
parameters=types.Parameters(
custom-param='value-2',
)
)types.RequestInput(
name="parameters-np",
shape=[2],
datatype="BYTES",
data=[],
parameters=types.Parameters(
custom-param=['value-1', 'value-2'],
)
)types.ResponseOutput(
name="foo",
datatype="INT32",
shape=[3, 3],
data=[1, 2, 3, 4, 5, 6, 7, 8, 9],
parameters=types.Parameters(
content_type="np",
foo=["foo_1", "foo_2"],
bar=["bar_1", "bar_2", "bar_3"],
),
)# Request 1
types.ResponseOutput(
name="foo",
datatype="INT32",
shape=[1, 3],
data=[1, 2, 3],
parameters=types.Parameters(
content_type="np", foo="foo_1", bar="'bar_1"
),
)
# Request 2
types.ResponseOutput(
name="foo",
datatype="INT32",
shape=[1, 3],
data=[4, 5, 6],
parameters=types.Parameters(
content_type="np", foo="foo_2", bar="bar_2"
),
)
# Request 3
types.ResponseOutput(
name="foo",
datatype="INT32",
shape=[1, 3],
data=[7, 8, 9],
parameters=types.Parameters(content_type="np", bar="bar_3"),
)%%writefile runtime.py
import json
from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse, ResponseOutput
from mlserver.codecs import DecodedParameterName
_to_exclude = {
"parameters": {DecodedParameterName, "headers"},
'inputs': {"__all__": {"parameters": {DecodedParameterName, "headers"}}}
}
class EchoRuntime(MLModel):
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
outputs = []
for request_input in payload.inputs:
decoded_input = self.decode(request_input)
print(f"------ Encoded Input ({request_input.name}) ------")
as_dict = request_input.dict(exclude=_to_exclude) # type: ignore
print(json.dumps(as_dict, indent=2))
print(f"------ Decoded input ({request_input.name}) ------")
print(decoded_input)
outputs.append(
ResponseOutput(
name=request_input.name,
datatype=request_input.datatype,
shape=request_input.shape,
data=request_input.data
)
)
return InferenceResponse(model_name=self.name, outputs=outputs)
%%writefile model-settings.json
{
"name": "content-type-example",
"implementation": "runtime.EchoRuntime"
}import requests
payload = {
"inputs": [
{
"name": "parameters-np",
"datatype": "INT32",
"shape": [2, 2],
"data": [1, 2, 3, 4],
"parameters": {
"content_type": "np"
}
},
{
"name": "parameters-str",
"datatype": "BYTES",
"shape": [1],
"data": "hello world 😁",
"parameters": {
"content_type": "str"
}
}
]
}
response = requests.post(
"http://localhost:8080/v2/models/content-type-example/infer",
json=payload
)import requests
import numpy as np
from mlserver.types import InferenceRequest, InferenceResponse
from mlserver.codecs import NumpyCodec, StringCodec
parameters_np = np.array([[1, 2], [3, 4]])
parameters_str = ["hello world 😁"]
payload = InferenceRequest(
inputs=[
NumpyCodec.encode_input("parameters-np", parameters_np),
# The `use_bytes=False` flag will ensure that the encoded payload is JSON-compatible
StringCodec.encode_input("parameters-str", parameters_str, use_bytes=False),
]
)
response = requests.post(
"http://localhost:8080/v2/models/content-type-example/infer",
json=payload.model_dump()
)
response_payload = InferenceResponse.parse_raw(response.text)
print(NumpyCodec.decode_output(response_payload.outputs[0]))
print(StringCodec.decode_output(response_payload.outputs[1]))%%writefile model-settings.json
{
"name": "content-type-example",
"implementation": "runtime.EchoRuntime",
"inputs": [
{
"name": "metadata-np",
"datatype": "INT32",
"shape": [2, 2],
"parameters": {
"content_type": "np"
}
},
{
"name": "metadata-str",
"datatype": "BYTES",
"shape": [11],
"parameters": {
"content_type": "str"
}
}
]
}import requests
payload = {
"inputs": [
{
"name": "metadata-np",
"datatype": "INT32",
"shape": [2, 2],
"data": [1, 2, 3, 4],
},
{
"name": "metadata-str",
"datatype": "BYTES",
"shape": [11],
"data": "hello world 😁",
}
]
}
response = requests.post(
"http://localhost:8080/v2/models/content-type-example/infer",
json=payload
)%%writefile runtime.py
import io
import json
from PIL import Image
from mlserver import MLModel
from mlserver.types import (
InferenceRequest,
InferenceResponse,
RequestInput,
ResponseOutput,
)
from mlserver.codecs import NumpyCodec, register_input_codec, DecodedParameterName
from mlserver.codecs.utils import InputOrOutput
_to_exclude = {
"parameters": {DecodedParameterName},
"inputs": {"__all__": {"parameters": {DecodedParameterName}}},
}
@register_input_codec
class PillowCodec(NumpyCodec):
ContentType = "img"
DefaultMode = "L"
@classmethod
def can_encode(cls, payload: Image) -> bool:
return isinstance(payload, Image)
@classmethod
def _decode(cls, input_or_output: InputOrOutput) -> Image:
if input_or_output.datatype != "BYTES":
# If not bytes, assume it's an array
image_array = super().decode_input(input_or_output) # type: ignore
return Image.fromarray(image_array, mode=cls.DefaultMode)
encoded = input_or_output.data
if isinstance(encoded, str):
encoded = encoded.encode()
return Image.frombytes(
mode=cls.DefaultMode, size=input_or_output.shape, data=encoded
)
@classmethod
def encode_output(cls, name: str, payload: Image) -> ResponseOutput: # type: ignore
byte_array = io.BytesIO()
payload.save(byte_array, mode=cls.DefaultMode)
return ResponseOutput(
name=name, shape=payload.size, datatype="BYTES", data=byte_array.getvalue()
)
@classmethod
def decode_output(cls, response_output: ResponseOutput) -> Image:
return cls._decode(response_output)
@classmethod
def encode_input(cls, name: str, payload: Image) -> RequestInput: # type: ignore
output = cls.encode_output(name, payload)
return RequestInput(
name=output.name,
shape=output.shape,
datatype=output.datatype,
data=output.data,
)
@classmethod
def decode_input(cls, request_input: RequestInput) -> Image:
return cls._decode(request_input)
class EchoRuntime(MLModel):
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
outputs = []
for request_input in payload.inputs:
decoded_input = self.decode(request_input)
print(f"------ Encoded Input ({request_input.name}) ------")
as_dict = request_input.dict(exclude=_to_exclude) # type: ignore
print(json.dumps(as_dict, indent=2))
print(f"------ Decoded input ({request_input.name}) ------")
print(decoded_input)
outputs.append(
ResponseOutput(
name=request_input.name,
datatype=request_input.datatype,
shape=request_input.shape,
data=request_input.data,
)
)
return InferenceResponse(model_name=self.name, outputs=outputs)import requests
payload = {
"inputs": [
{
"name": "image-int32",
"datatype": "INT32",
"shape": [8, 8],
"data": [
1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0
],
"parameters": {
"content_type": "img"
}
},
{
"name": "image-bytes",
"datatype": "BYTES",
"shape": [8, 8],
"data": (
"10101010"
"10101010"
"10101010"
"10101010"
"10101010"
"10101010"
"10101010"
"10101010"
),
"parameters": {
"content_type": "img"
}
}
]
}
response = requests.post(
"http://localhost:8080/v2/models/content-type-example/infer",
json=payload
)%%writefile runtime.py
import json
from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse, ResponseOutput
from mlserver.codecs import DecodedParameterName
_to_exclude = {
"parameters": {DecodedParameterName},
'inputs': {"__all__": {"parameters": {DecodedParameterName}}}
}
class EchoRuntime(MLModel):
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
print("------ Encoded Input (request) ------")
as_dict = payload.dict(exclude=_to_exclude) # type: ignore
print(json.dumps(as_dict, indent=2))
print("------ Decoded input (request) ------")
decoded_request = None
if payload.parameters:
decoded_request = getattr(payload.parameters, DecodedParameterName)
print(decoded_request)
outputs = []
for request_input in payload.inputs:
outputs.append(
ResponseOutput(
name=request_input.name,
datatype=request_input.datatype,
shape=request_input.shape,
data=request_input.data
)
)
return InferenceResponse(model_name=self.name, outputs=outputs)
import requests
payload = {
"inputs": [
{
"name": "parameters-np",
"datatype": "INT32",
"shape": [2, 2],
"data": [1, 2, 3, 4],
"parameters": {
"content_type": "np"
}
},
{
"name": "parameters-str",
"datatype": "BYTES",
"shape": [2, 11],
"data": ["hello world 😁", "bye bye 😁"],
"parameters": {
"content_type": "str"
}
}
],
"parameters": {
"content_type": "pd"
}
}
response = requests.post(
"http://localhost:8080/v2/models/content-type-example/infer",
json=payload
)pip install mlserverpip install mlserver-sklearn./hack/update-version.sh 0.2.0.dev1make testtox -e py3 -- tests/batch_processing/test_rest.pyInstall `alibi` library for dataset dependencies and `alibi_detect` library for detector configuration from Pypi
```python
!pip install alibi alibi_detect
```import alibi
import matplotlib.pyplot as plt
import numpy as npadult = alibi.datasets.fetch_adult()
X, y = adult.data, adult.target
feature_names = adult.feature_names
category_map = adult.category_mapn_ref = 10000
n_test = 10000
X_ref, X_t0, X_t1 = X[:n_ref], X[n_ref:n_ref + n_test], X[n_ref + n_test:n_ref + 2 * n_test]
categories_per_feature = {f: None for f in list(category_map.keys())}from alibi_detect.cd import TabularDrift
cd_tabular = TabularDrift(X_ref, p_val=.05, categories_per_feature=categories_per_feature)from alibi_detect.utils.saving import save_detector
filepath = "alibi-detector-artifacts"
save_detector(cd_tabular, filepath)preds = cd_tabular.predict(X_t0,drift_type="feature")
labels = ['No!', 'Yes!']
print(f"Threshold {preds['data']['threshold']}")
for f in range(cd_tabular.n_features):
fname = feature_names[f]
is_drift = (preds['data']['p_val'][f] < preds['data']['threshold']).astype(int)
stat_val, p_val = preds['data']['distance'][f], preds['data']['p_val'][f]
print(f'{fname} -- Drift? {labels[is_drift]} -- Chi2 {stat_val:.3f} -- p-value {p_val:.3f}')Threshold 0.05
Age -- Drift? No! -- Chi2 0.012 -- p-value 0.508
Workclass -- Drift? No! -- Chi2 8.487 -- p-value 0.387
Education -- Drift? No! -- Chi2 4.753 -- p-value 0.576
Marital Status -- Drift? No! -- Chi2 3.160 -- p-value 0.368
Occupation -- Drift? No! -- Chi2 8.194 -- p-value 0.415
Relationship -- Drift? No! -- Chi2 0.485 -- p-value 0.993
Race -- Drift? No! -- Chi2 0.587 -- p-value 0.965
Sex -- Drift? No! -- Chi2 0.217 -- p-value 0.641
Capital Gain -- Drift? No! -- Chi2 0.002 -- p-value 1.000
Capital Loss -- Drift? No! -- Chi2 0.002 -- p-value 1.000
Hours per week -- Drift? No! -- Chi2 0.012 -- p-value 0.508
Country -- Drift? No! -- Chi2 9.991 -- p-value 0.441%%writefile settings.json
{
"debug": "true"
}Overwriting settings.json%%writefile model-settings.json
{
"name": "income-tabular-drift",
"implementation": "mlserver_alibi_detect.AlibiDetectRuntime",
"parameters": {
"uri": "./alibi-detector-artifacts",
"version": "v0.1.0",
"extra": {
"predict_parameters":{
"drift_type": "feature"
}
}
}
}Overwriting model-settings.jsonmlserver start .import requests
inference_request = {
"inputs": [
{
"name": "predict",
"shape": X_t0.shape,
"datatype": "FP32",
"data": X_t0.tolist(),
}
]
}
endpoint = "http://localhost:8080/v2/models/income-tabular-drift/versions/v0.1.0/infer"
response = requests.post(endpoint, json=inference_request)import json
response_dict = json.loads(response.text)
labels = ['No!', 'Yes!']
for f in range(cd_tabular.n_features):
stat = 'Chi2' if f in list(categories_per_feature.keys()) else 'K-S'
fname = feature_names[f]
is_drift = response_dict['outputs'][0]['data'][f]
stat_val, p_val = response_dict['outputs'][1]['data'][f], response_dict['outputs'][2]['data'][f]
print(f'{fname} -- Drift? {labels[is_drift]} -- Chi2 {stat_val:.3f} -- p-value {p_val:.3f}')Age -- Drift? No! -- Chi2 0.012 -- p-value 0.508
Workclass -- Drift? No! -- Chi2 8.487 -- p-value 0.387
Education -- Drift? No! -- Chi2 4.753 -- p-value 0.576
Marital Status -- Drift? No! -- Chi2 3.160 -- p-value 0.368
Occupation -- Drift? No! -- Chi2 8.194 -- p-value 0.415
Relationship -- Drift? No! -- Chi2 0.485 -- p-value 0.993
Race -- Drift? No! -- Chi2 0.587 -- p-value 0.965
Sex -- Drift? No! -- Chi2 0.217 -- p-value 0.641
Capital Gain -- Drift? No! -- Chi2 0.002 -- p-value 1.000
Capital Loss -- Drift? No! -- Chi2 0.002 -- p-value 1.000
Hours per week -- Drift? No! -- Chi2 0.012 -- p-value 0.508
Country -- Drift? No! -- Chi2 9.991 -- p-value 0.441%%writefile jsonmodels.py
import json
from typing import Dict, Any
from mlserver import MLModel, types
from mlserver.codecs import StringCodec
class JsonHelloWorldModel(MLModel):
async def load(self) -> bool:
# Perform additional custom initialization here.
print("Initialize model")
# Set readiness flag for model
return await super().load()
async def predict(self, payload: types.InferenceRequest) -> types.InferenceResponse:
request = self._extract_json(payload)
response = {
"request": request,
"server_response": "Got your request. Hello from the server.",
}
response_bytes = json.dumps(response).encode("UTF-8")
return types.InferenceResponse(
id=payload.id,
model_name=self.name,
model_version=self.version,
outputs=[
types.ResponseOutput(
name="echo_response",
shape=[len(response_bytes)],
datatype="BYTES",
data=[response_bytes],
parameters=types.Parameters(content_type="str"),
)
],
)
def _extract_json(self, payload: types.InferenceRequest) -> Dict[str, Any]:
inputs = {}
for inp in payload.inputs:
inputs[inp.name] = json.loads(
"".join(self.decode(inp, default_codec=StringCodec))
)
return inputs
%%writefile settings.json
{
"debug": "true"
}%%writefile model-settings.json
{
"name": "json-hello-world",
"implementation": "jsonmodels.JsonHelloWorldModel"
}mlserver start .import requests
import json
from mlserver.types import InferenceResponse
from mlserver.codecs.string import StringRequestCodec
from pprint import PrettyPrinter
pp = PrettyPrinter(indent=1)
inputs = {"name": "Foo Bar", "message": "Hello from Client (REST)!"}
# NOTE: this uses characters rather than encoded bytes. It is recommended that you use the `mlserver` types to assist in the correct encoding.
inputs_string = json.dumps(inputs)
inference_request = {
"inputs": [
{
"name": "echo_request",
"shape": [len(inputs_string)],
"datatype": "BYTES",
"data": [inputs_string],
}
]
}
endpoint = "http://localhost:8080/v2/models/json-hello-world/infer"
response = requests.post(endpoint, json=inference_request)
print(f"full response:\n")
print(response)
# retrive text output as dictionary
inference_response = InferenceResponse.parse_raw(response.text)
raw_json = StringRequestCodec.decode_response(inference_response)
output = json.loads(raw_json[0])
print(f"\ndata part:\n")
pp.pprint(output)import requests
import json
import grpc
from mlserver.codecs.string import StringRequestCodec
import mlserver.grpc.converters as converters
import mlserver.grpc.dataplane_pb2_grpc as dataplane
import mlserver.types as types
from pprint import PrettyPrinter
pp = PrettyPrinter(indent=1)
model_name = "json-hello-world"
inputs = {"name": "Foo Bar", "message": "Hello from Client (gRPC)!"}
inputs_bytes = json.dumps(inputs).encode("UTF-8")
inference_request = types.InferenceRequest(
inputs=[
types.RequestInput(
name="echo_request",
shape=[len(inputs_bytes)],
datatype="BYTES",
data=[inputs_bytes],
parameters=types.Parameters(content_type="str"),
)
]
)
inference_request_g = converters.ModelInferRequestConverter.from_types(
inference_request, model_name=model_name, model_version=None
)
grpc_channel = grpc.insecure_channel("localhost:8081")
grpc_stub = dataplane.GRPCInferenceServiceStub(grpc_channel)
response = grpc_stub.ModelInfer(inference_request_g)
print(f"full response:\n")
print(response)
# retrive text output as dictionary
inference_response = converters.ModelInferResponseConverter.to_types(response)
raw_json = StringRequestCodec.decode_response(inference_response)
output = json.loads(raw_json[0])
print(f"\ndata part:\n")
pp.pprint(output)
{
"properties": {
"error": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Error"
}
},
"title": "InferenceErrorResponse",
"type": "object"
}
{
"$defs": {
"Datatype": {
"enum": [
"BOOL",
"UINT8",
"UINT16",
"UINT32",
"UINT64",
"INT8",
"INT16",
"INT32",
"INT64",
"FP16",
"FP32",
"FP64",
"BYTES"
],
"title": "Datatype",
"type": "string"
},
"Parameters": {
"additionalProperties": true,
"properties": {
"content_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Content Type"
},
"headers": {
"anyOf": [
{
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"title": "Headers"
}
},
"title": "Parameters",
"type": "object"
},
"RequestInput": {
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"shape": {
"items": {
"type": "integer"
},
"title": "Shape",
"type": "array"
},
"datatype": {
"$ref": "#/$defs/Datatype"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
},
"data": {
"$ref": "#/$defs/TensorData"
}
},
"required": [
"name",
"shape",
"datatype",
"data"
],
"title": "RequestInput",
"type": "object"
},
"RequestOutput": {
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
}
},
"required": [
"name"
],
"title": "RequestOutput",
"type": "object"
},
"TensorData": {
"anyOf": [
{
"items": {},
"type": "array"
},
{}
],
"title": "TensorData"
}
},
"properties": {
"id": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Id"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
},
"inputs": {
"items": {
"$ref": "#/$defs/RequestInput"
},
"title": "Inputs",
"type": "array"
},
"outputs": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/RequestOutput"
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"title": "Outputs"
}
},
"required": [
"inputs"
],
"title": "InferenceRequest",
"type": "object"
}
{
"$defs": {
"Datatype": {
"enum": [
"BOOL",
"UINT8",
"UINT16",
"UINT32",
"UINT64",
"INT8",
"INT16",
"INT32",
"INT64",
"FP16",
"FP32",
"FP64",
"BYTES"
],
"title": "Datatype",
"type": "string"
},
"Parameters": {
"additionalProperties": true,
"properties": {
"content_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Content Type"
},
"headers": {
"anyOf": [
{
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"title": "Headers"
}
},
"title": "Parameters",
"type": "object"
},
"ResponseOutput": {
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"shape": {
"items": {
"type": "integer"
},
"title": "Shape",
"type": "array"
},
"datatype": {
"$ref": "#/$defs/Datatype"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
},
"data": {
"$ref": "#/$defs/TensorData"
}
},
"required": [
"name",
"shape",
"datatype",
"data"
],
"title": "ResponseOutput",
"type": "object"
},
"TensorData": {
"anyOf": [
{
"items": {},
"type": "array"
},
{}
],
"title": "TensorData"
}
},
"properties": {
"model_name": {
"title": "Model Name",
"type": "string"
},
"model_version": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Model Version"
},
"id": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Id"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
},
"outputs": {
"items": {
"$ref": "#/$defs/ResponseOutput"
},
"title": "Outputs",
"type": "array"
}
},
"required": [
"model_name",
"outputs"
],
"title": "InferenceResponse",
"type": "object"
}
{
"properties": {
"error": {
"title": "Error",
"type": "string"
}
},
"required": [
"error"
],
"title": "MetadataModelErrorResponse",
"type": "object"
}
{
"$defs": {
"Datatype": {
"enum": [
"BOOL",
"UINT8",
"UINT16",
"UINT32",
"UINT64",
"INT8",
"INT16",
"INT32",
"INT64",
"FP16",
"FP32",
"FP64",
"BYTES"
],
"title": "Datatype",
"type": "string"
},
"MetadataTensor": {
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"datatype": {
"$ref": "#/$defs/Datatype"
},
"shape": {
"items": {
"type": "integer"
},
"title": "Shape",
"type": "array"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
}
},
"required": [
"name",
"datatype",
"shape"
],
"title": "MetadataTensor",
"type": "object"
},
"Parameters": {
"additionalProperties": true,
"properties": {
"content_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Content Type"
},
"headers": {
"anyOf": [
{
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"title": "Headers"
}
},
"title": "Parameters",
"type": "object"
}
},
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"versions": {
"anyOf": [
{
"items": {
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"title": "Versions"
},
"platform": {
"title": "Platform",
"type": "string"
},
"inputs": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/MetadataTensor"
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"title": "Inputs"
},
"outputs": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/MetadataTensor"
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"title": "Outputs"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
}
},
"required": [
"name",
"platform"
],
"title": "MetadataModelResponse",
"type": "object"
}
{
"properties": {
"error": {
"title": "Error",
"type": "string"
}
},
"required": [
"error"
],
"title": "MetadataServerErrorResponse",
"type": "object"
}
{
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"version": {
"title": "Version",
"type": "string"
},
"extensions": {
"items": {
"type": "string"
},
"title": "Extensions",
"type": "array"
}
},
"required": [
"name",
"version",
"extensions"
],
"title": "MetadataServerResponse",
"type": "object"
}
{
"$defs": {
"Datatype": {
"enum": [
"BOOL",
"UINT8",
"UINT16",
"UINT32",
"UINT64",
"INT8",
"INT16",
"INT32",
"INT64",
"FP16",
"FP32",
"FP64",
"BYTES"
],
"title": "Datatype",
"type": "string"
},
"Parameters": {
"additionalProperties": true,
"properties": {
"content_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Content Type"
},
"headers": {
"anyOf": [
{
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"title": "Headers"
}
},
"title": "Parameters",
"type": "object"
}
},
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"datatype": {
"$ref": "#/$defs/Datatype"
},
"shape": {
"items": {
"type": "integer"
},
"title": "Shape",
"type": "array"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
}
},
"required": [
"name",
"datatype",
"shape"
],
"title": "MetadataTensor",
"type": "object"
}
{
"additionalProperties": true,
"properties": {
"content_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Content Type"
},
"headers": {
"anyOf": [
{
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"title": "Headers"
}
},
"title": "Parameters",
"type": "object"
}
{
"properties": {
"ready": {
"anyOf": [
{
"type": "boolean"
},
{
"type": "null"
}
],
"default": null,
"title": "Ready"
}
},
"title": "RepositoryIndexRequest",
"type": "object"
}
{
"$defs": {
"RepositoryIndexResponseItem": {
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"version": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Version"
},
"state": {
"$ref": "#/$defs/State"
},
"reason": {
"title": "Reason",
"type": "string"
}
},
"required": [
"name",
"state",
"reason"
],
"title": "RepositoryIndexResponseItem",
"type": "object"
},
"State": {
"enum": [
"UNKNOWN",
"READY",
"UNAVAILABLE",
"LOADING",
"UNLOADING"
],
"title": "State",
"type": "string"
}
},
"items": {
"$ref": "#/$defs/RepositoryIndexResponseItem"
},
"title": "RepositoryIndexResponse",
"type": "array"
}
{
"$defs": {
"State": {
"enum": [
"UNKNOWN",
"READY",
"UNAVAILABLE",
"LOADING",
"UNLOADING"
],
"title": "State",
"type": "string"
}
},
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"version": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Version"
},
"state": {
"$ref": "#/$defs/State"
},
"reason": {
"title": "Reason",
"type": "string"
}
},
"required": [
"name",
"state",
"reason"
],
"title": "RepositoryIndexResponseItem",
"type": "object"
}
{
"properties": {
"error": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Error"
}
},
"title": "RepositoryLoadErrorResponse",
"type": "object"
}
{
"properties": {
"error": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Error"
}
},
"title": "RepositoryUnloadErrorResponse",
"type": "object"
}
{
"$defs": {
"Datatype": {
"enum": [
"BOOL",
"UINT8",
"UINT16",
"UINT32",
"UINT64",
"INT8",
"INT16",
"INT32",
"INT64",
"FP16",
"FP32",
"FP64",
"BYTES"
],
"title": "Datatype",
"type": "string"
},
"Parameters": {
"additionalProperties": true,
"properties": {
"content_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Content Type"
},
"headers": {
"anyOf": [
{
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"title": "Headers"
}
},
"title": "Parameters",
"type": "object"
},
"TensorData": {
"anyOf": [
{
"items": {},
"type": "array"
},
{}
],
"title": "TensorData"
}
},
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"shape": {
"items": {
"type": "integer"
},
"title": "Shape",
"type": "array"
},
"datatype": {
"$ref": "#/$defs/Datatype"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
},
"data": {
"$ref": "#/$defs/TensorData"
}
},
"required": [
"name",
"shape",
"datatype",
"data"
],
"title": "RequestInput",
"type": "object"
}
{
"$defs": {
"Parameters": {
"additionalProperties": true,
"properties": {
"content_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Content Type"
},
"headers": {
"anyOf": [
{
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"title": "Headers"
}
},
"title": "Parameters",
"type": "object"
}
},
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
}
},
"required": [
"name"
],
"title": "RequestOutput",
"type": "object"
}
{
"$defs": {
"Datatype": {
"enum": [
"BOOL",
"UINT8",
"UINT16",
"UINT32",
"UINT64",
"INT8",
"INT16",
"INT32",
"INT64",
"FP16",
"FP32",
"FP64",
"BYTES"
],
"title": "Datatype",
"type": "string"
},
"Parameters": {
"additionalProperties": true,
"properties": {
"content_type": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Content Type"
},
"headers": {
"anyOf": [
{
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"title": "Headers"
}
},
"title": "Parameters",
"type": "object"
},
"TensorData": {
"anyOf": [
{
"items": {},
"type": "array"
},
{}
],
"title": "TensorData"
}
},
"properties": {
"name": {
"title": "Name",
"type": "string"
},
"shape": {
"items": {
"type": "integer"
},
"title": "Shape",
"type": "array"
},
"datatype": {
"$ref": "#/$defs/Datatype"
},
"parameters": {
"anyOf": [
{
"$ref": "#/$defs/Parameters"
},
{
"type": "null"
}
],
"default": null
},
"data": {
"$ref": "#/$defs/TensorData"
}
},
"required": [
"name",
"shape",
"datatype",
"data"
],
"title": "ResponseOutput",
"type": "object"
}
{
"anyOf": [
{
"items": {},
"type": "array"
},
{}
],
"title": "TensorData"
}