Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
import mlserver
from mlserver.types import InferenceRequest, InferenceResponse
class MyCustomRuntime(mlserver.MLModel):
async def load(self) -> bool:
self._model = load_my_custom_model()
mlserver.register("my_custom_metric", "This is a custom metric example")
return True
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
mlserver.log(my_custom_metric=34)
# TODO: Replace for custom logic to run inference
return self._model.predict(payload)

apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
name: my-model
spec:
protocol: v2
predictors:
- name: default
graph:
name: classifier
implementation: SKLEARN_SERVER
modelUri: gs://seldon-models/sklearn/iriskubectl apply -f my-seldondeployment-manifest.yamlapiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
name: my-model
spec:
protocol: v2
predictors:
- name: default
graph:
name: classifier
componentSpecs:
- spec:
containers:
- name: classifier
image: my-custom-server:0.1.0kubectl apply -f my-seldondeployment-manifest.yamlapiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: my-model
spec:
predictor:
sklearn:
protocolVersion: v2
storageUri: gs://seldon-models/sklearn/iriskubectl apply -f my-inferenceservice-manifest.yamlapiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: my-model
spec:
predictor:
containers:
- name: classifier
image: my-custom-server:0.1.0
env:
- name: PROTOCOL
value: v2
ports:
- containerPort: 8080
protocol: TCPkubectl apply -f my-inferenceservice-manifest.yaml# request 1
types.RequestInput(
name="parameters-np",
shape=[1],
datatype="BYTES",
data=[],
parameters=types.Parameters(
custom-param='value-1',
)
)
# request 2
types.RequestInput(
name="parameters-np",
shape=[1],
datatype="BYTES",
data=[],
parameters=types.Parameters(
custom-param='value-2',
)
)types.RequestInput(
name="parameters-np",
shape=[2],
datatype="BYTES",
data=[],
parameters=types.Parameters(
custom-param=['value-1', 'value-2'],
)
)types.ResponseOutput(
name="foo",
datatype="INT32",
shape=[3, 3],
data=[1, 2, 3, 4, 5, 6, 7, 8, 9],
parameters=types.Parameters(
content_type="np",
foo=["foo_1", "foo_2"],
bar=["bar_1", "bar_2", "bar_3"],
),
)# Request 1
types.ResponseOutput(
name="foo",
datatype="INT32",
shape=[1, 3],
data=[1, 2, 3],
parameters=types.Parameters(
content_type="np", foo="foo_1", bar="'bar_1"
),
)
# Request 2
types.ResponseOutput(
name="foo",
datatype="INT32",
shape=[1, 3],
data=[4, 5, 6],
parameters=types.Parameters(
content_type="np", foo="foo_2", bar="bar_2"
),
)
# Request 3
types.ResponseOutput(
name="foo",
datatype="INT32",
shape=[1, 3],
data=[7, 8, 9],
parameters=types.Parameters(content_type="np", bar="bar_3"),
)

np.ndarraymlserver buildsettings.jsonmodel-settings.jsonmlserver dockerfilefrom mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse
class MyCustomRuntime(MLModel):
async def load(self) -> bool:
# TODO: Replace for custom logic to load a model artifact
self._model = load_my_custom_model()
return True
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
# TODO: Replace for custom logic to run inference
return self._model.predict(payload)from mlserver import MLModel
from mlserver.codecs import decode_args
from typing import List
class MyCustomRuntime(MLModel):
async def load(self) -> bool:
# TODO: Replace for custom logic to load a model artifact
self._model = load_my_custom_model()
return True
@decode_args
async def predict(self, questions: List[str], context: List[str]) -> np.ndarray:
# TODO: Replace for custom logic to run inference
return self._model.predict(questions, context)from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse
class CustomHeadersRuntime(MLModel):
...
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
if payload.parameters and payload.parametes.headers:
# These are all the incoming HTTP headers / gRPC metadata
print(payload.parameters.headers)
...from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse
class CustomHeadersRuntime(MLModel):
...
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
...
return InferenceResponse(
# Include any actual outputs from inference
outputs=[],
parameters=Parameters(headers={"foo": "bar"})
).
└── models
└── sum-model
├── model-settings.json
├── models.py{
"model": "sum-model",
"implementation": "models.MyCustomRuntime"
}.
└── models
└── sum-model
├── environment.tar.gz
├── model-settings.json
├── models.py{
"model": "sum-model",
"implementation": "models.MyCustomRuntime",
"parameters": {
"environment_tarball": "./environment.tar.gz"
}
}mlserver build . -t my-custom-serverDOCKER_BUILDKIT=1 docker build . -t my-custom-runtime:0.1.0output{
"parameters": {
"content_type": "pd"
},
"inputs": [
{
"name": "First Name",
"datatype": "BYTES",
"parameters": {
"content_type": "str"
},
"shape": [2],
"data": ["Joanne", "Michael"]
},
{
"name": "Age",
"datatype": "INT32",
"shape": [2],
"data": [34, 22]
},
]
}import pandas as pd
from mlserver.codecs import PandasCodec
dataframe = pd.DataFrame({'First Name': ["Joanne", "Michael"], 'Age': [34, 22]})
inference_request = PandasCodec.encode_request(dataframe)
print(inference_request)import pandas as pd
import requests
from mlserver.codecs import PandasCodec
dataframe = pd.DataFrame({'First Name': ["Joanne", "Michael"], 'Age': [34, 22]})
inference_request = PandasCodec.encode_request(dataframe)
# raw_request will be a Python dictionary compatible with `requests`'s `json` kwarg
raw_request = inference_request.dict()
response = requests.post("localhost:8080/v2/models/foo/infer", json=raw_request)
# raw_response will be a dictionary (loaded from the response's JSON),
# therefore we can pass it as the InferenceResponse constructors' kwargs
raw_response = response.json()
inference_response = InferenceResponse(**raw_response)import numpy as np
foo = np.array([[1.2, 2.3], [np.NaN, 4.5]]){
"inputs": [
{
"name": "foo",
"parameters": {
"content_type": "np"
},
"data": [1.2, 2.3, null, 4.5]
"datatype": "FP64",
"shape": [2, 2],
}
]
}{
"parameters": {
"content_type": "pd"
},
"inputs": [
{
"name": "First Name",
"datatype": "BYTES",
"parameters": {
"content_type": "str"
},
"shape": [-1],
},
{
"name": "Age",
"datatype": "INT32",
"shape": [-1],
},
]
}import numpy as np
foo = np.array([[1, 2], [3, 4]]){
"inputs": [
{
"name": "foo",
"parameters": {
"content_type": "np"
},
"data": [1, 2, 3, 4]
"datatype": "INT32",
"shape": [2, 2],
}
]
}from mlserver.codecs import NumpyRequestCodec
# Encode an entire V2 request
inference_request = NumpyRequestCodec.encode_request(foo)from mlserver.types import InferenceRequest
from mlserver.codecs import NumpyCodec
# We can use the `NumpyCodec` to encode a single input head with name `foo`
# within a larger request
inference_request = InferenceRequest(
inputs=[
NumpyCodec.encode_input("foo", foo)
]
){
"parameters": {
"content_type": "pd"
},
"inputs": [
{
"name": "A",
"data": ["a1", "a2", "a3", "a4"]
"datatype": "BYTES",
"shape": [4],
},
{
"name": "B",
"data": ["b1", "b2", "b3", "b4"]
"datatype": "BYTES",
"shape": [4],
},
{
"name": "C",
"data": ["c1", "c2", "c3", "c4"]
"datatype": "BYTES",
"shape": [4],
},
]
}import pandas as pd
from mlserver.codecs import PandasCodec
foo = pd.DataFrame({
"A": ["a1", "a2", "a3", "a4"],
"B": ["b1", "b2", "b3", "b4"],
"C": ["c1", "c2", "c3", "c4"]
})
inference_request = PandasCodec.encode_request(foo)foo = ["bar", "bar2"]{
"parameters": {
"content_type": "str"
},
"inputs": [
{
"name": "foo",
"data": ["bar", "bar2"]
"datatype": "BYTES",
"shape": [2],
}
]
}from mlserver.codecs.string import StringRequestCodec
# Encode an entire V2 request
inference_request = StringRequestCodec.encode_request(foo, use_bytes=False)from mlserver.types import InferenceRequest
from mlserver.codecs import StringCodec
# We can use the `StringCodec` to encode a single input head with name `foo`
# within a larger request
inference_request = InferenceRequest(
inputs=[
StringCodec.encode_input("foo", foo, use_bytes=False)
]
)foo = b"Python is fun"{
"inputs": [
{
"name": "foo",
"parameters": {
"content_type": "base64"
},
"data": ["UHl0aG9uIGlzIGZ1bg=="]
"datatype": "BYTES",
"shape": [1],
}
]
}from mlserver.types import InferenceRequest
from mlserver.codecs import Base64Codec
# We can use the `Base64Codec` to encode a single input head with name `foo`
# within a larger request
inference_request = InferenceRequest(
inputs=[
Base64Codec.encode_input("foo", foo, use_bytes=False)
]
)import datetime
foo = datetime.datetime(2022, 1, 11, 11, 0, 0){
"inputs": [
{
"name": "foo",
"parameters": {
"content_type": "datetime"
},
"data": ["2022-01-11T11:00:00"]
"datatype": "BYTES",
"shape": [1],
}
]
}from mlserver.types import InferenceRequest
from mlserver.codecs import DatetimeCodec
# We can use the `DatetimeCodec` to encode a single input head with name `foo`
# within a larger request
inference_request = InferenceRequest(
inputs=[
DatetimeCodec.encode_input("foo", foo, use_bytes=False)
]
)