Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Loading...
Note: The Seldon CLI allows you to view information about underlying Seldon resources and make changes to them through the scheduler in non-Kubernetes environments. However, it cannot modify underlying manifests within a Kubernetes cluster. Therefore, using the Seldon CLI for control plane operations in a Kubernetes environment is not recommended. For more details, see Seldon CLI.
import os
os.environ["NAMESPACE"] = "seldon-mesh"
MESH_IP=!kubectl get svc seldon-mesh -n ${NAMESPACE} -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP=MESH_IP[0]
import os
os.environ['MESH_IP'] = MESH_IP
MESH_IP
'172.18.255.2'
cat ./models/sklearn-iris-gs.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn"
requirements:
- sklearn
memory: 100Ki
kubectl create -f ./models/sklearn-iris-gs.yaml -n ${NAMESPACE}
model.mlops.seldon.io/iris created
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
model.mlops.seldon.io/iris condition met
kubectl get model iris -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .
{
"conditions": [
{
"lastTransitionTime": "2023-06-30T10:01:52Z",
"message": "ModelAvailable",
"status": "True",
"type": "ModelReady"
},
{
"lastTransitionTime": "2023-06-30T10:01:52Z",
"status": "True",
"type": "Ready"
}
],
"replicas": 1
}
seldon model infer iris --inference-host ${MESH_IP}:80 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
{
"model_name": "iris_1",
"model_version": "1",
"id": "7fd401e1-3dce-46f5-9668-902aea652b89",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
2
]
}
]
}
seldon model infer iris --inference-mode grpc --inference-host ${MESH_IP}:80 \
'{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}' | jq -M .
{
"modelName": "iris_1",
"modelVersion": "1",
"outputs": [
{
"name": "predict",
"datatype": "INT64",
"shape": [
"1",
"1"
],
"parameters": {
"content_type": {
"stringParam": "np"
}
},
"contents": {
"int64Contents": [
"2"
]
}
}
]
}
kubectl get server mlserver -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .
{
"conditions": [
{
"lastTransitionTime": "2023-06-30T09:59:12Z",
"status": "True",
"type": "Ready"
},
{
"lastTransitionTime": "2023-06-30T09:59:12Z",
"reason": "StatefulSet replicas matches desired replicas",
"status": "True",
"type": "StatefulSetReady"
}
],
"loadedModels": 1,
"replicas": 1,
"selector": "seldon-server-name=mlserver"
}
kubectl delete -f ./models/sklearn-iris-gs.yaml -n ${NAMESPACE}
model.mlops.seldon.io "iris" deleted
cat ./models/sklearn1.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/mlserver/iris"
requirements:
- sklearn
cat ./models/sklearn2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris2
spec:
storageUri: "gs://seldon-models/mlserver/iris"
requirements:
- sklearn
kubectl create -f ./models/sklearn1.yaml -n ${NAMESPACE}
kubectl create -f ./models/sklearn2.yaml -n ${NAMESPACE}
model.mlops.seldon.io/iris created
model.mlops.seldon.io/iris2 created
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
model.mlops.seldon.io/iris condition met
model.mlops.seldon.io/iris2 condition met
cat ./experiments/ab-default-model.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: experiment-sample
spec:
default: iris
candidates:
- name: iris
weight: 50
- name: iris2
weight: 50
kubectl create -f ./experiments/ab-default-model.yaml -n ${NAMESPACE}
experiment.mlops.seldon.io/experiment-sample created
kubectl wait --for condition=ready --timeout=300s experiment --all -n ${NAMESPACE}
experiment.mlops.seldon.io/experiment-sample condition met
seldon model infer --inference-host ${MESH_IP}:80 -i 50 iris \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::29 :iris_1::21]
kubectl delete -f ./experiments/ab-default-model.yaml -n ${NAMESPACE}
kubectl delete -f ./models/sklearn1.yaml -n ${NAMESPACE}
kubectl delete -f ./models/sklearn2.yaml -n ${NAMESPACE}
experiment.mlops.seldon.io "experiment-sample" deleted
model.mlops.seldon.io "iris" deleted
model.mlops.seldon.io "iris2" deleted
cat ./models/tfsimple1.yaml
cat ./models/tfsimple2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple2
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
kubectl create -f ./models/tfsimple1.yaml -n ${NAMESPACE}
kubectl create -f ./models/tfsimple2.yaml -n ${NAMESPACE}
model.mlops.seldon.io/tfsimple1 created
model.mlops.seldon.io/tfsimple2 created
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
model.mlops.seldon.io/tfsimple1 condition met
model.mlops.seldon.io/tfsimple2 condition met
cat ./pipelines/tfsimples.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimples
spec:
steps:
- name: tfsimple1
- name: tfsimple2
inputs:
- tfsimple1
tensorMap:
tfsimple1.outputs.OUTPUT0: INPUT0
tfsimple1.outputs.OUTPUT1: INPUT1
output:
steps:
- tfsimple2
kubectl create -f ./pipelines/tfsimples.yaml -n ${NAMESPACE}
pipeline.mlops.seldon.io/tfsimples created
kubectl wait --for condition=ready --timeout=300s pipeline --all -n ${NAMESPACE}
pipeline.mlops.seldon.io/tfsimples condition met
seldon pipeline infer tfsimples --inference-mode grpc --inference-host ${MESH_IP}:80 \
'{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
}
]
}
kubectl delete -f ./pipelines/tfsimples.yaml -n ${NAMESPACE}
pipeline.mlops.seldon.io "tfsimples" deleted
kubectl delete -f ./models/tfsimple1.yaml -n ${NAMESPACE}
kubectl delete -f ./models/tfsimple2.yaml -n ${NAMESPACE}
model.mlops.seldon.io "tfsimple1" deleted
model.mlops.seldon.io "tfsimple2" deleted
cat ./models/tfsimple1.yaml
cat ./models/tfsimple2.yaml
cat ./models/tfsimple3.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple2
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple3
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
kubectl create -f ./models/tfsimple1.yaml -n ${NAMESPACE}
kubectl create -f ./models/tfsimple2.yaml -n ${NAMESPACE}
kubectl create -f ./models/tfsimple3.yaml -n ${NAMESPACE}
model.mlops.seldon.io/tfsimple1 created
model.mlops.seldon.io/tfsimple2 created
model.mlops.seldon.io/tfsimple3 created
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
model.mlops.seldon.io/tfsimple1 condition met
model.mlops.seldon.io/tfsimple2 condition met
model.mlops.seldon.io/tfsimple3 condition met
cat ./pipelines/tfsimples-join.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: join
spec:
steps:
- name: tfsimple1
- name: tfsimple2
- name: tfsimple3
inputs:
- tfsimple1.outputs.OUTPUT0
- tfsimple2.outputs.OUTPUT1
tensorMap:
tfsimple1.outputs.OUTPUT0: INPUT0
tfsimple2.outputs.OUTPUT1: INPUT1
output:
steps:
- tfsimple3
kubectl create -f ./pipelines/tfsimples-join.yaml -n ${NAMESPACE}
pipeline.mlops.seldon.io/join created
kubectl wait --for condition=ready --timeout=300s pipeline --all -n ${NAMESPACE}
pipeline.mlops.seldon.io/join condition met
seldon pipeline infer join --inference-mode grpc --inference-host ${MESH_IP}:80 \
'{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
}
]
}
kubectl delete -f ./pipelines/tfsimples-join.yaml -n ${NAMESPACE}
pipeline.mlops.seldon.io "join" deleted
kubectl delete -f ./models/tfsimple1.yaml -n ${NAMESPACE}
kubectl delete -f ./models/tfsimple2.yaml -n ${NAMESPACE}
kubectl delete -f ./models/tfsimple3.yaml -n ${NAMESPACE}
model.mlops.seldon.io "tfsimple1" deleted
model.mlops.seldon.io "tfsimple2" deleted
model.mlops.seldon.io "tfsimple3" deleted
cat ./models/income.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/income/classifier"
requirements:
- sklearn
kubectl create -f ./models/income.yaml -n ${NAMESPACE}
model.mlops.seldon.io/income created
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
model.mlops.seldon.io/income condition met
kubectl get model income -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .
{
"conditions": [
{
"lastTransitionTime": "2023-06-30T10:02:53Z",
"message": "ModelAvailable",
"status": "True",
"type": "ModelReady"
},
{
"lastTransitionTime": "2023-06-30T10:02:53Z",
"status": "True",
"type": "Ready"
}
],
"replicas": 1
}
seldon model infer income --inference-host ${MESH_IP}:80 \
'{"inputs": [{"name": "predict", "shape": [1, 12], "datatype": "FP32", "data": [[47,4,1,1,1,3,4,1,0,0,40,9]]}]}'
{
"model_name": "income_1",
"model_version": "1",
"id": "f52acfeb-0f22-429f-8c7a-785ef17cd470",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
0
]
}
]
}
cat ./models/income-explainer.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income-explainer
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/income/explainer"
explainer:
type: anchor_tabular
modelRef: income
kubectl create -f ./models/income-explainer.yaml -n ${NAMESPACE}
model.mlops.seldon.io/income-explainer created
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
model.mlops.seldon.io/income condition met
model.mlops.seldon.io/income-explainer condition met
kubectl get model income-explainer -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .
{
"conditions": [
{
"lastTransitionTime": "2023-06-30T10:03:07Z",
"message": "ModelAvailable",
"status": "True",
"type": "ModelReady"
},
{
"lastTransitionTime": "2023-06-30T10:03:07Z",
"status": "True",
"type": "Ready"
}
],
"replicas": 1
}
seldon model infer income-explainer --inference-host ${MESH_IP}:80 \
'{"inputs": [{"name": "predict", "shape": [1, 12], "datatype": "FP32", "data": [[47,4,1,1,1,3,4,1,0,0,40,9]]}]}'
{
"model_name": "income-explainer_1",
"model_version": "1",
"id": "3028a904-9bb3-42d7-bdb7-6e6993323ed7",
"parameters": {},
"outputs": [
{
"name": "explanation",
"shape": [
1,
1
],
"datatype": "BYTES",
"parameters": {
"content_type": "str"
},
"data": [
"{\"meta\": {\"name\": \"AnchorTabular\", \"type\": [\"blackbox\"], \"explanations\": [\"local\"], \"params\": {\"seed\": 1, \"disc_perc\": [25, 50, 75], \"threshold\": 0.95, \"delta\": 0.1, \"tau\": 0.15, \"batch_size\": 100, \"coverage_samples\": 10000, \"beam_size\": 1, \"stop_on_first\": false, \"max_anchor_size\": null, \"min_samples_start\": 100, \"n_covered_ex\": 10, \"binary_cache_size\": 10000, \"cache_margin\": 1000, \"verbose\": false, \"verbose_every\": 1, \"kwargs\": {}}, \"version\": \"0.9.1\"}, \"data\": {\"anchor\": [\"Marital Status = Never-Married\", \"Relationship = Own-child\"], \"precision\": 0.9705882352941176, \"coverage\": 0.0699, \"raw\": {\"feature\": [3, 5], \"mean\": [0.8094218415417559, 0.9705882352941176], \"precision\": [0.8094218415417559, 0.9705882352941176], \"coverage\": [0.3036, 0.0699], \"examples\": [{\"covered_true\": [[23, 4, 1, 1, 5, 1, 4, 0, 0, 0, 40, 9], [44, 4, 1, 1, 8, 0, 4, 1, 0, 0, 40, 9], [60, 2, 5, 1, 5, 1, 4, 0, 0, 0, 25, 9], [52, 4, 1, 1, 2, 0, 4, 1, 0, 0, 50, 9], [66, 6, 1, 1, 8, 0, 4, 1, 0, 0, 8, 9], [52, 4, 1, 1, 8, 0, 4, 1, 0, 0, 40, 9], [27, 4, 1, 1, 1, 1, 4, 1, 0, 0, 35, 9], [48, 4, 1, 1, 6, 0, 4, 1, 0, 0, 45, 9], [45, 6, 1, 1, 5, 0, 4, 1, 0, 0, 40, 9], [40, 2, 1, 1, 5, 4, 4, 0, 0, 0, 45, 9]], \"covered_false\": [[42, 6, 5, 1, 6, 0, 4, 1, 99999, 0, 80, 9], [29, 4, 1, 1, 8, 1, 4, 1, 0, 0, 50, 9], [49, 4, 1, 1, 8, 0, 4, 1, 0, 0, 50, 9], [34, 4, 5, 1, 8, 0, 4, 1, 0, 0, 40, 9], [38, 2, 1, 1, 5, 5, 4, 0, 7688, 0, 40, 9], [45, 7, 5, 1, 5, 0, 4, 1, 0, 0, 45, 9], [43, 4, 2, 1, 5, 0, 4, 1, 99999, 0, 55, 9], [47, 4, 5, 1, 6, 1, 4, 1, 27828, 0, 60, 9], [42, 6, 1, 1, 2, 0, 4, 1, 15024, 0, 60, 9], [56, 4, 1, 1, 6, 0, 2, 1, 7688, 0, 45, 9]], \"uncovered_true\": [], \"uncovered_false\": []}, {\"covered_true\": [[23, 4, 1, 1, 4, 3, 4, 1, 0, 0, 40, 9], [50, 2, 5, 1, 8, 3, 2, 1, 0, 0, 45, 9], [24, 4, 1, 1, 7, 3, 4, 0, 0, 0, 40, 3], [62, 4, 5, 1, 5, 3, 4, 1, 0, 0, 40, 9], [22, 4, 1, 1, 5, 3, 4, 1, 0, 0, 40, 9], [44, 4, 1, 1, 1, 3, 4, 0, 0, 0, 40, 9], [46, 4, 1, 1, 4, 3, 4, 1, 0, 0, 40, 9], [44, 4, 1, 1, 2, 3, 4, 1, 0, 0, 40, 9], [25, 4, 5, 1, 5, 3, 4, 1, 0, 0, 35, 9], [32, 2, 5, 1, 5, 3, 4, 1, 0, 0, 50, 9]], \"covered_false\": [[57, 5, 5, 1, 6, 3, 4, 1, 99999, 0, 40, 9], [44, 4, 1, 1, 8, 3, 4, 1, 7688, 0, 60, 9], [43, 2, 5, 1, 4, 3, 2, 0, 8614, 0, 47, 9], [56, 5, 2, 1, 5, 3, 4, 1, 99999, 0, 70, 9]], \"uncovered_true\": [], \"uncovered_false\": []}], \"all_precision\": 0, \"num_preds\": 1000000, \"success\": true, \"names\": [\"Marital Status = Never-Married\", \"Relationship = Own-child\"], \"prediction\": [0], \"instance\": [47.0, 4.0, 1.0, 1.0, 1.0, 3.0, 4.0, 1.0, 0.0, 0.0, 40.0, 9.0], \"instances\": [[47.0, 4.0, 1.0, 1.0, 1.0, 3.0, 4.0, 1.0, 0.0, 0.0, 40.0, 9.0]]}}}"
]
}
]
}
kubectl delete -f ./models/income.yaml -n ${NAMESPACE}
kubectl delete -f ./models/income-explainer.yaml -n ${NAMESPACE}
model.mlops.seldon.io "income" deleted
model.mlops.seldon.io "income-explainer" deleted
cat ./models/hf-text-gen.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: text-gen
spec:
storageUri: "gs://seldon-models/mlserver/huggingface/text-generation"
requirements:
- huggingface
Load the model
seldon model load -f ./models/hf-text-gen.yaml
{}
seldon model status text-gen -w ModelAvailable | jq -M .
{}
seldon model infer text-gen \
'{"inputs": [{"name": "args","shape": [1],"datatype": "BYTES","data": ["Once upon a time in a galaxy far away"]}]}'
{
"model_name": "text-gen_1",
"model_version": "1",
"id": "121ff5f4-1d4a-46d0-9a5e-4cd3b11040df",
"parameters": {},
"outputs": [
{
"name": "output",
"shape": [
1,
1
],
"datatype": "BYTES",
"parameters": {
"content_type": "hg_jsonlist"
},
"data": [
"{\"generated_text\": \"Once upon a time in a galaxy far away, the planet is full of strange little creatures. A very strange combination of creatures in that universe, that is. A strange combination of creatures in that universe, that is. A kind of creature that is\"}"
]
}
]
}
res = !seldon model infer text-gen --inference-mode grpc \
'{"inputs":[{"name":"args","contents":{"bytes_contents":["T25jZSB1cG9uIGEgdGltZSBpbiBhIGdhbGF4eSBmYXIgYXdheQo="]},"datatype":"BYTES","shape":[1]}]}'
import json
import base64
r = json.loads(res[0])
base64.b64decode(r["outputs"][0]["contents"]["bytesContents"][0])
b'{"generated_text": "Once upon a time in a galaxy far away\\n\\nThe Universe is a big and massive place. How can you feel any of this? Your body doesn\'t make sense if the Universe is in full swing \\u2014 you don\'t have to remember whether the"}'
Unload the model
seldon model unload text-gen
cat ./models/hf-text-gen-custom-tiny-stories.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: custom-tiny-stories-text-gen
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/huggingface-text-gen-custom-tiny-stories"
requirements:
- huggingface
Load the model
seldon model load -f ./models/hf-text-gen-custom-tiny-stories.yaml
{}
seldon model status custom-tiny-stories-text-gen -w ModelAvailable | jq -M .
{}
seldon model infer custom-tiny-stories-text-gen \
'{"inputs": [{"name": "args","shape": [1],"datatype": "BYTES","data": ["Once upon a time in a galaxy far away"]}]}'
{
"model_name": "custom-tiny-stories-text-gen_1",
"model_version": "1",
"id": "d0fce59c-76e2-4f81-9711-1c93d08bcbf9",
"parameters": {},
"outputs": [
{
"name": "output",
"shape": [
1,
1
],
"datatype": "BYTES",
"parameters": {
"content_type": "hg_jsonlist"
},
"data": [
"{\"generated_text\": \"Once upon a time in a galaxy far away. It was a very special place to live.\\n\"}"
]
}
]
}
res = !seldon model infer custom-tiny-stories-text-gen --inference-mode grpc \
'{"inputs":[{"name":"args","contents":{"bytes_contents":["T25jZSB1cG9uIGEgdGltZSBpbiBhIGdhbGF4eSBmYXIgYXdheQo="]},"datatype":"BYTES","shape":[1]}]}'
import json
import base64
r = json.loads(res[0])
base64.b64decode(r["outputs"][0]["contents"]["bytesContents"][0])
b'{"generated_text": "Once upon a time in a galaxy far away\\nOne night, a little girl named Lily went to"}'
Unload the model
seldon model unload custom-tiny-stories-text-gen
As a next step, why not try running a larger-scale model? You can find a definition for one in ./models/hf-text-gen-custom-gpt2.yaml. However, you may need to request and allocate more memory!
This notebook illustrates a series of Pipelines showing of different ways of combining flows of data and conditional logic. We assume you have Seldon Core 2 running locally.
gs://seldon-models/triton/simple
an example Triton tensorflow model that takes 2 inputs INPUT0 and INPUT1 and adds them to produce OUTPUT0 and also subtracts INPUT1 from INPUT0 to produce OUTPUT1. See here for the original source code and license.
Other models can be found at https://github.com/SeldonIO/triton-python-examples
Chain the output of one model into the next. Also shows chaning the tensor names via tensorMap
to conform to the expected input tensor names of the second model.
cat ./models/tfsimple1.yaml
cat ./models/tfsimple2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple2
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
seldon model load -f ./models/tfsimple1.yaml
seldon model load -f ./models/tfsimple2.yaml
{}
{}
seldon model status tfsimple1 -w ModelAvailable | jq -M .
seldon model status tfsimple2 -w ModelAvailable | jq -M .
{}
{}
The pipeline below chains the output of tfsimple1
into tfsimple2
. As these models have compatible shape and data type this can be done. However, the output tensor names from tfsimple1
need to be renamed to match the input tensor names for tfsimple2
. We do this with the tensorMap
feature.
The output of the Pipeline is the output from tfsimple2
.
cat ./pipelines/tfsimples.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimples
spec:
steps:
- name: tfsimple1
- name: tfsimple2
inputs:
- tfsimple1
tensorMap:
tfsimple1.outputs.OUTPUT0: INPUT0
tfsimple1.outputs.OUTPUT1: INPUT1
output:
steps:
- tfsimple2
seldon pipeline load -f ./pipelines/tfsimples.yaml
seldon pipeline status tfsimples -w PipelineReady | jq -M .
{
"pipelineName": "tfsimples",
"versions": [
{
"pipeline": {
"name": "tfsimples",
"uid": "ciep26qi8ufs73flaiqg",
"version": 2,
"steps": [
{
"name": "tfsimple1"
},
{
"name": "tfsimple2",
"inputs": [
"tfsimple1.outputs"
],
"tensorMap": {
"tfsimple1.outputs.OUTPUT0": "INPUT0",
"tfsimple1.outputs.OUTPUT1": "INPUT1"
}
}
],
"output": {
"steps": [
"tfsimple2.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 2,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-29T14:11:40.101677847Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer tfsimples \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
seldon pipeline infer tfsimples --inference-mode grpc \
'{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
}
]
}
We use the Seldon CLI pipeline inspect
feature to look at the data for all steps of the pipeline for the last data item passed through the pipeline (the default). This can be useful for debugging.
seldon pipeline inspect tfsimples
seldon.default.model.tfsimple1.inputs ciep298fh5ss73dpdir0 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}]}
seldon.default.model.tfsimple1.outputs ciep298fh5ss73dpdir0 {"modelName":"tfsimple1_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon.default.model.tfsimple2.inputs ciep298fh5ss73dpdir0 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.model.tfsimple2.outputs ciep298fh5ss73dpdir0 {"modelName":"tfsimple2_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
seldon.default.pipeline.tfsimples.inputs ciep298fh5ss73dpdir0 {"modelName":"tfsimples", "inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}]}
seldon.default.pipeline.tfsimples.outputs ciep298fh5ss73dpdir0 {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
Next, we look get the output as json and use the jq
tool to get just one value.
seldon pipeline inspect tfsimples --format json | jq -M .topics[0].msgs[0].value
{
"inputs": [
{
"name": "INPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16
]
}
},
{
"name": "INPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16
]
}
}
]
}
seldon pipeline unload tfsimples
seldon model unload tfsimple1
seldon model unload tfsimple2
Chain the output of one model into the next. Shows using the input and outputs and combining.
cat ./models/tfsimple1.yaml
cat ./models/tfsimple2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple2
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
seldon model load -f ./models/tfsimple1.yaml
seldon model load -f ./models/tfsimple2.yaml
{}
{}
seldon model status tfsimple1 -w ModelAvailable | jq -M .
seldon model status tfsimple2 -w ModelAvailable | jq -M .
{}
{}
cat ./pipelines/tfsimples-input.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimples-input
spec:
steps:
- name: tfsimple1
- name: tfsimple2
inputs:
- tfsimple1.inputs.INPUT0
- tfsimple1.outputs.OUTPUT1
tensorMap:
tfsimple1.outputs.OUTPUT1: INPUT1
output:
steps:
- tfsimple2
seldon pipeline load -f ./pipelines/tfsimples-input.yaml
seldon pipeline status tfsimples-input -w PipelineReady | jq -M .
{
"pipelineName": "tfsimples-input",
"versions": [
{
"pipeline": {
"name": "tfsimples-input",
"uid": "ciep2fii8ufs73flair0",
"version": 1,
"steps": [
{
"name": "tfsimple1"
},
{
"name": "tfsimple2",
"inputs": [
"tfsimple1.inputs.INPUT0",
"tfsimple1.outputs.OUTPUT1"
],
"tensorMap": {
"tfsimple1.outputs.OUTPUT1": "INPUT1"
}
}
],
"output": {
"steps": [
"tfsimple2.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-29T14:12:14.711416101Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer tfsimples-input \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"model_name": "",
"outputs": [
{
"data": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
seldon pipeline infer tfsimples-input --inference-mode grpc \
'{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16
]
}
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16
]
}
}
]
}
seldon pipeline unload tfsimples-input
seldon model unload tfsimple1
seldon model unload tfsimple2
Join two flows of data from two models as input to a third model. This shows how individual flows of data can be combined.
cat ./models/tfsimple1.yaml
echo "---"
cat ./models/tfsimple2.yaml
echo "---"
cat ./models/tfsimple3.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple2
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple3
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
seldon model load -f ./models/tfsimple1.yaml
seldon model load -f ./models/tfsimple2.yaml
seldon model load -f ./models/tfsimple3.yaml
{}
{}
{}
seldon model status tfsimple1 -w ModelAvailable | jq -M .
seldon model status tfsimple2 -w ModelAvailable | jq -M .
seldon model status tfsimple3 -w ModelAvailable | jq -M .
{}
{}
{}
In the pipeline below for the input to tfsimple3
we join 1 output tensor each from the two previous models tfsimple1
and tfsimple2
. We need to use the tensorMap
feature to rename each output tensor to one of the expected input tensors for the tfsimple3
model.
cat ./pipelines/tfsimples-join.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: join
spec:
steps:
- name: tfsimple1
- name: tfsimple2
- name: tfsimple3
inputs:
- tfsimple1.outputs.OUTPUT0
- tfsimple2.outputs.OUTPUT1
tensorMap:
tfsimple1.outputs.OUTPUT0: INPUT0
tfsimple2.outputs.OUTPUT1: INPUT1
output:
steps:
- tfsimple3
seldon pipeline load -f ./pipelines/tfsimples-join.yaml
seldon pipeline status join -w PipelineReady | jq -M .
{
"pipelineName": "join",
"versions": [
{
"pipeline": {
"name": "join",
"uid": "ciep2k2i8ufs73flairg",
"version": 1,
"steps": [
{
"name": "tfsimple1"
},
{
"name": "tfsimple2"
},
{
"name": "tfsimple3",
"inputs": [
"tfsimple1.outputs.OUTPUT0",
"tfsimple2.outputs.OUTPUT1"
],
"tensorMap": {
"tfsimple1.outputs.OUTPUT0": "INPUT0",
"tfsimple2.outputs.OUTPUT1": "INPUT1"
}
}
],
"output": {
"steps": [
"tfsimple3.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-29T14:12:32.938603415Z",
"modelsReady": true
}
}
]
}
The outputs are the sequence "2,4,6..." which conforms to the logic of this model (addition and subtraction) when fed the output of the first two models.
seldon pipeline infer join --inference-mode grpc \
'{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
}
]
}
seldon pipeline unload join
seldon model unload tfsimple1
seldon model unload tfsimple2
seldon model unload tfsimple3
Shows conditional data flows - one of two models is run based on output tensors from first.
cat ./models/conditional.yaml
echo "---"
cat ./models/add10.yaml
echo "---"
cat ./models/mul10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: conditional
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/conditional"
requirements:
- triton
- python
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: add10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/add10"
requirements:
- triton
- python
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: mul10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/mul10"
requirements:
- triton
- python
seldon model load -f ./models/conditional.yaml
seldon model load -f ./models/add10.yaml
seldon model load -f ./models/mul10.yaml
{}
{}
{}
seldon model status conditional -w ModelAvailable | jq -M .
seldon model status add10 -w ModelAvailable | jq -M .
seldon model status mul10 -w ModelAvailable | jq -M .
{}
{}
{}
Here we assume the conditional
model can output two tensors OUTPUT0 and OUTPUT1 but only outputs the former if the CHOICE input tensor is set to 0 otherwise it outputs tensor OUTPUT1. By this means only one of the two downstream models will receive data and run. The output
steps does an any
join from both models and whichever data appears first will be sent as output to pipeline. As in this case only 1 of the two models add10
and mul10
runs we will receive their output.
cat ./pipelines/conditional.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-conditional
spec:
steps:
- name: conditional
- name: mul10
inputs:
- conditional.outputs.OUTPUT0
tensorMap:
conditional.outputs.OUTPUT0: INPUT
- name: add10
inputs:
- conditional.outputs.OUTPUT1
tensorMap:
conditional.outputs.OUTPUT1: INPUT
output:
steps:
- mul10
- add10
stepsJoin: any
seldon pipeline load -f ./pipelines/conditional.yaml
seldon pipeline status tfsimple-conditional -w PipelineReady | jq -M .
{
"pipelineName": "tfsimple-conditional",
"versions": [
{
"pipeline": {
"name": "tfsimple-conditional",
"uid": "ciepga2i8ufs73flais0",
"version": 1,
"steps": [
{
"name": "add10",
"inputs": [
"conditional.outputs.OUTPUT1"
],
"tensorMap": {
"conditional.outputs.OUTPUT1": "INPUT"
}
},
{
"name": "conditional"
},
{
"name": "mul10",
"inputs": [
"conditional.outputs.OUTPUT0"
],
"tensorMap": {
"conditional.outputs.OUTPUT0": "INPUT"
}
}
],
"output": {
"steps": [
"mul10.outputs",
"add10.outputs"
],
"stepsJoin": "ANY"
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-29T14:41:45.133142725Z",
"modelsReady": true
}
}
]
}
The mul10
model will run as the CHOICE tensor is set to 0.
seldon pipeline infer tfsimple-conditional --inference-mode grpc \
'{"model_name":"conditional","inputs":[{"name":"CHOICE","contents":{"int_contents":[0]},"datatype":"INT32","shape":[1]},{"name":"INPUT0","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]},{"name":"INPUT1","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
10,
20,
30,
40
]
}
}
]
}
The add10
model will run as the CHOICE tensor is not set to zero.
seldon pipeline infer tfsimple-conditional --inference-mode grpc \
'{"model_name":"conditional","inputs":[{"name":"CHOICE","contents":{"int_contents":[1]},"datatype":"INT32","shape":[1]},{"name":"INPUT0","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]},{"name":"INPUT1","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
11,
12,
13,
14
]
}
}
]
}
seldon pipeline unload tfsimple-conditional
seldon model unload conditional
seldon model unload add10
seldon model unload mul10
Access to indivudal tensors in pipeline inputs
cat ./models/mul10.yaml
echo "---"
cat ./models/add10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: mul10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/mul10"
requirements:
- triton
- python
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: add10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/add10"
requirements:
- triton
- python
seldon model load -f ./models/mul10.yaml
seldon model load -f ./models/add10.yaml
{}
{}
seldon model status mul10 -w ModelAvailable | jq -M .
seldon model status add10 -w ModelAvailable | jq -M .
{}
{}
This pipeline shows how we can access pipeline inputs INPUT0 and INPUT1 from different steps.
cat ./pipelines/pipeline-inputs.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: pipeline-inputs
spec:
steps:
- name: mul10
inputs:
- pipeline-inputs.inputs.INPUT0
tensorMap:
pipeline-inputs.inputs.INPUT0: INPUT
- name: add10
inputs:
- pipeline-inputs.inputs.INPUT1
tensorMap:
pipeline-inputs.inputs.INPUT1: INPUT
output:
steps:
- mul10
- add10
seldon pipeline load -f ./pipelines/pipeline-inputs.yaml
seldon pipeline status pipeline-inputs -w PipelineReady | jq -M .
{
"pipelineName": "pipeline-inputs",
"versions": [
{
"pipeline": {
"name": "pipeline-inputs",
"uid": "ciepgeqi8ufs73flaisg",
"version": 1,
"steps": [
{
"name": "add10",
"inputs": [
"pipeline-inputs.inputs.INPUT1"
],
"tensorMap": {
"pipeline-inputs.inputs.INPUT1": "INPUT"
}
},
{
"name": "mul10",
"inputs": [
"pipeline-inputs.inputs.INPUT0"
],
"tensorMap": {
"pipeline-inputs.inputs.INPUT0": "INPUT"
}
}
],
"output": {
"steps": [
"mul10.outputs",
"add10.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-29T14:42:04.202598715Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer pipeline-inputs --inference-mode grpc \
'{"model_name":"pipeline","inputs":[{"name":"INPUT0","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]},{"name":"INPUT1","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
10,
20,
30,
40
]
}
},
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
11,
12,
13,
14
]
}
}
]
}
seldon pipeline unload pipeline-inputs
seldon model unload mul10
seldon model unload add10
Shows how joins can be used for triggers as well.
cat ./models/mul10.yaml
cat ./models/add10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: mul10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/mul10"
requirements:
- triton
- python
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: add10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/add10"
requirements:
- triton
- python
seldon model load -f ./models/mul10.yaml
seldon model load -f ./models/add10.yaml
{}
{}
seldon model status mul10 -w ModelAvailable | jq -M .
seldon model status add10 -w ModelAvailable | jq -M .
{}
{}
Here we required tensors names ok1
or ok2
to exist on pipeline inputs to run the mul10
model but require tensor ok3
to exist on pipeline inputs to run the add10
model. The logic on mul10
is handled by a trigger join of any
meaning either of these input data can exist to satisfy the trigger join.
cat ./pipelines/trigger-joins.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: trigger-joins
spec:
steps:
- name: mul10
inputs:
- trigger-joins.inputs.INPUT
triggers:
- trigger-joins.inputs.ok1
- trigger-joins.inputs.ok2
triggersJoinType: any
- name: add10
inputs:
- trigger-joins.inputs.INPUT
triggers:
- trigger-joins.inputs.ok3
output:
steps:
- mul10
- add10
stepsJoin: any
seldon pipeline load -f ./pipelines/trigger-joins.yaml
seldon pipeline status trigger-joins -w PipelineReady | jq -M .
{
"pipelineName": "trigger-joins",
"versions": [
{
"pipeline": {
"name": "trigger-joins",
"uid": "ciepgkqi8ufs73flait0",
"version": 1,
"steps": [
{
"name": "add10",
"inputs": [
"trigger-joins.inputs.INPUT"
],
"triggers": [
"trigger-joins.inputs.ok3"
]
},
{
"name": "mul10",
"inputs": [
"trigger-joins.inputs.INPUT"
],
"triggers": [
"trigger-joins.inputs.ok1",
"trigger-joins.inputs.ok2"
],
"triggersJoin": "ANY"
}
],
"output": {
"steps": [
"mul10.outputs",
"add10.outputs"
],
"stepsJoin": "ANY"
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-29T14:42:27.595300698Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer trigger-joins --inference-mode grpc \
'{"model_name":"pipeline","inputs":[{"name":"ok1","contents":{"fp32_contents":[1]},"datatype":"FP32","shape":[1]},{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
10,
20,
30,
40
]
}
}
]
}
seldon pipeline infer trigger-joins --inference-mode grpc \
'{"model_name":"pipeline","inputs":[{"name":"ok3","contents":{"fp32_contents":[1]},"datatype":"FP32","shape":[1]},{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
11,
12,
13,
14
]
}
}
]
}
seldon pipeline unload trigger-joins
seldon model unload mul10
seldon model unload add10
Note: The Seldon CLI allows you to view information about underlying Seldon resources and make changes to them through the scheduler in non-Kubernetes environments. However, it cannot modify underlying manifests within a Kubernetes cluster. Therefore, using the Seldon CLI for control plane operations in a Kubernetes environment is not recommended. For more details, see Seldon CLI.
We have a Triton model that has two version folders
Model 1 adds 10 to input, Model 2 multiples by 10 the input. The structure of the artifact repo is shown below:
config.pbtxt
1/model.py <add 10>
2/model.py <mul 10>
import os
os.environ["NAMESPACE"] = "seldon-mesh"
MESH_IP=!kubectl get svc seldon-mesh -n ${NAMESPACE} -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP=MESH_IP[0]
import os
os.environ['MESH_IP'] = MESH_IP
MESH_IP
'172.19.255.1'
cat ./models/multi-version-1.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: math
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/multi-version"
artifactVersion: 1
requirements:
- triton
- python
kubectl apply -f ./models/multi-version-1.yaml -n ${NAMESPACE}
model.mlops.seldon.io/math created
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
model.mlops.seldon.io/math condition met
seldon model infer math --inference-mode grpc --inference-host ${MESH_IP}:80 \
'{"model_name":"math","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"modelName": "math_1",
"modelVersion": "1",
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
11,
12,
13,
14
]
}
}
]
}
cat ./models/multi-version-2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: math
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/multi-version"
artifactVersion: 2
requirements:
- triton
- python
kubectl apply -f ./models/multi-version-2.yaml -n ${NAMESPACE}
model.mlops.seldon.io/math configured
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
model.mlops.seldon.io/math condition met
seldon model infer math --inference-mode grpc --inference-host ${MESH_IP}:80 \
'{"model_name":"math","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"modelName": "math_2",
"modelVersion": "1",
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
10,
20,
30,
40
]
}
}
]
}
kubectl delete -f ./models/multi-version-1.yaml -n ${NAMESPACE}
model.mlops.seldon.io "math" deleted
We use a simple sklearn iris classification model
cat ./models/sklearn-iris-gs.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn"
requirements:
- sklearn
memory: 100Ki
Load the model
seldon model load -f ./models/sklearn-iris-gs.yaml
{}
Wait for the model to be ready
seldon model status iris -w ModelAvailable | jq -M .
{}
Do a REST inference call
seldon model infer iris \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
{
"model_name": "iris_1",
"model_version": "1",
"id": "983bd95f-4b4d-4ff1-95b2-df9d6d089164",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
2
]
}
]
}
Do a gRPC inference call
seldon model infer iris --inference-mode grpc \
'{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}' | jq -M .
{
"modelName": "iris_1",
"modelVersion": "1",
"outputs": [
{
"name": "predict",
"datatype": "INT64",
"shape": [
"1",
"1"
],
"parameters": {
"content_type": {
"stringParam": "np"
}
},
"contents": {
"int64Contents": [
"2"
]
}
}
]
}
Unload the model
seldon model unload iris
We run a simple tensorflow model. Note the requirements section specifying tensorflow
.
cat ./models/tfsimple1.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
Load the model.
seldon model load -f ./models/tfsimple1.yaml
{}
Wait for the model to be ready.
seldon model status tfsimple1 -w ModelAvailable | jq -M .
{}
Get model metadata
seldon model metadata tfsimple1
{
"name": "tfsimple1_1",
"versions": [
"1"
],
"platform": "tensorflow_graphdef",
"inputs": [
{
"name": "INPUT0",
"datatype": "INT32",
"shape": [
-1,
16
]
},
{
"name": "INPUT1",
"datatype": "INT32",
"shape": [
-1,
16
]
}
],
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
-1,
16
]
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
-1,
16
]
}
]
}
Do a REST inference call.
seldon model infer tfsimple1 \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"model_name": "tfsimple1_1",
"model_version": "1",
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
1,
16
],
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
1,
16
],
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
]
}
]
}
Do a gRPC inference call
seldon model infer tfsimple1 --inference-mode grpc \
'{"model_name":"tfsimple1","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"modelName": "tfsimple1_1",
"modelVersion": "1",
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
]
}
}
]
}
Unload the model
seldon model unload tfsimple1
We will use two SKlearn Iris classification models to illustrate an experiment.
cat ./models/sklearn1.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/mlserver/iris"
requirements:
- sklearn
cat ./models/sklearn2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris2
spec:
storageUri: "gs://seldon-models/mlserver/iris"
requirements:
- sklearn
Load both models.
seldon model load -f ./models/sklearn1.yaml
seldon model load -f ./models/sklearn2.yaml
{}
{}
Wait for both models to be ready.
seldon model status iris | jq -M .
seldon model status iris2 | jq -M .
{
"modelName": "iris",
"versions": [
{
"version": 1,
"serverName": "mlserver",
"kubernetesMeta": {},
"modelReplicaState": {
"0": {
"state": "Available",
"lastChangeTimestamp": "2023-06-29T14:01:41.362720538Z"
}
},
"state": {
"state": "ModelAvailable",
"availableReplicas": 1,
"lastChangeTimestamp": "2023-06-29T14:01:41.362720538Z"
},
"modelDefn": {
"meta": {
"name": "iris",
"kubernetesMeta": {}
},
"modelSpec": {
"uri": "gs://seldon-models/mlserver/iris",
"requirements": [
"sklearn"
]
},
"deploymentSpec": {
"replicas": 1
}
}
}
]
}
{
"modelName": "iris2",
"versions": [
{
"version": 1,
"serverName": "mlserver",
"kubernetesMeta": {},
"modelReplicaState": {
"0": {
"state": "Available",
"lastChangeTimestamp": "2023-06-29T14:01:41.362845079Z"
}
},
"state": {
"state": "ModelAvailable",
"availableReplicas": 1,
"lastChangeTimestamp": "2023-06-29T14:01:41.362845079Z"
},
"modelDefn": {
"meta": {
"name": "iris2",
"kubernetesMeta": {}
},
"modelSpec": {
"uri": "gs://seldon-models/mlserver/iris",
"requirements": [
"sklearn"
]
},
"deploymentSpec": {
"replicas": 1
}
}
}
]
}
Create an experiment that modifies the iris model to add a second model splitting traffic 50/50 between the two.
cat ./experiments/ab-default-model.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: experiment-sample
spec:
default: iris
candidates:
- name: iris
weight: 50
- name: iris2
weight: 50
Start the experiment.
seldon experiment start -f ./experiments/ab-default-model.yaml
Wait for the experiment to be ready.
seldon experiment status experiment-sample -w | jq -M .
{
"experimentName": "experiment-sample",
"active": true,
"candidatesReady": true,
"mirrorReady": true,
"statusDescription": "experiment active",
"kubernetesMeta": {}
}
Run a set of calls and record which route the traffic took. There should be roughly a 50/50 split.
seldon model infer iris -i 100 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::57 :iris_1::43]
Run one more request
seldon model infer iris \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
{
"model_name": "iris_1",
"model_version": "1",
"id": "fa425bdf-737c-41fe-894d-58868f70fe5d",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
2
]
}
]
}
Use sticky session key passed by last infer request to ensure same route is taken each time. We will test REST and gRPC.
seldon model infer iris -s -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::50]
seldon model infer iris --inference-mode grpc -s -i 50\
'{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}'
Success: map[:iris_1::50]
Stop the experiment
seldon experiment stop experiment-sample
Show the requests all go to original model now.
seldon model infer iris -i 100 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::100]
Unload both models.
seldon model unload iris
seldon model unload iris2
We will use two SKlearn Iris classification models to illustrate experiments.
cat ./models/sklearn1.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/mlserver/iris"
requirements:
- sklearn
cat ./models/sklearn2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris2
spec:
storageUri: "gs://seldon-models/mlserver/iris"
requirements:
- sklearn
Load both models.
seldon model load -f ./models/sklearn1.yaml
seldon model load -f ./models/sklearn2.yaml
{}
{}
Wait for both models to be ready.
seldon model status iris -w ModelAvailable
seldon model status iris2 -w ModelAvailable
{}
{}
seldon model infer iris -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::50]
seldon model infer iris2 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::50]
Create an experiment that modifies the iris model to add a second model splitting traffic 50/50 between the two.
cat ./experiments/ab-default-model.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: experiment-sample
spec:
default: iris
candidates:
- name: iris
weight: 50
- name: iris2
weight: 50
Start the experiment.
seldon experiment start -f ./experiments/ab-default-model.yaml
Wait for the experiment to be ready.
seldon experiment status experiment-sample -w | jq -M .
{
"experimentName": "experiment-sample",
"active": true,
"candidatesReady": true,
"mirrorReady": true,
"statusDescription": "experiment active",
"kubernetesMeta": {}
}
Run a set of calls and record which route the traffic took. There should be roughly a 50/50 split.
seldon model infer iris -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::27 :iris_1::23]
Show sticky session header x-seldon-route
that is returned
seldon model infer iris --show-headers \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
> POST /v2/models/iris/infer HTTP/1.1
> Host: localhost:9000
> Content-Type:[application/json]
> Seldon-Model:[iris]
< X-Seldon-Route:[:iris_1:]
< Ce-Id:[463e96ad-645f-4442-8890-4c340b58820b]
< Traceparent:[00-fe9e87fcbe4be98ed82fb76166e15ceb-d35e7ac96bd8b718-01]
< X-Envoy-Upstream-Service-Time:[3]
< Ce-Specversion:[0.3]
< Date:[Thu, 29 Jun 2023 14:03:03 GMT]
< Ce-Source:[io.seldon.serving.deployment.mlserver]
< Content-Type:[application/json]
< Server:[envoy]
< X-Request-Id:[cieou5ofh5ss73fbjdu0]
< Ce-Endpoint:[iris_1]
< Ce-Modelid:[iris_1]
< Ce-Type:[io.seldon.serving.inference.response]
< Content-Length:[213]
< Ce-Inferenceservicename:[mlserver]
< Ce-Requestid:[463e96ad-645f-4442-8890-4c340b58820b]
{
"model_name": "iris_1",
"model_version": "1",
"id": "463e96ad-645f-4442-8890-4c340b58820b",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
2
]
}
]
}
Use sticky session key passed by last infer request to ensure same route is taken each time.
seldon model infer iris -s -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::50]
seldon model infer iris --inference-mode grpc -s -i 50\
'{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}'
Success: map[:iris_1::50]
Stop the experiment
seldon experiment stop experiment-sample
Unload both models.
seldon model unload iris
seldon model unload iris2
cat ./models/add10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: add10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/add10"
requirements:
- triton
- python
cat ./models/mul10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: mul10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/mul10"
requirements:
- triton
- python
seldon model load -f ./models/add10.yaml
seldon model load -f ./models/mul10.yaml
{}
{}
seldon model status add10 -w ModelAvailable
seldon model status mul10 -w ModelAvailable
{}
{}
cat ./pipelines/mul10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: pipeline-mul10
spec:
steps:
- name: mul10
output:
steps:
- mul10
cat ./pipelines/add10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: pipeline-add10
spec:
steps:
- name: add10
output:
steps:
- add10
seldon pipeline load -f ./pipelines/add10.yaml
seldon pipeline load -f ./pipelines/mul10.yaml
seldon pipeline status pipeline-add10 -w PipelineReady
seldon pipeline status pipeline-mul10 -w PipelineReady
{"pipelineName":"pipeline-add10", "versions":[{"pipeline":{"name":"pipeline-add10", "uid":"cieov47l80lc739juklg", "version":1, "steps":[{"name":"add10"}], "output":{"steps":["add10.outputs"]}, "kubernetesMeta":{}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T14:05:04.460868091Z", "modelsReady":true}}]}
{"pipelineName":"pipeline-mul10", "versions":[{"pipeline":{"name":"pipeline-mul10", "uid":"cieov47l80lc739jukm0", "version":1, "steps":[{"name":"mul10"}], "output":{"steps":["mul10.outputs"]}, "kubernetesMeta":{}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T14:05:04.631980330Z", "modelsReady":true}}]}
seldon pipeline infer pipeline-add10 --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
11,
12,
13,
14
]
}
}
]
}
seldon pipeline infer pipeline-mul10 --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
10,
20,
30,
40
]
}
}
]
}
cat ./experiments/addmul10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: addmul10
spec:
default: pipeline-add10
resourceType: pipeline
candidates:
- name: pipeline-add10
weight: 50
- name: pipeline-mul10
weight: 50
seldon experiment start -f ./experiments/addmul10.yaml
seldon experiment status addmul10 -w | jq -M .
{
"experimentName": "addmul10",
"active": true,
"candidatesReady": true,
"mirrorReady": true,
"statusDescription": "experiment active",
"kubernetesMeta": {}
}
seldon pipeline infer pipeline-add10 -i 50 --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
Success: map[:add10_1::28 :mul10_1::22 :pipeline-add10.pipeline::28 :pipeline-mul10.pipeline::22]
Use sticky session key passed by last infer request to ensure same route is taken each time.
seldon pipeline infer pipeline-add10 --show-headers --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
> /inference.GRPCInferenceService/ModelInfer HTTP/2
> Host: localhost:9000
> seldon-model:[pipeline-add10.pipeline]
< x-envoy-expected-rq-timeout-ms:[60000]
< x-request-id:[cieov8ofh5ss739277i0]
< date:[Thu, 29 Jun 2023 14:05:23 GMT]
< server:[envoy]
< content-type:[application/grpc]
< x-envoy-upstream-service-time:[6]
< x-seldon-route:[:add10_1: :pipeline-add10.pipeline:]
< x-forwarded-proto:[http]
{"outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[11, 12, 13, 14]}}]}
seldon pipeline infer pipeline-add10 -s --show-headers --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
> /inference.GRPCInferenceService/ModelInfer HTTP/2
> Host: localhost:9000
> x-seldon-route:[:add10_1: :pipeline-add10.pipeline:]
> seldon-model:[pipeline-add10.pipeline]
< content-type:[application/grpc]
< x-forwarded-proto:[http]
< x-envoy-expected-rq-timeout-ms:[60000]
< x-seldon-route:[:add10_1: :pipeline-add10.pipeline: :pipeline-add10.pipeline:]
< x-request-id:[cieov90fh5ss739277ig]
< x-envoy-upstream-service-time:[7]
< date:[Thu, 29 Jun 2023 14:05:24 GMT]
< server:[envoy]
{"outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[11, 12, 13, 14]}}]}
seldon pipeline infer pipeline-add10 -s -i 50 --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
Success: map[:add10_1::50 :pipeline-add10.pipeline::150]
cat ./models/add20.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: add20
spec:
storageUri: "gs://seldon-models/triton/add20"
requirements:
- triton
- python
seldon model load -f ./models/add20.yaml
{}
seldon model status add20 -w ModelAvailable
{}
cat ./experiments/add1020.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: add1020
spec:
default: add10
candidates:
- name: add10
weight: 50
- name: add20
weight: 50
seldon experiment start -f ./experiments/add1020.yaml
seldon experiment status add1020 -w | jq -M .
{
"experimentName": "add1020",
"active": true,
"candidatesReady": true,
"mirrorReady": true,
"statusDescription": "experiment active",
"kubernetesMeta": {}
}
seldon model infer add10 -i 50 --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
Success: map[:add10_1::22 :add20_1::28]
seldon pipeline infer pipeline-add10 -i 100 --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
Success: map[:add10_1::24 :add20_1::32 :mul10_1::44 :pipeline-add10.pipeline::56 :pipeline-mul10.pipeline::44]
seldon pipeline infer pipeline-add10 --show-headers --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
> /inference.GRPCInferenceService/ModelInfer HTTP/2
> Host: localhost:9000
> seldon-model:[pipeline-add10.pipeline]
< x-request-id:[cieovf0fh5ss739279u0]
< x-envoy-upstream-service-time:[5]
< x-seldon-route:[:add10_1: :pipeline-add10.pipeline:]
< date:[Thu, 29 Jun 2023 14:05:48 GMT]
< server:[envoy]
< content-type:[application/grpc]
< x-forwarded-proto:[http]
< x-envoy-expected-rq-timeout-ms:[60000]
{"outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[11, 12, 13, 14]}}]}
seldon pipeline infer pipeline-add10 -s --show-headers --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
> /inference.GRPCInferenceService/ModelInfer HTTP/2
> Host: localhost:9000
> x-seldon-route:[:add10_1: :pipeline-add10.pipeline:]
> seldon-model:[pipeline-add10.pipeline]
< x-forwarded-proto:[http]
< x-envoy-expected-rq-timeout-ms:[60000]
< x-request-id:[cieovf8fh5ss739279ug]
< x-envoy-upstream-service-time:[6]
< date:[Thu, 29 Jun 2023 14:05:49 GMT]
< server:[envoy]
< content-type:[application/grpc]
< x-seldon-route:[:add10_1: :pipeline-add10.pipeline: :add20_1: :pipeline-add10.pipeline:]
{"outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[21, 22, 23, 24]}}]}
seldon experiment stop addmul10
seldon experiment stop add1020
seldon pipeline unload pipeline-add10
seldon pipeline unload pipeline-mul10
seldon model unload add10
seldon model unload add20
seldon model unload mul10
We will use two SKlearn Iris classification models to illustrate a model with a mirror.
cat ./models/sklearn1.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/mlserver/iris"
requirements:
- sklearn
cat ./models/sklearn2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris2
spec:
storageUri: "gs://seldon-models/mlserver/iris"
requirements:
- sklearn
Load both models.
seldon model load -f ./models/sklearn1.yaml
seldon model load -f ./models/sklearn2.yaml
{}
{}
Wait for both models to be ready.
seldon model status iris -w ModelAvailable
seldon model status iris2 -w ModelAvailable
{}
{}
Create an experiment that modifies in which we mirror traffic to iris also to iris2.
cat ./experiments/sklearn-mirror.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: sklearn-mirror
spec:
default: iris
candidates:
- name: iris
weight: 100
mirror:
name: iris2
percent: 100
Start the experiment.
seldon experiment start -f ./experiments/sklearn-mirror.yaml
Wait for the experiment to be ready.
seldon experiment status sklearn-mirror -w | jq -M .
{
"experimentName": "sklearn-mirror",
"active": true,
"candidatesReady": true,
"mirrorReady": true,
"statusDescription": "experiment active",
"kubernetesMeta": {}
}
We get responses from iris but all requests would also have been mirrored to iris2
seldon model infer iris -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::50]
We can check the local prometheus port from the agent to validate requests went to iris2
curl -s 0.0.0:9006/metrics | grep seldon_model_infer_total | grep iris2_1
seldon_model_infer_total{code="200",method_type="rest",model="iris",model_internal="iris2_1",server="mlserver",server_replica="0"} 50
Stop the experiment
seldon experiment stop sklearn-mirror
Unload both models.
seldon model unload iris
seldon model unload iris2
cat ./models/add10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: add10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/add10"
requirements:
- triton
- python
cat ./models/mul10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: mul10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/mul10"
requirements:
- triton
- python
seldon model load -f ./models/add10.yaml
seldon model load -f ./models/mul10.yaml
{}
{}
seldon model status add10 -w ModelAvailable
seldon model status mul10 -w ModelAvailable
{}
{}
cat ./pipelines/mul10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: pipeline-mul10
spec:
steps:
- name: mul10
output:
steps:
- mul10
cat ./pipelines/add10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: pipeline-add10
spec:
steps:
- name: add10
output:
steps:
- add10
seldon pipeline load -f ./pipelines/add10.yaml
seldon pipeline load -f ./pipelines/mul10.yaml
seldon pipeline status pipeline-add10 -w PipelineReady
seldon pipeline status pipeline-mul10 -w PipelineReady
{"pipelineName":"pipeline-add10", "versions":[{"pipeline":{"name":"pipeline-add10", "uid":"ciep072i8ufs73flaipg", "version":1, "steps":[{"name":"add10"}], "output":{"steps":["add10.outputs"]}, "kubernetesMeta":{}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T14:07:24.903503109Z", "modelsReady":true}}]}
{"pipelineName":"pipeline-mul10", "versions":[{"pipeline":{"name":"pipeline-mul10", "uid":"ciep072i8ufs73flaiq0", "version":1, "steps":[{"name":"mul10"}], "output":{"steps":["mul10.outputs"]}, "kubernetesMeta":{}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T14:07:25.082642153Z", "modelsReady":true}}]}
seldon pipeline infer pipeline-add10 --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
{"outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[11, 12, 13, 14]}}]}
seldon pipeline infer pipeline-mul10 --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
{"outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[10, 20, 30, 40]}}]}
cat ./experiments/addmul10-mirror.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: addmul10-mirror
spec:
default: pipeline-add10
resourceType: pipeline
candidates:
- name: pipeline-add10
weight: 100
mirror:
name: pipeline-mul10
percent: 100
seldon experiment start -f ./experiments/addmul10-mirror.yaml
seldon experiment status addmul10-mirror -w | jq -M .
{
"experimentName": "addmul10-mirror",
"active": true,
"candidatesReady": true,
"mirrorReady": true,
"statusDescription": "experiment active",
"kubernetesMeta": {}
}
seldon pipeline infer pipeline-add10 -i 1 --inference-mode grpc \
'{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}'
{"outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[11, 12, 13, 14]}}]}
Let's check that the mul10 model was called.
curl -s 0.0.0:9007/metrics | grep seldon_model_infer_total | grep mul10_1
seldon_model_infer_total{code="OK",method_type="grpc",model="mul10",model_internal="mul10_1",server="triton",server_replica="0"} 2
curl -s 0.0.0:9007/metrics | grep seldon_model_infer_total | grep add10_1
seldon_model_infer_total{code="OK",method_type="grpc",model="add10",model_internal="add10_1",server="triton",server_replica="0"} 2
Let's do an http call and check agaib the two models
seldon pipeline infer pipeline-add10 -i 1 \
'{"model_name":"add10","inputs":[{"name":"INPUT","data":[1,2,3,4],"datatype":"FP32","shape":[4]}]}'
{
"model_name": "",
"outputs": [
{
"data": [
11,
12,
13,
14
],
"name": "OUTPUT",
"shape": [
4
],
"datatype": "FP32"
}
]
}
curl -s 0.0.0:9007/metrics | grep seldon_model_infer_total | grep mul10_1
seldon_model_infer_total{code="OK",method_type="grpc",model="mul10",model_internal="mul10_1",server="triton",server_replica="0"} 3
curl -s 0.0.0:9007/metrics | grep seldon_model_infer_total | grep add10_1
seldon_model_infer_total{code="OK",method_type="grpc",model="add10",model_internal="add10_1",server="triton",server_replica="0"} 3
seldon pipeline inspect pipeline-mul10
seldon.default.model.mul10.inputs ciep0bofh5ss73dpdiq0 {"inputs":[{"name":"INPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[1, 2, 3, 4]}}]}
seldon.default.model.mul10.outputs ciep0bofh5ss73dpdiq0 {"modelName":"mul10_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[10, 20, 30, 40]}}]}
seldon.default.pipeline.pipeline-mul10.inputs ciep0bofh5ss73dpdiq0 {"inputs":[{"name":"INPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[1, 2, 3, 4]}}]}
seldon.default.pipeline.pipeline-mul10.outputs ciep0bofh5ss73dpdiq0 {"outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[10, 20, 30, 40]}}]}
seldon experiment stop addmul10-mirror
seldon pipeline unload pipeline-add10
seldon pipeline unload pipeline-mul10
seldon model unload add10
seldon model unload mul10
Note: The Seldon CLI allows you to view information about underlying Seldon resources and make changes to them through the scheduler in non-Kubernetes environments. However, it cannot modify underlying manifests within a Kubernetes cluster. Therefore, using the Seldon CLI for control plane operations in a Kubernetes environment is not recommended. For more details, see Seldon CLI.
import os
os.environ["NAMESPACE"] = "seldon-mesh"
MESH_IP=!kubectl get svc seldon-mesh -n ${NAMESPACE} -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP=MESH_IP[0]
import os
os.environ['MESH_IP'] = MESH_IP
MESH_IP
'172.18.255.2'
The capabilities
field replaces the capabilities from the ServerConfig.
cat ./servers/custom-mlserver-capabilities.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Server
metadata:
name: mlserver-134
spec:
serverConfig: mlserver
capabilities:
- mlserver-1.3.4
podSpec:
containers:
- image: seldonio/mlserver:1.3.4
name: mlserver
kubectl create -f ./servers/custom-mlserver-capabilities.yaml -n ${NAMESPACE}
server.mlops.seldon.io/mlserver-134 created
kubectl wait --for condition=ready --timeout=300s server --all -n ${NAMESPACE}
server.mlops.seldon.io/mlserver condition met
server.mlops.seldon.io/mlserver-134 condition met
server.mlops.seldon.io/triton condition met
cat ./models/iris-custom-requirements.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/mlserver/iris"
requirements:
- mlserver-1.3.4
kubectl create -f ./models/iris-custom-requirements.yaml -n ${NAMESPACE}
model.mlops.seldon.io/iris created
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
model.mlops.seldon.io/iris condition met
seldon model infer iris --inference-host ${MESH_IP}:80 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
{
"model_name": "iris_1",
"model_version": "1",
"id": "057ae95c-e6bc-4f57-babf-0817ff171729",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
2
]
}
]
}
kubectl delete -f ./models/iris-custom-server.yaml -n ${NAMESPACE}
model.mlops.seldon.io "iris" deleted
kubectl delete -f ./servers/custom-mlserver.yaml -n ${NAMESPACE}
server.mlops.seldon.io "mlserver-134" deleted
The extraCapabilities
field extends the existing list from the ServerConfig.
cat ./servers/custom-mlserver.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Server
metadata:
name: mlserver-134
spec:
serverConfig: mlserver
extraCapabilities:
- mlserver-1.3.4
podSpec:
containers:
- image: seldonio/mlserver:1.3.4
name: mlserver
kubectl create -f ./servers/custom-mlserver.yaml -n ${NAMESPACE}
server.mlops.seldon.io/mlserver-134 created
kubectl wait --for condition=ready --timeout=300s server --all -n ${NAMESPACE}
server.mlops.seldon.io/mlserver condition met
server.mlops.seldon.io/mlserver-134 condition met
server.mlops.seldon.io/triton condition met
cat ./models/iris-custom-server.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/mlserver/iris"
server: mlserver-134
kubectl create -f ./models/iris-custom-server.yaml -n ${NAMESPACE}
model.mlops.seldon.io/iris created
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
model.mlops.seldon.io/iris condition met
seldon model infer iris --inference-host ${MESH_IP}:80 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
{
"model_name": "iris_1",
"model_version": "1",
"id": "a3e17c6c-ee3f-4a51-b890-6fb16385a757",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
2
]
}
]
}
kubectl delete -f ./models/iris-custom-server.yaml -n ${NAMESPACE}
model.mlops.seldon.io "iris" deleted
kubectl delete -f ./servers/custom-mlserver.yaml -n ${NAMESPACE}
server.mlops.seldon.io "mlserver-134" deleted
cat ./models/income.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/income/classifier"
requirements:
- sklearn
seldon model load -f ./models/income.yaml
{}
seldon model status income -w ModelAvailable
{}
seldon model infer income \
'{"inputs": [{"name": "predict", "shape": [1, 12], "datatype": "FP32", "data": [[47,4,1,1,1,3,4,1,0,0,40,9]]}]}'
{
"model_name": "income_1",
"model_version": "1",
"id": "c65b8302-85af-4bac-aac5-91e3bedebee8",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"data": [
0
]
}
]
}
cat ./models/income-explainer.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income-explainer
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/income/explainer"
explainer:
type: anchor_tabular
modelRef: income
seldon model load -f ./models/income-explainer.yaml
{}
seldon model status income-explainer -w ModelAvailable
{}
seldon model infer income-explainer \
'{"inputs": [{"name": "predict", "shape": [1, 12], "datatype": "FP32", "data": [[47,4,1,1,1,3,4,1,0,0,40,9]]}]}'
{
"model_name": "income-explainer_1",
"model_version": "1",
"id": "a22c3785-ff3b-4504-9b3c-199aa48a62d6",
"parameters": {},
"outputs": [
{
"name": "explanation",
"shape": [
1,
1
],
"datatype": "BYTES",
"parameters": {
"content_type": "str"
},
"data": [
"{\"meta\": {\"name\": \"AnchorTabular\", \"type\": [\"blackbox\"], \"explanations\": [\"local\"], \"params\": {\"seed\": 1, \"disc_perc\": [25, 50, 75], \"threshold\": 0.95, \"delta\": 0.1, \"tau\": 0.15, \"batch_size\": 100, \"coverage_samples\": 10000, \"beam_size\": 1, \"stop_on_first\": false, \"max_anchor_size\": null, \"min_samples_start\": 100, \"n_covered_ex\": 10, \"binary_cache_size\": 10000, \"cache_margin\": 1000, \"verbose\": false, \"verbose_every\": 1, \"kwargs\": {}}, \"version\": \"0.9.0\"}, \"data\": {\"anchor\": [\"Marital Status = Never-Married\", \"Relationship = Own-child\"], \"precision\": 0.9518716577540107, \"coverage\": 0.07165109034267912, \"raw\": {\"feature\": [3, 5], \"mean\": [0.7959381044487428, 0.9518716577540107], \"precision\": [0.7959381044487428, 0.9518716577540107], \"coverage\": [0.3037383177570093, 0.07165109034267912], \"examples\": [{\"covered_true\": [[52, 5, 5, 1, 8, 1, 2, 0, 0, 0, 50, 9], [49, 4, 1, 1, 4, 4, 1, 0, 0, 0, 40, 1], [23, 4, 1, 1, 6, 1, 4, 1, 0, 0, 40, 9], [55, 2, 1, 1, 5, 1, 4, 0, 0, 0, 48, 9], [22, 4, 1, 1, 2, 3, 4, 0, 0, 0, 15, 9], [51, 4, 2, 1, 5, 0, 1, 1, 0, 0, 99, 4], [40, 4, 1, 1, 5, 1, 4, 0, 0, 0, 40, 9], [40, 6, 1, 1, 2, 0, 4, 1, 0, 0, 50, 9], [50, 5, 5, 1, 6, 0, 4, 1, 0, 0, 55, 9], [41, 4, 1, 1, 6, 0, 4, 1, 0, 0, 40, 9]], \"covered_false\": [[42, 4, 1, 1, 8, 0, 4, 1, 0, 2415, 60, 9], [48, 6, 2, 1, 5, 4, 4, 0, 0, 0, 60, 9], [37, 4, 1, 1, 5, 0, 4, 1, 0, 0, 45, 9], [57, 4, 5, 1, 8, 0, 4, 1, 0, 0, 50, 9], [63, 7, 2, 1, 8, 0, 4, 1, 0, 1902, 50, 9], [51, 4, 5, 1, 8, 0, 4, 1, 0, 1887, 47, 9], [51, 2, 2, 1, 8, 1, 4, 0, 0, 0, 45, 9], [68, 7, 5, 1, 5, 0, 4, 1, 0, 2377, 42, 0], [45, 4, 1, 1, 8, 0, 4, 1, 15024, 0, 40, 9], [45, 4, 1, 1, 8, 0, 4, 1, 0, 1977, 60, 9]], \"uncovered_true\": [], \"uncovered_false\": []}, {\"covered_true\": [[44, 6, 5, 1, 8, 3, 4, 0, 0, 1902, 60, 9], [58, 7, 2, 1, 5, 3, 1, 1, 4064, 0, 40, 1], [50, 7, 1, 1, 1, 3, 2, 0, 0, 0, 37, 9], [34, 4, 2, 1, 5, 3, 4, 1, 0, 0, 45, 9], [45, 4, 1, 1, 5, 3, 4, 1, 0, 0, 40, 9], [33, 7, 5, 1, 5, 3, 1, 1, 0, 0, 30, 6], [61, 7, 2, 1, 5, 3, 4, 1, 0, 0, 40, 0], [35, 4, 5, 1, 1, 3, 4, 1, 0, 0, 40, 9], [71, 2, 1, 1, 5, 3, 4, 0, 0, 0, 6, 9], [44, 4, 1, 1, 8, 3, 2, 1, 0, 0, 35, 9]], \"covered_false\": [[30, 4, 5, 1, 5, 3, 4, 1, 10520, 0, 40, 9], [54, 7, 2, 1, 8, 3, 4, 1, 0, 1902, 50, 9], [66, 6, 2, 1, 6, 3, 4, 1, 0, 2377, 25, 9], [35, 4, 2, 1, 5, 3, 4, 1, 7298, 0, 40, 9], [44, 4, 1, 1, 8, 3, 4, 1, 7298, 0, 48, 9], [31, 4, 1, 1, 8, 3, 4, 0, 13550, 0, 50, 9], [35, 4, 1, 1, 8, 3, 4, 1, 8614, 0, 45, 9]], \"uncovered_true\": [], \"uncovered_false\": []}], \"all_precision\": 0, \"num_preds\": 1000000, \"success\": true, \"names\": [\"Marital Status = Never-Married\", \"Relationship = Own-child\"], \"prediction\": [0], \"instance\": [47.0, 4.0, 1.0, 1.0, 1.0, 3.0, 4.0, 1.0, 0.0, 0.0, 40.0, 9.0], \"instances\": [[47.0, 4.0, 1.0, 1.0, 1.0, 3.0, 4.0, 1.0, 0.0, 0.0, 40.0, 9.0]]}}}"
]
}
]
}
seldon model unload income-explainer
{}
seldon model unload income
{}
cat ./models/moviesentiment.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: sentiment
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/moviesentiment-sklearn"
requirements:
- sklearn
seldon model load -f ./models/moviesentiment.yaml
{}
seldon model status sentiment -w ModelAvailable
{}
seldon model infer sentiment \
'{"parameters": {"content_type": "str"}, "inputs": [{"name": "foo", "data": ["I am good"], "datatype": "BYTES","shape": [1]}]}'
{
"model_name": "sentiment_2",
"model_version": "1",
"id": "f5c07363-7e9d-4f09-aa30-228c81fdf4a4",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
0
]
}
]
}
cat ./models/moviesentiment-explainer.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: sentiment-explainer
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/moviesentiment-sklearn-explainer"
explainer:
type: anchor_text
modelRef: sentiment
seldon model load -f ./models/moviesentiment-explainer.yaml
{}
seldon model status sentiment-explainer -w ModelAvailable
{}
seldon model infer sentiment-explainer \
'{"parameters": {"content_type": "str"}, "inputs": [{"name": "foo", "data": ["I am good"], "datatype": "BYTES","shape": [1]}]}'
Error: V2 server error: 500 Traceback (most recent call last):
File "/opt/conda/lib/python3.8/site-packages/starlette/middleware/errors.py", line 162, in __call__
await self.app(scope, receive, _send)
File "/opt/conda/lib/python3.8/site-packages/starlette_exporter/middleware.py", line 307, in __call__
await self.app(scope, receive, wrapped_send)
File "/opt/conda/lib/python3.8/site-packages/starlette/middleware/gzip.py", line 24, in __call__
await responder(scope, receive, send)
File "/opt/conda/lib/python3.8/site-packages/starlette/middleware/gzip.py", line 44, in __call__
await self.app(scope, receive, self.send_with_gzip)
File "/opt/conda/lib/python3.8/site-packages/starlette/middleware/exceptions.py", line 79, in __call__
raise exc
File "/opt/conda/lib/python3.8/site-packages/starlette/middleware/exceptions.py", line 68, in __call__
await self.app(scope, receive, sender)
File "/opt/conda/lib/python3.8/site-packages/fastapi/middleware/asyncexitstack.py", line 21, in __call__
raise e
File "/opt/conda/lib/python3.8/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
await self.app(scope, receive, send)
File "/opt/conda/lib/python3.8/site-packages/starlette/routing.py", line 706, in __call__
await route.handle(scope, receive, send)
File "/opt/conda/lib/python3.8/site-packages/starlette/routing.py", line 276, in handle
await self.app(scope, receive, send)
File "/opt/conda/lib/python3.8/site-packages/starlette/routing.py", line 66, in app
response = await func(request)
File "/opt/conda/lib/python3.8/site-packages/mlserver/rest/app.py", line 42, in custom_route_handler
return await original_route_handler(request)
File "/opt/conda/lib/python3.8/site-packages/fastapi/routing.py", line 237, in app
raw_response = await run_endpoint_function(
File "/opt/conda/lib/python3.8/site-packages/fastapi/routing.py", line 163, in run_endpoint_function
return await dependant.call(**values)
File "/opt/conda/lib/python3.8/site-packages/mlserver/rest/endpoints.py", line 99, in infer
inference_response = await self._data_plane.infer(
File "/opt/conda/lib/python3.8/site-packages/mlserver/handlers/dataplane.py", line 103, in infer
prediction = await model.predict(payload)
File "/opt/conda/lib/python3.8/site-packages/mlserver_alibi_explain/runtime.py", line 86, in predict
output_data = await self._async_explain_impl(input_data, payload.parameters)
File "/opt/conda/lib/python3.8/site-packages/mlserver_alibi_explain/runtime.py", line 119, in _async_explain_impl
explanation = await loop.run_in_executor(self._executor, explain_call)
File "/opt/conda/lib/python3.8/concurrent/futures/thread.py", line 57, in run
result = self.fn(*self.args, **self.kwargs)
File "/opt/conda/lib/python3.8/site-packages/mlserver_alibi_explain/explainers/black_box_runtime.py", line 62, in _explain_impl
input_data = input_data[0]
KeyError: 0
seldon model unload sentiment-explainer
{}
seldon model unload sentiment
{}
This example runs you through a series of batch inference requests made to both models and pipelines running on Seldon Core locally.
Deprecated: The MLServer CLI infer
feature is experimental and will be removed in future work.
If you haven't already, you'll need to clone the Seldon Core repository and run it locally before you run through this example.
Note: By default, the CLI will expect your inference endpoint to be at 0.0.0.0:9000
. If you have customized this, you'll need to redirect the CLI.
First, let's jump in to the samples
folder where we'll find some sample models and pipelines we can use:
cd samples/
Let's take a look at a sample model before we deploy it:
cat models/sklearn-iris-gs.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/mlserver/iris"
requirements:
- sklearn
memory: 100Ki
The above manifest will deploy a simple sci-kit learn model based on the iris dataset.
Let's now deploy that model using the Seldon CLI:
seldon model load -f models/sklearn-iris-gs.yaml
Now that we've deployed our iris model, let's create a pipeline around the model.
cat pipelines/iris.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: iris-pipeline
spec:
steps:
- name: iris
output:
steps:
- iris
We see that this pipeline only has one step, which is to call the iris
model we deployed earlier. We can create the pipeline by running:
seldon pipeline load -f pipelines/iris.yaml
To demonstrate batch inference requests to different types of models, we'll also deploy a simple tensorflow model:
cat models/tfsimple1.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
The tensorflow model takes two arrays as inputs and returns two arrays as outputs. The first output is the addition of the two inputs and the second output is the value of (first input - second input).
Let's deploy the model:
seldon model load -f models/tfsimple1.yaml
Just as we did for the scikit-learn model, we'll deploy a simple pipeline for our tensorflow model:
Inspect the pipeline manifest:
cat pipelines/tfsimple.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple
spec:
steps:
- name: tfsimple1
output:
steps:
- tfsimple1
and deploy it:
seldon pipeline load -f pipelines/tfsimple.yaml
Once we've deployed a model or pipeline to Seldon Core, we can list them and check their status by running:
seldon model list
and
seldon pipeline list
Your models and pieplines should be showing a state of ModelAvailable
and PipelineReady
respectively.
Before we run a large batch job of predictions through our models and pipelines, let's quickly check that they work with a single standalone inference request. We can do this using the seldon model infer
command.
seldon model infer iris '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' | jq
{
"model_name": "iris_1",
"model_version": "1",
"id": "a67233c2-2f8c-4fbc-a87e-4e4d3d034c9f",
"parameters": {
"content_type": null,
"headers": null
},
"outputs": [
{
"name": "predict",
"shape": [
1
],
"datatype": "INT64",
"parameters": null,
"data": [
2
]
}
]
}
The preidiction request body needs to be an Open Inference Protocol compatible payload and also match the expected inputs for the model you've deployed. In this case, the iris model expects data of shape [1, 4]
and of type FP32
.
You'll notice that the prediction results for this request come back on outputs[0].data
.
seldon pipeline infer iris-pipeline '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' | jq
{
"model_name": "",
"outputs": [
{
"data": [
2
],
"name": "predict",
"shape": [
1
],
"datatype": "INT64"
}
]
}
seldon model infer tfsimple1 '{"outputs":[{"name":"OUTPUT0"}], "inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq
{
"model_name": "tfsimple1_1",
"model_version": "1",
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
1,
16
],
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
]
}
You'll notice that the inputs for our tensorflow model look different from the ones we sent to the iris model. This time, we're sending two arrays of shape [1,16]
. When sending an inference request, we can optionally chose which outputs we want back by including an {"outputs":...}
object.
seldon pipeline infer tfsimple '"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
In the samples folder there is a batch request input file: batch-inputs/iris-input.txt
. It contains 100 input payloads for our iris model. Let's take a look at the first line in that file:
cat batch-inputs/iris-input.txt | head -n 1 | jq
{
"inputs": [
{
"name": "predict",
"data": [
0.38606369295833043,
0.006894049558299753,
0.6104082981607108,
0.3958954239450676
],
"datatype": "FP64",
"shape": [
1,
4
]
}
]
}
To run a batch inference job we'll use the MLServer CLI. If you don't already have it installed you can install it using:
pip install mlserver
The inference job can be executed by running the following command:
mlserver infer -u localhost:9000 -m iris -i batch-inputs/iris-input.txt -o /tmp/iris-output.txt --workers 5
2023-01-22 18:24:17,272 [mlserver] INFO - Using asyncio event-loop policy: uvloop
2023-01-22 18:24:17,273 [mlserver] INFO - server url: localhost:9000
2023-01-22 18:24:17,273 [mlserver] INFO - model name: iris
2023-01-22 18:24:17,273 [mlserver] INFO - request headers: {}
2023-01-22 18:24:17,273 [mlserver] INFO - input file path: batch-inputs/iris-input.txt
2023-01-22 18:24:17,273 [mlserver] INFO - output file path: /tmp/iris-output.txt
2023-01-22 18:24:17,273 [mlserver] INFO - workers: 5
2023-01-22 18:24:17,273 [mlserver] INFO - retries: 3
2023-01-22 18:24:17,273 [mlserver] INFO - batch interval: 0.0
2023-01-22 18:24:17,274 [mlserver] INFO - batch jitter: 0.0
2023-01-22 18:24:17,274 [mlserver] INFO - connection timeout: 60
2023-01-22 18:24:17,274 [mlserver] INFO - micro-batch size: 1
2023-01-22 18:24:17,420 [mlserver] INFO - Finalizer: processed instances: 100
2023-01-22 18:24:17,421 [mlserver] INFO - Total processed instances: 100
2023-01-22 18:24:17,421 [mlserver] INFO - Time taken: 0.15 seconds
The mlserver batch component will take your input file batch-inputs/iris-input.txt
, distribute those payloads across 5 different workers (--workers 5
), collect the responses and write them to a file /tmp/iris-output.txt
. For a full set of options check out the MLServer CLI Reference.
We can check the inference responses by looking at the contents of the output file:
cat /tmp/iris-output.txt | head -n 1 | jq
We can run the same batch job for our iris pipeline and store the outputs in a different file:
mlserver infer -u localhost:9000 -m iris-pipeline.pipeline -i batch-inputs/iris-input.txt -o /tmp/iris-pipeline-output.txt --workers 5
2023-01-22 18:25:18,651 [mlserver] INFO - Using asyncio event-loop policy: uvloop
2023-01-22 18:25:18,653 [mlserver] INFO - server url: localhost:9000
2023-01-22 18:25:18,653 [mlserver] INFO - model name: iris-pipeline.pipeline
2023-01-22 18:25:18,653 [mlserver] INFO - request headers: {}
2023-01-22 18:25:18,653 [mlserver] INFO - input file path: batch-inputs/iris-input.txt
2023-01-22 18:25:18,653 [mlserver] INFO - output file path: /tmp/iris-pipeline-output.txt
2023-01-22 18:25:18,653 [mlserver] INFO - workers: 5
2023-01-22 18:25:18,653 [mlserver] INFO - retries: 3
2023-01-22 18:25:18,653 [mlserver] INFO - batch interval: 0.0
2023-01-22 18:25:18,653 [mlserver] INFO - batch jitter: 0.0
2023-01-22 18:25:18,653 [mlserver] INFO - connection timeout: 60
2023-01-22 18:25:18,653 [mlserver] INFO - micro-batch size: 1
2023-01-22 18:25:18,963 [mlserver] INFO - Finalizer: processed instances: 100
2023-01-22 18:25:18,963 [mlserver] INFO - Total processed instances: 100
2023-01-22 18:25:18,963 [mlserver] INFO - Time taken: 0.31 seconds
We can check the inference responses by looking at the contents of the output file:
cat /tmp/iris-pipeline-output.txt | head -n 1 | jq
The samples folder contains an example batch input for the tensorflow model, just as it did for the scikit-learn model. You can find it at batch-inputs/tfsimple-input.txt
. Let's take a look at the first inference request in the file:
cat batch-inputs/tfsimple-input.txt | head -n 1 | jq
{
"inputs": [
{
"name": "INPUT0",
"data": [
75,
39,
9,
44,
32,
97,
99,
40,
13,
27,
25,
36,
18,
77,
62,
60
],
"datatype": "INT32",
"shape": [
1,
16
]
},
{
"name": "INPUT1",
"data": [
39,
7,
14,
58,
13,
88,
98,
66,
97,
57,
49,
3,
49,
63,
37,
12
],
"datatype": "INT32",
"shape": [
1,
16
]
}
]
}
As before, we can run the inference batch job using the mlserver infer
command:
mlserver infer -u localhost:9000 -m tfsimple1 -i batch-inputs/tfsimple-input.txt -o /tmp/tfsimple-output.txt --workers 10
2023-01-23 14:56:10,870 [mlserver] INFO - Using asyncio event-loop policy: uvloop
2023-01-23 14:56:10,872 [mlserver] INFO - server url: localhost:9000
2023-01-23 14:56:10,872 [mlserver] INFO - model name: tfsimple1
2023-01-23 14:56:10,872 [mlserver] INFO - request headers: {}
2023-01-23 14:56:10,872 [mlserver] INFO - input file path: batch-inputs/tfsimple-input.txt
2023-01-23 14:56:10,872 [mlserver] INFO - output file path: /tmp/tfsimple-output.txt
2023-01-23 14:56:10,872 [mlserver] INFO - workers: 10
2023-01-23 14:56:10,872 [mlserver] INFO - retries: 3
2023-01-23 14:56:10,872 [mlserver] INFO - batch interval: 0.0
2023-01-23 14:56:10,872 [mlserver] INFO - batch jitter: 0.0
2023-01-23 14:56:10,872 [mlserver] INFO - connection timeout: 60
2023-01-23 14:56:10,872 [mlserver] INFO - micro-batch size: 1
2023-01-23 14:56:11,077 [mlserver] INFO - Finalizer: processed instances: 100
2023-01-23 14:56:11,077 [mlserver] INFO - Total processed instances: 100
2023-01-23 14:56:11,078 [mlserver] INFO - Time taken: 0.21 seconds
We can check the inference responses by looking at the contents of the output file:
cat /tmp/tfsimple-output.txt | head -n 1 | jq
You should get the following response:
{
"model_name": "tfsimple1_1",
"model_version": "1",
"id": "54e6c237-8356-4c3c-96b5-2dca4596dbe9",
"parameters": {
"batch_index": 0,
"inference_id": "54e6c237-8356-4c3c-96b5-2dca4596dbe9"
},
"outputs": [
{
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32",
"parameters": {},
"data": [
114,
46,
23,
102,
45,
185,
197,
106,
110,
84,
74,
39,
67,
140,
99,
72
]
},
{
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32",
"parameters": {},
"data": [
36,
32,
-5,
-14,
19,
9,
1,
-26,
-84,
-30,
-24,
33,
-31,
14,
25,
48
]
}
]
}
mlserver infer -u localhost:9000 -m tfsimple1 -i batch-inputs/tfsimple-input.txt -o /tmp/tfsimple-pipeline-output.txt --workers 10
2023-01-23 14:56:10,870 [mlserver] INFO - Using asyncio event-loop policy: uvloop
2023-01-23 14:56:10,872 [mlserver] INFO - server url: localhost:9000
2023-01-23 14:56:10,872 [mlserver] INFO - model name: tfsimple1
2023-01-23 14:56:10,872 [mlserver] INFO - request headers: {}
2023-01-23 14:56:10,872 [mlserver] INFO - input file path: batch-inputs/tfsimple-input.txt
2023-01-23 14:56:10,872 [mlserver] INFO - output file path: /tmp/tfsimple-pipeline-output.txt
2023-01-23 14:56:10,872 [mlserver] INFO - workers: 10
2023-01-23 14:56:10,872 [mlserver] INFO - retries: 3
2023-01-23 14:56:10,872 [mlserver] INFO - batch interval: 0.0
2023-01-23 14:56:10,872 [mlserver] INFO - batch jitter: 0.0
2023-01-23 14:56:10,872 [mlserver] INFO - connection timeout: 60
2023-01-23 14:56:10,872 [mlserver] INFO - micro-batch size: 1
2023-01-23 14:56:11,077 [mlserver] INFO - Finalizer: processed instances: 100
2023-01-23 14:56:11,077 [mlserver] INFO - Total processed instances: 100
2023-01-23 14:56:11,078 [mlserver] INFO - Time taken: 0.25 seconds
We can check the inference responses by looking at the contents of the output file:
cat /tmp/tfsimple-pipeline-output.txt | head -n 1 | jq
Now that we've run our batch examples, let's remove the models and pipelines we created:
seldon model unload iris
seldon model unload tfsimple1
seldon pipeline unload iris-pipeline
seldon pipeline unload tfsimple
And finally let's spin down our local instance of Seldon Core:
cd ../ && make undeploy-local
Examples of various model artifact types from various frameworks running under Seldon Core 2.
SKlearn
Tensorflow
XGBoost
ONNX
Lightgbm
MLFlow
PyTorch
Python requirements in model-zoo-requirements.txt
The training code for this model can be found at scripts/models/iris
in SCv2 repo.
cat ./models/sklearn-iris-gs.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn"
requirements:
- sklearn
memory: 100Ki
seldon model load -f ./models/sklearn-iris-gs.yaml
{}
seldon model status iris -w ModelAvailable | jq -M .
{}
seldon model infer iris \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
{
"model_name": "iris_1",
"model_version": "1",
"id": "09263298-ca66-49c5-acb9-0ca75b06f825",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"data": [
2
]
}
]
}
seldon model unload iris
{}
import requests
import json
from typing import Dict, List
import numpy as np
import os
import tensorflow as tf
from alibi_detect.utils.perturbation import apply_mask
from alibi_detect.datasets import fetch_cifar10c
import matplotlib.pyplot as plt
tf.keras.backend.clear_session()
2023-03-09 19:43:43.637892: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-03-09 19:43:43.637906: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
train, test = tf.keras.datasets.cifar10.load_data()
X_train, y_train = train
X_test, y_test = test
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
classes = (
"plane",
"car",
"bird",
"cat",
"deer",
"dog",
"frog",
"horse",
"ship",
"truck",
)
(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)
reqJson = json.loads('{"inputs":[{"name":"input_1","data":[],"datatype":"FP32","shape":[]}]}')
url = "http://0.0.0.0:9000/v2/models/model/infer"
def infer(resourceName: str, idx: int):
rows = X_train[idx:idx+1]
show(rows[0])
reqJson["inputs"][0]["data"] = rows.flatten().tolist()
reqJson["inputs"][0]["shape"] = [1, 32, 32, 3]
headers = {"Content-Type": "application/json", "seldon-model":resourceName}
response_raw = requests.post(url, json=reqJson, headers=headers)
probs = np.array(response_raw.json()["outputs"][0]["data"])
print(classes[probs.argmax(axis=0)])
def show(X):
plt.imshow(X.reshape(32, 32, 3))
plt.axis("off")
plt.show()
cat ./models/cifar10-no-config.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: cifar10
spec:
storageUri: "gs://seldon-models/scv2/samples/tensorflow/cifar10"
requirements:
- tensorflow
seldon model load -f ./models/cifar10-no-config.yaml
{}
seldon model status cifar10 -w ModelAvailable | jq -M .
{}
infer("cifar10",4)
car
seldon model unload cifar10
{}
The training code for this model can be found at ./scripts/models/income-xgb
cat ./models/income-xgb.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income-xgb
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/income-xgb"
requirements:
- xgboost
seldon model load -f ./models/income-xgb.yaml
{}
seldon model status income-xgb -w ModelAvailable | jq -M .
{}
seldon model infer income-xgb \
'{ "parameters": {"content_type": "pd"}, "inputs": [{"name": "Age", "shape": [1, 1], "datatype": "INT64", "data": [47]},{"name": "Workclass", "shape": [1, 1], "datatype": "INT64", "data": [4]},{"name": "Education", "shape": [1, 1], "datatype": "INT64", "data": [1]},{"name": "Marital Status", "shape": [1, 1], "datatype": "INT64", "data": [1]},{"name": "Occupation", "shape": [1, 1], "datatype": "INT64", "data": [1]},{"name": "Relationship", "shape": [1, 1], "datatype": "INT64", "data": [3]},{"name": "Race", "shape": [1, 1], "datatype": "INT64", "data": [4]},{"name": "Sex", "shape": [1, 1], "datatype": "INT64", "data": [1]},{"name": "Capital Gain", "shape": [1, 1], "datatype": "INT64", "data": [0]},{"name": "Capital Loss", "shape": [1, 1], "datatype": "INT64", "data": [0]},{"name": "Hours per week", "shape": [1, 1], "datatype": "INT64", "data": [40]},{"name": "Country", "shape": [1, 1], "datatype": "INT64", "data": [9]}]}'
{
"model_name": "income-xgb_1",
"model_version": "1",
"id": "e30c3b44-fa14-4e5f-88f5-d6f4d287da20",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "FP32",
"data": [
-1.8380107879638672
]
}
]
}
seldon model unload income-xgb
{}
This model is a pretrained model as defined in ./scripts/models/Makefile
target mnist-onnx
import matplotlib.pyplot as plt
import json
import requests
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np
training_data = MNIST(
root=".",
download=True,
train=False,
transform = transforms.Compose([
transforms.ToTensor()
])
)
reqJson = json.loads('{"inputs":[{"name":"Input3","data":[],"datatype":"FP32","shape":[]}]}')
url = "http://0.0.0.0:9000/v2/models/model/infer"
dl = DataLoader(training_data, batch_size=1, shuffle=False)
dlIter = iter(dl)
def infer_mnist():
x, y = next(dlIter)
data = x.cpu().numpy()
reqJson["inputs"][0]["data"] = data.flatten().tolist()
reqJson["inputs"][0]["shape"] = [1, 1, 28, 28]
headers = {"Content-Type": "application/json", "seldon-model":"mnist-onnx"}
response_raw = requests.post(url, json=reqJson, headers=headers)
show_mnist(x)
probs = np.array(response_raw.json()["outputs"][0]["data"])
print(probs.argmax(axis=0))
def show_mnist(X):
plt.imshow(X.reshape(28, 28))
plt.axis("off")
plt.show()
cat ./models/mnist-onnx.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: mnist-onnx
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/mnist-onnx"
requirements:
- onnx
seldon model load -f ./models/mnist-onnx.yaml
{}
seldon model status mnist-onnx -w ModelAvailable | jq -M .
{}
infer_mnist()
7
seldon model unload mnist-onnx
{}
The training code for this model can be found at ./scripts/models/income-lgb
cat ./models/income-lgb.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income-lgb
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/income-lgb"
requirements:
- lightgbm
seldon model load -f ./models/income-lgb.yaml
{}
seldon model status income-lgb -w ModelAvailable | jq -M .
{}
seldon model infer income-lgb \
'{ "parameters": {"content_type": "pd"}, "inputs": [{"name": "Age", "shape": [1, 1], "datatype": "INT64", "data": [47]},{"name": "Workclass", "shape": [1, 1], "datatype": "INT64", "data": [4]},{"name": "Education", "shape": [1, 1], "datatype": "INT64", "data": [1]},{"name": "Marital Status", "shape": [1, 1], "datatype": "INT64", "data": [1]},{"name": "Occupation", "shape": [1, 1], "datatype": "INT64", "data": [1]},{"name": "Relationship", "shape": [1, 1], "datatype": "INT64", "data": [3]},{"name": "Race", "shape": [1, 1], "datatype": "INT64", "data": [4]},{"name": "Sex", "shape": [1, 1], "datatype": "INT64", "data": [1]},{"name": "Capital Gain", "shape": [1, 1], "datatype": "INT64", "data": [0]},{"name": "Capital Loss", "shape": [1, 1], "datatype": "INT64", "data": [0]},{"name": "Hours per week", "shape": [1, 1], "datatype": "INT64", "data": [40]},{"name": "Country", "shape": [1, 1], "datatype": "INT64", "data": [9]}]}'
{
"model_name": "income-lgb_1",
"model_version": "1",
"id": "4437a71e-9af1-4e3b-aa4b-cb95d2cd86b9",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "FP64",
"data": [
0.06279460120044741
]
}
]
}
seldon model unload income-lgb
{}
The training code for this model can be found at ./scripts/models/wine-mlflow
cat ./models/wine-mlflow.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: wine
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/wine-mlflow"
requirements:
- mlflow
seldon model load -f ./models/wine-mlflow.yaml
{}
seldon model status wine -w ModelAvailable | jq -M .
{}
import requests
url = "http://0.0.0.0:9000/v2/models/model/infer"
inference_request = {
"inputs": [
{
"name": "fixed acidity",
"shape": [1],
"datatype": "FP32",
"data": [7.4],
},
{
"name": "volatile acidity",
"shape": [1],
"datatype": "FP32",
"data": [0.7000],
},
{
"name": "citric acid",
"shape": [1],
"datatype": "FP32",
"data": [0],
},
{
"name": "residual sugar",
"shape": [1],
"datatype": "FP32",
"data": [1.9],
},
{
"name": "chlorides",
"shape": [1],
"datatype": "FP32",
"data": [0.076],
},
{
"name": "free sulfur dioxide",
"shape": [1],
"datatype": "FP32",
"data": [11],
},
{
"name": "total sulfur dioxide",
"shape": [1],
"datatype": "FP32",
"data": [34],
},
{
"name": "density",
"shape": [1],
"datatype": "FP32",
"data": [0.9978],
},
{
"name": "pH",
"shape": [1],
"datatype": "FP32",
"data": [3.51],
},
{
"name": "sulphates",
"shape": [1],
"datatype": "FP32",
"data": [0.56],
},
{
"name": "alcohol",
"shape": [1],
"datatype": "FP32",
"data": [9.4],
},
]
}
headers = {"Content-Type": "application/json", "seldon-model":"wine"}
response_raw = requests.post(url, json=inference_request, headers=headers)
print(response_raw.json())
{'model_name': 'wine_1', 'model_version': '1', 'id': '0d7e44f8-b46c-4438-b8af-a749e6aa6039', 'parameters': {}, 'outputs': [{'name': 'output-1', 'shape': [1, 1], 'datatype': 'FP64', 'data': [5.576883936610762]}]}
seldon model unload wine
{}
This example model is downloaded and trained in ./scripts/models/Makefile
target mnist-pytorch
import numpy as np
import matplotlib.pyplot as plt
import json
import requests
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchvision import transforms
from torch.utils.data import DataLoader
training_data = MNIST(
root=".",
download=True,
train=False,
transform = transforms.Compose([
transforms.ToTensor()
])
)
reqJson = json.loads('{"inputs":[{"name":"x__0","data":[],"datatype":"FP32","shape":[]}]}')
url = "http://0.0.0.0:9000/v2/models/model/infer"
dl = DataLoader(training_data, batch_size=1, shuffle=False)
dlIter = iter(dl)
def infer_mnist():
x, y = next(dlIter)
data = x.cpu().numpy()
reqJson["inputs"][0]["data"] = data.flatten().tolist()
reqJson["inputs"][0]["shape"] = [1, 1, 28, 28]
headers = {"Content-Type": "application/json", "seldon-model":"mnist-pytorch"}
response_raw = requests.post(url, json=reqJson, headers=headers)
show_mnist(x)
probs = np.array(response_raw.json()["outputs"][0]["data"])
print(probs.argmax(axis=0))
def show_mnist(X):
plt.imshow(X.reshape(28, 28))
plt.axis("off")
plt.show()
cat ./models/mnist-pytorch.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: mnist-pytorch
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/mnist-pytorch"
requirements:
- pytorch
seldon model load -f ./models/mnist-pytorch.yaml
{}
seldon model status mnist-pytorch -w ModelAvailable | jq -M .
{}
infer_mnist()
7
seldon model unload mnist-pytorch
{}
Seldon inference is built from atomic Model components. Models as shown here cover a wide range of artifacts including:
Core machine learning models, e.g. a PyTorch model.
Feature transformations that might be built with custom python code.
Drift detectors.
Outlier detectors.
Explainers
Adversarial detectors.
A typical workflow for a production machine learning setup might be as follows:
You create a Tensorflow model for your core application use case and test this model in isolation to validate.
You create SKLearn feature transformation component before your model to convert the input into the correct form for your model. You also create Drift and Outlier detectors using Seldon's open source Alibi-detect library and test these in isolation.
You join these components together into a Pipeline for the final production setup.
These steps are shown in the diagram below:
This section will provide some examples to allow operations with Seldon to be tested so you can run your own models, experiments, pipelines and explainers.
This notebook illustrates a series of Pipelines that are joined together.
gs://seldon-models/triton/simple
an example Triton tensorflow model that takes 2 inputs INPUT0 and INPUT1 and adds them to produce OUTPUT0 and also subtracts INPUT1
from INPUT0
to produce OUTPUT1. See here for the original source code and license.
Other models can be found at https://github.com/SeldonIO/triton-python-examples
cat ./models/tfsimple1.yaml
cat ./models/tfsimple2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple2
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
seldon model load -f ./models/tfsimple1.yaml
seldon model load -f ./models/tfsimple2.yaml
{}
{}
seldon model status tfsimple1 -w ModelAvailable | jq -M .
seldon model status tfsimple2 -w ModelAvailable | jq -M .
{}
{}
cat ./pipelines/tfsimple.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple
spec:
steps:
- name: tfsimple1
output:
steps:
- tfsimple1
seldon pipeline load -f ./pipelines/tfsimple.yaml
seldon pipeline status tfsimple -w PipelineReady | jq -M .
{
"pipelineName": "tfsimple",
"versions": [
{
"pipeline": {
"name": "tfsimple",
"uid": "cieq5dqi8ufs73flaj4g",
"version": 1,
"steps": [
{
"name": "tfsimple1"
}
],
"output": {
"steps": [
"tfsimple1.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-29T15:26:48.074696631Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer tfsimple \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
cat ./pipelines/tfsimple-extended.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-extended
spec:
input:
externalInputs:
- tfsimple.outputs
tensorMap:
tfsimple.outputs.OUTPUT0: INPUT0
tfsimple.outputs.OUTPUT1: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
seldon pipeline load -f ./pipelines/tfsimple-extended.yaml
seldon pipeline status tfsimple-extended -w PipelineReady | jq -M .
{
"pipelineName": "tfsimple-extended",
"versions": [
{
"pipeline": {
"name": "tfsimple-extended",
"uid": "cieq5h2i8ufs73flaj50",
"version": 1,
"steps": [
{
"name": "tfsimple2"
}
],
"output": {
"steps": [
"tfsimple2.outputs"
]
},
"kubernetesMeta": {},
"input": {
"externalInputs": [
"tfsimple.outputs"
],
"tensorMap": {
"tfsimple.outputs.OUTPUT0": "INPUT0",
"tfsimple.outputs.OUTPUT1": "INPUT1"
}
}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-29T15:27:01.095715504Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer tfsimple --header x-request-id=test-id \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
seldon pipeline inspect tfsimple
seldon.default.model.tfsimple1.inputs test-id {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}]}
seldon.default.model.tfsimple1.outputs test-id {"modelName":"tfsimple1_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon.default.pipeline.tfsimple.inputs test-id {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}]}
seldon.default.pipeline.tfsimple.outputs test-id {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon pipeline inspect tfsimple-extended
seldon.default.model.tfsimple2.inputs test-id {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.model.tfsimple2.outputs test-id {"modelName":"tfsimple2_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
seldon.default.pipeline.tfsimple-extended.inputs test-id {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended.outputs test-id {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
seldon pipeline unload tfsimple-extended
seldon pipeline unload tfsimple
seldon model unload tfsimple1
seldon model unload tfsimple2
cat ./models/tfsimple1.yaml
cat ./models/tfsimple2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple2
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
seldon model load -f ./models/tfsimple1.yaml
seldon model load -f ./models/tfsimple2.yaml
{}
{}
seldon model status tfsimple1 -w ModelAvailable | jq -M .
seldon model status tfsimple2 -w ModelAvailable | jq -M .
{}
{}
cat ./pipelines/tfsimple.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple
spec:
steps:
- name: tfsimple1
output:
steps:
- tfsimple1
seldon pipeline load -f ./pipelines/tfsimple.yaml
seldon pipeline status tfsimple -w PipelineReady | jq -M .
{
"pipelineName": "tfsimple",
"versions": [
{
"pipeline": {
"name": "tfsimple",
"uid": "cieq6aai8ufs73flaj5g",
"version": 1,
"steps": [
{
"name": "tfsimple1"
}
],
"output": {
"steps": [
"tfsimple1.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-29T15:28:41.766794892Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer tfsimple \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
cat ./pipelines/tfsimple-extended.yaml
echo "---"
cat ./pipelines/tfsimple-extended2.yaml
echo "---"
cat ./pipelines/tfsimple-combined.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-extended
spec:
input:
externalInputs:
- tfsimple.outputs
tensorMap:
tfsimple.outputs.OUTPUT0: INPUT0
tfsimple.outputs.OUTPUT1: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-extended2
spec:
input:
externalInputs:
- tfsimple.outputs
tensorMap:
tfsimple.outputs.OUTPUT0: INPUT0
tfsimple.outputs.OUTPUT1: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-combined
spec:
input:
externalInputs:
- tfsimple-extended.outputs.OUTPUT0
- tfsimple-extended2.outputs.OUTPUT1
tensorMap:
tfsimple-extended.outputs.OUTPUT0: INPUT0
tfsimple-extended2.outputs.OUTPUT1: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
seldon pipeline load -f ./pipelines/tfsimple-extended.yaml
seldon pipeline load -f ./pipelines/tfsimple-extended2.yaml
seldon pipeline load -f ./pipelines/tfsimple-combined.yaml
seldon pipeline status tfsimple-extended -w PipelineReady
seldon pipeline status tfsimple-extended2 -w PipelineReady
seldon pipeline status tfsimple-combined -w PipelineReady
{"pipelineName":"tfsimple-extended", "versions":[{"pipeline":{"name":"tfsimple-extended", "uid":"cieq6dai8ufs73flaj60", "version":1, "steps":[{"name":"tfsimple2"}], "output":{"steps":["tfsimple2.outputs"]}, "kubernetesMeta":{}, "input":{"externalInputs":["tfsimple.outputs"], "tensorMap":{"tfsimple.outputs.OUTPUT0":"INPUT0", "tfsimple.outputs.OUTPUT1":"INPUT1"}}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T15:28:53.963808852Z", "modelsReady":true}}]}
{"pipelineName":"tfsimple-extended2", "versions":[{"pipeline":{"name":"tfsimple-extended2", "uid":"cieq6dai8ufs73flaj6g", "version":1, "steps":[{"name":"tfsimple2"}], "output":{"steps":["tfsimple2.outputs"]}, "kubernetesMeta":{}, "input":{"externalInputs":["tfsimple.outputs"], "tensorMap":{"tfsimple.outputs.OUTPUT0":"INPUT0", "tfsimple.outputs.OUTPUT1":"INPUT1"}}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T15:28:54.087670106Z", "modelsReady":true}}]}
{"pipelineName":"tfsimple-combined", "versions":[{"pipeline":{"name":"tfsimple-combined", "uid":"cieq6dii8ufs73flaj70", "version":1, "steps":[{"name":"tfsimple2"}], "output":{"steps":["tfsimple2.outputs"]}, "kubernetesMeta":{}, "input":{"externalInputs":["tfsimple-extended.outputs.OUTPUT0", "tfsimple-extended2.outputs.OUTPUT1"], "tensorMap":{"tfsimple-extended.outputs.OUTPUT0":"INPUT0", "tfsimple-extended2.outputs.OUTPUT1":"INPUT1"}}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T15:28:54.330770841Z", "modelsReady":true}}]}
seldon pipeline infer tfsimple --header x-request-id=test-id2 \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
seldon pipeline inspect tfsimple
seldon.default.model.tfsimple1.inputs test-id2 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}]}
seldon.default.model.tfsimple1.outputs test-id2 {"modelName":"tfsimple1_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon.default.pipeline.tfsimple.inputs test-id2 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}]}
seldon.default.pipeline.tfsimple.outputs test-id2 {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon pipeline inspect tfsimple-extended --offset 2 --verbose
seldon.default.model.tfsimple2.inputs test-id2 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]} x-request-id=[test-id2] x-forwarded-proto=[http] x-seldon-route=[:tfsimple1_1:] x-envoy-upstream-service-time=[1] pipeline=[tfsimple-extended] traceparent=[00-e438b82ad361ac2d5481bcfc494074d2-e468d06afdab8f52-01] x-envoy-expected-rq-timeout-ms=[60000]
seldon.default.model.tfsimple2.outputs test-id2 {"modelName":"tfsimple2_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]} x-envoy-expected-rq-timeout-ms=[60000] x-request-id=[test-id2] x-forwarded-proto=[http] x-seldon-route=[:tfsimple1_1: :tfsimple2_1:] x-envoy-upstream-service-time=[1] pipeline=[tfsimple-extended] traceparent=[00-e438b82ad361ac2d5481bcfc494074d2-73bd1ee54a94d8fb-01]
seldon.default.pipeline.tfsimple-extended.inputs test-id {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]} pipeline=[tfsimple-extended] traceparent=[00-3a6047efa647efc2b3fc5266ae023d23-fee12926788ce3b6-01] x-envoy-expected-rq-timeout-ms=[60000] x-request-id=[test-id] x-forwarded-proto=[http] x-envoy-upstream-service-time=[5] x-seldon-route=[:tfsimple1_1:]
seldon.default.pipeline.tfsimple-extended.inputs test-id2 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]} x-forwarded-proto=[http] x-seldon-route=[:tfsimple1_1:] x-envoy-upstream-service-time=[1] pipeline=[tfsimple-extended] traceparent=[00-e438b82ad361ac2d5481bcfc494074d2-4df8459a992e0278-01] x-envoy-expected-rq-timeout-ms=[60000] x-request-id=[test-id2]
seldon.default.pipeline.tfsimple-extended.outputs test-id {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]} pipeline=[tfsimple-extended] traceparent=[00-3a6047efa647efc2b3fc5266ae023d23-b2f899a739c5cafd-01] x-envoy-expected-rq-timeout-ms=[60000] x-request-id=[test-id] x-forwarded-proto=[http] x-envoy-upstream-service-time=[5] x-seldon-route=[:tfsimple1_1: :tfsimple2_1:]
seldon.default.pipeline.tfsimple-extended.outputs test-id2 {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]} x-envoy-upstream-service-time=[1] pipeline=[tfsimple-extended] traceparent=[00-e438b82ad361ac2d5481bcfc494074d2-dfa399143feec23d-01] x-envoy-expected-rq-timeout-ms=[60000] x-request-id=[test-id2] x-forwarded-proto=[http] x-seldon-route=[:tfsimple1_1: :tfsimple2_1:]
seldon pipeline inspect tfsimple-extended2 --offset 2
seldon.default.pipeline.tfsimple-extended2.inputs test-id3 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended2.inputs test-id {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended2.outputs test-id3 {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
seldon.default.pipeline.tfsimple-extended2.outputs test-id {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
seldon pipeline inspect tfsimple-combined
seldon.default.model.tfsimple2.inputs test-id {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA=="]}
seldon.default.model.tfsimple2.outputs test-id {"modelName":"tfsimple2_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon.default.pipeline.tfsimple-combined.inputs test-id {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA=="]}
seldon.default.pipeline.tfsimple-combined.outputs test-id {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon pipeline unload tfsimple-extended
seldon pipeline unload tfsimple-extended2
seldon pipeline unload tfsimple-combined
seldon pipeline unload tfsimple
seldon model unload tfsimple1
seldon model unload tfsimple2
cat ./models/tfsimple1.yaml
cat ./models/tfsimple2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple2
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
seldon model load -f ./models/tfsimple1.yaml
seldon model load -f ./models/tfsimple2.yaml
{}
{}
seldon model status tfsimple1 -w ModelAvailable | jq -M .
seldon model status tfsimple2 -w ModelAvailable | jq -M .
{}
{}
cat ./pipelines/tfsimple.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple
spec:
steps:
- name: tfsimple1
output:
steps:
- tfsimple1
seldon pipeline load -f ./pipelines/tfsimple.yaml
seldon pipeline status tfsimple -w PipelineReady | jq -M .
{
"pipelineName": "tfsimple",
"versions": [
{
"pipeline": {
"name": "tfsimple",
"uid": "ciepkmii8ufs73flaj2g",
"version": 1,
"steps": [
{
"name": "tfsimple1"
}
],
"output": {
"steps": [
"tfsimple1.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-29T14:51:06.822716088Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer tfsimple \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
cat ./pipelines/tfsimple-extended.yaml
echo "---"
cat ./pipelines/tfsimple-extended2.yaml
echo "---"
cat ./pipelines/tfsimple-combined-trigger.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-extended
spec:
input:
externalInputs:
- tfsimple.outputs
tensorMap:
tfsimple.outputs.OUTPUT0: INPUT0
tfsimple.outputs.OUTPUT1: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-extended2
spec:
input:
externalInputs:
- tfsimple.outputs
tensorMap:
tfsimple.outputs.OUTPUT0: INPUT0
tfsimple.outputs.OUTPUT1: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-combined-trigger
spec:
input:
externalInputs:
- tfsimple-extended.outputs
externalTriggers:
- tfsimple-extended2.outputs
tensorMap:
tfsimple-extended.outputs.OUTPUT0: INPUT0
tfsimple-extended.outputs.OUTPUT1: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
seldon pipeline load -f ./pipelines/tfsimple-extended.yaml
seldon pipeline load -f ./pipelines/tfsimple-extended2.yaml
seldon pipeline load -f ./pipelines/tfsimple-combined-trigger.yaml
seldon pipeline status tfsimple-extended -w PipelineReady
seldon pipeline status tfsimple-extended2 -w PipelineReady
seldon pipeline status tfsimple-combined-trigger -w PipelineReady
{"pipelineName":"tfsimple-extended", "versions":[{"pipeline":{"name":"tfsimple-extended", "uid":"ciepkoii8ufs73flaj30", "version":1, "steps":[{"name":"tfsimple2"}], "output":{"steps":["tfsimple2.outputs"]}, "kubernetesMeta":{}, "input":{"externalInputs":["tfsimple.outputs"], "tensorMap":{"tfsimple.outputs.OUTPUT0":"INPUT0", "tfsimple.outputs.OUTPUT1":"INPUT1"}}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T14:51:14.937544974Z", "modelsReady":true}}]}
{"pipelineName":"tfsimple-extended2", "versions":[{"pipeline":{"name":"tfsimple-extended2", "uid":"ciepkoii8ufs73flaj3g", "version":1, "steps":[{"name":"tfsimple2"}], "output":{"steps":["tfsimple2.outputs"]}, "kubernetesMeta":{}, "input":{"externalInputs":["tfsimple.outputs"], "tensorMap":{"tfsimple.outputs.OUTPUT0":"INPUT0", "tfsimple.outputs.OUTPUT1":"INPUT1"}}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T14:51:15.062097751Z", "modelsReady":true}}]}
{"pipelineName":"tfsimple-combined-trigger", "versions":[{"pipeline":{"name":"tfsimple-combined-trigger", "uid":"ciepkoqi8ufs73flaj40", "version":1, "steps":[{"name":"tfsimple2"}], "output":{"steps":["tfsimple2.outputs"]}, "kubernetesMeta":{}, "input":{"externalInputs":["tfsimple-extended.outputs"], "externalTriggers":["tfsimple-extended2.outputs"], "tensorMap":{"tfsimple-extended.outputs.OUTPUT0":"INPUT0", "tfsimple-extended.outputs.OUTPUT1":"INPUT1"}}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T14:51:15.326170068Z", "modelsReady":true}}]}
seldon pipeline infer tfsimple --header x-request-id=test-id3 \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
seldon pipeline inspect tfsimple
seldon.default.model.tfsimple1.inputs test-id3 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}]}
seldon.default.model.tfsimple1.outputs test-id3 {"modelName":"tfsimple1_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon.default.pipeline.tfsimple.inputs test-id3 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]}}]}
seldon.default.pipeline.tfsimple.outputs test-id3 {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon pipeline inspect tfsimple-extended --offset 2
seldon.default.model.tfsimple2.outputs test-id3 {"modelName":"tfsimple2_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
seldon.default.pipeline.tfsimple-extended.inputs test-id3 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended.inputs test-id3 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended.outputs test-id3 {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
seldon.default.pipeline.tfsimple-extended.outputs test-id3 {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
seldon pipeline inspect tfsimple-extended2 --offset 2
seldon.default.model.tfsimple2.inputs test-id3 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended2.inputs test-id3 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended2.inputs test-id3 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended2.outputs test-id3 {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
seldon.default.pipeline.tfsimple-extended2.outputs test-id3 {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}]}
seldon pipeline inspect tfsimple-combined-trigger
seldon.default.model.tfsimple2.inputs test-id3 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA=="]}
seldon.default.model.tfsimple2.outputs test-id3 {"modelName":"tfsimple2_1", "modelVersion":"1", "outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon.default.pipeline.tfsimple-combined-trigger.inputs test-id3 {"inputs":[{"name":"INPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}, {"name":"INPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]}}], "rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==", "AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA=="]}
seldon.default.pipeline.tfsimple-combined-trigger.outputs test-id3 {"outputs":[{"name":"OUTPUT0", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64]}}, {"name":"OUTPUT1", "datatype":"INT32", "shape":["1", "16"], "contents":{"intContents":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}}]}
seldon pipeline unload tfsimple-extended
seldon pipeline unload tfsimple-extended2
seldon pipeline unload tfsimple-combined-trigger
seldon pipeline unload tfsimple
seldon model unload tfsimple1
seldon model unload tfsimple2
cat ./models/tfsimple1.yaml
cat ./models/tfsimple2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple2
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
seldon model load -f ./models/tfsimple1.yaml
seldon model load -f ./models/tfsimple2.yaml
{}
{}
seldon model status tfsimple1 -w ModelAvailable | jq -M .
seldon model status tfsimple2 -w ModelAvailable | jq -M .
{}
{}
cat ./pipelines/tfsimple.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple
spec:
steps:
- name: tfsimple1
output:
steps:
- tfsimple1
seldon pipeline load -f ./pipelines/tfsimple.yaml
{}
seldon pipeline status tfsimple -w PipelineReady | jq -M .
{
"pipelineName": "tfsimple",
"versions": [
{
"pipeline": {
"name": "tfsimple",
"uid": "cg5g6m46dpcs73c4qhl0",
"version": 1,
"steps": [
{
"name": "tfsimple1"
}
],
"output": {
"steps": [
"tfsimple1.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-03-10T10:15:52.515491456Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer tfsimple \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
cat ./pipelines/tfsimple-extended-step.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-extended-step
spec:
input:
externalInputs:
- tfsimple.step.tfsimple1.outputs
tensorMap:
tfsimple.step.tfsimple1.outputs.OUTPUT0: INPUT0
tfsimple.step.tfsimple1.outputs.OUTPUT1: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
seldon pipeline load -f ./pipelines/tfsimple-extended-step.yaml
{}
seldon pipeline status tfsimple-extended-step -w PipelineReady | jq -M .
{
"pipelineName": "tfsimple-extended-step",
"versions": [
{
"pipeline": {
"name": "tfsimple-extended-step",
"uid": "cg5g6ns6dpcs73c4qhlg",
"version": 1,
"steps": [
{
"name": "tfsimple2"
}
],
"output": {
"steps": [
"tfsimple2.outputs"
]
},
"kubernetesMeta": {},
"input": {
"externalInputs": [
"tfsimple.step.tfsimple1.outputs"
],
"tensorMap": {
"tfsimple.step.tfsimple1.outputs.OUTPUT0": "INPUT0",
"tfsimple.step.tfsimple1.outputs.OUTPUT1": "INPUT1"
}
}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-03-10T10:15:59.634720740Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer tfsimple \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
seldon pipeline inspect tfsimple --verbose
seldon.default.model.tfsimple1.inputs cg5g6ogfh5ss73a44vvg {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}}]} pipeline=[tfsimple] traceparent=[00-2c66ff815d920ad238365be52a4467f5-90824e4cb70c3242-01] x-forwarded-proto=[http] x-envoy-expected-rq-timeout-ms=[60000] x-request-id=[cg5g6ogfh5ss73a44vvg]
seldon.default.model.tfsimple1.outputs cg5g6ogfh5ss73a44vvg {"modelName":"tfsimple1_1","modelVersion":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]} x-request-id=[cg5g6ogfh5ss73a44vvg] pipeline=[tfsimple] x-envoy-upstream-service-time=[8] x-seldon-route=[:tfsimple1_1:] traceparent=[00-2c66ff815d920ad238365be52a4467f5-ca023a540fa463b3-01] x-forwarded-proto=[http] x-envoy-expected-rq-timeout-ms=[60000]
seldon.default.pipeline.tfsimple.inputs cg5g6ogfh5ss73a44vvg {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}}]} pipeline=[tfsimple] x-request-id=[cg5g6ogfh5ss73a44vvg] traceparent=[00-2c66ff815d920ad238365be52a4467f5-843d6ce39292396d-01] x-forwarded-proto=[http] x-envoy-expected-rq-timeout-ms=[60000]
seldon.default.pipeline.tfsimple.outputs cg5g6ogfh5ss73a44vvg {"outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]} x-envoy-expected-rq-timeout-ms=[60000] x-request-id=[cg5g6ogfh5ss73a44vvg] x-envoy-upstream-service-time=[8] x-seldon-route=[:tfsimple1_1:] pipeline=[tfsimple] traceparent=[00-2c66ff815d920ad238365be52a4467f5-ee7527353e9fe5a2-01] x-forwarded-proto=[http]
seldon pipeline inspect tfsimple-extended-step
seldon.default.model.tfsimple2.inputs cg5g6ogfh5ss73a44vvg {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}],"rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==","AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.model.tfsimple2.outputs cg5g6ogfh5ss73a44vvg {"modelName":"tfsimple2_1","modelVersion":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}}]}
seldon.default.pipeline.tfsimple-extended-step.inputs cg5g6ogfh5ss73a44vvg {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}],"rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==","AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended-step.outputs cg5g6ogfh5ss73a44vvg {"outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}}]}
seldon pipeline unload tfsimple-extended-step
seldon pipeline unload tfsimple
{}
{}
seldon model unload tfsimple1
seldon model unload tfsimple2
{}
{}
cat ./models/tfsimple1.yaml
cat ./models/tfsimple2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple2
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
seldon model load -f ./models/tfsimple1.yaml
seldon model load -f ./models/tfsimple2.yaml
{}
{}
seldon model status tfsimple1 -w ModelAvailable | jq -M .
seldon model status tfsimple2 -w ModelAvailable | jq -M .
{}
{}
cat ./pipelines/tfsimple.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple
spec:
steps:
- name: tfsimple1
output:
steps:
- tfsimple1
seldon pipeline load -f ./pipelines/tfsimple.yaml
{}
seldon pipeline status tfsimple -w PipelineReady | jq -M .
{
"pipelineName": "tfsimple",
"versions": [
{
"pipeline": {
"name": "tfsimple",
"uid": "cg5g6u46dpcs73c4qhm0",
"version": 1,
"steps": [
{
"name": "tfsimple1"
}
],
"output": {
"steps": [
"tfsimple1.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-03-10T10:16:24.433333171Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer tfsimple \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
cat ./pipelines/tfsimple-extended.yaml
echo "---"
cat ./pipelines/tfsimple-extended2.yaml
echo "---"
cat ./pipelines/tfsimple-combined-step.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-extended
spec:
input:
externalInputs:
- tfsimple.outputs
tensorMap:
tfsimple.outputs.OUTPUT0: INPUT0
tfsimple.outputs.OUTPUT1: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-extended2
spec:
input:
externalInputs:
- tfsimple.outputs
tensorMap:
tfsimple.outputs.OUTPUT0: INPUT0
tfsimple.outputs.OUTPUT1: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple-combined-step
spec:
input:
externalInputs:
- tfsimple-extended.step.tfsimple2.outputs.OUTPUT0
- tfsimple-extended2.step.tfsimple2.outputs.OUTPUT0
tensorMap:
tfsimple-extended.step.tfsimple2.outputs.OUTPUT0: INPUT0
tfsimple-extended2.step.tfsimple2.outputs.OUTPUT0: INPUT1
steps:
- name: tfsimple2
output:
steps:
- tfsimple2
seldon pipeline load -f ./pipelines/tfsimple-extended.yaml
seldon pipeline load -f ./pipelines/tfsimple-extended2.yaml
seldon pipeline load -f ./pipelines/tfsimple-combined-step.yaml
{}
{}
{}
seldon pipeline status tfsimple-extended -w PipelineReady
seldon pipeline status tfsimple-extended2 -w PipelineReady
seldon pipeline status tfsimple-combined-step -w PipelineReady
{"pipelineName":"tfsimple-extended","versions":[{"pipeline":{"name":"tfsimple-extended","uid":"cg5g7046dpcs73c4qhmg","version":1,"steps":[{"name":"tfsimple2"}],"output":{"steps":["tfsimple2.outputs"]},"kubernetesMeta":{},"input":{"externalInputs":["tfsimple.outputs"],"tensorMap":{"tfsimple.outputs.OUTPUT0":"INPUT0","tfsimple.outputs.OUTPUT1":"INPUT1"}}},"state":{"pipelineVersion":1,"status":"PipelineReady","reason":"created pipeline","lastChangeTimestamp":"2023-03-10T10:16:32.576588675Z","modelsReady":true}}]}
{"pipelineName":"tfsimple-extended2","versions":[{"pipeline":{"name":"tfsimple-extended2","uid":"cg5g7046dpcs73c4qhn0","version":1,"steps":[{"name":"tfsimple2"}],"output":{"steps":["tfsimple2.outputs"]},"kubernetesMeta":{},"input":{"externalInputs":["tfsimple.outputs"],"tensorMap":{"tfsimple.outputs.OUTPUT0":"INPUT0","tfsimple.outputs.OUTPUT1":"INPUT1"}}},"state":{"pipelineVersion":1,"status":"PipelineReady","reason":"created pipeline","lastChangeTimestamp":"2023-03-10T10:16:32.711813099Z","modelsReady":true}}]}
{"pipelineName":"tfsimple-combined-step","versions":[{"pipeline":{"name":"tfsimple-combined-step","uid":"cg5g7046dpcs73c4qhng","version":1,"steps":[{"name":"tfsimple2"}],"output":{"steps":["tfsimple2.outputs"]},"kubernetesMeta":{},"input":{"externalInputs":["tfsimple-extended.step.tfsimple2.outputs.OUTPUT0","tfsimple-extended2.step.tfsimple2.outputs.OUTPUT0"],"tensorMap":{"tfsimple-extended.step.tfsimple2.outputs.OUTPUT0":"INPUT0","tfsimple-extended2.step.tfsimple2.outputs.OUTPUT0":"INPUT1"}}},"state":{"pipelineVersion":1,"status":"PipelineReady","reason":"created pipeline","lastChangeTimestamp":"2023-03-10T10:16:33.017843490Z","modelsReady":true}}]}
seldon pipeline infer tfsimple \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
seldon pipeline inspect tfsimple
seldon.default.model.tfsimple1.inputs cg5g710fh5ss73a4500g {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}}]}
seldon.default.model.tfsimple1.outputs cg5g710fh5ss73a4500g {"modelName":"tfsimple1_1","modelVersion":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]}
seldon.default.pipeline.tfsimple.inputs cg5g710fh5ss73a4500g {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}}]}
seldon.default.pipeline.tfsimple.outputs cg5g710fh5ss73a4500g {"outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]}
seldon pipeline inspect tfsimple-extended
seldon.default.model.tfsimple2.inputs cg5g710fh5ss73a4500g {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}}],"rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==","AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA=="]}
seldon.default.model.tfsimple2.outputs cg5g710fh5ss73a4500g {"modelName":"tfsimple2_1","modelVersion":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,64]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]}
seldon.default.pipeline.tfsimple-extended.inputs cg5g710fh5ss73a4500g {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}],"rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==","AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended.outputs cg5g710fh5ss73a4500g {"outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}}]}
seldon pipeline inspect tfsimple-extended2
seldon.default.model.tfsimple2.inputs cg5g710fh5ss73a4500g {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}}],"rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==","AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA=="]}
seldon.default.model.tfsimple2.outputs cg5g710fh5ss73a4500g {"modelName":"tfsimple2_1","modelVersion":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,64]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]}
seldon.default.pipeline.tfsimple-extended2.inputs cg5g710fh5ss73a4500g {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}],"rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==","AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]}
seldon.default.pipeline.tfsimple-extended2.outputs cg5g710fh5ss73a4500g {"outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}}]}
seldon pipeline inspect tfsimple-combined-step
seldon.default.model.tfsimple2.inputs cg5g710fh5ss73a4500g {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}}],"rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==","AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA=="]}
seldon.default.model.tfsimple2.outputs cg5g710fh5ss73a4500g {"modelName":"tfsimple2_1","modelVersion":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,64]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]}
seldon.default.pipeline.tfsimple-combined-step.inputs cg5g710fh5ss73a4500g {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}}],"rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==","AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA=="]}
seldon.default.pipeline.tfsimple-combined-step.outputs cg5g710fh5ss73a4500g {"outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[4,8,12,16,20,24,28,32,36,40,44,48,52,56,60,64]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]}
seldon pipeline unload tfsimple-extended
seldon pipeline unload tfsimple-extended2
seldon pipeline unload tfsimple-combined-step
seldon pipeline unload tfsimple
{}
{}
{}
{}
seldon model unload tfsimple1
seldon model unload tfsimple2
{}
{}
Requires mlserver
to be installed.
Deprecated: The MLServer CLI infer
feature is experimental and will be removed in future work.
Note: The Seldon CLI allows you to view information about underlying Seldon resources and make changes to them through the scheduler in non-Kubernetes environments. However, it cannot modify underlying manifests within a Kubernetes cluster. Therefore, using the Seldon CLI for control plane operations in a Kubernetes environment is not recommended. For more details, see .
Note: The Seldon CLI allows you to view information about underlying Seldon resources and make changes to them through the scheduler in non-Kubernetes environments. However, it cannot modify underlying manifests within a Kubernetes cluster. Therefore, using the Seldon CLI for control plane operations in a Kubernetes environment is not recommended. For more details, see .
To run this example in Kind we need to start Kind with access to a local folder where are models are location. In this example we will use a folder in /tmp
and associate that with a path in the container.
To start a Kind cluster with these settings using our ansible script you can run from the project root folder
Create the local folder we will use for our models and copy an example iris sklearn model to it.
Here we create a storage class and associated persistent colume referencing the /models
folder where our models are stored.
Now we create a new Server based on the provided MLServer configuration but extend it with our PVC by adding this to the rclone container which will allow rclone to move models from this PVC onto the server.
We also add a new capability pvc
to allow us to schedule models to this server that has the PVC.
We use a simple sklearn iris classification model with the added pvc
requirement so our MLServer with the PVC will be targeted during scheduling.
Do a gRPC inference call
To run this notebook you need the inference data. This can be acquired in two ways:
Run make train
or,
gsutil cp -R gs://seldon-models/scv2/examples/income/infer-data .
Show predictions from reference set. Should not be drift or outliers.
Show predictions from drift data. Should be drift and probably not outliers.
Show predictions from outlier data. Should be outliers and probably not drift.
Note: The Seldon CLI allows you to view information about underlying Seldon resources and make changes to them through the scheduler in non-Kubernetes environments. However, it cannot modify underlying manifests within a Kubernetes cluster. Therefore, using the Seldon CLI for control plane operations in a Kubernetes environment is not recommended. For more details, see .
To install tritonclient
Note: for compatibility of Tritonclient check .
Note: binary data support in HTTP is blocked by
Note: binary data support in HTTP is blocked by https://github.com/SeldonIO/seldon-core-v2/issues/475
kubectl delete -f models/sklearn-iris-gs.yaml -n ${NAMESPACE}
kubectl delete -f pipelines/iris.yaml -n ${NAMESPACE}
model.mlops.seldon.io "iris" deleted
pipeline.mlops.seldon.io "iris-pipeline" deleted
kubectl delete -f models/tfsimple1.yaml -n ${NAMESPACE}
kubectl delete -f pipelines/tfsimple.yaml -n ${NAMESPACE}
model.mlops.seldon.io "tfsimple1" deleted
pipeline.mlops.seldon.io "tfsimple" deleted
model.mlops.seldon.io/iris created
pipeline.mlops.seldon.io/iris-pipeline created
model.mlops.seldon.io/tfsimple1 created
pipeline.mlops.seldon.io/tfsimple created
model.mlops.seldon.io/iris condition met
model.mlops.seldon.io/tfsimple1 condition met
pipeline.mlops.seldon.io/iris-pipeline condition met
pipeline.mlops.seldon.io/tfsimple condition met
2023-06-30 11:05:32,389 [mlserver] INFO - server url: 172.18.255.2
2023-06-30 11:05:32,389 [mlserver] INFO - model name: iris
2023-06-30 11:05:32,389 [mlserver] INFO - request headers: {}
2023-06-30 11:05:32,389 [mlserver] INFO - input file path: batch-inputs/iris-input.txt
2023-06-30 11:05:32,389 [mlserver] INFO - output file path: /tmp/iris-output.txt
2023-06-30 11:05:32,389 [mlserver] INFO - workers: 5
2023-06-30 11:05:32,389 [mlserver] INFO - retries: 3
2023-06-30 11:05:32,389 [mlserver] INFO - batch interval: 0.0
2023-06-30 11:05:32,389 [mlserver] INFO - batch jitter: 0.0
2023-06-30 11:05:32,389 [mlserver] INFO - connection timeout: 60
2023-06-30 11:05:32,389 [mlserver] INFO - micro-batch size: 1
2023-06-30 11:05:32,503 [mlserver] INFO - Finalizer: processed instances: 100
2023-06-30 11:05:32,503 [mlserver] INFO - Total processed instances: 100
2023-06-30 11:05:32,503 [mlserver] INFO - Time taken: 0.11 seconds
2023-06-30 11:05:35,857 [mlserver] INFO - server url: 172.18.255.2
2023-06-30 11:05:35,858 [mlserver] INFO - model name: iris-pipeline.pipeline
2023-06-30 11:05:35,858 [mlserver] INFO - request headers: {}
2023-06-30 11:05:35,858 [mlserver] INFO - input file path: batch-inputs/iris-input.txt
2023-06-30 11:05:35,858 [mlserver] INFO - output file path: /tmp/iris-pipeline-output.txt
2023-06-30 11:05:35,858 [mlserver] INFO - workers: 5
2023-06-30 11:05:35,858 [mlserver] INFO - retries: 3
2023-06-30 11:05:35,858 [mlserver] INFO - batch interval: 0.0
2023-06-30 11:05:35,858 [mlserver] INFO - batch jitter: 0.0
2023-06-30 11:05:35,858 [mlserver] INFO - connection timeout: 60
2023-06-30 11:05:35,858 [mlserver] INFO - micro-batch size: 1
2023-06-30 11:05:36,145 [mlserver] INFO - Finalizer: processed instances: 100
2023-06-30 11:05:36,146 [mlserver] INFO - Total processed instances: 100
2023-06-30 11:05:36,146 [mlserver] INFO - Time taken: 0.29 seconds
2023-06-30 11:22:52,662 [mlserver] INFO - server url: 172.18.255.2
2023-06-30 11:22:52,662 [mlserver] INFO - model name: tfsimple1
2023-06-30 11:22:52,662 [mlserver] INFO - request headers: {}
2023-06-30 11:22:52,662 [mlserver] INFO - input file path: batch-inputs/tfsimple-input.txt
2023-06-30 11:22:52,662 [mlserver] INFO - output file path: /tmp/tfsimple-output.txt
2023-06-30 11:22:52,662 [mlserver] INFO - workers: 5
2023-06-30 11:22:52,662 [mlserver] INFO - retries: 3
2023-06-30 11:22:52,662 [mlserver] INFO - batch interval: 0.0
2023-06-30 11:22:52,662 [mlserver] INFO - batch jitter: 0.0
2023-06-30 11:22:52,662 [mlserver] INFO - connection timeout: 60
2023-06-30 11:22:52,662 [mlserver] INFO - micro-batch size: 1
2023-06-30 11:22:52,755 [mlserver] INFO - Finalizer: processed instances: 100
2023-06-30 11:22:52,755 [mlserver] INFO - Total processed instances: 100
2023-06-30 11:22:52,756 [mlserver] INFO - Time taken: 0.09 seconds
2023-06-30 11:22:54,065 [mlserver] INFO - server url: 172.18.255.2
2023-06-30 11:22:54,065 [mlserver] INFO - model name: tfsimple.pipeline
2023-06-30 11:22:54,065 [mlserver] INFO - request headers: {}
2023-06-30 11:22:54,065 [mlserver] INFO - input file path: batch-inputs/tfsimple-input.txt
2023-06-30 11:22:54,065 [mlserver] INFO - output file path: /tmp/tfsimple-pipeline-output.txt
2023-06-30 11:22:54,065 [mlserver] INFO - workers: 5
2023-06-30 11:22:54,065 [mlserver] INFO - retries: 3
2023-06-30 11:22:54,065 [mlserver] INFO - batch interval: 0.0
2023-06-30 11:22:54,065 [mlserver] INFO - batch jitter: 0.0
2023-06-30 11:22:54,065 [mlserver] INFO - connection timeout: 60
2023-06-30 11:22:54,065 [mlserver] INFO - micro-batch size: 1
2023-06-30 11:22:54,302 [mlserver] INFO - Finalizer: processed instances: 100
2023-06-30 11:22:54,302 [mlserver] INFO - Total processed instances: 100
2023-06-30 11:22:54,303 [mlserver] INFO - Time taken: 0.24 seconds
model.mlops.seldon.io "iris" deleted
pipeline.mlops.seldon.io "iris-pipeline" deleted
model.mlops.seldon.io "tfsimple1" deleted
pipeline.mlops.seldon.io "tfsimple" deleted
ansible-playbook ansible/playbooks/kind-cluster.yaml -e kind_config_file=${PWD}/samples/examples/local-pvc/kind-config.yaml
model.mlops.seldon.io/iris created
model.mlops.seldon.io/iris condition met
{
"conditions": [
{
"lastTransitionTime": "2022-12-24T11:04:37Z",
"status": "True",
"type": "ModelReady"
},
{
"lastTransitionTime": "2022-12-24T11:04:37Z",
"status": "True",
"type": "Ready"
}
],
"replicas": 1
}
{
"model_name": "iris_1",
"model_version": "1",
"id": "dc032bcc-3f4e-4395-a2e4-7c1e3ef56e9e",
"parameters": {
"content_type": null,
"headers": null
},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": null,
"data": [
2
]
}
]
}
{
"modelName": "iris_1",
"modelVersion": "1",
"outputs": [
{
"name": "predict",
"datatype": "INT64",
"shape": [
"1",
"1"
],
"contents": {
"int64Contents": [
"2"
]
}
}
]
}
model.mlops.seldon.io "iris" deleted
[0 0 1 1 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1]
<Response [200]>
{'model_name': '', 'outputs': [{'data': [0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1], 'name': 'predict', 'shape': [20, 1], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}}, {'data': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'name': 'is_outlier', 'shape': [1, 20], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}}]}
seldon.default.model.income-drift.outputs cifej9gfh5ss738i5br0 {"name":"is_drift", "datatype":"INT64", "shape":["1", "1"], "parameters":{"content_type":{"stringParam":"np"}}, "contents":{"int64Contents":["0"]}}
[0 0 1 1 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1]
<Response [200]>
{'model_name': '', 'outputs': [{'data': [0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1], 'name': 'predict', 'shape': [20, 1], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}}, {'data': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'name': 'is_outlier', 'shape': [1, 20], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}}]}
seldon.default.model.income-drift.outputs cifejaofh5ss738i5brg {"name":"is_drift", "datatype":"INT64", "shape":["1", "1"], "parameters":{"content_type":{"stringParam":"np"}}, "contents":{"int64Contents":["1"]}}
[0 0 1 1 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 1]
<Response [200]>
{'model_name': '', 'outputs': [{'data': [0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1], 'name': 'predict', 'shape': [20, 1], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}}, {'data': [1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1], 'name': 'is_outlier', 'shape': [1, 20], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}}]}
seldon.default.model.income-drift.outputs cifejb8fh5ss738i5bs0 {"name":"is_drift", "datatype":"INT64", "shape":["1", "1"], "parameters":{"content_type":{"stringParam":"np"}}, "contents":{"int64Contents":["0"]}}
[0]
<Response [200]>
{'model_name': 'income-explainer_1', 'model_version': '1', 'id': 'cdd68ba5-c569-4930-886f-fbdc26e24866', 'parameters': {}, 'outputs': [{'name': 'explanation', 'shape': [1, 1], 'datatype': 'BYTES', 'parameters': {'content_type': 'str'}, 'data': ['{"meta": {"name": "AnchorTabular", "type": ["blackbox"], "explanations": ["local"], "params": {"seed": 1, "disc_perc": [25, 50, 75], "threshold": 0.95, "delta": 0.1, "tau": 0.15, "batch_size": 100, "coverage_samples": 10000, "beam_size": 1, "stop_on_first": false, "max_anchor_size": null, "min_samples_start": 100, "n_covered_ex": 10, "binary_cache_size": 10000, "cache_margin": 1000, "verbose": false, "verbose_every": 1, "kwargs": {}}, "version": "0.9.1"}, "data": {"anchor": ["Marital Status = Never-Married", "Relationship = Own-child", "Capital Gain <= 0.00"], "precision": 0.9942028985507246, "coverage": 0.0657, "raw": {"feature": [3, 5, 8], "mean": [0.7914951989026063, 0.9400749063670412, 0.9942028985507246], "precision": [0.7914951989026063, 0.9400749063670412, 0.9942028985507246], "coverage": [0.3043, 0.069, 0.0657], "examples": [{"covered_true": [[30, 0, 1, 1, 0, 1, 1, 0, 0, 0, 50, 2], [49, 4, 2, 1, 6, 0, 4, 1, 0, 0, 60, 9], [39, 2, 5, 1, 5, 0, 4, 1, 0, 0, 40, 9], [33, 4, 2, 1, 5, 0, 4, 1, 0, 0, 40, 9], [63, 4, 1, 1, 8, 1, 4, 0, 0, 0, 40, 9], [23, 4, 1, 1, 7, 1, 4, 1, 0, 0, 66, 8], [45, 4, 1, 1, 8, 0, 1, 1, 0, 0, 40, 1], [54, 4, 1, 1, 8, 4, 4, 1, 0, 0, 45, 9], [32, 6, 1, 1, 8, 4, 2, 0, 0, 0, 30, 9], [40, 5, 1, 1, 2, 0, 4, 1, 0, 0, 40, 9]], "covered_false": [[57, 4, 5, 1, 5, 0, 4, 1, 0, 1977, 45, 9], [53, 0, 5, 1, 0, 1, 4, 0, 8614, 0, 35, 9], [37, 4, 1, 1, 5, 0, 4, 1, 0, 0, 45, 9], [53, 4, 5, 1, 8, 0, 4, 1, 0, 1977, 55, 9], [35, 4, 1, 1, 8, 0, 4, 1, 7688, 0, 50, 9], [32, 4, 1, 1, 5, 1, 4, 1, 0, 0, 40, 9], [42, 4, 1, 1, 5, 0, 4, 1, 99999, 0, 40, 9], [32, 4, 1, 1, 8, 0, 4, 1, 15024, 0, 50, 9], [53, 7, 5, 1, 8, 0, 4, 1, 0, 0, 42, 9], [52, 1, 1, 1, 8, 0, 4, 1, 0, 0, 45, 9]], "uncovered_true": [], "uncovered_false": []}, {"covered_true": [[52, 7, 5, 1, 5, 3, 4, 1, 0, 0, 40, 9], [27, 4, 1, 1, 8, 3, 4, 1, 0, 0, 40, 9], [28, 4, 1, 1, 6, 3, 4, 1, 0, 0, 60, 9], [46, 6, 5, 1, 2, 3, 4, 1, 0, 0, 50, 9], [53, 2, 5, 1, 5, 3, 2, 0, 0, 1669, 35, 9], [27, 4, 5, 1, 8, 3, 4, 0, 0, 0, 40, 9], [25, 4, 1, 1, 8, 3, 4, 0, 0, 0, 40, 9], [29, 6, 5, 1, 2, 3, 4, 1, 0, 0, 30, 9], [64, 0, 1, 1, 0, 3, 4, 1, 0, 0, 50, 9], [63, 0, 5, 1, 0, 3, 4, 1, 0, 0, 30, 9]], "covered_false": [[50, 5, 1, 1, 8, 3, 4, 1, 15024, 0, 60, 9], [45, 6, 1, 1, 6, 3, 4, 1, 14084, 0, 45, 9], [37, 4, 1, 1, 8, 3, 4, 1, 15024, 0, 40, 9], [33, 4, 1, 1, 8, 3, 4, 1, 15024, 0, 60, 9], [41, 6, 5, 1, 8, 3, 4, 1, 7298, 0, 70, 9], [42, 6, 1, 1, 2, 3, 4, 1, 15024, 0, 60, 9]], "uncovered_true": [], "uncovered_false": []}, {"covered_true": [[41, 4, 1, 1, 1, 3, 4, 1, 0, 0, 40, 9], [55, 2, 5, 1, 8, 3, 4, 1, 0, 0, 50, 9], [35, 4, 5, 1, 5, 3, 4, 0, 0, 0, 32, 9], [31, 4, 1, 1, 2, 3, 4, 1, 0, 0, 40, 9], [47, 4, 1, 1, 1, 3, 4, 1, 0, 0, 40, 9], [33, 4, 5, 1, 5, 3, 4, 1, 0, 0, 40, 9], [58, 0, 1, 1, 0, 3, 4, 0, 0, 0, 50, 9], [44, 6, 1, 1, 2, 3, 4, 1, 0, 0, 90, 9], [30, 4, 1, 1, 6, 3, 4, 1, 0, 0, 40, 9], [25, 4, 1, 1, 5, 3, 4, 1, 0, 0, 40, 9]], "covered_false": [], "uncovered_true": [], "uncovered_false": []}], "all_precision": 0, "num_preds": 1000000, "success": true, "names": ["Marital Status = Never-Married", "Relationship = Own-child", "Capital Gain <= 0.00"], "prediction": [0], "instance": [47.0, 4.0, 1.0, 1.0, 1.0, 3.0, 4.0, 1.0, 0.0, 0.0, 40.0, 9.0], "instances": [[47.0, 4.0, 1.0, 1.0, 1.0, 3.0, 4.0, 1.0, 0.0, 0.0, 40.0, 9.0]]}}}']}]}
model.mlops.seldon.io/iris created
pipeline.mlops.seldon.io/iris-pipeline created
model.mlops.seldon.io/iris condition met
pipeline.mlops.seldon.io/iris-pipeline condition met
model ready: True
model metadata: {'name': 'iris_1', 'versions': [], 'platform': '', 'inputs': [], 'outputs': [], 'parameters': {}}
array([[2]])
array([[2]])
model ready: True
name: "iris_1"
array([[2]])
array([[2]])
model.mlops.seldon.io/tfsimple1 created
pipeline.mlops.seldon.io/tfsimple created
model.mlops.seldon.io/tfsimple1 condition met
pipeline.mlops.seldon.io/tfsimple condition met
model ready: True
model metadata: {'name': 'iris_1', 'versions': [], 'platform': '', 'inputs': [], 'outputs': [], 'parameters': {}}
array([[ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]],
dtype=int32)
array([[ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]],
dtype=int32)
array([[ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]],
dtype=int32)
model ready: True
name: "tfsimple1_1"
versions: "1"
platform: "tensorflow_graphdef"
inputs {
name: "INPUT0"
datatype: "INT32"
shape: -1
shape: 16
}
inputs {
name: "INPUT1"
datatype: "INT32"
shape: -1
shape: 16
}
outputs {
name: "OUTPUT0"
datatype: "INT32"
shape: -1
shape: 16
}
outputs {
name: "OUTPUT1"
datatype: "INT32"
shape: -1
shape: 16
}
array([[ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]],
dtype=int32)
array([[ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32]],
dtype=int32)
'172.18.255.2'
pip install mlserver
import os
os.environ["NAMESPACE"] = "seldon-mesh"
MESH_IP=!kubectl get svc seldon-mesh -n ${NAMESPACE} -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP=MESH_IP[0]
import os
os.environ['MESH_IP'] = MESH_IP
MESH_IP
cat models/sklearn-iris-gs.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn"
requirements:
- sklearn
memory: 100Ki
cat pipelines/iris.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: iris-pipeline
spec:
steps:
- name: iris
output:
steps:
- iris
cat models/tfsimple1.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
cat pipelines/tfsimple.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple
spec:
steps:
- name: tfsimple1
output:
steps:
- tfsimple1
kubectl apply -f models/sklearn-iris-gs.yaml -n ${NAMESPACE}
kubectl apply -f pipelines/iris.yaml -n ${NAMESPACE}
kubectl apply -f models/tfsimple1.yaml -n ${NAMESPACE}
kubectl apply -f pipelines/tfsimple.yaml -n ${NAMESPACE}
kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
kubectl wait --for condition=ready --timeout=300s pipelines --all -n ${NAMESPACE}
seldon model infer iris --inference-host ${MESH_IP}:80 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' | jq -M .
{
"model_name": "iris_1",
"model_version": "1",
"id": "25e1c1b9-a20f-456d-bdff-c75d5ba83b1f",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
2
]
}
]
}
seldon pipeline infer iris-pipeline --inference-host ${MESH_IP}:80 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' | jq -M .
{
"model_name": "",
"outputs": [
{
"data": [
2
],
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
}
}
]
}
seldon model infer tfsimple1 --inference-host ${MESH_IP}:80 \
'{"outputs":[{"name":"OUTPUT0"}], "inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"model_name": "tfsimple1_1",
"model_version": "1",
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
1,
16
],
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
]
}
seldon pipeline infer tfsimple --inference-host ${MESH_IP}:80 \
'{"outputs":[{"name":"OUTPUT0"}], "inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
cat batch-inputs/iris-input.txt | head -n 1 | jq -M .
{
"inputs": [
{
"name": "predict",
"data": [
0.38606369295833043,
0.006894049558299753,
0.6104082981607108,
0.3958954239450676
],
"datatype": "FP64",
"shape": [
1,
4
]
}
]
}
%%bash
mlserver infer -u ${MESH_IP} -m iris -i batch-inputs/iris-input.txt -o /tmp/iris-output.txt --workers 5
%%bash
mlserver infer -u ${MESH_IP} -m iris-pipeline.pipeline -i batch-inputs/iris-input.txt -o /tmp/iris-pipeline-output.txt --workers 5
cat /tmp/iris-output.txt | head -n 1 | jq -M .
{
"model_name": "iris_1",
"model_version": "1",
"id": "46bdfca2-8805-4a72-b1ce-95e4f38c1a19",
"parameters": {
"inference_id": "46bdfca2-8805-4a72-b1ce-95e4f38c1a19",
"batch_index": 0
},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
1
]
}
]
}
cat /tmp/iris-pipeline-output.txt | head -n 1 | jq .
{
"model_name": "",
"id": "37e8c013-b348-41e8-89b9-fea86a4f9632",
"parameters": {
"batch_index": 1
},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
1
]
}
]
}
cat batch-inputs/tfsimple-input.txt | head -n 1 | jq -M .
{
"inputs": [
{
"name": "INPUT0",
"data": [
75,
39,
9,
44,
32,
97,
99,
40,
13,
27,
25,
36,
18,
77,
62,
60
],
"datatype": "INT32",
"shape": [
1,
16
]
},
{
"name": "INPUT1",
"data": [
39,
7,
14,
58,
13,
88,
98,
66,
97,
57,
49,
3,
49,
63,
37,
12
],
"datatype": "INT32",
"shape": [
1,
16
]
}
]
}
%%bash
mlserver infer -u ${MESH_IP} -m tfsimple1 -i batch-inputs/tfsimple-input.txt -o /tmp/tfsimple-output.txt --workers 5 -b
%%bash
mlserver infer -u ${MESH_IP} -m tfsimple.pipeline -i batch-inputs/tfsimple-input.txt -o /tmp/tfsimple-pipeline-output.txt --workers 5
cat /tmp/tfsimple-output.txt | head -n 1 | jq -M .
{
"model_name": "tfsimple1_1",
"model_version": "1",
"id": "19952272-b023-4079-aa08-f1880ded05e5",
"parameters": {
"inference_id": "19952272-b023-4079-aa08-f1880ded05e5",
"batch_index": 1
},
"outputs": [
{
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32",
"parameters": {},
"data": [
115,
69,
97,
112,
73,
106,
58,
182,
114,
66,
64,
110,
100,
24,
22,
77
]
},
{
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32",
"parameters": {},
"data": [
-77,
33,
25,
-52,
-49,
-88,
-48,
0,
-50,
26,
-44,
46,
-2,
18,
-6,
-47
]
}
]
}
cat /tmp/tfsimple-pipeline-output.txt | head -n 1 | jq -M .
{
"model_name": "",
"id": "46b05aab-07d9-414d-be96-c03d1863552a",
"parameters": {
"batch_index": 3
},
"outputs": [
{
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32",
"data": [
140,
164,
85,
58,
152,
76,
70,
56,
100,
141,
98,
181,
115,
177,
106,
193
]
},
{
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32",
"data": [
-10,
0,
-11,
-38,
2,
-36,
-52,
-8,
-18,
57,
94,
-5,
-27,
17,
58,
-1
]
}
]
}
kubectl delete -f models/sklearn-iris-gs.yaml -n ${NAMESPACE}
kubectl delete -f pipelines/iris.yaml -n ${NAMESPACE}
kubectl delete -f models/tfsimple1.yaml -n ${NAMESPACE}
kubectl delete -f pipelines/tfsimple.yaml -n ${NAMESPACE}
env: INFER_ENDPOINT=0.0.0.0:9000
%env INFER_ENDPOINT=0.0.0.0:9000
cat ./models/tfsimple1.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
seldon model load -f ./models/tfsimple1.yaml
{}
seldon model status tfsimple1 -w ModelAvailable | jq -M .
{}
seldon model infer tfsimple1 --inference-host ${INFER_ENDPOINT} \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
{
"model_name": "tfsimple1_1",
"model_version": "1",
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
1,
16
],
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
1,
16
],
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
]
}
]
}
seldon model infer tfsimple1 --inference-mode grpc --inference-host ${INFER_ENDPOINT} \
'{"model_name":"tfsimple1","inputs":[{"name":"INPUT0","contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}'
{"modelName":"tfsimple1_1","modelVersion":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]}
curl http://${INFER_ENDPOINT}/v2/models/tfsimple1/infer -H "Content-Type: application/json" -H "seldon-model: tfsimple1" \
-d '{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
{"model_name":"tfsimple1_1","model_version":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":[1,16],"data":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]},{"name":"OUTPUT1","datatype":"INT32","shape":[1,16],"data":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}]}
grpcurl -d '{"model_name":"tfsimple1","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' \
-plaintext \
-import-path ../apis \
-proto ../apis/mlops/v2_dataplane/v2_dataplane.proto \
-rpc-header seldon-model:tfsimple1 \
${INFER_ENDPOINT} inference.GRPCInferenceService/ModelInfer
{
"modelName": "tfsimple1_1",
"modelVersion": "1",
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
]
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
]
}
],
"rawOutputContents": [
"AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==",
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="
]
}
cat ./pipelines/tfsimple.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple
spec:
steps:
- name: tfsimple1
output:
steps:
- tfsimple1
seldon pipeline load -f ./pipelines/tfsimple.yaml
{}
seldon pipeline status tfsimple -w PipelineReady
{"pipelineName":"tfsimple","versions":[{"pipeline":{"name":"tfsimple","uid":"cg5fm6c6dpcs73c4qhe0","version":1,"steps":[{"name":"tfsimple1"}],"output":{"steps":["tfsimple1.outputs"]},"kubernetesMeta":{}},"state":{"pipelineVersion":1,"status":"PipelineReady","reason":"created pipeline","lastChangeTimestamp":"2023-03-10T09:40:41.317797761Z","modelsReady":true}}]}
seldon pipeline infer tfsimple --inference-host ${INFER_ENDPOINT} \
'{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
{
"model_name": "",
"outputs": [
{
"data": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
],
"name": "OUTPUT0",
"shape": [
1,
16
],
"datatype": "INT32"
},
{
"data": [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"name": "OUTPUT1",
"shape": [
1,
16
],
"datatype": "INT32"
}
]
}
seldon pipeline infer tfsimple --inference-mode grpc --inference-host ${INFER_ENDPOINT} \
'{"model_name":"tfsimple1","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}'
{"outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]}
curl http://${INFER_ENDPOINT}/v2/models/tfsimple1/infer -H "Content-Type: application/json" -H "seldon-model: tfsimple.pipeline" \
-d '{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
{"model_name":"","outputs":[{"data":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32],"name":"OUTPUT0","shape":[1,16],"datatype":"INT32"},{"data":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"name":"OUTPUT1","shape":[1,16],"datatype":"INT32"}]}
grpcurl -d '{"model_name":"tfsimple1","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' \
-plaintext \
-import-path ../apis \
-proto ../apis/mlops/v2_dataplane/v2_dataplane.proto \
-rpc-header seldon-model:tfsimple.pipeline \
${INFER_ENDPOINT} inference.GRPCInferenceService/ModelInfer
{
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
]
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
]
}
],
"rawOutputContents": [
"AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==",
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="
]
}
seldon pipeline unload tfsimple
seldon model unload tfsimple1
{}
{}
import os
'172.19.255.1'
os.environ["NAMESPACE"] = "seldon-mesh"
MESH_IP=!kubectl get svc seldon-mesh -n ${NAMESPACE} -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP=MESH_IP[0]
import os
os.environ['MESH_IP'] = MESH_IP
MESH_IP
!cat kind-config.yaml
apiVersion: kind.x-k8s.io/v1alpha4
kind: Cluster
nodes:
- role: control-plane
extraMounts:
- hostPath: /tmp/models
containerPath: /models
!mkdir -p /tmp/models
!gsutil cp -r gs://seldon-models/mlserver/iris /tmp/models
!cat pvc.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: local-path-immediate
provisioner: rancher.io/local-path
reclaimPolicy: Delete
mountOptions:
- debug
volumeBindingMode: Immediate
---
kind: PersistentVolume
apiVersion: v1
metadata:
name: ml-models-pv
namespace: seldon-mesh
labels:
type: local
spec:
storageClassName: local-path-immediate
capacity:
storage: 1Gi
accessModes:
- ReadWriteOnce
hostPath:
path: "/models"
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: ml-models-pvc
namespace: seldon-mesh
spec:
storageClassName: local-path-immediate
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
selector:
matchLabels:
type: local
!cat server.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Server
metadata:
name: mlserver-pvc
spec:
serverConfig: mlserver
extraCapabilities:
- "pvc"
podSpec:
volumes:
- name: models-pvc
persistentVolumeClaim:
claimName: ml-models-pvc
containers:
- name: rclone
volumeMounts:
- name: models-pvc
mountPath: /var/models
!cat ./iris.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "/var/models/iris"
requirements:
- sklearn
- pvc
!kubectl create -f iris.yaml -n ${NAMESPACE}
!kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE}
!kubectl get model iris -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M .
!seldon model infer iris --inference-host ${MESH_IP}:80 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
!seldon model infer iris --inference-mode grpc --inference-host ${MESH_IP}:80 \
'{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}' | jq -M .
!kubectl delete -f ./iris.yaml -n ${NAMESPACE}
import numpy as np
import json
import requests
with open('./infer-data/test.npy', 'rb') as f:
x_ref = np.load(f)
x_h1 = np.load(f)
y_ref = np.load(f)
x_outlier = np.load(f)
reqJson = json.loads('{"inputs":[{"name":"input_1","data":[],"datatype":"FP32","shape":[]}]}')
url = "http://0.0.0.0:9000/v2/models/model/infer"
def infer(resourceName: str, batchSz: int, requestType: str):
if requestType == "outlier":
rows = x_outlier[0:0+batchSz]
elif requestType == "drift":
rows = x_h1[0:0+batchSz]
else:
rows = x_ref[0:0+batchSz]
reqJson["inputs"][0]["data"] = rows.flatten().tolist()
reqJson["inputs"][0]["shape"] = [batchSz, rows.shape[1]]
headers = {"Content-Type": "application/json", "seldon-model":resourceName}
response_raw = requests.post(url, json=reqJson, headers=headers)
print(response_raw)
print(response_raw.json())
cat ../../models/income-preprocess.yaml
echo "---"
cat ../../models/income.yaml
echo "---"
cat ../../models/income-drift.yaml
echo "---"
cat ../../models/income-outlier.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income-preprocess
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/income/preprocessor"
requirements:
- sklearn
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/income/classifier"
requirements:
- sklearn
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income-drift
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/income/drift-detector"
requirements:
- mlserver
- alibi-detect
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income-outlier
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/income/outlier-detector"
requirements:
- mlserver
- alibi-detect
seldon model load -f ../../models/income-preprocess.yaml
seldon model load -f ../../models/income.yaml
seldon model load -f ../../models/income-drift.yaml
seldon model load -f ../../models/income-outlier.yaml
{}
{}
{}
{}
seldon model status income-preprocess -w ModelAvailable | jq .
seldon model status income -w ModelAvailable | jq .
seldon model status income-drift -w ModelAvailable | jq .
seldon model status income-outlier -w ModelAvailable | jq .
{}
{}
{}
{}
cat ../../pipelines/income.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: income-production
spec:
steps:
- name: income
- name: income-preprocess
- name: income-outlier
inputs:
- income-preprocess
- name: income-drift
batch:
size: 20
output:
steps:
- income
- income-outlier.outputs.is_outlier
seldon pipeline load -f ../../pipelines/income.yaml
seldon pipeline status income-production -w PipelineReady | jq -M .
{
"pipelineName": "income-production",
"versions": [
{
"pipeline": {
"name": "income-production",
"uid": "cifej8iufmbc73e5int0",
"version": 1,
"steps": [
{
"name": "income"
},
{
"name": "income-drift",
"batch": {
"size": 20
}
},
{
"name": "income-outlier",
"inputs": [
"income-preprocess.outputs"
]
},
{
"name": "income-preprocess"
}
],
"output": {
"steps": [
"income.outputs",
"income-outlier.outputs.is_outlier"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-30T14:41:38.343754921Z",
"modelsReady": true
}
}
]
}
batchSz=20
print(y_ref[0:batchSz])
infer("income-production.pipeline",batchSz,"normal")
seldon pipeline inspect income-production.income-drift.outputs.is_drift
batchSz=20
print(y_ref[0:batchSz])
infer("income-production.pipeline",batchSz,"drift")
seldon pipeline inspect income-production.income-drift.outputs.is_drift
batchSz=20
print(y_ref[0:batchSz])
infer("income-production.pipeline",batchSz,"outlier")
seldon pipeline inspect income-production.income-drift.outputs.is_drift
cat ../../models/income-explainer.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: income-explainer
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/income/explainer"
explainer:
type: anchor_tabular
modelRef: income
seldon model load -f ../../models/income-explainer.yaml
{}
seldon model status income-explainer -w ModelAvailable | jq .
{}
batchSz=1
print(y_ref[0:batchSz])
infer("income-explainer",batchSz,"normal")
seldon pipeline unload income-production
seldon model unload income-preprocess
seldon model unload income
seldon model unload income-drift
seldon model unload income-outlier
seldon model unload income-explainer
'172.19.255.1'
pip install tritonclient[all]
import os
os.environ["NAMESPACE"] = "seldon-mesh"
MESH_IP=!kubectl get svc seldon-mesh -n ${NAMESPACE} -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP=MESH_IP[0]
import os
os.environ['MESH_IP'] = MESH_IP
MESH_IP
cat models/sklearn-iris-gs.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn"
requirements:
- sklearn
memory: 100Ki
cat pipelines/iris.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: iris-pipeline
spec:
steps:
- name: iris
output:
steps:
- iris
kubectl apply -f models/sklearn-iris-gs.yaml -n ${NAMESPACE}
kubectl apply -f pipelines/iris.yaml -n ${NAMESPACE}
kubectl wait --for condition=ready --timeout=300s model iris -n ${NAMESPACE}
kubectl wait --for condition=ready --timeout=300s pipelines iris-pipeline -n ${NAMESPACE}
import tritonclient.http as httpclient
import numpy as np
http_triton_client = httpclient.InferenceServerClient(
url=f"{MESH_IP}:80",
verbose=False,
)
print("model ready:", http_triton_client.is_model_ready("iris"))
print("model metadata:", http_triton_client.get_model_metadata("iris"))
# Against model
binary_data = False
inputs = [httpclient.InferInput("predict", (1, 4), "FP64")]
inputs[0].set_data_from_numpy(np.array([[1, 2, 3, 4]]).astype("float64"), binary_data=binary_data)
outputs = [httpclient.InferRequestedOutput("predict", binary_data=binary_data)]
result = http_triton_client.infer("iris", inputs, outputs=outputs)
result.as_numpy("predict")
# Against pipeline
binary_data = False
inputs = [httpclient.InferInput("predict", (1, 4), "FP64")]
inputs[0].set_data_from_numpy(np.array([[1, 2, 3, 4]]).astype("float64"), binary_data=binary_data)
outputs = [httpclient.InferRequestedOutput("predict", binary_data=binary_data)]
result = http_triton_client.infer("iris-pipeline.pipeline", inputs, outputs=outputs)
result.as_numpy("predict")
import tritonclient.grpc as grpcclient
import numpy as np
grpc_triton_client = grpcclient.InferenceServerClient(
url=f"{MESH_IP}:80",
verbose=False,
)
model_name = "iris"
headers = {"seldon-model": model_name}
print("model ready:", grpc_triton_client.is_model_ready(model_name, headers=headers))
print(grpc_triton_client.get_model_metadata(model_name, headers=headers))
model_name = "iris"
headers = {"seldon-model": model_name}
inputs = [
grpcclient.InferInput("predict", (1, 4), "FP64"),
]
inputs[0].set_data_from_numpy(np.array([[1, 2, 3, 4]]).astype("float64"))
outputs = [grpcclient.InferRequestedOutput("predict")]
result = grpc_triton_client.infer(model_name, inputs, outputs=outputs, headers=headers)
result.as_numpy("predict")
model_name = "iris-pipeline.pipeline"
headers = {"seldon-model": model_name}
inputs = [
grpcclient.InferInput("predict", (1, 4), "FP64"),
]
inputs[0].set_data_from_numpy(np.array([[1, 2, 3, 4]]).astype("float64"))
outputs = [grpcclient.InferRequestedOutput("predict")]
result = grpc_triton_client.infer(model_name, inputs, outputs=outputs, headers=headers)
result.as_numpy("predict")
cat models/tfsimple1.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: tfsimple1
spec:
storageUri: "gs://seldon-models/triton/simple"
requirements:
- tensorflow
memory: 100Ki
cat pipelines/tfsimple.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimple
spec:
steps:
- name: tfsimple1
output:
steps:
- tfsimple1
kubectl apply -f models/tfsimple1.yaml -n ${NAMESPACE}
kubectl apply -f pipelines/tfsimple.yaml -n ${NAMESPACE}
kubectl wait --for condition=ready --timeout=300s model tfsimple1 -n ${NAMESPACE}
kubectl wait --for condition=ready --timeout=300s pipelines tfsimple -n ${NAMESPACE}
import tritonclient.http as httpclient
import numpy as np
http_triton_client = httpclient.InferenceServerClient(
url=f"{MESH_IP}:80",
verbose=False,
)
print("model ready:", http_triton_client.is_model_ready("iris"))
print("model metadata:", http_triton_client.get_model_metadata("iris"))
# Against model (no binary data)
binary_data = False
inputs = [
httpclient.InferInput("INPUT0", (1, 16), "INT32"),
httpclient.InferInput("INPUT1", (1, 16), "INT32"),
]
inputs[0].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"), binary_data=binary_data)
inputs[1].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"), binary_data=binary_data)
outputs = [httpclient.InferRequestedOutput("OUTPUT0", binary_data=binary_data)]
result = http_triton_client.infer("tfsimple1", inputs, outputs=outputs)
result.as_numpy("OUTPUT0")
# Against model (with binary data)
binary_data = True
inputs = [
httpclient.InferInput("INPUT0", (1, 16), "INT32"),
httpclient.InferInput("INPUT1", (1, 16), "INT32"),
]
inputs[0].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"), binary_data=binary_data)
inputs[1].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"), binary_data=binary_data)
outputs = [httpclient.InferRequestedOutput("OUTPUT0", binary_data=binary_data)]
result = http_triton_client.infer("tfsimple1", inputs, outputs=outputs)
result.as_numpy("OUTPUT0")
# Against Pipeline (no binary data)
binary_data = False
inputs = [
httpclient.InferInput("INPUT0", (1, 16), "INT32"),
httpclient.InferInput("INPUT1", (1, 16), "INT32"),
]
inputs[0].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"), binary_data=binary_data)
inputs[1].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"), binary_data=binary_data)
outputs = [httpclient.InferRequestedOutput("OUTPUT0", binary_data=binary_data)]
result = http_triton_client.infer("tfsimple.pipeline", inputs, outputs=outputs)
result.as_numpy("OUTPUT0")
## binary data does not work with http behind pipeline
# import numpy as np
# binary_data = True
# inputs = [
# httpclient.InferInput("INPUT0", (1, 16), "INT32"),
# httpclient.InferInput("INPUT1", (1, 16), "INT32"),
# ]
# inputs[0].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"), binary_data=binary_data)
# inputs[1].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"), binary_data=binary_data)
# outputs = [httpclient.InferRequestedOutput("OUTPUT0", binary_data=binary_data)]
# result = http_triton_client.infer("tfsimple.pipeline", inputs, outputs=outputs)
# result.as_numpy("OUTPUT0")
import tritonclient.grpc as grpcclient
import numpy as np
grpc_triton_client = grpcclient.InferenceServerClient(
url=f"{MESH_IP}:80",
verbose=False,
)
model_name = "tfsimple1"
headers = {"seldon-model": model_name}
print("model ready:", grpc_triton_client.is_model_ready(model_name, headers=headers))
print(grpc_triton_client.get_model_metadata(model_name, headers=headers))
# Against Model
model_name = "tfsimple1"
headers = {"seldon-model": model_name}
inputs = [
grpcclient.InferInput("INPUT0", (1, 16), "INT32"),
grpcclient.InferInput("INPUT1", (1, 16), "INT32"),
]
inputs[0].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"))
inputs[1].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"))
outputs = [grpcclient.InferRequestedOutput("OUTPUT0")]
result = grpc_triton_client.infer(model_name, inputs, outputs=outputs, headers=headers)
result.as_numpy("OUTPUT0")
# Against Pipeline
model_name = "tfsimple.pipeline"
headers = {"seldon-model": model_name}
inputs = [
grpcclient.InferInput("INPUT0", (1, 16), "INT32"),
grpcclient.InferInput("INPUT1", (1, 16), "INT32"),
]
inputs[0].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"))
inputs[1].set_data_from_numpy(np.arange(1, 17).reshape(-1, 16).astype("int32"))
outputs = [grpcclient.InferRequestedOutput("OUTPUT0")]
result = grpc_triton_client.infer(model_name, inputs, outputs=outputs, headers=headers)
result.as_numpy("OUTPUT0")
This notebook will show how we can update running experiments.
We will use three SKlearn Iris classification models to illustrate experiment updates.
Load all models.
seldon model load -f ./models/sklearn1.yaml
seldon model load -f ./models/sklearn2.yaml
seldon model load -f ./models/sklearn3.yaml
{}
{}
{}
seldon model status iris -w ModelAvailable
seldon model status iris2 -w ModelAvailable
seldon model status iris3 -w ModelAvailable
{}
{}
{}
Let's call all three models individually first.
seldon model infer iris -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::50]
seldon model infer iris2 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::50]
seldon model infer iris3 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris3_1::50]
We will start an experiment to change the iris endpoint to split traffic with the iris2
model.
cat ./experiments/ab-default-model.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: experiment-sample
spec:
default: iris
candidates:
- name: iris
weight: 50
- name: iris2
weight: 50
seldon experiment start -f ./experiments/ab-default-model.yaml
{}
seldon experiment status experiment-sample -w | jq -M .
{
"experimentName": "experiment-sample",
"active": true,
"candidatesReady": true,
"mirrorReady": true,
"statusDescription": "experiment active",
"kubernetesMeta": {}
}
Now when we call the iris model we should see a roughly 50/50 split between the two models.
seldon model infer iris -i 100 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::48 :iris_1::52]
Now we update the experiment to change to a split with the iris3
model.
cat ./experiments/ab-default-model2.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: experiment-sample
spec:
default: iris
candidates:
- name: iris
weight: 50
- name: iris3
weight: 50
seldon experiment start -f ./experiments/ab-default-model2.yaml
{}
seldon experiment status experiment-sample -w | jq -M .
{
"experimentName": "experiment-sample",
"active": true,
"candidatesReady": true,
"mirrorReady": true,
"statusDescription": "experiment active",
"kubernetesMeta": {}
}
Now we should see a split with the iris3
model.
seldon model infer iris -i 100 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris3_1::42 :iris_1::58]
seldon experiment stop experiment-sample
{}
Now the experiment has been stopped we check everything as before.
seldon model infer iris -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::50]
seldon model infer iris2 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::50]
seldon model infer iris3 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris3_1::50]
seldon model unload iris
seldon model unload iris2
seldon model unload iris3
{}
{}
{}
Here we test changing the model we want to split traffic on. We will use three SKlearn Iris classification models to illustrate.
seldon model load -f ./models/sklearn1.yaml
seldon model load -f ./models/sklearn2.yaml
seldon model load -f ./models/sklearn3.yaml
{}
{}
{}
seldon model status iris -w ModelAvailable
seldon model status iris2 -w ModelAvailable
seldon model status iris3 -w ModelAvailable
{}
{}
{}
Let's call all three models to verify initial conditions.
seldon model infer iris -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::50]
seldon model infer iris2 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::50]
seldon model infer iris3 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris3_1::50]
Now we start an experiment to change calls to the iris
model to split with the iris2
model.
cat ./experiments/ab-default-model.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: experiment-sample
spec:
default: iris
candidates:
- name: iris
weight: 50
- name: iris2
weight: 50
seldon experiment start -f ./experiments/ab-default-model.yaml
{}
seldon experiment status experiment-sample -w | jq -M .
{
"experimentName": "experiment-sample",
"active": true,
"candidatesReady": true,
"mirrorReady": true,
"statusDescription": "experiment active",
"kubernetesMeta": {}
}
Run a set of calls and record which route the traffic took. There should be roughly a 50/50 split.
seldon model infer iris -i 100 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::51 :iris_1::49]
Now let's change the model we want to experiment to modify to the iris3
model. Splitting between that and iris2
.
cat ./experiments/ab-default-model3.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Experiment
metadata:
name: experiment-sample
spec:
default: iris3
candidates:
- name: iris3
weight: 50
- name: iris2
weight: 50
seldon experiment start -f ./experiments/ab-default-model3.yaml
{}
seldon experiment status experiment-sample -w | jq -M .
{
"experimentName": "experiment-sample",
"active": true,
"candidatesReady": true,
"mirrorReady": true,
"statusDescription": "experiment active",
"kubernetesMeta": {}
}
Let's check the iris model is now as before but the iris3 model has traffic split.
seldon model infer iris -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::50]
seldon model infer iris3 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::25 :iris3_1::25]
seldon model infer iris2 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::50]
seldon experiment stop experiment-sample
{}
Finally, let's check now the experiment has stopped as is as at the start.
seldon model infer iris -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::50]
seldon model infer iris2 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris2_1::50]
seldon model infer iris3 -i 50 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris3_1::50]
seldon model unload iris
seldon model unload iris2
seldon model unload iris3
{}
{}
{}
Run these examples from the samples/examples/image_classifier
folder.
We show an image classifier (CIFAR10) with associated outlier and drift detectors using a Pipeline.
The model is a tensorflow CIFAR10 image classfier
The outlier detector is created from the CIFAR10 VAE Outlier example.
The drift detector is created from the CIFAR10 KS Drift example
To run local training run the training notebook.
import requests
import json
from typing import Dict, List
import numpy as np
import os
import tensorflow as tf
from alibi_detect.utils.perturbation import apply_mask
from alibi_detect.datasets import fetch_cifar10c
import matplotlib.pyplot as plt
tf.keras.backend.clear_session()
2023-06-30 15:39:28.732453: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-06-30 15:39:28.732465: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
train, test = tf.keras.datasets.cifar10.load_data()
X_train, y_train = train
X_test, y_test = test
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
classes = (
"plane",
"car",
"bird",
"cat",
"deer",
"dog",
"frog",
"horse",
"ship",
"truck",
)
(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)
outliers = []
for idx in range(0,X_train.shape[0]):
X_mask, mask = apply_mask(X_train[idx].reshape(1, 32, 32, 3),
mask_size=(14,14),
n_masks=1,
channels=[0,1,2],
mask_type='normal',
noise_distr=(0,1),
clip_rng=(0,1))
outliers.append(X_mask)
X_outliers = np.vstack(outliers)
X_outliers.shape
(50000, 32, 32, 3)
corruption = ['brightness']
X_corr, y_corr = fetch_cifar10c(corruption=corruption, severity=5, return_X_y=True)
X_corr = X_corr.astype('float32') / 255
reqJson = json.loads('{"inputs":[{"name":"input_1","data":[],"datatype":"FP32","shape":[]}]}')
url = "http://0.0.0.0:9000/v2/models/model/infer"
def infer(resourceName: str, batchSz: int, requestType: str):
if requestType == "outlier":
rows = X_outliers[0:0+batchSz]
elif requestType == "drift":
rows = X_corr[0:0+batchSz]
else:
rows = X_train[0:0+batchSz]
for i in range(batchSz):
show(rows[i])
reqJson["inputs"][0]["data"] = rows.flatten().tolist()
reqJson["inputs"][0]["shape"] = [batchSz, 32, 32, 3]
headers = {"Content-Type": "application/json", "seldon-model":resourceName}
response_raw = requests.post(url, json=reqJson, headers=headers)
print(response_raw)
print(response_raw.json())
def show(X):
plt.imshow(X.reshape(32, 32, 3))
plt.axis("off")
plt.show()
cat ../../models/cifar10.yaml
echo "---"
cat ../../models/cifar10-outlier-detect.yaml
echo "---"
cat ../../models/cifar10-drift-detect.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: cifar10
spec:
storageUri: "gs://seldon-models/triton/tf_cifar10"
requirements:
- tensorflow
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: cifar10-outlier
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/cifar10/outlier-detector"
requirements:
- mlserver
- alibi-detect
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: cifar10-drift
spec:
storageUri: "gs://seldon-models/scv2/examples/mlserver_1.3.5/cifar10/drift-detector"
requirements:
- mlserver
- alibi-detect
seldon model load -f ../../models/cifar10.yaml
seldon model load -f ../../models/cifar10-outlier-detect.yaml
seldon model load -f ../../models/cifar10-drift-detect.yaml
{}
{}
{}
seldon model status cifar10 -w ModelAvailable | jq .
seldon model status cifar10-outlier -w ModelAvailable | jq .
seldon model status cifar10-drift -w ModelAvailable | jq .
{}
{}
{}
cat ../../pipelines/cifar10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: cifar10-production
spec:
steps:
- name: cifar10
- name: cifar10-outlier
- name: cifar10-drift
batch:
size: 20
output:
steps:
- cifar10
- cifar10-outlier.outputs.is_outlier
seldon pipeline load -f ../../pipelines/cifar10.yaml
seldon pipeline status cifar10-production -w PipelineReady | jq -M .
{
"pipelineName": "cifar10-production",
"versions": [
{
"pipeline": {
"name": "cifar10-production",
"uid": "cifeii2ufmbc73e5insg",
"version": 1,
"steps": [
{
"name": "cifar10"
},
{
"name": "cifar10-drift",
"batch": {
"size": 20
}
},
{
"name": "cifar10-outlier"
}
],
"output": {
"steps": [
"cifar10.outputs",
"cifar10-outlier.outputs.is_outlier"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-30T14:40:09.047429817Z",
"modelsReady": true
}
}
]
}
infer("cifar10-production.pipeline",20, "normal")
<Response [200]>
{'model_name': '', 'outputs': [{'data': [1.45001495e-08, 1.2525752e-09, 1.6298458e-07, 0.11529388, 1.7431412e-07, 6.1856604e-06, 0.8846994, 6.0739285e-09, 7.437921e-08, 4.7317337e-09, 1.26449e-06, 4.8814868e-09, 1.5153439e-09, 8.490656e-09, 5.5131194e-10, 1.1617216e-09, 5.7729294e-10, 2.8839776e-07, 0.0006149016, 0.99938357, 0.888746, 2.5331951e-06, 0.00012967695, 0.10531583, 2.4284174e-05, 6.3332986e-06, 0.0016261435, 1.13079e-05, 0.0013286703, 0.0028091935, 2.0993439e-06, 3.680449e-08, 0.0013269952, 2.1766558e-05, 0.99841356, 0.00015300694, 6.9472035e-06, 1.3277059e-05, 6.1860555e-05, 3.4072806e-07, 1.1205097e-05, 0.99997175, 1.9948227e-07, 6.9880834e-08, 3.3387135e-08, 5.2603138e-08, 3.0352305e-07, 4.3738982e-08, 5.3243946e-07, 1.5870584e-05, 0.0006525102, 0.013322109, 1.480307e-06, 0.9766325, 4.9847167e-05, 0.00058075984, 0.008405659, 5.2234273e-06, 0.00023390084, 0.000116047224, 1.6682397e-06, 5.7737526e-10, 0.9975605, 6.45564e-05, 0.002371972, 1.0392675e-07, 9.747962e-08, 1.4484569e-07, 8.762438e-07, 2.4758325e-08, 5.028761e-09, 6.856381e-11, 5.9932094e-12, 4.921233e-10, 1.471166e-07, 2.7940719e-06, 3.4563383e-09, 0.99999714, 5.9420524e-10, 9.445026e-11, 4.1854888e-05, 5.041549e-08, 8.0302314e-08, 1.2119854e-07, 6.781646e-09, 1.2616152e-08, 1.1878505e-08, 1.628573e-09, 0.9999578, 3.281738e-08, 0.08930307, 1.4065135e-07, 4.1117343e-07, 0.90898305, 8.933351e-07, 0.0015637449, 0.00013868928, 9.092981e-06, 4.8759745e-07, 4.3976044e-07, 0.00016094849, 3.5653954e-07, 0.0760521, 0.8927447, 0.0011777573, 0.00265573, 0.027189083, 4.1892267e-06, 1.329405e-05, 1.8564688e-06, 1.3373891e-06, 1.0251247e-07, 8.651912e-09, 4.458202e-06, 1.4646349e-05, 1.260957e-06, 1.046087e-08, 0.9998946, 8.332438e-05, 3.900894e-07, 6.53852e-05, 3.012202e-08, 1.0247197e-07, 1.8824371e-06, 0.0004958526, 3.533475e-05, 2.739997e-07, 0.99939275, 4.840305e-06, 3.5346695e-06, 0.0005518078, 3.1597017e-07, 0.99902296, 0.00031509742, 8.07886e-07, 1.6366084e-06, 2.795575e-06, 6.112367e-06, 9.817249e-05, 2.602709e-07, 0.0004561966, 5.360607e-06, 2.8656412e-05, 0.000116040654, 6.881144e-05, 8.844774e-06, 4.4655946e-05, 3.5564542e-05, 0.006564381, 0.9926715, 0.007300911, 1.766928e-06, 3.0520596e-07, 0.026906287, 1.3769699e-06, 0.00027539674, 5.583593e-06, 3.792553e-06, 0.0003876767, 0.9651169, 0.18114138, 2.8360228e-05, 0.00019927241, 0.007685872, 0.00014663498, 3.9361137e-05, 5.941682e-05, 7.36174e-05, 0.79936546, 0.01126067, 2.3992783e-11, 7.6336457e-16, 1.4644799e-15, 1, 2.4652159e-14, 1.1786078e-10, 1.9402116e-13, 4.2408636e-15, 1.209294e-15, 2.9042784e-15, 1.5366902e-08, 1.2476195e-09, 1.3560152e-07, 0.999997, 4.3113017e-11, 2.8163534e-08, 2.4494727e-06, 1.3122828e-10, 3.8081083e-07, 2.1628158e-11, 0.0004926238, 6.9424555e-06, 2.827196e-05, 0.92534137, 9.500486e-06, 0.00036133997, 0.072713904, 1.2831057e-07, 0.0010457055, 2.8514464e-07], 'name': 'fc10', 'shape': [20, 10], 'datatype': 'FP32'}, {'data': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'name': 'is_outlier', 'shape': [1, 20], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}}]}
seldon pipeline inspect cifar10-production.cifar10-drift.outputs.is_drift
seldon.default.model.cifar10-drift.outputs cifeij8fh5ss738i5bp0 {"name":"is_drift", "datatype":"INT64", "shape":["1", "1"], "parameters":{"content_type":{"stringParam":"np"}}, "contents":{"int64Contents":["0"]}}
infer("cifar10-production.pipeline",20, "drift")
<Response [200]>
{'model_name': '', 'outputs': [{'data': [8.080701e-09, 2.3025173e-12, 2.2681688e-09, 1, 4.1828953e-11, 4.48467e-09, 3.216822e-08, 2.8404365e-13, 5.217064e-09, 3.3497323e-13, 0.96965235, 4.7030144e-06, 1.6964266e-07, 1.7355454e-05, 2.6667e-06, 1.9505828e-06, 1.1363079e-07, 3.3352034e-08, 0.030320557, 1.7086056e-07, 0.03725602, 6.8623276e-06, 7.5557014e-05, 0.00018132397, 2.2838503e-05, 0.000110639296, 2.3732607e-06, 2.1210687e-06, 0.9623351, 7.131072e-06, 0.999079, 4.207448e-09, 1.5788535e-08, 2.723756e-08, 2.6555508e-11, 2.1526697e-10, 2.7599315e-10, 2.0737433e-10, 0.0009210062, 3.0885383e-09, 6.665241e-07, 1.7765576e-09, 1.4911559e-07, 0.9765331, 1.9476123e-07, 2.8244015e-06, 0.023463126, 5.8030287e-09, 3.243206e-09, 1.12179785e-08, 4.4123663e-06, 4.7628927e-09, 1.1727273e-08, 0.9761534, 1.1409252e-08, 8.922882e-05, 0.023752932, 3.1563903e-08, 2.7916305e-09, 8.7746266e-10, 1.0166265e-05, 0.999703, 4.5408615e-05, 0.00022673907, 1.7365853e-07, 1.0147362e-06, 6.253448e-06, 2.9711526e-07, 7.811687e-07, 6.183683e-06, 0.86618125, 5.47548e-07, 0.00038408802, 0.013155022, 3.6916779e-06, 0.0006137024, 0.11965008, 3.6425424e-06, 6.7638084e-06, 1.2372367e-06, 1.9545263e-05, 1.1281859e-13, 1.6811868e-14, 0.9999777, 1.9805435e-11, 2.7563674e-06, 2.9651657e-09, 1.1363432e-12, 2.9902746e-13, 1.220973e-12, 2.9895918e-05, 3.4964305e-07, 1.1331837e-08, 1.7012125e-06, 3.6088227e-07, 3.035954e-08, 2.2102333e-06, 1.7414077e-08, 0.9999455, 1.9921794e-05, 0.9999999, 5.3446598e-11, 6.3188843e-10, 1.0956511e-07, 1.1538642e-10, 8.113561e-10, 4.7179572e-08, 1.4544753e-11, 5.490219e-08, 1.3347151e-10, 1.5363307e-07, 6.604881e-09, 2.424105e-10, 9.963063e-09, 3.9349533e-09, 1.5709017e-09, 7.705774e-10, 4.8085802e-08, 1.8885139e-05, 0.9999809, 7.147243e-08, 3.143131e-13, 2.1447092e-13, 0.00042652222, 6.945973e-12, 0.9995734, 6.174434e-09, 4.1128205e-11, 3.4031404e-13, 8.573159e-15, 1.2226405e-09, 2.3768018e-10, 2.822187e-07, 8.016278e-08, 4.0692296e-08, 6.8023346e-06, 2.3926754e-07, 0.9999925, 6.652648e-09, 7.743497e-09, 7.6360675e-06, 5.9386625e-09, 1.5675019e-09, 2.136716e-07, 1.3074002e-06, 3.700079e-10, 1.0984521e-09, 6.2138824e-08, 0.9609078, 0.03908287, 0.0008332255, 7.696685e-08, 2.4428939e-09, 7.186676e-05, 1.4520063e-09, 1.4521317e-08, 1.09093e-06, 1.2531165e-10, 0.9990938, 5.798501e-09, 5.785368e-05, 3.82365e-09, 7.404351e-08, 0.008338481, 8.048078e-10, 0.99157715, 1.1663455e-05, 1.4583546e-05, 8.3543476e-08, 3.274394e-08, 2.4682688e-05, 1.3951502e-09, 1.0260489e-08, 0.9998845, 1.9418138e-08, 8.667954e-07, 2.1851054e-07, 8.917964e-05, 4.4437223e-07, 1.1292918e-07, 4.5302792e-07, 5.631744e-08, 2.9086214e-08, 3.1013877e-07, 7.695681e-09, 2.1452344e-09, 1.1493902e-08, 6.1980093e-10, 0.99999917, 1.1436694e-08, 2.42685e-05, 8.557389e-08, 0.024081504, 0.0073837163, 4.8152968e-05, 5.128531e-07, 0.9684405, 9.630179e-08, 2.1060101e-05, 1.901065e-07], 'name': 'fc10', 'shape': [20, 10], 'datatype': 'FP32'}, {'data': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'name': 'is_outlier', 'shape': [1, 20], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}}]}
seldon pipeline inspect cifar10-production.cifar10-drift.outputs.is_drift
seldon.default.model.cifar10-drift.outputs cifeimgfh5ss738i5bpg {"name":"is_drift", "datatype":"INT64", "shape":["1", "1"], "parameters":{"content_type":{"stringParam":"np"}}, "contents":{"int64Contents":["1"]}}
infer("cifar10-production.pipeline",1, "outlier")
<Response [200]>
{'model_name': '', 'outputs': [{'data': [6.3606867e-06, 0.0006106364, 0.0054279356, 0.6536454, 1.4738829e-05, 2.6104701e-06, 0.3397848, 1.3538776e-05, 0.0004458526, 4.807229e-05], 'name': 'fc10', 'shape': [1, 10], 'datatype': 'FP32'}, {'data': [1], 'name': 'is_outlier', 'shape': [1, 1], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}}]}
infer("cifar10-production.pipeline",1, "ok")
<Response [200]>
{'model_name': '', 'outputs': [{'data': [1.45001495e-08, 1.2525752e-09, 1.6298458e-07, 0.11529388, 1.7431412e-07, 6.1856604e-06, 0.8846994, 6.0739285e-09, 7.43792e-08, 4.7317337e-09], 'name': 'fc10', 'shape': [1, 10], 'datatype': 'FP32'}, {'data': [0], 'name': 'is_outlier', 'shape': [1, 1], 'datatype': 'INT64', 'parameters': {'content_type': 'np'}}]}
Use the seldon CLI to look at the outputs from the CIFAR10 model. It will decode the Triton binary outputs for us.
seldon pipeline inspect cifar10-production.cifar10.outputs
seldon.default.model.cifar10.outputs cifeiq8fh5ss738i5bqg {"modelName":"cifar10_1", "modelVersion":"1", "outputs":[{"name":"fc10", "datatype":"FP32", "shape":["1", "10"], "contents":{"fp32Contents":[1.45001495e-8, 1.2525752e-9, 1.6298458e-7, 0.11529388, 1.7431412e-7, 0.0000061856604, 0.8846994, 6.0739285e-9, 7.43792e-8, 4.7317337e-9]}}]}
seldon pipeline unload cifar10-production
seldon model unload cifar10
seldon model unload cifar10-outlier
seldon model unload cifar10-drift
Note: The Seldon CLI allows you to view information about underlying Seldon resources and make changes to them through the scheduler in non-Kubernetes environments. However, it cannot modify underlying manifests within a Kubernetes cluster. Therefore, using the Seldon CLI for control plane operations in a Kubernetes environment is not recommended. For more details, see .
kubectl wait --for condition=ready --timeout=1s pipeline --all -n ${NAMESPACE}
error: timed out waiting for the condition on pipelines/tfsimples
kubectl get pipeline tfsimples -o jsonpath='{.status.conditions[0]}' -n ${NAMESPACE}
{"lastTransitionTime":"2022-11-14T10:25:31Z","status":"False","type":"ModelsReady"}
kubectl create -f ./models/tfsimple1.yaml -n ${NAMESPACE}
kubectl create -f ./models/tfsimple2.yaml -n ${NAMESPACE}
model.mlops.seldon.io/tfsimple1 created
model.mlops.seldon.io/tfsimple2 created
kubectl wait --for condition=ready --timeout=300s pipeline --all -n ${NAMESPACE}
pipeline.mlops.seldon.io/tfsimples condition met
kubectl get pipeline tfsimples -o jsonpath='{.status.conditions[0]}' -n ${NAMESPACE}
{"lastTransitionTime":"2022-11-14T10:25:49Z","status":"True","type":"ModelsReady"}
kubectl delete -f ./models/tfsimple1.yaml -n ${NAMESPACE}
kubectl delete -f ./models/tfsimple2.yaml -n ${NAMESPACE}
kubectl delete -f ./pipelines/tfsimples.yaml -n ${NAMESPACE}
model.mlops.seldon.io "tfsimple1" deleted
model.mlops.seldon.io "tfsimple2" deleted
pipeline.mlops.seldon.io "tfsimples" deleted
null
true
true
true
null
'172.19.255.1'
pipeline.mlops.seldon.io/tfsimples created
%env INFER_REST_ENDPOINT=http://0.0.0.0:9000
%env INFER_GRPC_ENDPOINT=0.0.0.0:9000
%env SELDON_SCHEDULE_HOST=0.0.0.0:9004
env: INFER_REST_ENDPOINT=http://0.0.0.0:9000
env: INFER_GRPC_ENDPOINT=0.0.0.0:9000
env: SELDON_SCHEDULE_HOST=0.0.0.0:9004
#%env INFER_REST_ENDPOINT=http://172.19.255.1:80
#%env INFER_GRPC_ENDPOINT=172.19.255.1:80
#%env SELDON_SCHEDULE_HOST=172.19.255.2:9004
cat ./pipelines/tfsimples.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: tfsimples
spec:
steps:
- name: tfsimple1
- name: tfsimple2
inputs:
- tfsimple1
tensorMap:
tfsimple1.outputs.OUTPUT0: INPUT0
tfsimple1.outputs.OUTPUT1: INPUT1
output:
steps:
- tfsimple2
curl -Ik ${INFER_REST_ENDPOINT}/v2/pipelines/tfsimples/ready
grpcurl -d '{"name":"tfsimples"}' \
-plaintext \
-import-path ../apis \
-proto ../apis/mlops/v2_dataplane/v2_dataplane.proto \
-rpc-header seldon-model:tfsimples.pipeline \
${INFER_GRPC_ENDPOINT} inference.GRPCInferenceService/ModelReady
ERROR:
Code: Unimplemented
Message:
seldon pipeline load -f ./pipelines/tfsimples.yaml
seldon pipeline status tfsimples -w PipelineReady
{"pipelineName":"tfsimples", "versions":[{"pipeline":{"name":"tfsimples", "uid":"ciepit2i8ufs73flaitg", "version":1, "steps":[{"name":"tfsimple1"}, {"name":"tfsimple2", "inputs":["tfsimple1.outputs"], "tensorMap":{"tfsimple1.outputs.OUTPUT0":"INPUT0", "tfsimple1.outputs.OUTPUT1":"INPUT1"}}], "output":{"steps":["tfsimple2.outputs"]}, "kubernetesMeta":{}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T14:47:16.365934922Z"}}]}
seldon pipeline status tfsimples | jq .versions[0].state.modelsReady
curl -Ik ${INFER_REST_ENDPOINT}/v2/pipelines/tfsimples/ready
grpcurl -d '{"name":"tfsimples"}' \
-plaintext \
-import-path ../apis \
-proto ../apis/mlops/v2_dataplane/v2_dataplane.proto \
-rpc-header seldon-model:tfsimples.pipeline \
${INFER_GRPC_ENDPOINT} inference.GRPCInferenceService/ModelReady
{
}
seldon model load -f ./models/tfsimple1.yaml
seldon model status tfsimple1 -w ModelAvailable
{}
{}
curl -Ik ${INFER_REST_ENDPOINT}/v2/pipelines/tfsimples/ready
grpcurl -d '{"name":"tfsimples"}' \
-plaintext \
-import-path ../apis \
-proto ../apis/mlops/v2_dataplane/v2_dataplane.proto \
-rpc-header seldon-model:tfsimples.pipeline \
${INFER_GRPC_ENDPOINT} inference.GRPCInferenceService/ModelReady
{
}
seldon model load -f ./models/tfsimple2.yaml
seldon model status tfsimple2 -w ModelAvailable | jq -M .
{}
{}
curl -Ik ${INFER_REST_ENDPOINT}/v2/pipelines/tfsimples/ready
grpcurl -d '{"name":"tfsimples"}' \
-plaintext \
-import-path ../apis \
-proto ../apis/mlops/v2_dataplane/v2_dataplane.proto \
-rpc-header seldon-model:tfsimples.pipeline \
${INFER_GRPC_ENDPOINT} inference.GRPCInferenceService/ModelReady
{
"ready": true
}
seldon pipeline status tfsimples | jq .versions[0].state.modelsReady
seldon pipeline unload tfsimples
curl -Ik ${INFER_REST_ENDPOINT}/v2/pipelines/tfsimples/ready
grpcurl -d '{"name":"tfsimples"}' \
-plaintext \
-import-path ../apis \
-proto ../apis/mlops/v2_dataplane/v2_dataplane.proto \
-rpc-header seldon-model:tfsimples.pipeline \
${INFER_GRPC_ENDPOINT} inference.GRPCInferenceService/ModelReady
ERROR:
Code: Unimplemented
Message:
seldon pipeline status tfsimples | jq .versions[0].state.modelsReady
seldon pipeline load -f ./pipelines/tfsimples.yaml
seldon pipeline status tfsimples -w PipelineReady
{"pipelineName":"tfsimples", "versions":[{"pipeline":{"name":"tfsimples", "uid":"ciepj5qi8ufs73flaiu0", "version":1, "steps":[{"name":"tfsimple1"}, {"name":"tfsimple2", "inputs":["tfsimple1.outputs"], "tensorMap":{"tfsimple1.outputs.OUTPUT0":"INPUT0", "tfsimple1.outputs.OUTPUT1":"INPUT1"}}], "output":{"steps":["tfsimple2.outputs"]}, "kubernetesMeta":{}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2023-06-29T14:47:51.626155116Z", "modelsReady":true}}]}
curl -Ik ${INFER_REST_ENDPOINT}/v2/pipelines/tfsimples/ready
grpcurl -d '{"name":"tfsimples"}' \
-plaintext \
-import-path ../apis \
-proto ../apis/mlops/v2_dataplane/v2_dataplane.proto \
-rpc-header seldon-model:tfsimples.pipeline \
${INFER_GRPC_ENDPOINT} inference.GRPCInferenceService/ModelReady
{
"ready": true
}
seldon pipeline status tfsimples | jq .versions[0].state.modelsReady
seldon model unload tfsimple1
seldon model unload tfsimple2
seldon pipeline status tfsimples | jq .versions[0].state.modelsReady
seldon pipeline unload tfsimples
import os
os.environ["NAMESPACE"] = "seldon-mesh"
MESH_IP=!kubectl get svc seldon-mesh -n ${NAMESPACE} -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP=MESH_IP[0]
import os
os.environ['MESH_IP'] = MESH_IP
MESH_IP
kubectl create -f ./pipelines/tfsimples.yaml -n ${NAMESPACE}
from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse
from mlserver.codecs import PandasCodec
from mlserver.errors import MLServerError
import pandas as pd
from fastapi import status
from mlserver.logging import logger
QUERY_KEY = "query"
class ModelParametersMissing(MLServerError):
def __init__(self, model_name: str, reason: str):
super().__init__(
f"Parameters missing for model {model_name} {reason}", status.HTTP_400_BAD_REQUEST
)
class PandasQueryRuntime(MLModel):
async def load(self) -> bool:
logger.info("Loading with settings %s", self.settings)
if self.settings.parameters is None or \
self.settings.parameters.extra is None:
raise ModelParametersMissing(self.name, "no settings.parameters.extra found")
self.query = self.settings.parameters.extra[QUERY_KEY]
if self.query is None:
raise ModelParametersMissing(self.name, "no settings.parameters.extra.query found")
self.ready = True
return self.ready
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
input_df: pd.DataFrame = PandasCodec.decode_request(payload)
# run query on input_df and save in output_df
output_df = input_df.query(self.query)
if output_df.empty:
output_df = pd.DataFrame({'status':["no rows satisfied " + self.query]})
else:
output_df["status"] = "row satisfied " + self.query
return PandasCodec.encode_response(self.name, output_df, self.version)
cat ../../models/choice1.yaml
echo "---"
cat ../../models/choice2.yaml
echo "---"
cat ../../models/add10.yaml
echo "---"
cat ../../models/mul10.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: choice-is-one
spec:
storageUri: "gs://seldon-models/scv2/examples/pandasquery"
requirements:
- mlserver
- python
parameters:
- name: query
value: "choice == 1"
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: choice-is-two
spec:
storageUri: "gs://seldon-models/scv2/examples/pandasquery"
requirements:
- mlserver
- python
parameters:
- name: query
value: "choice == 2"
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: add10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/add10"
requirements:
- triton
- python
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: mul10
spec:
storageUri: "gs://seldon-models/scv2/samples/triton_23-03/mul10"
requirements:
- triton
- python
seldon model load -f ../../models/choice1.yaml
seldon model load -f ../../models/choice2.yaml
seldon model load -f ../../models/add10.yaml
seldon model load -f ../../models/mul10.yaml
{}
{}
{}
{}
seldon model status choice-is-one -w ModelAvailable
seldon model status choice-is-two -w ModelAvailable
seldon model status add10 -w ModelAvailable
seldon model status mul10 -w ModelAvailable
{}
{}
{}
{}
cat ../../pipelines/choice.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: choice
spec:
steps:
- name: choice-is-one
- name: mul10
inputs:
- choice.inputs.INPUT
triggers:
- choice-is-one.outputs.choice
- name: choice-is-two
- name: add10
inputs:
- choice.inputs.INPUT
triggers:
- choice-is-two.outputs.choice
output:
steps:
- mul10
- add10
stepsJoin: any
seldon pipeline load -f ../../pipelines/choice.yaml
seldon pipeline status choice -w PipelineReady | jq -M .
{
"pipelineName": "choice",
"versions": [
{
"pipeline": {
"name": "choice",
"uid": "cifel9aufmbc73e5intg",
"version": 1,
"steps": [
{
"name": "add10",
"inputs": [
"choice.inputs.INPUT"
],
"triggers": [
"choice-is-two.outputs.choice"
]
},
{
"name": "choice-is-one"
},
{
"name": "choice-is-two"
},
{
"name": "mul10",
"inputs": [
"choice.inputs.INPUT"
],
"triggers": [
"choice-is-one.outputs.choice"
]
}
],
"output": {
"steps": [
"mul10.outputs",
"add10.outputs"
],
"stepsJoin": "ANY"
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 1,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-06-30T14:45:57.284684328Z",
"modelsReady": true
}
}
]
}
seldon pipeline infer choice --inference-mode grpc \
'{"model_name":"choice","inputs":[{"name":"choice","contents":{"int_contents":[1]},"datatype":"INT32","shape":[1]},{"name":"INPUT","contents":{"fp32_contents":[5,6,7,8]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
50,
60,
70,
80
]
}
}
]
}
seldon pipeline infer choice --inference-mode grpc \
'{"model_name":"choice","inputs":[{"name":"choice","contents":{"int_contents":[2]},"datatype":"INT32","shape":[1]},{"name":"INPUT","contents":{"fp32_contents":[5,6,7,8]},"datatype":"FP32","shape":[4]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT",
"datatype": "FP32",
"shape": [
"4"
],
"contents": {
"fp32Contents": [
15,
16,
17,
18
]
}
}
]
}
seldon model unload choice-is-one
seldon model unload choice-is-two
seldon model unload add10
seldon model unload mul10
seldon pipeline unload choice
helm upgrade --install seldon-core-v2-crds ../k8s/helm-charts/seldon-core-v2-crds -n seldon-mesh
Release "seldon-core-v2-crds" does not exist. Installing it now.
NAME: seldon-core-v2-crds
LAST DEPLOYED: Tue Aug 15 11:01:03 2023
NAMESPACE: seldon-mesh
STATUS: deployed
REVISION: 1
TEST SUITE: None
The below setup also illustrates using kafka specific prefixes for topics and consumerIds for isolation where the kafka cluster is shared with other applications and you want to enforce constraints. You would not strictly need this in this example as we install Kafka just for Seldon here.
helm upgrade --install seldon-v2 ../k8s/helm-charts/seldon-core-v2-setup/ -n seldon-mesh \
--set controller.clusterwide=true \
--set kafka.topicPrefix=myorg \
--set kafka.consumerGroupIdPrefix=myorg
Release "seldon-v2" does not exist. Installing it now.
NAME: seldon-v2
LAST DEPLOYED: Tue Aug 15 11:01:07 2023
NAMESPACE: seldon-mesh
STATUS: deployed
REVISION: 1
TEST SUITE: None
kubectl create namespace ns1
kubectl create namespace ns2
namespace/ns1 created
namespace/ns2 created
helm install seldon-v2-runtime ../k8s/helm-charts/seldon-core-v2-runtime -n ns1 --wait
NAME: seldon-v2-runtime
LAST DEPLOYED: Tue Aug 15 11:01:11 2023
NAMESPACE: ns1
STATUS: deployed
REVISION: 1
TEST SUITE: None
helm install seldon-v2-servers ../k8s/helm-charts/seldon-core-v2-servers -n ns1 --wait
NAME: seldon-v2-servers
LAST DEPLOYED: Tue Aug 15 10:47:31 2023
NAMESPACE: ns1
STATUS: deployed
REVISION: 1
TEST SUITE: None
helm install seldon-v2-runtime ../k8s/helm-charts/seldon-core-v2-runtime -n ns2 --wait
NAME: seldon-v2-runtime
LAST DEPLOYED: Tue Aug 15 10:53:12 2023
NAMESPACE: ns2
STATUS: deployed
REVISION: 1
TEST SUITE: None
helm install seldon-v2-servers ../k8s/helm-charts/seldon-core-v2-servers -n ns2 --wait
NAME: seldon-v2-servers
LAST DEPLOYED: Tue Aug 15 10:53:28 2023
NAMESPACE: ns2
STATUS: deployed
REVISION: 1
TEST SUITE: None
kubectl wait --for condition=ready --timeout=300s server --all -n ns1
server.mlops.seldon.io/mlserver condition met
server.mlops.seldon.io/triton condition met
kubectl wait --for condition=ready --timeout=300s server --all -n ns2
server.mlops.seldon.io/mlserver condition met
server.mlops.seldon.io/triton condition met
MESH_IP=!kubectl get svc seldon-mesh -n ns1 -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP_NS1=MESH_IP[0]
import os
os.environ['MESH_IP_NS1'] = MESH_IP_NS1
MESH_IP_NS1
'172.18.255.2'
MESH_IP=!kubectl get svc seldon-mesh -n ns2 -o jsonpath='{.status.loadBalancer.ingress[0].ip}'
MESH_IP_NS2=MESH_IP[0]
import os
os.environ['MESH_IP_NS2'] = MESH_IP_NS2
MESH_IP_NS2
'172.18.255.4'
Note: The Seldon CLI allows you to view information about underlying Seldon resources and make changes to them through the scheduler in non-Kubernetes environments. However, it cannot modify underlying manifests within a Kubernetes cluster. Therefore, using the Seldon CLI for control plane operations in a Kubernetes environment is not recommended. For more details, see Seldon CLI.
cat ./models/sklearn-iris-gs.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: iris
spec:
storageUri: "gs://seldon-models/scv2/samples/mlserver_1.3.5/iris-sklearn"
requirements:
- sklearn
memory: 100Ki
kubectl create -f ./models/sklearn-iris-gs.yaml -n ns1
model.mlops.seldon.io/iris created
kubectl wait --for condition=ready --timeout=300s model --all -n ns1
model.mlops.seldon.io/iris condition met
seldon model infer iris --inference-host ${MESH_IP_NS1}:80 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
{
"model_name": "iris_1",
"model_version": "1",
"id": "3ca1757c-df02-4e57-87c1-38311bcc5943",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
2
]
}
]
}
seldon model infer iris --inference-mode grpc --inference-host ${MESH_IP_NS1}:80 \
'{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}' | jq -M .
{
"modelName": "iris_1",
"modelVersion": "1",
"outputs": [
{
"name": "predict",
"datatype": "INT64",
"shape": [
"1",
"1"
],
"parameters": {
"content_type": {
"stringParam": "np"
}
},
"contents": {
"int64Contents": [
"2"
]
}
}
]
}
kubectl create -f ./models/sklearn-iris-gs.yaml -n ns2
model.mlops.seldon.io/iris created
kubectl wait --for condition=ready --timeout=300s model --all -n ns2
model.mlops.seldon.io/iris condition met
seldon model infer iris --inference-host ${MESH_IP_NS2}:80 \
'{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
{
"model_name": "iris_1",
"model_version": "1",
"id": "f706a23e-775f-4765-bd18-2e98d83bf7d5",
"parameters": {},
"outputs": [
{
"name": "predict",
"shape": [
1,
1
],
"datatype": "INT64",
"parameters": {
"content_type": "np"
},
"data": [
2
]
}
]
}
seldon model infer iris --inference-mode grpc --inference-host ${MESH_IP_NS2}:80 \
'{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}' | jq -M .
{
"modelName": "iris_1",
"modelVersion": "1",
"outputs": [
{
"name": "predict",
"datatype": "INT64",
"shape": [
"1",
"1"
],
"parameters": {
"content_type": {
"stringParam": "np"
}
},
"contents": {
"int64Contents": [
"2"
]
}
}
]
}
kubectl delete -f ./models/sklearn-iris-gs.yaml -n ns1
kubectl delete -f ./models/sklearn-iris-gs.yaml -n ns2
model.mlops.seldon.io "iris" deleted
model.mlops.seldon.io "iris" deleted
kubectl create -f ./models/tfsimple1.yaml -n ns1
kubectl create -f ./models/tfsimple2.yaml -n ns1
kubectl create -f ./models/tfsimple1.yaml -n ns2
kubectl create -f ./models/tfsimple2.yaml -n ns2
model.mlops.seldon.io/tfsimple1 created
model.mlops.seldon.io/tfsimple2 created
model.mlops.seldon.io/tfsimple1 created
model.mlops.seldon.io/tfsimple2 created
kubectl wait --for condition=ready --timeout=300s model --all -n ns1
kubectl wait --for condition=ready --timeout=300s model --all -n ns2
model.mlops.seldon.io/tfsimple1 condition met
model.mlops.seldon.io/tfsimple2 condition met
model.mlops.seldon.io/tfsimple1 condition met
model.mlops.seldon.io/tfsimple2 condition met
kubectl create -f ./pipelines/tfsimples.yaml -n ns1
kubectl create -f ./pipelines/tfsimples.yaml -n ns2
pipeline.mlops.seldon.io/tfsimples created
pipeline.mlops.seldon.io/tfsimples created
kubectl wait --for condition=ready --timeout=300s pipeline --all -n ns1
kubectl wait --for condition=ready --timeout=300s pipeline --all -n ns2
pipeline.mlops.seldon.io/tfsimples condition met
pipeline.mlops.seldon.io/tfsimples condition met
seldon pipeline infer tfsimples --inference-mode grpc --inference-host ${MESH_IP_NS1}:80 \
'{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
}
]
}
seldon pipeline infer tfsimples --inference-mode grpc --inference-host ${MESH_IP_NS2}:80 \
'{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M .
{
"outputs": [
{
"name": "OUTPUT0",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
},
{
"name": "OUTPUT1",
"datatype": "INT32",
"shape": [
"1",
"16"
],
"contents": {
"intContents": [
2,
4,
6,
8,
10,
12,
14,
16,
18,
20,
22,
24,
26,
28,
30,
32
]
}
}
]
}
If you have installed Kafka via the ansible playbook setup-ecosystem then you can use the following command to see the consumer group ids which are reflecting the settings we created.
kubectl exec seldon-kafka-0 -n seldon-mesh -- bin/kafka-consumer-groups.sh --list --bootstrap-server localhost:9092
myorg-ns2-seldon-pipelinegateway-dfd61b49-4bb9-4684-adce-0b7cc215d3af
myorg-ns2-seldon-modelgateway-17
myorg-ns1-seldon-pipelinegateway-d4fc83e6-29cb-442e-90cd-92a389961cfe
myorg-ns2-seldon-modelgateway-60
myorg-ns2-seldon-dataflow-73d465744b7b1b5be20e88d6245e50bd
myorg-ns1-seldon-modelgateway-60
myorg-ns1-seldon-modelgateway-17
myorg-ns1-seldon-dataflow-f563e04e093caa20c03e6eced084331b
We can similarly show the topics that have been created.
kubectl exec seldon-kafka-0 -n seldon-mesh -- bin/kafka-topics.sh --bootstrap-server=localhost:9092 --list
__consumer_offsets
myorg.ns1.errors.errors
myorg.ns1.model.iris.inputs
myorg.ns1.model.iris.outputs
myorg.ns1.model.tfsimple1.inputs
myorg.ns1.model.tfsimple1.outputs
myorg.ns1.model.tfsimple2.inputs
myorg.ns1.model.tfsimple2.outputs
myorg.ns1.pipeline.tfsimples.inputs
myorg.ns1.pipeline.tfsimples.outputs
myorg.ns2.errors.errors
myorg.ns2.model.iris.inputs
myorg.ns2.model.iris.outputs
myorg.ns2.model.tfsimple1.inputs
myorg.ns2.model.tfsimple1.outputs
myorg.ns2.model.tfsimple2.inputs
myorg.ns2.model.tfsimple2.outputs
myorg.ns2.pipeline.tfsimples.inputs
myorg.ns2.pipeline.tfsimples.outputs
kubectl delete -f ./pipelines/tfsimples.yaml -n ns1
kubectl delete -f ./pipelines/tfsimples.yaml -n ns2
pipeline.mlops.seldon.io "tfsimples" deleted
pipeline.mlops.seldon.io "tfsimples" deleted
kubectl delete -f ./models/tfsimple1.yaml -n ns1
kubectl delete -f ./models/tfsimple2.yaml -n ns1
kubectl delete -f ./models/tfsimple1.yaml -n ns2
kubectl delete -f ./models/tfsimple2.yaml -n ns2
model.mlops.seldon.io "tfsimple1" deleted
model.mlops.seldon.io "tfsimple2" deleted
model.mlops.seldon.io "tfsimple1" deleted
model.mlops.seldon.io "tfsimple2" deleted
helm delete seldon-v2-servers -n ns1 --wait
helm delete seldon-v2-servers -n ns2 --wait
release "seldon-v2-servers" uninstalled
release "seldon-v2-servers" uninstalled
helm delete seldon-v2-runtime -n ns1 --wait
helm delete seldon-v2-runtime -n ns2 --wait
release "seldon-v2-runtime" uninstalled
release "seldon-v2-runtime" uninstalled
helm delete seldon-v2 -n seldon-mesh --wait
release "seldon-v2" uninstalled
helm delete seldon-core-v2-crds -n seldon-mesh
release "seldon-core-v2-crds" uninstalled
kubectl delete namespace ns1
kubectl delete namespace ns2
namespace "ns1" deleted
namespace "ns2" deleted
In this example we create a Pipeline to chain two huggingface models to allow speech to sentiment functionalityand add an explainer to understand the result.
This example also illustrates how explainers can target pipelines to allow complex explanations flows.
This example requires ffmpeg package to be installed locally. run make install-requirements
for the Python dependencies.
from ipywebrtc import AudioRecorder, CameraStream
import torchaudio
from IPython.display import Audio
import base64
import json
import requests
import os
import time
Create a method to load speech from recorder; transform into mp3 and send at base64 data. On return of the result extract and show the text and sentiment.
reqJson = json.loads('{"inputs":[{"name":"args", "parameters": {"content_type": "base64"}, "data":[],"datatype":"BYTES","shape":[1]}]}')
url = "http://0.0.0.0:9000/v2/models/model/infer"
def infer(resource: str):
with open('recording.webm', 'wb') as f:
f.write(recorder.audio.value)
!ffmpeg -i recording.webm -vn -ab 128k -ar 44100 file.mp3 -y -hide_banner -loglevel panic
with open("file.mp3", mode='rb') as file:
fileContent = file.read()
encoded = base64.b64encode(fileContent)
base64_message = encoded.decode('utf-8')
reqJson["inputs"][0]["data"] = [str(base64_message)]
headers = {"Content-Type": "application/json", "seldon-model": resource}
response_raw = requests.post(url, json=reqJson, headers=headers)
j = response_raw.json()
sentiment = j["outputs"][0]["data"][0]
text = j["outputs"][1]["data"][0]
reqId = response_raw.headers["x-request-id"]
print(reqId)
os.environ["REQUEST_ID"]=reqId
print(base64.b64decode(text))
print(base64.b64decode(sentiment))
We will load two Huggingface models for speech to text and text to sentiment.
cat ../../models/hf-whisper.yaml
echo "---"
cat ../../models/hf-sentiment.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: whisper
spec:
storageUri: "gs://seldon-models/mlserver/huggingface/whisper"
requirements:
- huggingface
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: sentiment
spec:
storageUri: "gs://seldon-models/mlserver/huggingface/sentiment"
requirements:
- huggingface
seldon model load -f ../../models/hf-whisper.yaml
seldon model load -f ../../models/hf-sentiment.yaml
{}
{}
seldon model status whisper -w ModelAvailable | jq -M .
seldon model status sentiment -w ModelAvailable | jq -M .
{}
{}
To allow Alibi-Explain to more easily explain the sentiment we will need:
input and output transfrorms that take the Dict values input and output by the Huggingface sentiment model and turn them into values that Alibi-Explain can easily understand with the core values we want to explain and the outputs from the sentiment model.
A separate Pipeline to allow us to join the sentiment model with the output transform
These transform models are MLServer custom runtimes as shown below:
cat ./sentiment-input-transform/model.py | pygmentize
# Copyright (c) 2024 Seldon Technologies Ltd.
# Use of this software is governed BY
# (1) the license included in the LICENSE file or
# (2) if the license included in the LICENSE file is the Business Source License 1.1,
# the Change License after the Change Date as each is defined in accordance with the LICENSE file.
from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse, ResponseOutput
from mlserver.codecs.string import StringRequestCodec
from mlserver.logging import logger
import json
class SentimentInputTransformRuntime(MLModel):
async def load(self) -> bool:
return self.ready
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
logger.info("payload (input-transform): %s",payload)
res_list = self.decode_request(payload, default_codec=StringRequestCodec)
logger.info("res list (input-transform): %s",res_list)
texts = []
for res in res_list:
logger.info("decoded data (input-transform): %s", res)
#text = json.loads(res)
text = res
texts.append(text["text"])
logger.info("transformed data (input-transform): %s", texts)
response = StringRequestCodec.encode_response(
model_name="sentiment",
payload=texts
)
logger.info("response (input-transform): %s", response)
return response
cat ./sentiment-output-transform/model.py | pygmentize
# Copyright (c) 2024 Seldon Technologies Ltd.
# Use of this software is governed BY
# (1) the license included in the LICENSE file or
# (2) if the license included in the LICENSE file is the Business Source License 1.1,
# the Change License after the Change Date as each is defined in accordance with the LICENSE file.
from mlserver import MLModel
from mlserver.types import InferenceRequest, InferenceResponse, ResponseOutput
from mlserver.codecs import StringCodec, Base64Codec, NumpyRequestCodec
from mlserver.codecs.string import StringRequestCodec
from mlserver.codecs.numpy import NumpyRequestCodec
import base64
from mlserver.logging import logger
import numpy as np
import json
class SentimentOutputTransformRuntime(MLModel):
async def load(self) -> bool:
return self.ready
async def predict(self, payload: InferenceRequest) -> InferenceResponse:
logger.info("payload (output-transform): %s",payload)
res_list = self.decode_request(payload, default_codec=StringRequestCodec)
logger.info("res list (output-transform): %s",res_list)
scores = []
for res in res_list:
logger.debug("decoded data (output transform): %s",res)
#sentiment = json.loads(res)
sentiment = res
if sentiment["label"] == "POSITIVE":
scores.append(1)
else:
scores.append(0)
response = NumpyRequestCodec.encode_response(
model_name="sentiments",
payload=np.array(scores)
)
logger.info("response (output-transform): %s", response)
return response
cat ../../models/hf-sentiment-input-transform.yaml
echo "---"
cat ../../models/hf-sentiment-output-transform.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: sentiment-input-transform
spec:
storageUri: "gs://seldon-models/scv2/examples/huggingface/mlserver_1.3.5/sentiment-input-transform"
requirements:
- mlserver
- python
---
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: sentiment-output-transform
spec:
storageUri: "gs://seldon-models/scv2/examples/huggingface/mlserver_1.3.5/sentiment-output-transform"
requirements:
- mlserver
- python
seldon model load -f ../../models/hf-sentiment-input-transform.yaml
seldon model load -f ../../models/hf-sentiment-output-transform.yaml
{}
{}
seldon model status sentiment-input-transform -w ModelAvailable | jq -M .
seldon model status sentiment-output-transform -w ModelAvailable | jq -M .
{}
{}
cat ../../pipelines/sentiment-explain.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: sentiment-explain
spec:
steps:
- name: sentiment
tensorMap:
sentiment-explain.inputs.predict: array_inputs
- name: sentiment-output-transform
inputs:
- sentiment
output:
steps:
- sentiment-output-transform
seldon pipeline load -f ../../pipelines/sentiment-explain.yaml
seldon pipeline status sentiment-explain -w PipelineReady | jq -M .
{
"pipelineName": "sentiment-explain",
"versions": [
{
"pipeline": {
"name": "sentiment-explain",
"uid": "cihuo3svgtec73bj6ncg",
"version": 2,
"steps": [
{
"name": "sentiment",
"tensorMap": {
"sentiment-explain.inputs.predict": "array_inputs"
}
},
{
"name": "sentiment-output-transform",
"inputs": [
"sentiment.outputs"
]
}
],
"output": {
"steps": [
"sentiment-output-transform.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 2,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-07-04T09:53:19.250753906Z",
"modelsReady": true
}
}
]
}
cat ../../models/hf-sentiment-explainer.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Model
metadata:
name: sentiment-explainer
spec:
storageUri: "gs://seldon-models/scv2/examples/huggingface/speech-sentiment/explainer"
explainer:
type: anchor_text
pipelineRef: sentiment-explain
seldon model load -f ../../models/hf-sentiment-explainer.yaml
{}
seldon model status sentiment-explainer -w ModelAvailable | jq -M .
Error: Model wait status timeout
We can now create the final pipeline that will take speech and generate sentiment alongwith an explanation of why that sentiment was predicted.
cat ../../pipelines/speech-to-sentiment.yaml
apiVersion: mlops.seldon.io/v1alpha1
kind: Pipeline
metadata:
name: speech-to-sentiment
spec:
steps:
- name: whisper
- name: sentiment
inputs:
- whisper
tensorMap:
whisper.outputs.output: args
- name: sentiment-input-transform
inputs:
- whisper
- name: sentiment-explainer
inputs:
- sentiment-input-transform
output:
steps:
- sentiment
- whisper
seldon pipeline load -f ../../pipelines/speech-to-sentiment.yaml
seldon pipeline status speech-to-sentiment -w PipelineReady | jq -M .
{
"pipelineName": "speech-to-sentiment",
"versions": [
{
"pipeline": {
"name": "speech-to-sentiment",
"uid": "cihuqb4vgtec73bj6nd0",
"version": 2,
"steps": [
{
"name": "sentiment",
"inputs": [
"whisper.outputs"
],
"tensorMap": {
"whisper.outputs.output": "args"
}
},
{
"name": "sentiment-explainer",
"inputs": [
"sentiment-input-transform.outputs"
]
},
{
"name": "sentiment-input-transform",
"inputs": [
"whisper.outputs"
]
},
{
"name": "whisper"
}
],
"output": {
"steps": [
"sentiment.outputs",
"whisper.outputs"
]
},
"kubernetesMeta": {}
},
"state": {
"pipelineVersion": 2,
"status": "PipelineReady",
"reason": "created pipeline",
"lastChangeTimestamp": "2023-07-04T09:58:04.277171896Z",
"modelsReady": true
}
}
]
}
camera = CameraStream(constraints={'audio': True,'video':False})
recorder = AudioRecorder(stream=camera)
recorder
AudioRecorder(audio=Audio(value=b'', format='webm'), stream=CameraStream(constraints={'audio': True, 'video': …
infer("speech-to-sentiment.pipeline")
cihuqm8fh5ss73der5gg
b'{"text": " Cambridge is a great place."}'
b'{"label": "POSITIVE", "score": 0.9998548030853271}'
We will wait for the explanation which is run asynchronously to the functional output from the Pipeline above.
while True:
base64Res = !seldon pipeline inspect speech-to-sentiment.sentiment-explainer.outputs --format json \
--request-id ${REQUEST_ID}
j = json.loads(base64Res[0])
if j["topics"][0]["msgs"] is not None:
expBase64 = j["topics"][0]["msgs"][0]["value"]["outputs"][0]["contents"]["bytesContents"][0]
expRaw = base64.b64decode(expBase64)
exp = json.loads(expRaw)
print("")
print("Explanation anchors:",exp["data"]["anchor"])
break
else:
print(".",end='')
time.sleep(1)
......
Explanation anchors: ['great']
seldon pipeline unload speech-to-sentiment
seldon pipeline unload sentiment-explain
seldon model unload whisper
seldon model unload sentiment
seldon model unload sentiment-explainer
seldon model unload sentiment-output-transform
seldon model unload sentiment-input-transform