Local Metrics

Run these examples from the samples folder at the root of the repo.

This notebook tests the exposed Prometheus metrics of model and pipeline servers.

Requires: prometheus_client and requests libraries. See docs for full set of metrics available.

mlserver_metrics_host="0.0.0.0:9006"
triton_metrics_host="0.0.0.0:9007"
pipeline_metrics_host="0.0.0.0:9009"
from prometheus_client.parser import text_string_to_metric_families
import requests

def scrape_metrics(host):
    data = requests.get(f"http://{host}/metrics").text
    return {
        family.name: family for family in text_string_to_metric_families(data)
    }

def print_sample(family, label, value):
    for sample in family.samples:
        if sample.labels[label] == value:
            print(sample)

def get_model_infer_count(host, model_name):
    metrics = scrape_metrics(host)
    family = metrics["seldon_model_infer"]
    print_sample(family, "model", model_name)

def get_pipeline_infer_count(host, pipeline_name):
    metrics = scrape_metrics(host)
    family = metrics["seldon_pipeline_infer"]
    print_sample(family, "pipeline", pipeline_name)

MLServer Model

seldon model load -f ./models/sklearn-iris-gs.yaml
seldon model status iris -w ModelAvailable | jq -M .
{}
{}
seldon model infer iris -i 50 \
  '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}'
Success: map[:iris_1::50]
seldon model infer iris --inference-mode grpc -i 100 \
   '{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}'
Success: map[:iris_1::100]
get_model_infer_count(mlserver_metrics_host,"iris")
Sample(name='seldon_model_infer_total', labels={'code': '200', 'method_type': 'rest', 'model': 'iris', 'model_internal': 'iris_1', 'server': 'mlserver', 'server_replica': '0'}, value=50.0, timestamp=None, exemplar=None)
Sample(name='seldon_model_infer_total', labels={'code': 'OK', 'method_type': 'grpc', 'model': 'iris', 'model_internal': 'iris_1', 'server': 'mlserver', 'server_replica': '0'}, value=100.0, timestamp=None, exemplar=None)
seldon model unload iris
{}

Triton Model

Load the model.

seldon model load -f ./models/tfsimple1.yaml
seldon model status tfsimple1 -w ModelAvailable | jq -M .
{}
{}
seldon model infer tfsimple1 -i 50\
    '{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
Success: map[:tfsimple1_1::50]
seldon model infer tfsimple1 --inference-mode grpc -i 100 \
    '{"model_name":"tfsimple1","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}'
Success: map[:tfsimple1_1::100]
get_model_infer_count(triton_metrics_host,"tfsimple1")
Sample(name='seldon_model_infer_total', labels={'code': '200', 'method_type': 'rest', 'model': 'tfsimple1', 'model_internal': 'tfsimple1_1', 'server': 'triton', 'server_replica': '0'}, value=50.0, timestamp=None, exemplar=None)
Sample(name='seldon_model_infer_total', labels={'code': 'OK', 'method_type': 'grpc', 'model': 'tfsimple1', 'model_internal': 'tfsimple1_1', 'server': 'triton', 'server_replica': '0'}, value=100.0, timestamp=None, exemplar=None)
seldon model unload tfsimple1
{}

Pipeline

seldon model load -f ./models/tfsimple1.yaml
seldon model load -f ./models/tfsimple2.yaml
seldon model status tfsimple1 -w ModelAvailable | jq -M .
seldon model status tfsimple2 -w ModelAvailable | jq -M .
seldon pipeline load -f ./pipelines/tfsimples.yaml
seldon pipeline status tfsimples -w PipelineReady
{}
{}
{}
{}
{}
{"pipelineName":"tfsimples", "versions":[{"pipeline":{"name":"tfsimples", "uid":"cdqji39qa12c739ab3o0", "version":2, "steps":[{"name":"tfsimple1"}, {"name":"tfsimple2", "inputs":["tfsimple1.outputs"], "tensorMap":{"tfsimple1.outputs.OUTPUT0":"INPUT0", "tfsimple1.outputs.OUTPUT1":"INPUT1"}}], "output":{"steps":["tfsimple2.outputs"]}, "kubernetesMeta":{}}, "state":{"pipelineVersion":2, "status":"PipelineReady", "reason":"created pipeline", "lastChangeTimestamp":"2022-11-16T19:25:01.255955114Z"}}]}
seldon pipeline infer tfsimples -i 50 \
    '{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}'
Success: map[:tfsimple1_1::50 :tfsimple2_1::50 :tfsimples.pipeline::50]
get_pipeline_infer_count(pipeline_metrics_host,"tfsimples")
Sample(name='seldon_pipeline_infer_total', labels={'code': '200', 'method_type': 'rest', 'pipeline': 'tfsimples', 'server': 'pipeline-gateway'}, value=50.0, timestamp=None, exemplar=None)
seldon model unload tfsimple1
seldon model unload tfsimple2
seldon pipeline unload tfsimples
{}
{}
{}

Last updated

Was this helpful?