Server Config
Server configurations define how to create an inference server. By default one is provided for Seldon MLServer and one for NVIDIA Triton Inference Server. Both these servers support the Open Inference Protocol which is a requirement for all inference servers. They define how the Kubernetes ReplicaSet is defined which includes the Seldon Agent reverse proxy as well as an Rclone server for downloading artifacts for the server. The Kustomize ServerConfig for MlServer is shown below:
https://github.com/SeldonIO/seldon-core/blob/v2/operator/config/serverconfigs/mlserver.yaml
---
apiVersion: mlops.seldon.io/v1alpha1
kind: ServerConfig
metadata:
name: mlserver
spec:
podSpec:
terminationGracePeriodSeconds: 120
serviceAccountName: seldon-server
containers:
- image: rclone:latest
imagePullPolicy: IfNotPresent
name: rclone
ports:
- containerPort: 5572
name: rclone
protocol: TCP
lifecycle:
preStop:
httpGet:
port: 9007
path: terminate
resources:
requests:
cpu: "200m"
memory: '100M'
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
tcpSocket:
port: 5572
timeoutSeconds: 1
volumeMounts:
- mountPath: /mnt/agent
name: mlserver-models
- image: agent:latest
imagePullPolicy: IfNotPresent
command:
- /bin/agent
args:
- --tracing-config-path=/mnt/tracing/tracing.json
name: agent
env:
- name: SELDON_SERVER_CAPABILITIES
value: "mlserver,alibi-detect,alibi-explain,huggingface,lightgbm,mlflow,python,sklearn,spark-mlib,xgboost"
- name: SELDON_OVERCOMMIT_PERCENTAGE
value: "10"
- name: SELDON_MODEL_INFERENCE_LAG_THRESHOLD
value: "30"
- name: SELDON_MODEL_INACTIVE_SECONDS_THRESHOLD
value: "600"
- name: SELDON_SCALING_STATS_PERIOD_SECONDS
value: "20"
- name: SELDON_SERVER_HTTP_PORT
value: "9000"
- name: SELDON_SERVER_GRPC_PORT
value: "9500"
- name: SELDON_REVERSE_PROXY_HTTP_PORT
value: "9001"
- name: SELDON_REVERSE_PROXY_GRPC_PORT
value: "9501"
- name: SELDON_SCHEDULER_HOST
value: "seldon-scheduler"
- name: SELDON_SCHEDULER_PORT
value: "9005"
- name: SELDON_SCHEDULER_TLS_PORT
value: "9055"
- name: SELDON_METRICS_PORT
value: "9006"
- name: SELDON_DRAINER_PORT
value: "9007"
- name: SELDON_READINESS_PORT
value: "9008"
- name: AGENT_TLS_SECRET_NAME
value: ""
- name: AGENT_TLS_FOLDER_PATH
value: ""
- name: SELDON_SERVER_TYPE
value: "mlserver"
- name: SELDON_ENVOY_HOST
value: "seldon-mesh"
- name: SELDON_ENVOY_PORT
value: "80"
- name: SELDON_LOG_LEVEL
value: "warn"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MEMORY_REQUEST
valueFrom:
resourceFieldRef:
containerName: mlserver
resource: requests.memory
- name: SELDON_USE_DEPLOYMENTS_FOR_SERVERS
value: "false"
ports:
- containerPort: 9501
name: grpc
protocol: TCP
- containerPort: 9001
name: http
protocol: TCP
- containerPort: 9006
name: metrics
protocol: TCP
- containerPort: 9008
name: readiness-port
lifecycle:
preStop:
httpGet:
port: 9007
path: terminate
readinessProbe:
httpGet:
path: /ready
port: 9008
failureThreshold: 1
periodSeconds: 5
startupProbe:
httpGet:
path: /ready
port: 9008
failureThreshold: 60
periodSeconds: 15
resources:
requests:
cpu: "500m"
memory: '500M'
volumeMounts:
- mountPath: /mnt/agent
name: mlserver-models
- name: config-volume
mountPath: /mnt/config
- name: tracing-config-volume
mountPath: /mnt/tracing
- image: mlserver:latest
imagePullPolicy: IfNotPresent
env:
- name: MLSERVER_HTTP_PORT
value: "9000"
- name: MLSERVER_GRPC_PORT
value: "9500"
- name: MLSERVER_MODELS_DIR
value: "/mnt/agent/models"
- name: MLSERVER_MODEL_PARALLEL_WORKERS
value: "1"
- name: MLSERVER_LOAD_MODELS_AT_STARTUP
value: "false"
- name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH
value: "1048576000" # 100MB (100 * 1024 * 1024)
resources:
requests:
cpu: 1
memory: '1G'
lifecycle:
preStop:
httpGet:
port: 9007
path: terminate
livenessProbe:
httpGet:
path: /v2/health/live
port: server-http
readinessProbe:
httpGet:
path: /v2/health/live
port: server-http
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
httpGet:
path: /v2/health/live
port: server-http
failureThreshold: 10
periodSeconds: 10
name: mlserver
ports:
- containerPort: 9500
name: server-grpc
protocol: TCP
- containerPort: 9000
name: server-http
protocol: TCP
- containerPort: 8082
name: server-metrics
volumeMounts:
- mountPath: /mnt/agent
name: mlserver-models
readOnly: true
- mountPath: /mnt/certs
name: downstream-ca-certs
readOnly: true
securityContext:
fsGroup: 2000
runAsUser: 1000
runAsNonRoot: true
volumes:
- name: config-volume
configMap:
name: seldon-agent
- name: tracing-config-volume
configMap:
name: seldon-tracing
- name: downstream-ca-certs
secret:
secretName: seldon-downstream-server
optional: true
volumeClaimTemplates:
- name: mlserver-models
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 1Gi
Last updated
Was this helpful?