Copy ---
apiVersion: mlops.seldon.io/v1alpha1
kind: ServerConfig
metadata:
name: mlserver
spec:
podSpec:
terminationGracePeriodSeconds: 120
serviceAccountName: seldon-server
containers:
- image: rclone:latest
imagePullPolicy: IfNotPresent
name: rclone
command:
- rclone
args:
- rcd
- --rc-no-auth
- --config=/rclone/rclone.conf
- --rc-addr=0.0.0.0:5572
- --max-buffer-memory=$(MAX_BUFFER_MEMORY)
env:
- name: MAX_BUFFER_MEMORY
value: "64M"
ports:
- containerPort: 5572
name: rclone
protocol: TCP
lifecycle:
preStop:
httpGet:
port: 9007
path: terminate
resources:
limits:
memory: '256M'
requests:
cpu: "200m"
memory: '100M'
readinessProbe:
failureThreshold: 3
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
tcpSocket:
port: 5572
timeoutSeconds: 1
livenessProbe:
failureThreshold: 1
initialDelaySeconds: 5
periodSeconds: 5
successThreshold: 1
exec:
command:
- rclone
- rc
- rc/noop
timeoutSeconds: 1
volumeMounts:
- mountPath: /mnt/agent
name: mlserver-models
- image: agent:latest
imagePullPolicy: IfNotPresent
command:
- /bin/agent
args:
- --tracing-config-path=/mnt/tracing/tracing.json
name: agent
env:
- name: SELDON_SERVER_CAPABILITIES
value: "mlserver,alibi-detect,alibi-explain,huggingface,lightgbm,mlflow,python,sklearn,spark-mlib,xgboost"
- name: SELDON_OVERCOMMIT_PERCENTAGE
value: "10"
- name: SELDON_MODEL_INFERENCE_LAG_THRESHOLD
value: "30"
- name: SELDON_MODEL_INACTIVE_SECONDS_THRESHOLD
value: "600"
- name: SELDON_SCALING_STATS_PERIOD_SECONDS
value: "20"
- name: SELDON_SERVER_HTTP_PORT
value: "9000"
- name: SELDON_SERVER_GRPC_PORT
value: "9500"
- name: SELDON_REVERSE_PROXY_HTTP_PORT
value: "9001"
- name: SELDON_REVERSE_PROXY_GRPC_PORT
value: "9501"
- name: SELDON_SCHEDULER_HOST
value: "seldon-scheduler"
- name: SELDON_SCHEDULER_PORT
value: "9005"
- name: SELDON_SCHEDULER_TLS_PORT
value: "9055"
- name: SELDON_METRICS_PORT
value: "9006"
- name: SELDON_DRAINER_PORT
value: "9007"
- name: SELDON_READINESS_PORT
value: "9008"
- name: AGENT_TLS_SECRET_NAME
value: ""
- name: AGENT_TLS_FOLDER_PATH
value: ""
- name: SELDON_SERVER_TYPE
value: "mlserver"
- name: SELDON_ENVOY_HOST
value: "seldon-mesh"
- name: SELDON_ENVOY_PORT
value: "80"
- name: SELDON_LOG_LEVEL
value: "warn"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: MEMORY_REQUEST
valueFrom:
resourceFieldRef:
containerName: mlserver
resource: requests.memory
- name: SELDON_USE_DEPLOYMENTS_FOR_SERVERS
value: "false"
ports:
- containerPort: 9501
name: grpc
protocol: TCP
- containerPort: 9001
name: http
protocol: TCP
- containerPort: 9006
name: metrics
protocol: TCP
- containerPort: 9008
name: readiness-port
lifecycle:
preStop:
httpGet:
port: 9007
path: terminate
readinessProbe:
httpGet:
path: /ready
port: 9008
failureThreshold: 1
periodSeconds: 5
startupProbe:
httpGet:
path: /ready
port: 9008
failureThreshold: 60
periodSeconds: 15
resources:
requests:
cpu: "500m"
memory: '500M'
volumeMounts:
- mountPath: /mnt/agent
name: mlserver-models
- name: config-volume
mountPath: /mnt/config
- name: tracing-config-volume
mountPath: /mnt/tracing
- image: mlserver:latest
imagePullPolicy: IfNotPresent
env:
- name: MLSERVER_HTTP_PORT
value: "9000"
- name: MLSERVER_GRPC_PORT
value: "9500"
- name: MLSERVER_MODELS_DIR
value: "/mnt/agent/models"
- name: MLSERVER_PARALLEL_WORKERS
value: "1"
- name: MLSERVER_LOAD_MODELS_AT_STARTUP
value: "false"
- name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH
value: "1048576000" # 100MB (100 * 1024 * 1024)
resources:
requests:
cpu: 1
memory: '1G'
lifecycle:
preStop:
httpGet:
port: 9007
path: terminate
livenessProbe:
httpGet:
path: /v2/health/live
port: server-http
readinessProbe:
httpGet:
path: /v2/health/live
port: server-http
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
httpGet:
path: /v2/health/live
port: server-http
failureThreshold: 10
periodSeconds: 10
name: mlserver
ports:
- containerPort: 9500
name: server-grpc
protocol: TCP
- containerPort: 9000
name: server-http
protocol: TCP
- containerPort: 8082
name: server-metrics
volumeMounts:
- mountPath: /mnt/agent
name: mlserver-models
readOnly: true
- mountPath: /mnt/certs
name: downstream-ca-certs
readOnly: true
securityContext:
fsGroup: 2000
runAsUser: 1000
runAsNonRoot: true
volumes:
- name: config-volume
configMap:
name: seldon-agent
- name: tracing-config-volume
configMap:
name: seldon-tracing
- name: downstream-ca-certs
secret:
secretName: seldon-downstream-server
optional: true
volumeClaimTemplates:
- name: mlserver-models
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 1Gi