Seldon provides some internal gRPC services it uses to manage components.
This API is for communication between the Seldon Scheduler and the Seldon Agent which runs next to each inference server and manages the loading and unloading of models onto the server as well as acting as a reverse proxy in the data plane for handling requests to the inference server.
syntax = "proto3";
package seldon.mlops.agent;
option go_package = "github.com/seldonio/seldon-core/apis/go/v2/mlops/agent";
import "mlops/scheduler/scheduler.proto";
// [START Messages]
message ModelEventMessage {
string serverName = 1;
uint32 replicaIdx = 2;
string modelName = 3;
uint32 modelVersion = 4;
enum Event {
UNKNOWN_EVENT = 0;
LOAD_FAIL_MEMORY = 1;
LOADED = 2;
LOAD_FAILED = 3;
UNLOADED = 4;
UNLOAD_FAILED = 5;
REMOVED = 6; // unloaded and removed from local PVC
REMOVE_FAILED = 7;
RSYNC = 9; // Ask server for all models that need to be loaded
}
Event event = 5;
string message = 6;
uint64 availableMemoryBytes = 7;
}
message ModelEventResponse {
}
message ModelScalingTriggerMessage {
string serverName = 1;
uint32 replicaIdx = 2;
string modelName = 3;
uint32 modelVersion = 4;
enum Trigger {
SCALE_UP = 0;
SCALE_DOWN = 1;
}
Trigger trigger = 5;
uint32 amount = 6; // number of replicas required
map<string,uint32> metrics = 7; // optional metrics to expose to the scheduler
}
message ModelScalingTriggerResponse {
}
message AgentDrainRequest {
string serverName = 1;
uint32 replicaIdx = 2;
}
message AgentDrainResponse {
bool success = 1;
}
message AgentSubscribeRequest {
string serverName = 1;
bool shared = 2;
uint32 replicaIdx = 3;
ReplicaConfig replicaConfig = 4;
repeated ModelVersion loadedModels = 5;
uint64 availableMemoryBytes = 6;
}
message ReplicaConfig {
string inferenceSvc = 1; // inference DNS service name
int32 inferenceHttpPort = 2; // inference HTTP port
int32 inferenceGrpcPort = 3; // Inference grpc port
uint64 memoryBytes = 4; // The memory capacity of the server replica
repeated string capabilities = 5; // The list of capabilities of the server, e.g. sklearn, pytorch, xgboost, mlflow
uint32 overCommitPercentage = 6; // The percentage of over commit to allow, set to 0 (%) to disable over commit
}
message ModelOperationMessage {
enum Operation {
UNKNOWN_EVENT = 0;
LOAD_MODEL = 1;
UNLOAD_MODEL = 2;
}
Operation operation = 1;
ModelVersion modelVersion = 2;
bool autoscalingEnabled = 3;
}
message ModelVersion {
scheduler.Model model = 1;
uint32 version = 2;
}
// [END Messages]
// [START Services]
service AgentService {
rpc AgentEvent(ModelEventMessage) returns (ModelEventResponse) {};
rpc Subscribe(AgentSubscribeRequest) returns (stream ModelOperationMessage) {};
rpc ModelScalingTrigger(stream ModelScalingTriggerMessage) returns (ModelScalingTriggerResponse) {};
rpc AgentDrain(AgentDrainRequest) returns (AgentDrainResponse) {};
}
// [END Services]
syntax = "proto3";
package seldon.mlops.chainer;
option go_package = "github.com/seldonio/seldon-core/apis/go/v2/mlops/chainer";
option java_package = "io.seldon.mlops.chainer";
message PipelineSubscriptionRequest {
string name = 1;
}
message PipelineUpdateMessage {
enum PipelineOperation {
Unknown = 0;
Create = 1;
Delete = 2;
}
PipelineOperation op = 1;
string pipeline = 2;
uint32 version = 3;
string uid = 4;
repeated PipelineStepUpdate updates = 5;
}
message PipelineStepUpdate {
enum PipelineJoinType {
Unknown = 0;
Inner = 1;
Outer = 2;
Any = 3;
}
// https://docs.google.com/document/d/1tX-uaOvngx1RpEyWEZ4EbEcU8D0OgYuRWVb2UAi85n4/edit
// Pipeline Resource example, e.g. transform.outputs.traffic
// seldon.<namespace>.<model name>.<inputs|outputs>.<tensor name>
repeated PipelineTopic sources = 1;
repeated PipelineTopic triggers = 2;
PipelineTopic sink = 3;
PipelineJoinType inputJoinTy = 4;
PipelineJoinType triggersJoinTy = 5;
bool passEmptyResponses = 6; // Forward empty response to following steps, default false
optional uint32 joinWindowMs = 7; // Join window millisecs, some nozero default (TBD)
repeated PipelineTensorMapping tensorMap = 8; // optional list of tensor name mappings
Batch batch = 9; // Batch settings
}
message PipelineTensorMapping {
string pipelineName = 1;
string topicAndTensor = 2;
string tensorName = 3;
}
message PipelineTopic {
string pipelineName = 1;
string topicName = 2;
optional string tensor = 3;
}
message Batch {
optional uint32 size = 1;
optional uint32 windowMs = 2;
bool rolling = 3;
}
message PipelineUpdateStatusMessage {
// TODO - include `name` to identify transformer message comes from
PipelineUpdateMessage update = 1;
bool success = 2;
string reason = 3;
}
message PipelineUpdateStatusResponse {
}
service Chainer {
rpc SubscribePipelineUpdates(PipelineSubscriptionRequest) returns (stream PipelineUpdateMessage) {};
rpc PipelineUpdateEvent(PipelineUpdateStatusMessage) returns (PipelineUpdateStatusResponse) {};
}