Copy syntax = "proto3" ;
package seldon.mlops.agent ;
option go_package = "github.com/seldonio/seldon-core/apis/go/v2/mlops/agent" ;
import "mlops/scheduler/scheduler.proto" ;
// [START Messages]
message ModelEventMessage {
string serverName = 1 ;
uint32 replicaIdx = 2 ;
string modelName = 3 ;
uint32 modelVersion = 4 ;
enum Event {
UNKNOWN_EVENT = 0 ;
LOAD_FAIL_MEMORY = 1 ;
LOADED = 2 ;
LOAD_FAILED = 3 ;
UNLOADED = 4 ;
UNLOAD_FAILED = 5 ;
REMOVED = 6 ; // unloaded and removed from local PVC
REMOVE_FAILED = 7 ;
RSYNC = 9 ; // Ask server for all models that need to be loaded
}
Event event = 5 ;
string message = 6 ;
uint64 availableMemoryBytes = 7 ;
scheduler.ModelRuntimeInfo runtimeInfo = 8 ;
}
message ModelEventResponse {
}
message ModelScalingTriggerMessage {
string serverName = 1 ;
uint32 replicaIdx = 2 ;
string modelName = 3 ;
uint32 modelVersion = 4 ;
enum Trigger {
SCALE_UP = 0 ;
SCALE_DOWN = 1 ;
}
Trigger trigger = 5 ;
uint32 amount = 6 ; // number of replicas required
map < string , uint32 > metrics = 7 ; // optional metrics to expose to the scheduler
}
message ModelScalingTriggerResponse {
}
message AgentDrainRequest {
string serverName = 1 ;
uint32 replicaIdx = 2 ;
}
message AgentDrainResponse {
bool success = 1 ;
}
message AgentSubscribeRequest {
string serverName = 1 ;
bool shared = 2 ;
uint32 replicaIdx = 3 ;
ReplicaConfig replicaConfig = 4 ;
repeated ModelVersion loadedModels = 5 ;
uint64 availableMemoryBytes = 6 ;
}
message ReplicaConfig {
string inferenceSvc = 1 ; // inference DNS service name
int32 inferenceHttpPort = 2 ; // inference HTTP port
int32 inferenceGrpcPort = 3 ; // Inference grpc port
uint64 memoryBytes = 4 ; // The memory capacity of the server replica
repeated string capabilities = 5 ; // The list of capabilities of the server, e.g. sklearn, pytorch, xgboost, mlflow
uint32 overCommitPercentage = 6 ; // The percentage of over commit to allow, set to 0 (%) to disable over commit
}
message ModelOperationMessage {
enum Operation {
UNKNOWN_EVENT = 0 ;
LOAD_MODEL = 1 ;
UNLOAD_MODEL = 2 ;
}
Operation operation = 1 ;
ModelVersion modelVersion = 2 ;
bool autoscalingEnabled = 3 ;
}
message ModelVersion {
scheduler.Model model = 1 ;
uint32 version = 2 ;
}
// [END Messages]
// [START Services]
service AgentService {
rpc AgentEvent (ModelEventMessage) returns (ModelEventResponse) {};
rpc Subscribe (AgentSubscribeRequest) returns ( stream ModelOperationMessage) {};
rpc ModelScalingTrigger ( stream ModelScalingTriggerMessage) returns (ModelScalingTriggerResponse) {};
rpc AgentDrain (AgentDrainRequest) returns (AgentDrainResponse) {};
}
// [END Services]