In this documentation page we provide an example upgrade path from kfserving-based to rclone-based storage initializer. This is required due to the fact that secret format expected by these two storage initializers is different.
Storage initializers are used by Seldon's pre-packaged model servers to download models binaries. As it is explained in the SC 1.8 upgrading notes the seldonio/rclone-storage-initializer became default storage initializer in v1.8.0.
In this tutorial we aim to provide an intuition of the steps you will have to carry to migrate to the new rclone-based Storage Initializer with the context that every cluster configuration will be different, so you should be able to see this as something you can build from.
Set starting storage initializer to be kfserving one
Setup MinIO
Use the provided notebook to install Minio in your cluster and configure mc CLI tool.
Copy iris model into local MinIO
Deploy SKLearn Server with kfserving-storage-initializer
First we deploy the model using kfserving-storage-initializer. This is using the default Storage Initializer for pre Seldon Core v1.8.0.
Using envSecretRefName
Using serviceAccountName
Preparing rclone-compatible secret
The rclone-based storage initializer expects one to define a new secret. General documentation credentials hadling can be found here with constantly updated examples of tested configurations.
If we do not have yet an example for Cloud Storage solution that you are using, please, consult the relevant page on RClone documentation.
Updating envSecretRefName-specified secrets
Updating serviceAccountName-specified secrets and deployments
Upgrade Seldon Core to use new storage initializer
from typing import Dict, List, Tuple, Union
from kubernetes import client, config
AWS_SECRET_REQUIRED_FIELDS = [
"AWS_ACCESS_KEY_ID",
"AWS_ENDPOINT_URL",
"AWS_SECRET_ACCESS_KEY",
]
def get_secrets_to_update(namespace: str) -> List[str]:
"""Get list of secrets defined for Seldon Deployments in a given namespace.
Parameters:
----------
namespace: str
Namespace in which to look for secrets attached to Seldon Deployments.
Returns:
-------
secrets_names: List[str]
List of secrets names
"""
secret_names = []
api_instance = client.CustomObjectsApi()
sdeps = api_instance.list_namespaced_custom_object(
"machinelearning.seldon.io",
"v1",
namespace,
"seldondeployments",
)
for sdep in sdeps.get("items", []):
for predictor in sdep.get("spec", {}).get("predictors", []):
secret_name = predictor.get("graph", {}).get("envSecretRefName", None)
if secret_name:
secret_names.append(secret_name)
return secret_names
def new_fields_for_secret(secret: client.V1Secret, provider: str) -> Dict:
"""Get new fields that need to be added to secret.
Parameters
----------
secret: client.V1Secret
Kubernetes secret that needs to be updated
provider: str
S3 provider: must be minio or aws
Returns
-------
new_fields: dict
New fields for the secret partitioned into 'data' and 'stringData' fields
"""
for key in AWS_SECRET_REQUIRED_FIELDS:
if key not in secret.data:
raise ValueError(
f"Secret '{secret.metadata.name}' does not contain '{key}' field."
)
return {
"data": {
"RCLONE_CONFIG_S3_ACCESS_KEY_ID": secret.data.get("AWS_ACCESS_KEY_ID"),
"RCLONE_CONFIG_S3_SECRET_ACCESS_KEY": secret.data.get(
"AWS_SECRET_ACCESS_KEY"
),
"RCLONE_CONFIG_S3_ENDPOINT": secret.data.get("AWS_ENDPOINT_URL"),
},
"stringData": {
"RCLONE_CONFIG_S3_TYPE": "s3",
"RCLONE_CONFIG_S3_PROVIDER": provider,
"RCLONE_CONFIG_S3_ENV_AUTH": "false",
},
}
def update_aws_secrets(namespaces: List[str], provider: str):
"""Updated AWS secrets used by Seldon Deployments in specified namespaces
Parameters
----------
namespaces: List[str]
List of namespaces in which will look for Seldon Deployments
provider: str
S3 provider: must be minio or aws
"""
if provider not in ["minio", "aws"]:
raise ValueError("Provider must be 'minio' or 'aws'")
v1 = client.CoreV1Api()
for namespace in namespaces:
print(f"Updating secrets in namespace {namespace}")
secret_names = get_secrets_to_update(namespace)
for secret_name in secret_names:
secret = v1.read_namespaced_secret(secret_name, namespace)
try:
new_fields = new_fields_for_secret(secret, provider)
except ValueError as e:
print(f" Couldn't upgrade a secret: {e}.")
continue
_ = v1.patch_namespaced_secret(
secret_name,
namespace,
client.V1Secret(
data=new_fields["data"], string_data=new_fields["stringData"]
),
)
print(f" Upgraded secret {secret_name}.")
AWS_SA_SECRET_REQUIRED_FIELDS = ["awsAccessKeyID", "awsSecretAccessKey"]
AWS_SA_SECRET_REQUIRED_ANNOTATIONS = [
"machinelearning.seldon.io/s3-usehttps",
"machinelearning.seldon.io/s3-endpoint",
]
def get_sdeps_with_service_accounts(namespace: str) -> List[Tuple[dict, List[str]]]:
"""Get list of secrets defined for Seldon Deployments in a given namespace.
Parameters:
----------
namespace: str
Namespace in which to look for secrets attached to Seldon Deployments.
Returns:
-------
output: List[Tuple[dict, List[dict]]]]
Eeach tuple contain sdep (dict) and a list service account names (List[str])
The list of Service Account names is of length of number of predictors.
If Predictor has no related Service Account a None is included.
"""
output = []
api_instance = client.CustomObjectsApi()
sdeps = api_instance.list_namespaced_custom_object(
"machinelearning.seldon.io",
"v1",
namespace,
"seldondeployments",
)
for sdep in sdeps.get("items", []):
sa_names = []
for predictor in sdep.get("spec", {}).get("predictors", []):
sa_name = predictor.get("graph", {}).get("serviceAccountName", None)
sa_names.append(sa_name)
output.append((sdep, sa_names))
return output
def find_sa_related_secret(sa_name, namespace) -> Union[client.V1Secret, None]:
"""Find AWS secret related to specified SA.
Parameters
----------
sa_name: str
Name of Service Account
namespace:
Name of namespace that contains the SA.
Returns
-------
secret: client.V1Secret
"""
v1 = client.CoreV1Api()
service_account = v1.read_namespaced_service_account(sa_name, namespace)
for s in service_account.secrets:
secret = v1.read_namespaced_secret(s.name, namespace)
if not all(key in secret.data for key in AWS_SA_SECRET_REQUIRED_FIELDS):
continue
if not all(
key in secret.metadata.annotations
for key in AWS_SA_SECRET_REQUIRED_ANNOTATIONS
):
continue
return secret
return None
def new_field_for_sa_secret(secret: client.V1Secret, provider: str):
"""Get new fields that need to be added to secret.
Parameters
----------
secret: client.V1Secret
Kubernetes secret that needs to be updated
provider: str
S3 provider: must be minio or aws
Returns
-------
new_fields: dict
New fields for the secret partitioned into 'data' and 'stringData' fields
"""
for key in AWS_SA_SECRET_REQUIRED_FIELDS:
if key not in secret.data:
raise ValueError(
f"Secret '{secret.metadata.name}' does not contain '{key}' field."
)
use_https = secret.metadata.annotations.get(
"machinelearning.seldon.io/s3-usehttps", None
)
if use_https == "0":
protocol = "http"
elif use_https == "1":
protocol = "https"
else:
raise ValueError(
f"Cannot determine http(s) protocol for {secret.metadata.name}."
)
s3_endpoint = secret.metadata.annotations.get(
"machinelearning.seldon.io/s3-endpoint", None
)
if s3_endpoint is None:
raise ValueError(f"Cannot determine S3 endpoint for {secret.metadata.name}.")
endpoint = f"{protocol}://{s3_endpoint}"
return {
"data": {
"RCLONE_CONFIG_S3_ACCESS_KEY_ID": secret.data.get("awsAccessKeyID"),
"RCLONE_CONFIG_S3_SECRET_ACCESS_KEY": secret.data.get("awsSecretAccessKey"),
},
"stringData": {
"RCLONE_CONFIG_S3_TYPE": "s3",
"RCLONE_CONFIG_S3_PROVIDER": provider,
"RCLONE_CONFIG_S3_ENV_AUTH": "false",
"RCLONE_CONFIG_S3_ENDPOINT": endpoint,
},
}
def update_aws_sa_resources(namespaces, provider):
"""Updated AWS secrets used by Seldon Deployments via related Service Accounts in specified namespaces.
Parameters
----------
namespaces: List[str]
List of namespaces in which will look for Seldon Deployments
provider: str
S3 provider: must be minio or aws
"""
v1 = client.CoreV1Api()
api_instance = client.CustomObjectsApi()
for namespace in namespaces:
print(f"Upgrading namespace {namespace}")
for sdep, sa_names_per_predictor in get_sdeps_with_service_accounts(namespace):
if all(sa_name is None for sa_name in sa_names_per_predictor):
continue
update_body = {"spec": sdep["spec"]}
for n, sa_name in enumerate(sa_names_per_predictor):
if sa_name is None:
continue
secret = find_sa_related_secret(sa_name, namespace)
if secret is None:
print(
f"Couldn't find secret with S3 credentials for {sa.metadata.name}"
)
continue
new_fields = new_field_for_sa_secret(secret, "minio")
_ = v1.patch_namespaced_secret(
secret.metadata.name,
namespace,
client.V1Secret(
data=new_fields["data"], string_data=new_fields["stringData"]
),
)
print(f" Upgraded secret {secret.metadata.name}")
update_body["spec"]["predictors"][n]["graph"][
"envSecretRefName"
] = secret.metadata.name
api_instance.patch_namespaced_custom_object(
"machinelearning.seldon.io",
"v1",
namespace,
"seldondeployments",
sdep["metadata"]["name"],
update_body,
)
print(f" Upgrade sdep {sdep['metadata']['name']}")