SkyPilot API Server Helm Chart Values#
The SkyPilot API server helm chart provides typical helm values as configuration entries. Configuration values can be passed in two ways when installing the chart:
--values
(or-f
): Specify a YAML file with overrides.cat <<EOF > values.yaml apiService: image: berkeleyskypilot/skypilot:0.9.2 EOF helm install $RELEASE_NAME skypilot/skypilot-nightly --devel --values values.yaml
--set
: Specify overrides on the command line.helm install $RELEASE_NAME skypilot/skypilot-nightly --set apiService.image="berkeleyskypilot/skypilot:0.9.2"
Values#
Below is the available helm value keys and the default value of each key:
apiService: image: berkeleyskypilot/skypilot-nightly:latest upgradeStrategy: Recreate replicas: 1 enableUserManagement: false initialBasicAuthCredentials: "skypilot:$apr1$c1h4rNxt$2NnL7dIDUV0tWsnuNMGSr/" initialBasicAuthSecret: null authUserHeaderName: null preDeployHook: |- # Run commands before deploying the API server, e.g. installing an admin # policy. Remember to set the admin policy in the config section below. echo "Pre-deploy hook" # Uncomment the following lines to install the admin policy # echo "Installing admin policy" # pip install git+https://github.com/michaelvll/admin-policy-examples config: null dbConnectionSecretName: null dbConnectionString: null sshNodePools: null sshKeySecret: null skipResourceCheck: false resources: requests: cpu: "4" memory: "8Gi" limits: cpu: "4" memory: "8Gi" skypilotDev: false metrics: enabled: false port: 9090 terminationGracePeriodSeconds: 60 annotations: null extraEnvs: null extraVolumes: null extraVolumeMounts: null auth: oauth: enabled: false oidc-issuer-url: null client-id: "" client-secret: "" client-details-from-secret: "" email-domain: "*" session-store-type: "redis" redis-url: null redis-secret: null cookie-refresh: null cookie-expire: null serviceAccount: enabled: null storage: enabled: true storageClassName: "" accessMode: ReadWriteOnce size: 10Gi selector: {} volumeName: "" annotations: {} ingress: enabled: true authSecret: null authCredentials: null host: null path: '/' ingressClassName: nginx nodePortEnabled: null httpNodePort: 30050 httpsNodePort: 30051 annotations: null # Deprecated: use auth.oauth instead. oauth2-proxy: enabled: false oidc-issuer-url: null client-id: "" client-secret: "" client-details-from-secret: "" image: "quay.io/oauth2-proxy/oauth2-proxy:v7.9.0" use-https: false email-domain: "*" session-store-type: "redis" redis-url: null cookie-refresh: null cookie-expire: null ingress-nginx: enabled: true controller: service: type: LoadBalancer annotations: service.beta.kubernetes.io/aws-load-balancer-type: "nlb" cloud.google.com/l4-rbs: "enabled" service.beta.kubernetes.io/port_443_health-probe_protocol: "TCP" service.beta.kubernetes.io/port_80_health-probe_protocol: "TCP" config: http-snippet: | map $http_upgrade $connection_upgrade { default upgrade; '' close; } rbac: create: true serviceAccountName: "" namespaceRules: - apiGroups: [ "" ] resources: [ "pods", "pods/status", "pods/exec", "pods/portforward" ] verbs: [ "*" ] - apiGroups: [ "" ] resources: [ "services" ] verbs: [ "*" ] - apiGroups: [ "" ] resources: [ "secrets" ] verbs: [ "*" ] - apiGroups: [ "" ] resources: [ "events" ] verbs: [ "get", "list", "watch" ] - apiGroups: [ "" ] resources: [ "configmaps" ] verbs: [ "get", "patch" ] - apiGroups: ["apps"] resources: ["deployments", "deployments/status"] verbs: ["*"] - apiGroups: [""] resources: ["persistentvolumeclaims"] verbs: ["*"] clusterRules: - apiGroups: [ "" ] resources: [ "nodes" ] verbs: [ "get", "list", "watch" ] - apiGroups: [ "" ] resources: [ "pods" ] verbs: [ "get", "list", "watch" ] - apiGroups: [ "node.k8s.io" ] resources: [ "runtimeclasses" ] verbs: [ "get", "list", "watch" ] - apiGroups: [ "networking.k8s.io" ] resources: [ "ingressclasses" ] verbs: [ "get", "list", "watch" ] - apiGroups: [""] resources: ["services"] verbs: ["list", "get"] manageRbacPolicies: true manageSystemComponents: true serviceAccountAnnotations: null kubernetesCredentials: useApiServerCluster: true useKubeconfig: false kubeconfigSecretName: kube-credentials inclusterNamespace: null awsCredentials: enabled: false awsSecretName: aws-credentials accessKeyIdKeyName: aws_access_key_id secretAccessKeyKeyName: aws_secret_access_key gcpCredentials: enabled: false projectId: null gcpSecretName: gcp-credentials r2Credentials: enabled: false r2SecretName: r2-credentials runpodCredentials: enabled: false runpodSecretName: runpod-credentials lambdaCredentials: enabled: false lambdaSecretName: lambda-credentials vastCredentials: enabled: false vastSecretName: vast-credentials nebiusCredentials: enabled: false tenantId: null nebiusSecretName: nebius-credentials extraInitContainers: null podSecurityContext: {} securityContext: capabilities: drop: - ALL allowPrivilegeEscalation: false runtimeClassName: null prometheus: enabled: false grafana: enabled: false
Fields#
apiService
#
Configuration for the SkyPilot API server deployment.
apiService.image
#
Docker image to use for the API server. The default value is depending on the chart you are using:
Stable release of the chart(
skypilot/skypilot
): the same stable release of SkyPilot will be used by default, i.e.berkeleyskypilot/skypilot:$CHART_VERSION
.Nightly release of the chart(
skypilot/skypilot-nightly
): the same nightly build of SkyPilot will be used by default, i.e.berkeleyskypilot/skypilot-nightly:$CHART_VERSION
.Installing from source: the latest nightly build of SkyPilot will be used by default, i.e.
berkeleyskypilot/skypilot-nightly:latest
.
To use a specific release version, set the image
value to the desired version:
apiService:
image: berkeleyskypilot/skypilot:0.10.0
To use a nightly build, find the desired nightly version on pypi and update the image
value:
apiService:
# Replace 1.0.0.devYYYYMMDD with the desired nightly version
image: berkeleyskypilot/skypilot-nightly:1.0.0.devYYYYMMDD
apiService.upgradeStrategy
#
Upgrade strategy for the API server deployment. Available options are:
Recreate
: Delete the old pod first and create a new one (has downtime).RollingUpdate
: Create a new pod first, wait for it to be ready, then delete the old one (zero downtime).
When set to RollingUpdate
, an external database must be configured via apiService.dbConnectionSecretName or apiService.dbConnectionString.
Default: "Recreate"
apiService:
upgradeStrategy: Recreate
apiService.replicas
#
Number of replicas to deploy for the API server. Replicas > 1 is not well tested and requires a PVC that supports ReadWriteMany.
Default: 1
apiService:
replicas: 1
apiService.enableUserManagement
#
Enable basic auth and user management in the API server. This is ignored if ingress.oauth2-proxy.enabled
is true
.
If enabled, the user can be created, updated, and deleted in the Dashboard, and the basic auth will be done in the API server instead of the ingress controller. In this case, the basic auth configuration ingress.authCredentials
and ingress.authSecret
in the ingress will be ignored.
Default: false
apiService:
enableUserManagement: false
apiService.initialBasicAuthCredentials
#
Initial basic auth credentials for the API server.
The user in the credentials will be used to create a new admin user in the API server, and the password can be updated by the user in the Dashboard.
If both initialBasicAuthCredentials
and initialBasicAuthSecret
are set, initialBasicAuthSecret
will be used. They are only used when enableUserManagement
is true.
Default: "skypilot:$apr1$c1h4rNxt$2NnL7dIDUV0tWsnuNMGSr/"
apiService:
initialBasicAuthCredentials: "skypilot:$apr1$c1h4rNxt$2NnL7dIDUV0tWsnuNMGSr/"
apiService.initialBasicAuthSecret
#
Initial basic auth secret for the API server. If not specified, a new secret will be created using initialBasicAuthCredentials
.
To create a new secret, you can use the following command:
WEB_USERNAME=skypilot
WEB_PASSWORD=skypilot
AUTH_STRING=$(htpasswd -nb $WEB_USERNAME $WEB_PASSWORD)
NAMESPACE=skypilot
kubectl create secret generic initial-basic-auth \
--from-literal=auth=$AUTH_STRING \
-n $NAMESPACE
Default: null
apiService:
initialBasicAuthSecret: null
apiService.authUserHeaderName
#
Custom header name for user authentication with auth proxies. This overrides the default X-Auth-Request-Email
header.
This setting is useful when integrating with auth proxies that use different header names for user identification, such as X-Remote-User
, X-Auth-User
, or custom headers specific to your organization’s auth infrastructure.
Default: null
(uses X-Auth-Request-Email
)
apiService:
authUserHeaderName: X-Custom-User-Header
apiService.preDeployHook
#
Commands to run before deploying the API server (e.g., install admin policy).
Default: see the yaml below.
apiService:
preDeployHook: |-
# Run commands before deploying the API server, e.g. installing an admin
# policy. Remember to set the admin policy in the config section below.
echo "Pre-deploy hook"
# Uncomment the following lines to install the admin policy
# echo "Installing admin policy"
# pip install git+https://github.com/michaelvll/admin-policy-examples
apiService.config
#
Content of the SkyPilot config.yaml to set on the API server. Set to null
to use an empty config. Refer to setting the SkyPilot config for more details.
Default: null
apiService:
config: |-
allowed_clouds:
- aws
- gcp
apiService.dbConnectionSecretName
#
Name of the secret containing the database connection string for the API server. This is used to configure an external database for the API server.
If either this field or apiService.dbConnectionString is set, apiService.config must be null
. Refer to the API server deployment guide for more details on configuring an external database.
Name of the secret containing the database connection string for the API server. If this field is set, config
must be null.
Default: null
apiService:
dbConnectionSecretName: my-db-connection-secret
apiService.dbConnectionString
#
Database connection string for the API server. This is a shortcut for setting the database connection string directly instead of using a secret.
If either this field or apiService.dbConnectionSecretName is set, apiService.config must be null
. Refer to the API server deployment guide for more details on configuring an external database.
Default: null
apiService:
dbConnectionString: "postgresql://user:password@host:port/database"
apiService.enableServiceAccounts
#
Enable service accounts in the API server.
Deprecated: use auth.serviceAccount.enabled instead.
Default: true
apiService.sshNodePools
#
Content of the ~/.sky/ssh_node_pools.yaml
to set on the API server. Set to null
to use an empty ssh node pools. Refer to Deploy SkyPilot on existing machines for more details.
Default: null
apiService:
sshNodePools: |-
my-cluster:
hosts:
- 1.2.3.4
- 1.2.3.5
my-box:
hosts:
- hostname_in_ssh_config
apiService.sshKeySecret
#
Optional secret that contains SSH identity files to the API server to use, all the entries in the secret will be mounted to ~/.ssh/
directory in the API server. Refer to Deploy SkyPilot on existing machines for more details.
Default: null
apiService:
sshKeySecret: my-ssh-key-secret
The content of the secret should be like:
apiVersion: v1
kind: Secret
metadata:
name: my-ssh-key-secret
data:
id_rsa: <secret-content>
apiService.skipResourceCheck
#
Skip resource check for the API server (not recommended for production), refer to tuning API server resources for more details.
Default: false
apiService:
skipResourceCheck: false
apiService.resources
#
Resource requests and limits for the API server container. Refer to tuning API server resources for how to tune the resources.
Default: see the yaml below.
apiService:
resources:
requests:
cpu: "4"
memory: "8Gi"
limits:
cpu: "4"
memory: "8Gi"
apiService.skypilotDev
#
Enable developer mode for SkyPilot.
Default: false
apiService:
skypilotDev: false
apiService.metrics
#
Configuration for metrics collection on the API server.
Default: see the yaml below.
apiService:
metrics:
enabled: true
port: 9090
apiService.metrics.enabled
#
Enable (exposing API metrics)[Link to docs/source/reference/api-server/examples/api-server-metrics-setup.rst] from the API server. If this is enabled and the API server image does not support metrics, the deployment will fail.
Default: false
apiService:
metrics:
enabled: true
apiService.metrics.port
#
The port to expose the metrics on.
Default: 9090
apiService:
metrics:
port: 9090
apiService.terminationGracePeriodSeconds
#
The number of seconds to wait for the API server to finish processing the request before shutting down. Refer to Graceful upgrade for more details.
Default: 60
apiService:
terminationGracePeriodSeconds: 300
apiService.annotations
#
Custom annotations for the API server deployment.
Default: null
apiService:
annotations:
my-annotation: "my-value"
apiService.extraEnvs
#
Extra environment variables to set before starting the API server.
Default: null
apiService:
extraEnvs:
- name: MY_ADDITIONAL_ENV_VAR
value: "my_value"
apiService.extraVolumes
#
Extra volumes to mount to the API server.
Default: null
apiService:
extraVolumes:
- name: my-volume
secret:
secretName: my-secret
apiService.extraVolumeMounts
#
Extra volume mounts to mount to the API server.
Default: null
apiService:
extraVolumeMounts:
- name: my-volume
mountPath: /my-path
subPath: my-file
auth
#
Authentication configuration for the API server.
auth.oauth
#
OAuth2 Proxy based authentication configuration for the API server.
Default: see the yaml below.
auth:
oauth:
enabled: false
oidc-issuer-url: null
client-id: ""
client-secret: ""
client-details-from-secret: ""
email-domain: "*"
session-store-type: "redis"
redis-url: null
cookie-refresh: null
cookie-expire: null
auth.oauth.enabled
#
Enable/disable OAuth2 Proxy based authentication on the API server. This is mutually exclusive with authentications on ingress, including basic auth and OAuth2 Proxy on ingress.
Default: false
auth:
oauth:
enabled: true
auth.oauth.oidc-issuer-url
#
The URL of the OIDC issuer (e.g., your Okta domain). Required when oauth is enabled.
Default: null
auth:
oauth:
oidc-issuer-url: "https://mycompany.okta.com"
auth.oauth.client-id
#
The OAuth client ID from your OIDC provider (e.g., Okta). Required when oauth is enabled.
Default: ""
auth:
oauth:
client-id: "0abc123def456"
auth.oauth.client-secret
#
The OAuth client secret from your OIDC provider (e.g., Okta). Required when oauth is enabled.
Default: ""
auth:
oauth:
client-secret: "abcdef123456"
auth.oauth.client-details-from-secret
#
Alternative way to get both client ID and client secret from a Kubernetes secret. If set to a secret name, both client-id
and client-secret
values above are ignored. The secret must contain keys named either client-id
and client-secret
OR client_id
and client_secret
. Both dash and underscore formats are supported for compatibility with different secret managers (e.g., HashiCorp Vault requires underscore format due to key naming constraints).
Default: ""
auth:
oauth:
client-details-from-secret: "oauth-client-credentials"
auth.oauth.email-domain
#
Email domains to allow for authentication. Use "*"
to allow all email domains.
Default: "*"
auth:
oauth:
email-domain: "mycompany.com"
auth.oauth.session-store-type
#
Session storage type for OAuth2 Proxy. Can be set to "cookie"
or "redis"
. Using Redis as a session store results in smaller cookies and better performance for large-scale deployments.
Default: "redis"
auth:
oauth:
session-store-type: "redis"
auth.oauth.redis-url
#
URL to connect to an external Redis instance for session storage. If set to null
and session-store-type
is "redis"
, a Redis instance will be automatically deployed. Format: redis://host[:port][/db-number]
Default: null
auth:
oauth:
redis-url: "redis://redis-host:6379/0"
auth.oauth.redis-secret
#
Alternative way to specify Redis connection URL using a Kubernetes secret. The secret must contain a key named redis_url
with the Redis connection URL in the format redis://host[:port][/db-number]
.
This field is mutually exclusive with redis-url.
Default: null
auth:
oauth:
redis-secret: "my-redis-credentials"
auth.serviceAccount
#
Service account token based authentication configuration for the API server.
auth:
serviceAccount:
enabled: null
auth.serviceAccount.enabled
#
Enable service account tokens for automated API access. If enabled, users can create bearer tokens to bypass SSO authentication for automated systems.
JWT secrets are automatically stored in the database for persistence across restarts. This setting defaults to the value of .apiService.enableServiceAccounts (which is true
by default) for backward compatibility. Setting this field will override the default value.
Default: null
auth:
serviceAccount:
enabled: true
storage
#
storage.enabled
#
Enable persistent storage for the API server, setting this to false
is prone to data loss and should only be used for testing.
Default: true
storage:
enabled: true
storage.storageClassName
#
Storage class to use for the API server, leave empty to use the default storage class of the hosting Kubernetes cluster.
Default: ""
storage:
storageClassName: gp2
storage.accessMode
#
Access mode for the persistent storage volume. Can be set to ReadWriteOnce
or ReadWriteMany
depending on what is supported by the storage class.
Default: ReadWriteOnce
storage:
accessMode: ReadWriteOnce
storage.size
#
Size of the persistent storage volume for the API server.
Default: 10Gi
storage:
size: 10Gi
storage.selector
#
Selector for matching specific PersistentVolumes. Usually left empty.
Default: {}
storage:
selector: {}
storage.volumeName
#
Name of the PersistentVolume to bind to. Usually left empty to let Kubernetes select and bind the volume automatically.
Default: ""
storage:
volumeName: ""
storage.annotations
#
Annotations to add to the PersistentVolumeClaim.
Default: {}
storage:
annotations: {}
ingress
#
ingress.enabled
#
Enable ingress for the API server. Set to true
to expose the API server via an ingress controller.
Default: true
ingress:
enabled: true
ingress.authSecret
#
Name of the Kubernetes secret containing basic auth credentials for ingress. If not specified, a new secret will be created using authCredentials
. This is ignored if ingress.oauth2-proxy.enabled
is true
.
One of ingress.authSecret
or ingress.authCredentials
must be set, unless ingress.oauth2-proxy.enabled
is true
.
Default: null
ingress:
authSecret: null
ingress.authCredentials
#
Basic auth credentials in the format username:encrypted_password
. Used only if authSecret
is not set. This is ignored if ingress.oauth2-proxy.enabled
is true
.
One of ingress.authSecret
or ingress.authCredentials
must be set, unless ingress.oauth2-proxy.enabled
is true
.
Default: null
ingress:
authCredentials: "username:$apr1$encrypted_password"
ingress.path
#
The base path of the API server. You may use different paths to expose multiple API servers through a unified ingress controller.
Default: '/'
ingress:
path: '/'
ingress.host
#
Host to exclusively accept traffic from (optional). Will respond to all host requests if not set.
Default: null
ingress:
host: api.mycompany.com
ingress.ingressClassName
#
Ingress class name for newer Kubernetes versions.
Default: nginx
ingress:
ingressClassName: nginx
ingress.nodePortEnabled
#
Whether to enable an additional NodePort service for the ingress controller. Deprecated: use ingress-nginx.controller.service.type=NodePort
instead.
Default: null
ingress:
nodePortEnabled: false
ingress.httpNodePort
#
Specific nodePort to use for HTTP traffic. Deprecated: use ingress-nginx.controller.service.nodePorts.http
instead.
Default: 30050
ingress:
httpNodePort: 30050
ingress.httpsNodePort
#
Specific nodePort to use for HTTPS traffic. Deprecated: use ingress-nginx.controller.service.nodePorts.https
instead.
Default: 30051
ingress:
httpsNodePort: 30051
ingress.annotations
#
Custom annotations for the ingress controller.
Default: null
ingress:
annotations:
my-annotation: "my-value"
ingress.oauth2-proxy
#
Configuration for the OAuth2 Proxy authentication for the API server.
Deprecated: use auth.oauth instead.
Default: see the yaml below.
ingress:
oauth2-proxy:
enabled: false
# Required when enabled:
oidc-issuer-url: null
client-id: ""
client-secret: ""
client-details-from-secret: ""
# Optional settings:
image: "quay.io/oauth2-proxy/oauth2-proxy:v7.9.0"
use-https: false
email-domain: "*"
session-store-type: "redis"
redis-url: null
cookie-refresh: null
cookie-expire: null
ingress.oauth2-proxy.enabled
#
Enable OAuth2 Proxy for authentication. When enabled, this will deploy an OAuth2 Proxy component and configure the ingress to use it for authentication instead of basic auth.
Default: false
ingress:
oauth2-proxy:
enabled: true
ingress.oauth2-proxy.oidc-issuer-url
#
The URL of the OIDC issuer (e.g., your Okta domain). Required when oauth2-proxy is enabled.
Default: null
ingress:
oauth2-proxy:
oidc-issuer-url: "https://mycompany.okta.com"
ingress.oauth2-proxy.client-id
#
The OAuth client ID from your OIDC provider (e.g., Okta). Required when oauth2-proxy is enabled.
Default: ""
ingress:
oauth2-proxy:
client-id: "0abc123def456"
ingress.oauth2-proxy.client-secret
#
The OAuth client secret from your OIDC provider (e.g., Okta). Required when oauth2-proxy is enabled.
Default: ""
ingress:
oauth2-proxy:
client-secret: "abcdef123456"
ingress.oauth2-proxy.client-details-from-secret
#
Alternative way to get both client ID and client secret from a Kubernetes secret. If set to a secret name, both client-id
and client-secret
values above are ignored. The secret must contain keys named client-id
and client-secret
.
Default: ""
ingress:
oauth2-proxy:
client-details-from-secret: "oauth-client-credentials"
ingress.oauth2-proxy.image
#
Docker image for the OAuth2 Proxy component.
Default: "quay.io/oauth2-proxy/oauth2-proxy:v7.9.0"
ingress:
oauth2-proxy:
image: "quay.io/oauth2-proxy/oauth2-proxy:v7.9.0"
ingress.oauth2-proxy.use-https
#
Set to true
when using HTTPS for the API server endpoint. When set to false
, secure cookies are disabled, which is required for HTTP endpoints.
Default: false
ingress:
oauth2-proxy:
use-https: true
ingress.oauth2-proxy.email-domain
#
Email domains to allow for authentication. Use "*"
to allow all email domains.
Default: "*"
ingress:
oauth2-proxy:
email-domain: "mycompany.com"
ingress.oauth2-proxy.session-store-type
#
Session storage type for OAuth2 Proxy. Can be set to "cookie"
or "redis"
. Using Redis as a session store results in smaller cookies and better performance for large-scale deployments.
Default: "redis"
ingress:
oauth2-proxy:
session-store-type: "redis"
ingress.oauth2-proxy.redis-url
#
URL to connect to an external Redis instance for session storage. If set to null
and session-store-type
is "redis"
, a Redis instance will be automatically deployed. Format: redis://host[:port][/db-number]
Default: null
ingress:
oauth2-proxy:
redis-url: "redis://redis-host:6379/0"
ingress-nginx
#
ingress-nginx.enabled
#
Enable the ingress-nginx controller for the API server. If you have an existing ingress-nginx controller, you have to set this to false
to avoid conflict.
Default: true
ingress-nginx:
enabled: true
ingress-nginx.controller
#
Fields under ingress-nginx.controller
will be mapped to controller
values for the ingress-nginx controller sub-chart. Refer to the ingress-nginx chart documentation for more details.
Default: see the yaml below.
ingress-nginx:
controller:
service:
# Service type of the ingress controller.
type: LoadBalancer
# Annotations for the ingress controller service.
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
cloud.google.com/l4-rbs: "enabled"
service.beta.kubernetes.io/port_443_health-probe_protocol: "TCP"
service.beta.kubernetes.io/port_80_health-probe_protocol: "TCP"
config:
# Custom HTTP snippet to inject into the ingress-nginx configuration.
http-snippet: |
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
rbac
#
rbac.create
#
Whether to create the service account and RBAC policies for the API server. If false, an external service account is expected.
Default: true
rbac:
create: true
rbac.serviceAccountName
#
Name of the service account to use. Leave empty to let the chart generate one.
Default: ""
rbac:
serviceAccountName: ""
rbac.namespaceRules
#
Namespace-scoped RBAC rules granted to the namespace where the SkyPilot tasks will be launched.
Note
Modifying the rules may break functionalities of SkyPilot API server. Refer to setting minimum permissions in helm deployment for how to modify the rules based on your use case.
Default: see the yaml below.
rbac:
namespaceRules:
- apiGroups: [ "" ]
resources: [ "pods", "pods/status", "pods/exec", "pods/portforward" ]
verbs: [ "*" ]
- apiGroups: [ "" ]
resources: [ "services" ]
verbs: [ "*" ]
- apiGroups: [ "" ]
resources: [ "secrets" ]
verbs: [ "*" ]
- apiGroups: [ "" ]
resources: [ "events" ]
verbs: [ "get", "list", "watch" ]
- apiGroups: [ "" ]
resources: [ "configmaps" ]
verbs: [ "get", "patch" ]
- apiGroups: ["apps"]
resources: ["deployments", "deployments/status"]
verbs: ["*"]
- apiGroups: [ "" ]
resources: [ "configmaps" ]
verbs: [ "get", "patch" ]
- apiGroups: ["apps"]
resources: ["deployments", "deployments/status"]
verbs: ["*"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["*"]
rbac.clusterRules
#
Cluster-scoped RBAC rules for the API server.
Note
Modifying the rules may break functionalities of SkyPilot API server. Refer to setting minimum permissions in helm deployment for how to modify the rules based on your use case.
Default: see the yaml below.
rbac:
clusterRules:
- apiGroups: [ "" ]
resources: [ "nodes" ]
verbs: [ "get", "list", "watch" ]
- apiGroups: [ "" ]
resources: [ "pods" ]
verbs: [ "get", "list", "watch" ]
- apiGroups: [ "node.k8s.io" ]
resources: [ "runtimeclasses" ]
verbs: [ "get", "list", "watch" ]
- apiGroups: [ "networking.k8s.io" ]
resources: [ "ingressclasses" ]
verbs: [ "get", "list", "watch" ]
- apiGroups: [""]
resources: ["services"]
verbs: ["list", "get"]
rbac.manageRbacPolicies
#
Allow the API server to grant permissions to SkyPilot Pods and system components. Refer to setting minimum permissions in helm deployment for more details.
Default: true
rbac:
manageRbacPolicies: true
rbac.manageSystemComponents
#
Allow the API server to manage system components in the skypilot-system namespace. Required for object store mounting.
Default: true
rbac:
manageSystemComponents: true
rbac.serviceAccountAnnotations
#
Custom annotations for the API server service account. This is useful for cloud provider integrations that require specific annotations on service accounts, such as AWS IAM roles for service accounts (IRSA) or GCP Workload Identity.
Default: null
rbac:
serviceAccountAnnotations:
eks.amazonaws.com/role-arn: "arn:aws:iam::123456789012:role/MyServiceAccountRole"
iam.gke.io/gcp-service-account: "[email protected]"
kubernetesCredentials
#
kubernetesCredentials.useApiServerCluster
#
Enable using the API server’s cluster for workloads.
Default: true
kubernetesCredentials:
useApiServerCluster: true
kubernetesCredentials.useKubeconfig
#
Use the kube-credentials secret containing the kubeconfig to authenticate to Kubernetes.
Default: false
kubernetesCredentials:
useKubeconfig: false
kubernetesCredentials.kubeconfigSecretName
#
Name of the secret containing the kubeconfig file. Only used if useKubeconfig is true.
Default: kube-credentials
kubernetesCredentials:
kubeconfigSecretName: kube-credentials
kubernetesCredentials.inclusterNamespace
#
Namespace to use for in-cluster resources.
Default: null
kubernetesCredentials:
inclusterNamespace: null
awsCredentials
#
awsCredentials.enabled
#
Enable AWS credentials for the API server.
Default: false
awsCredentials:
enabled: false
awsCredentials.awsSecretName
#
Name of the secret containing the AWS credentials. Only used if enabled is true.
Default: aws-credentials
awsCredentials:
awsSecretName: aws-credentials
awsCredentials.accessKeyIdKeyName
#
Key name used to set AWS_ACCESS_KEY_ID.
Default: aws_access_key_id
awsCredentials:
accessKeyIdKeyName: aws_access_key_id
awsCredentials.secretAccessKeyKeyName
#
Key name used to set AWS_SECRET_ACCESS_KEY.
Default: aws_secret_access_key
awsCredentials:
secretAccessKeyKeyName: aws_secret_access_key
gcpCredentials
#
gcpCredentials.enabled
#
Enable GCP credentials for the API server.
Default: false
gcpCredentials:
enabled: false
gcpCredentials.projectId
#
GCP project ID. Only used if enabled is true.
Default: null
gcpCredentials:
projectId: null
gcpCredentials.gcpSecretName
#
Name of the secret containing the GCP credentials. Only used if enabled is true.
Default: gcp-credentials
gcpCredentials:
gcpSecretName: gcp-credentials
r2Credentials
#
r2Credentials.enabled
#
Enable R2 credentials for the API server.
r2Credentials:
enabled: true
r2Credentials.r2SecretName
#
Name of the secret containing the R2 credentials. Only used if enabled is true. The secret should contain the following keys:
r2.credentials
: R2 credentials fileaccountid
: R2 account ID file
Refer to Cloudflare R2 installation for more details.
Default: r2-credentials
r2Credentials:
r2SecretName: your-r2-credentials-secret-name
runpodCredentials
#
runpodCredentials.enabled
#
Enable RunPod credentials for the API server.
Default: false
runpodCredentials:
enabled: false
runpodCredentials.runpodSecretName
#
Name of the secret containing the RunPod credentials. Only used if enabled is true.
Default: runpod-credentials
runpodCredentials:
runpodSecretName: runpod-credentials
lambdaCredentials
#
lambdaCredentials.enabled
#
Enable Lambda credentials for the API server.
Default: false
lambdaCredentials:
enabled: false
lambdaCredentials.lambdaSecretName
#
Name of the secret containing the Lambda credentials. Only used if enabled is true.
Default: lambda-credentials
lambdaCredentials:
lambdaSecretName: lambda-credentials
vastCredentials
#
vastCredentials.enabled
#
Enable Vast credentials for the API server.
Default: false
vastCredentials:
enabled: false
vastCredentials.vastSecretName
#
Name of the secret containing the Vast credentials. Only used if enabled is true.
Default: vast-credentials
vastCredentials:
vastSecretName: vast-credentials
nebiusCredentials
#
nebiusCredentials.enabled
#
Enable Nebius credentials for the API server.
Default: false
nebiusCredentials:
enabled: false
nebiusCredentials.tenantId
#
Nebius tenant ID. Only used if enabled is true.
Default: null
nebiusCredentials:
tenantId: null
nebiusCredentials.nebiusSecretName
#
Name of the secret containing the Nebius credentials. Only used if enabled is true.
Default: nebius-credentials
nebiusCredentials:
nebiusSecretName: nebius-credentials
extraInitContainers
#
Additional init containers to add to the API server pod.
Default: null
extraInitContainers:
- name: my-init-container
image: my-image:latest
command: ["/bin/sh", "-c", "echo 'Hello from init container'"]
podSecurityContext
#
Security context for the API server pod. Usually left empty to use defaults. Refer to set the security context for Pod for more details.
Default: {}
podSecurityContext:
runAsUser: 1000
runAsGroup: 3000
fsGroup: 2000
securityContext
#
securityContext.capabilities
#
Linux capabilities to drop for the API server container.
Default: drop all capabilities.
securityContext:
capabilities:
drop:
- ALL
securityContext.allowPrivilegeEscalation
#
Whether to allow privilege escalation in the API server container.
Default: false
securityContext:
allowPrivilegeEscalation: false
runtimeClassName
#
The runtime class to use for the API server pod. Usually left empty to use the default runtime class.
Default: (empty)
runtimeClassName:
prometheus
#
Configuration for Prometheus helm chart. Refer to the Prometheus helm chart repository for available values.
SkyPilot provides a minimal Prometheus configuration by default. If you want to monitor more resources other than the API server, it is recommended to install and manage Prometheus separately.
prometheus:
enabled: true
server:
persistentVolume:
enabled: true
size: 10Gi
extraScrapeConfigs: |
# Static scrape target for SkyPilot API server GPU metrics
- job_name: 'skypilot-api-server-gpu-metrics'
static_configs:
- targets: ['{{ .Release.Name }}-api-service.{{ .Release.Namespace }}.svc.cluster.local:80']
metrics_path: '/gpu-metrics'
scrape_interval: 15s
scrape_timeout: 10s
kube-state-metrics:
enabled: true
metricLabelsAllowlist:
- pods=[skypilot-cluster]
prometheus-node-exporter:
enabled: false
prometheus-pushgateway:
enabled: false
alertmanager:
enabled: false
prometheus.enabled
#
Enable prometheus for the API server.
Default: false
prometheus:
enabled: false
grafana
#
Configuration for Grafana helm chart. Refer to the Grafana helm chart documentation for available values.
By default, Grafana is configured to work with the ingress controller and auth proxy for seamless authentication.
grafana:
enabled: true
persistence:
enabled: true
size: 10Gi
ingress:
enabled: false
enableAuthedIngress: true
path: "/grafana"
ingressClassName: nginx
hosts: null
grafana.ini:
server:
domain: localhost
root_url: "%(protocol)s://%(domain)s/grafana"
enforce_domain: false
serve_from_sub_path: true
security:
allow_embedding: true
auth.proxy:
enabled: true
header_name: "X-WEBAUTH-USER"
header_property: "username"
auto_sign_up: true
auth:
disable_login_form: true
disable_signout_menu: true
auth.anonymous:
enabled: false
auth.basic:
enabled: false
sidecar:
datasources:
enabled: true
dashboards:
enabled: true
dashboardProviders:
dashboardproviders.yaml:
apiVersion: 1
providers:
- name: 'default'
orgId: 1
folder: ''
type: file
disableDeletion: false
allowUiUpdates: false
updateIntervalSeconds: 30
options:
path: /var/lib/grafana/dashboards/default
grafana.enabled
#
Enable grafana for the API server.
Default: false
grafana:
enabled: false