mirror of
https://github.com/michaelthomson0797/fleet-infra.git
synced 2026-02-04 04:59:54 +00:00
nvidia update values
This commit is contained in:
@@ -16,7 +16,161 @@ spec:
|
|||||||
timeout: 5m
|
timeout: 5m
|
||||||
releaseName: nvidia-device-plugin
|
releaseName: nvidia-device-plugin
|
||||||
values:
|
values:
|
||||||
|
# Plugin configuration
|
||||||
|
# Only one of "name" or "map" should ever be set for a given deployment.
|
||||||
|
# Use "name" to point to an external ConfigMap with a list of configurations.
|
||||||
|
# Use "map" to build an integrated ConfigMap from a set of configurations as
|
||||||
|
# part of this helm chart. An example of setting "map" might be:
|
||||||
|
# config:
|
||||||
|
# map:
|
||||||
|
# default: |-
|
||||||
|
# version: v1
|
||||||
|
# flags:
|
||||||
|
# migStrategy: none
|
||||||
|
# mig-single: |-
|
||||||
|
# version: v1
|
||||||
|
# flags:
|
||||||
|
# migStrategy: single
|
||||||
|
# mig-mixed: |-
|
||||||
|
# version: v1
|
||||||
|
# flags:
|
||||||
|
# migStrategy: mixed
|
||||||
|
config:
|
||||||
|
# ConfigMap name if pulling from an external ConfigMap
|
||||||
|
name: ""
|
||||||
|
# Set of named configs to build an integrated ConfigMap from
|
||||||
|
map: {}
|
||||||
|
# Default config name within the ConfigMap
|
||||||
|
default: ""
|
||||||
|
# List of fallback strategies to attempt if no config is selected and no default is provided
|
||||||
|
fallbackStrategies: ["named" , "single"]
|
||||||
|
|
||||||
|
compatWithCPUManager: null
|
||||||
|
migStrategy: null
|
||||||
|
failOnInitError: null
|
||||||
|
deviceListStrategy: null
|
||||||
|
deviceIDStrategy: null
|
||||||
|
nvidiaDriverRoot: null
|
||||||
|
gdsEnabled: null
|
||||||
|
mofedEnabled: null
|
||||||
|
deviceDiscoveryStrategy: null
|
||||||
|
|
||||||
|
nameOverride: ""
|
||||||
|
fullnameOverride: ""
|
||||||
|
namespaceOverride: ""
|
||||||
|
selectorLabelsOverride: {}
|
||||||
|
|
||||||
|
allowDefaultNamespace: false
|
||||||
|
|
||||||
|
imagePullSecrets: []
|
||||||
|
image:
|
||||||
|
repository: nvcr.io/nvidia/k8s-device-plugin
|
||||||
|
pullPolicy: IfNotPresent
|
||||||
|
# Overrides the image tag whose default is the chart appVersion.
|
||||||
|
tag: ""
|
||||||
|
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
|
||||||
|
podAnnotations: {}
|
||||||
|
podSecurityContext: {}
|
||||||
|
securityContext: {}
|
||||||
|
|
||||||
|
resources: {}
|
||||||
|
nodeSelector: {}
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
# On discrete-GPU based systems NFD adds the following label where 10de is the NVIDIA PCI vendor ID
|
||||||
|
- key: feature.node.kubernetes.io/pci-10de.present
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- "true"
|
||||||
|
- matchExpressions:
|
||||||
|
# On some Tegra-based systems NFD detects the CPU vendor ID as NVIDIA
|
||||||
|
- key: feature.node.kubernetes.io/cpu-model.vendor_id
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- "NVIDIA"
|
||||||
|
- matchExpressions:
|
||||||
|
# We allow a GPU deployment to be forced by setting the following label to "true"
|
||||||
|
- key: "nvidia.com/gpu.present"
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- "true"
|
||||||
|
tolerations:
|
||||||
|
# This toleration is deprecated. Kept here for backward compatibility
|
||||||
|
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
|
||||||
|
- key: CriticalAddonsOnly
|
||||||
|
operator: Exists
|
||||||
|
- key: nvidia.com/gpu
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
|
||||||
|
# Mark this pod as a critical add-on; when enabled, the critical add-on
|
||||||
|
# scheduler reserves resources for critical add-on pods so that they can
|
||||||
|
# be rescheduled after a failure.
|
||||||
|
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
|
||||||
|
priorityClassName: "system-node-critical"
|
||||||
|
|
||||||
runtimeClassName: nvidia
|
runtimeClassName: nvidia
|
||||||
|
|
||||||
|
devicePlugin:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
gfd:
|
gfd:
|
||||||
enabled: true
|
enabled: true
|
||||||
|
nameOverride: gpu-feature-discovery
|
||||||
|
namespaceOverride: ""
|
||||||
|
noTimestamp: null
|
||||||
|
sleepInterval: null
|
||||||
|
securityContext:
|
||||||
|
# privileged access is required for the gpu-feature-discovery to access the
|
||||||
|
# vgpu info on a host.
|
||||||
|
# TODO: This should be optional and detected automatically.
|
||||||
|
privileged: true
|
||||||
|
|
||||||
|
# Helm dependency
|
||||||
|
nfd:
|
||||||
|
nameOverride: node-feature-discovery
|
||||||
|
enableNodeFeatureApi: false
|
||||||
|
master:
|
||||||
|
serviceAccount:
|
||||||
|
name: node-feature-discovery
|
||||||
|
create: true
|
||||||
|
config:
|
||||||
|
extraLabelNs: ["nvidia.com"]
|
||||||
|
|
||||||
|
worker:
|
||||||
|
tolerations:
|
||||||
|
- key: "node-role.kubernetes.io/master"
|
||||||
|
operator: "Equal"
|
||||||
|
value: ""
|
||||||
|
effect: "NoSchedule"
|
||||||
|
- key: "nvidia.com/gpu"
|
||||||
|
operator: "Equal"
|
||||||
|
value: "present"
|
||||||
|
effect: "NoSchedule"
|
||||||
|
config:
|
||||||
|
sources:
|
||||||
|
pci:
|
||||||
|
deviceClassWhitelist:
|
||||||
|
- "02"
|
||||||
|
- "03"
|
||||||
|
deviceLabelFields:
|
||||||
|
- vendor
|
||||||
|
|
||||||
|
mps:
|
||||||
|
# root specifies the location where files and folders for managing MPS will
|
||||||
|
# be created. This includes a daemon-specific /dev/shm and pipe and log
|
||||||
|
# directories.
|
||||||
|
# Pipe directories will be created at {{ mps.root }}/{{ .ResourceName }}
|
||||||
|
root: "/run/nvidia/mps"
|
||||||
|
|
||||||
|
|
||||||
|
cdi:
|
||||||
|
# nvidiaHookPath specifies the path to the nvidia-cdi-hook or nvidia-ctk executables on the host.
|
||||||
|
# This is required to ensure that the generated CDI specification refers to the correct CDI hooks.
|
||||||
|
nvidiaHookPath: null
|
||||||
|
|||||||
Reference in New Issue
Block a user