diff --git a/infrastructure/controllers/gpu-operator/release.yaml b/infrastructure/controllers/gpu-operator/release.yaml deleted file mode 100644 index eeefd1e..0000000 --- a/infrastructure/controllers/gpu-operator/release.yaml +++ /dev/null @@ -1,26 +0,0 @@ -apiVersion: helm.toolkit.fluxcd.io/v2 -kind: HelmRelease -metadata: - name: gpu-operator - namespace: gpu-operator -spec: - chart: - spec: - chart: gpu-operator - version: v26.3.x - sourceRef: - kind: HelmRepository - name: nvidia - interval: 15m - releaseName: gpu-operator - values: - driver: - enabled: false - toolkit: - enabled: false - hostPaths: - driverInstallDir: /usr/local - devicePlugin: - config: - name: time-slicing-config - default: time-slicing diff --git a/infrastructure/controllers/gpu-operator/repository.yaml b/infrastructure/controllers/gpu-operator/repository.yaml deleted file mode 100644 index bbe46b3..0000000 --- a/infrastructure/controllers/gpu-operator/repository.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: source.toolkit.fluxcd.io/v1 -kind: HelmRepository -metadata: - name: nvidia - namespace: gpu-operator -spec: - interval: 15m - url: https://helm.ngc.nvidia.com/nvidia diff --git a/infrastructure/controllers/gpu-operator/time-slicing-config.yaml b/infrastructure/controllers/gpu-operator/time-slicing-config.yaml deleted file mode 100644 index d9a4a39..0000000 --- a/infrastructure/controllers/gpu-operator/time-slicing-config.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: time-slicing-config - namespace: gpu-operator -data: - time-slicing: | - version: v1 - sharing: - timeSlicing: - resources: - - name: nvidia.com/gpu - replicas: 5 diff --git a/infrastructure/controllers/nvidia-device-plugin/release.yaml b/infrastructure/controllers/nvidia-device-plugin/release.yaml new file mode 100644 index 0000000..4c8dc2e --- /dev/null +++ b/infrastructure/controllers/nvidia-device-plugin/release.yaml @@ -0,0 +1,27 @@ +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: nvidia-device-plugin + namespace: nvidia-device-plugin +spec: + chart: + spec: + chart: nvidia-device-plugin + version: 0.17.x + sourceRef: + kind: HelmRepository + name: nvdp + interval: 15m + releaseName: nvidia-device-plugin + values: + runtimeClassName: nvidia + config: + default: time-slicing + map: + time-slicing: | + version: v1 + sharing: + timeSlicing: + resources: + - name: nvidia.com/gpu + replicas: 5 diff --git a/infrastructure/controllers/nvidia-device-plugin/repository.yaml b/infrastructure/controllers/nvidia-device-plugin/repository.yaml new file mode 100644 index 0000000..4735cb9 --- /dev/null +++ b/infrastructure/controllers/nvidia-device-plugin/repository.yaml @@ -0,0 +1,8 @@ +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: nvdp + namespace: nvidia-device-plugin +spec: + interval: 15m + url: https://nvidia.github.io/k8s-device-plugin diff --git a/infrastructure/controllers/nvidia-device-plugin/runtime-class.yaml b/infrastructure/controllers/nvidia-device-plugin/runtime-class.yaml new file mode 100644 index 0000000..c26bd6d --- /dev/null +++ b/infrastructure/controllers/nvidia-device-plugin/runtime-class.yaml @@ -0,0 +1,5 @@ +apiVersion: node.k8s.io/v1 +kind: RuntimeClass +metadata: + name: nvidia +handler: nvidia diff --git a/infrastructure/namespaces/namespace-gpu-operator.yaml b/infrastructure/namespaces/namespace-gpu-operator.yaml deleted file mode 100644 index c4104d2..0000000 --- a/infrastructure/namespaces/namespace-gpu-operator.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: gpu-operator - labels: - pod-security.kubernetes.io/enforce: privileged - pod-security.kubernetes.io/audit: privileged - pod-security.kubernetes.io/warn: privileged diff --git a/infrastructure/namespaces/namespace-nvidia-device-plugin.yaml b/infrastructure/namespaces/namespace-nvidia-device-plugin.yaml new file mode 100644 index 0000000..4a6b3ab --- /dev/null +++ b/infrastructure/namespaces/namespace-nvidia-device-plugin.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: nvidia-device-plugin