mirror of
https://github.com/michaelthomson0797/fleet-infra.git
synced 2026-05-13 19:11:41 +00:00
nvidia gpu operator
This commit is contained in:
@@ -1,47 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: generic-cdi-plugin-daemonset
|
||||
namespace: generic-cdi-plugin
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
name: generic-cdi-plugin
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: generic-cdi-plugin
|
||||
app.kubernetes.io/component: generic-cdi-plugin
|
||||
app.kubernetes.io/name: generic-cdi-plugin
|
||||
spec:
|
||||
containers:
|
||||
- image: ghcr.io/olfillasodikno/generic-cdi-plugin:main
|
||||
name: generic-cdi-plugin
|
||||
command:
|
||||
- /generic-cdi-plugin
|
||||
- /var/run/cdi/nvidia-container-toolkit.json
|
||||
imagePullPolicy: Always
|
||||
securityContext:
|
||||
privileged: true
|
||||
tty: true
|
||||
volumeMounts:
|
||||
- name: kubelet
|
||||
mountPath: /var/lib/kubelet
|
||||
- name: nvidia-container-toolkit
|
||||
mountPath: /var/run/cdi/nvidia-container-toolkit.json
|
||||
volumes:
|
||||
- name: kubelet
|
||||
hostPath:
|
||||
path: /var/lib/kubelet
|
||||
- name: nvidia-container-toolkit
|
||||
hostPath:
|
||||
path: /var/run/cdi/nvidia-container-toolkit.json
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: "nixos-nvidia-cdi"
|
||||
operator: In
|
||||
values:
|
||||
- "enabled"
|
||||
24
infrastructure/controllers/gpu-operator/release.yaml
Normal file
24
infrastructure/controllers/gpu-operator/release.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: gpu-operator
|
||||
namespace: gpu-operator
|
||||
spec:
|
||||
chart:
|
||||
spec:
|
||||
chart: gpu-operator
|
||||
version: v26.3.x
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: nvidia
|
||||
interval: 15m
|
||||
releaseName: gpu-operator
|
||||
values:
|
||||
driver:
|
||||
enabled: false
|
||||
toolkit:
|
||||
enabled: false
|
||||
devicePlugin:
|
||||
config:
|
||||
name: time-slicing-config
|
||||
default: time-slicing
|
||||
8
infrastructure/controllers/gpu-operator/repository.yaml
Normal file
8
infrastructure/controllers/gpu-operator/repository.yaml
Normal file
@@ -0,0 +1,8 @@
|
||||
apiVersion: source.toolkit.fluxcd.io/v1
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: nvidia
|
||||
namespace: gpu-operator
|
||||
spec:
|
||||
interval: 15m
|
||||
url: https://helm.ngc.nvidia.com/nvidia
|
||||
@@ -0,0 +1,13 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: time-slicing-config
|
||||
namespace: gpu-operator
|
||||
data:
|
||||
time-slicing: |
|
||||
version: v1
|
||||
sharing:
|
||||
timeSlicing:
|
||||
resources:
|
||||
- name: nvidia.com/gpu
|
||||
replicas: 5
|
||||
Reference in New Issue
Block a user