forked from NVIDIA/k8s-device-plugin
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathvalues.yaml
136 lines (123 loc) · 3.66 KB
/
values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# Plugin configuration
# Only one of "name" or "map" should ever be set for a given deployment.
# Use "name" to point to an external ConfigMap with a list of configurations, or to make the Chart
# create a ConfigMap for you if "create" is True.
# Use "map" to build an integrated ConfigMap from a set of configurations as
# part of this helm chart. An example of setting "map" might be:
# config:
# map:
# default: |-
# version: v1
# flags:
# migStrategy: none
# mig-single: |-
# version: v1
# flags:
# migStrategy: single
# mig-mixed: |-
# version: v1
# flags:
# migStrategy: mixed
config:
# ConfigMap name if pulling from an external ConfigMap
name: "nos-device-plugin-configs"
# If true, the ConfigMap containing the plugin configuration files will be created by the Chart, initialized
# with an empty default configuration.
# Otherwise, the Chart will use the existing ConfigMap with name .Values.config.name to exist.
create: true
# Set of named configs to build an integrated ConfigMap from
map: {}
# Default config name within the ConfigMap
default: ""
# List of fallback strategies to attempt if no config is selected and no default is provided
fallbackStrategies: ["named" , "single"]
legacyDaemonsetAPI: null
compatWithCPUManager: null
migStrategy: null
failOnInitError: null
deviceListStrategy: null
deviceIDStrategy: null
nvidiaDriverRoot: null
gdsEnabled: null
mofedEnabled: null
fullnameOverride: ""
namespaceOverride: ""
selectorLabelsOverride: {}
allowDefaultNamespace: false
imagePullSecrets: []
image:
repository: ghcr.io/nebuly-ai/k8s-device-plugin
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: ""
mps:
enabled: true
# The ID of the user used to run the MPS server.
# All the containers requesting GPU resources must run as this user.
userID: 1000
image:
repository: ghcr.io/nebuly-ai/nvidia-mps-server
pullPolicy: IfNotPresent
tag: "0.0.1"
updateStrategy:
type: RollingUpdate
podAnnotations: {}
podSecurityContext: {}
securityContext: {}
resources: {}
nodeSelector:
nos.nebuly.com/gpu-partitioning: "mps"
affinity: {}
tolerations:
# This toleration is deprecated. Kept here for backward compatibility
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
- key: CriticalAddonsOnly
operator: Exists
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
- key: "kubernetes.azure.com/scalesetpriority"
operator: "Equal"
value: "spot"
effect: "NoSchedule"
# Mark this pod as a critical add-on; when enabled, the critical add-on
# scheduler reserves resources for critical add-on pods so that they can
# be rescheduled after a failure.
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
priorityClassName: "system-node-critical"
runtimeClassName: null
# Subcharts
nfd:
nameOverride: node-feature-discovery
enableNodeFeatureApi: false
master:
serviceAccount:
name: node-feature-discovery
create: true
config:
extraLabelNs: ["nvidia.com"]
worker:
tolerations:
- key: "node-role.kubernetes.io/master"
operator: "Equal"
value: ""
effect: "NoSchedule"
- key: "nvidia.com/gpu"
operator: "Equal"
value: "present"
effect: "NoSchedule"
config:
sources:
pci:
deviceClassWhitelist:
- "02"
- "0200"
- "0207"
- "0300"
- "0302"
deviceLabelFields:
- vendor
gfd:
enabled: false
nameOverride: gpu-feature-discovery
namespaceOverride: ""