From 944a364fe4623674a6b2da9914139f5dfa89fa09 Mon Sep 17 00:00:00 2001 From: tizhou86 Date: Mon, 8 Jun 2020 19:49:40 +0800 Subject: [PATCH] Add gpu sharing in predicate logic. --- go.mod | 2 +- pkg/scheduler/api/device_info.go | 63 +++++++++++++ pkg/scheduler/api/node_info.go | 89 +++++++++++++++++++ .../plugins/predicates/predicates.go | 43 ++++++++- vendor/modules.txt | 47 ++++++++++ 5 files changed, 242 insertions(+), 2 deletions(-) create mode 100644 pkg/scheduler/api/device_info.go diff --git a/go.mod b/go.mod index f5cf0cb02d2..2e4c8c61e66 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module volcano.sh/volcano -go 1.13 +go 1.14 replace ( k8s.io/api => k8s.io/api v0.0.0-20200131112707-d64dbec685a4 diff --git a/pkg/scheduler/api/device_info.go b/pkg/scheduler/api/device_info.go new file mode 100644 index 00000000000..1c15a563903 --- /dev/null +++ b/pkg/scheduler/api/device_info.go @@ -0,0 +1,63 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package api + +import ( + "strconv" + + v1 "k8s.io/api/core/v1" +) + +type DeviceInfo struct { + Id int + PodMap map[string]*v1.Pod + GPUTotalMemory uint +} + +func (di *DeviceInfo) GetPods() []*v1.Pod { + pods := []*v1.Pod{} + for _, pod := range di.PodMap { + pods = append(pods, pod) + } + return pods +} + +func NewDeviceInfo(id int, mem uint) *DeviceInfo { + return &DeviceInfo{ + Id: id, + GPUTotalMemory: mem, + PodMap: map[string]*v1.Pod{}, + } +} + +func (di *DeviceInfo) GetUsedGPUMemory() uint { + res := uint(0) + for _, pod := range di.PodMap { + if pod.Status.Phase == v1.PodSucceeded || pod.Status.Phase == v1.PodFailed { + continue + } else { + if len(pod.ObjectMeta.Annotations) > 0 { + mem, found := pod.ObjectMeta.Annotations["volcano.sh/pod-gpu-memory"] + if found { + m, _ := strconv.Atoi(mem) + res += uint(m) + } + } + } + } + return res +} diff --git a/pkg/scheduler/api/node_info.go b/pkg/scheduler/api/node_info.go index 94bc7d5d6a9..7b035022d7b 100644 --- a/pkg/scheduler/api/node_info.go +++ b/pkg/scheduler/api/node_info.go @@ -18,6 +18,7 @@ package api import ( "fmt" + "strconv" v1 "k8s.io/api/core/v1" "k8s.io/klog" @@ -46,6 +47,10 @@ type NodeInfo struct { Tasks map[TaskID]*TaskInfo + Devices map[int]*DeviceInfo + GPUTotalCore int + GPUTotalMemory int + // Used to store custom information Others map[string]interface{} } @@ -78,6 +83,10 @@ func NewNodeInfo(node *v1.Node) *NodeInfo { Capability: EmptyResource(), Tasks: make(map[TaskID]*TaskInfo), + + Devices: make(map[int]*DeviceInfo), + GPUTotalCore: 0, + GPUTotalMemory: 0, } } else { ni = &NodeInfo{ @@ -93,9 +102,14 @@ func NewNodeInfo(node *v1.Node) *NodeInfo { Capability: NewResource(node.Status.Capacity), Tasks: make(map[TaskID]*TaskInfo), + + Devices: make(map[int]*DeviceInfo), + GPUTotalCore: 0, + GPUTotalMemory: 0, } } + ni.SetNodeGPUInfo(node) ni.setNodeState(node) return ni @@ -154,6 +168,24 @@ func (ni *NodeInfo) setNodeState(node *v1.Node) { } } +func (ni *NodeInfo) SetNodeGPUInfo(node *v1.Node) { + + core, ok := node.Status.Capacity["volcano.sh/node-gpu-core"] + if ok { + ni.GPUTotalCore = int(core.Value()) + } + + mem, ok := node.Status.Capacity["volcano.sh/node-gpu-memory"] + if ok { + ni.GPUTotalMemory = int(mem.Value()) + } + + for i := 0; i < int(core.Value()); i++ { + ni.Devices[i] = NewDeviceInfo(i, uint(int(mem.Value())/int(core.Value()))) + } + +} + // SetNode sets kubernetes node object to nodeInfo object func (ni *NodeInfo) SetNode(node *v1.Node) { ni.setNodeState(node) @@ -314,3 +346,60 @@ func (ni *NodeInfo) Pods() (pods []*v1.Pod) { return } + +func (ni *NodeInfo) CheckPredicatePodOnGPUNode(pod *v1.Pod) bool { + res := false + memReq := uint(0) + + remainMems := ni.GetDevicesRemainGPUMemory() + if len(pod.ObjectMeta.Annotations) > 0 { + mem, found := pod.ObjectMeta.Annotations["volcano.sh/pod-gpu-memory"] + if found { + m, _ := strconv.Atoi(mem) + memReq = uint(m) + } + } + + if len(remainMems) > 0 { + for devID := 0; devID < len(ni.Devices); devID++ { + availableGPU, ok := remainMems[devID] + if ok { + if availableGPU >= memReq { + res = true + break + } + } + } + } + + return res + +} + +func (ni *NodeInfo) GetDevicesRemainGPUMemory() map[int]uint { + devicesAllGPUMemory := ni.GetDevicesAllGPUMemory() + devicesUsedGPUMemory := ni.GetDevicesUsedGPUMemory() + res := map[int]uint{} + for id, allMemory := range devicesAllGPUMemory { + if usedMemory, found := devicesUsedGPUMemory[id]; found { + res[id] = allMemory - usedMemory + } + } + return res +} + +func (ni *NodeInfo) GetDevicesUsedGPUMemory() map[int]uint { + res := map[int]uint{} + for _, device := range ni.Devices { + res[device.Id] = device.GetUsedGPUMemory() + } + return res +} + +func (ni *NodeInfo) GetDevicesAllGPUMemory() map[int]uint { + res := map[int]uint{} + for _, device := range ni.Devices { + res[device.Id] = device.GPUTotalMemory + } + return res +} diff --git a/pkg/scheduler/plugins/predicates/predicates.go b/pkg/scheduler/plugins/predicates/predicates.go index cb08695416d..e5cdda25d6f 100644 --- a/pkg/scheduler/plugins/predicates/predicates.go +++ b/pkg/scheduler/plugins/predicates/predicates.go @@ -17,6 +17,9 @@ limitations under the License. package predicates import ( + "fmt" + + "k8s.io/api/core/v1" "k8s.io/klog" "k8s.io/kubernetes/pkg/scheduler/algorithm" "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates" @@ -37,6 +40,8 @@ const ( DiskPressurePredicate = "predicate.DiskPressureEnable" // PIDPressurePredicate is the key for enabling PID Pressure Predicate in YAML PIDPressurePredicate = "predicate.PIDPressureEnable" + // GPUSharingPredicate is the key for enabling GPU Sharing Predicate in YAML + GPUSharingPredicate = "predicate.GPUSharingEnable" ) type predicatesPlugin struct { @@ -57,12 +62,13 @@ type predicateEnable struct { memoryPressureEnable bool diskPressureEnable bool pidPressureEnable bool + gpuSharingEnable bool } func enablePredicate(args framework.Arguments) predicateEnable { /* - User Should give predicatesEnable in this format(predicate.MemoryPressureEnable, predicate.DiskPressureEnable, predicate.PIDPressureEnable. + User Should give predicatesEnable in this format(predicate.MemoryPressureEnable, predicate.DiskPressureEnable, predicate.PIDPressureEnable, predicate.GPUSharingEnable. Currently supported only for MemoryPressure, DiskPressure, PIDPressure predicate checks. actions: "reclaim, allocate, backfill, preempt" @@ -78,6 +84,7 @@ func enablePredicate(args framework.Arguments) predicateEnable { predicate.MemoryPressureEnable: true predicate.DiskPressureEnable: true predicate.PIDPressureEnable: true + predicate.GPUSharingEnable: true - name: proportion - name: nodeorder */ @@ -86,6 +93,7 @@ func enablePredicate(args framework.Arguments) predicateEnable { memoryPressureEnable: false, diskPressureEnable: false, pidPressureEnable: false, + gpuSharingEnable: false, } // Checks whether predicate.MemoryPressureEnable is provided or not, if given, modifies the value in predicateEnable struct. @@ -97,6 +105,9 @@ func enablePredicate(args framework.Arguments) predicateEnable { // Checks whether predicate.PIDPressureEnable is provided or not, if given, modifies the value in predicateEnable struct. args.GetBool(&predicate.pidPressureEnable, PIDPressurePredicate) + // Checks whether predicate.GPUSharingEnable is provided or not, if given, modifies the value in predicateEnable struct. + args.GetBool(&predicate.gpuSharingEnable, GPUSharingPredicate) + return predicate } @@ -269,6 +280,21 @@ func (pp *predicatesPlugin) OnSessionOpen(ssn *framework.Session) { } } + if predicate.gpuSharingEnable { + // CheckGPUSharingPredicate + fit, reasons, err = CheckNodeGPUSharingPredicate(task.Pod, node) + if err != nil { + return err + } + + klog.V(4).Infof("CheckNodeGPUSharingPredicate predicates Task <%s/%s> on Node <%s>: fit %t, err %v", + task.Namespace, task.Name, node.Name, fit, err) + + if !fit { + return api.NewFitErrorByReasons(task, node, reasons...) + } + } + var lister algorithm.PodLister lister = pl if !util.HaveAffinity(task.Pod) { @@ -294,3 +320,18 @@ func (pp *predicatesPlugin) OnSessionOpen(ssn *framework.Session) { } func (pp *predicatesPlugin) OnSessionClose(ssn *framework.Session) {} + +// CheckNodeGPUSharingPredicate checks if a gpu sharing pod can be scheduled on a node. +func CheckNodeGPUSharingPredicate(pod *v1.Pod, nodeInfo *api.NodeInfo) (bool, []predicates.PredicateFailureReason, error) { + + _, ok := nodeInfo.Node.Status.Capacity["volcano.sh/node-gpu-core"] + if !ok { + return false, nil, fmt.Errorf("node is not gpu sharing") + } else { + isEnoughGPUMemoryOnNode := nodeInfo.CheckPredicatePodOnGPUNode(pod) + if !isEnoughGPUMemoryOnNode { + return false, nil, fmt.Errorf("no enough gpu memory on single device") + } + } + return true, nil, nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 7bd111334c9..ff65851d91e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -54,6 +54,7 @@ github.com/google/go-cmp/cmp/internal/value # github.com/google/gofuzz v1.0.0 github.com/google/gofuzz # github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 +## explicit github.com/google/shlex # github.com/google/uuid v1.1.1 github.com/google/uuid @@ -64,6 +65,7 @@ github.com/googleapis/gnostic/extensions # github.com/hashicorp/errwrap v1.0.0 github.com/hashicorp/errwrap # github.com/hashicorp/go-multierror v1.0.0 +## explicit github.com/hashicorp/go-multierror # github.com/hashicorp/golang-lru v0.5.1 github.com/hashicorp/golang-lru @@ -87,6 +89,7 @@ github.com/modern-go/concurrent # github.com/modern-go/reflect2 v1.0.1 github.com/modern-go/reflect2 # github.com/onsi/ginkgo v1.10.1 +## explicit github.com/onsi/ginkgo github.com/onsi/ginkgo/config github.com/onsi/ginkgo/internal/codelocation @@ -106,6 +109,7 @@ github.com/onsi/ginkgo/reporters/stenographer/support/go-colorable github.com/onsi/ginkgo/reporters/stenographer/support/go-isatty github.com/onsi/ginkgo/types # github.com/onsi/gomega v1.7.0 +## explicit github.com/onsi/gomega github.com/onsi/gomega/format github.com/onsi/gomega/internal/assertion @@ -121,6 +125,7 @@ github.com/onsi/gomega/types # github.com/opencontainers/go-digest v1.0.0-rc1 github.com/opencontainers/go-digest # github.com/prometheus/client_golang v0.9.2 +## explicit github.com/prometheus/client_golang/prometheus github.com/prometheus/client_golang/prometheus/internal github.com/prometheus/client_golang/prometheus/promauto @@ -137,8 +142,10 @@ github.com/prometheus/procfs/internal/util github.com/prometheus/procfs/nfs github.com/prometheus/procfs/xfs # github.com/spf13/cobra v0.0.5 +## explicit github.com/spf13/cobra # github.com/spf13/pflag v1.0.5 +## explicit github.com/spf13/pflag # go.uber.org/atomic v0.0.0-20181018215023-8dc6146f7569 go.uber.org/atomic @@ -152,6 +159,7 @@ go.uber.org/zap/internal/color go.uber.org/zap/internal/exit go.uber.org/zap/zapcore # golang.org/x/crypto v0.0.0-20200220183623-bac4c82f6975 +## explicit golang.org/x/crypto/blowfish golang.org/x/crypto/chacha20 golang.org/x/crypto/curve25519 @@ -206,6 +214,7 @@ golang.org/x/text/transform golang.org/x/text/unicode/bidi golang.org/x/text/unicode/norm # golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 +## explicit golang.org/x/time/rate # google.golang.org/appengine v1.5.0 google.golang.org/appengine @@ -262,8 +271,10 @@ gopkg.in/inf.v0 # gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 gopkg.in/tomb.v1 # gopkg.in/yaml.v2 v2.2.8 +## explicit gopkg.in/yaml.v2 # k8s.io/api v0.0.0 => k8s.io/api v0.0.0-20200131112707-d64dbec685a4 +## explicit k8s.io/api/admission/v1beta1 k8s.io/api/admissionregistration/v1 k8s.io/api/admissionregistration/v1beta1 @@ -306,6 +317,7 @@ k8s.io/api/storage/v1beta1 # k8s.io/apiextensions-apiserver v0.0.0 => k8s.io/apiextensions-apiserver v0.0.0-20200318010308-c96942bf1b43 k8s.io/apiextensions-apiserver/pkg/features # k8s.io/apimachinery v0.16.9-beta.0 => k8s.io/apimachinery v0.16.9-beta.0 +## explicit k8s.io/apimachinery/pkg/api/equality k8s.io/apimachinery/pkg/api/errors k8s.io/apimachinery/pkg/api/meta @@ -355,6 +367,7 @@ k8s.io/apimachinery/pkg/watch k8s.io/apimachinery/third_party/forked/golang/json k8s.io/apimachinery/third_party/forked/golang/reflect # k8s.io/apiserver v0.0.0 => k8s.io/apiserver v0.0.0-20200317213526-62c18ca8f66b +## explicit k8s.io/apiserver/pkg/features k8s.io/apiserver/pkg/server/healthz k8s.io/apiserver/pkg/server/httplog @@ -365,6 +378,7 @@ k8s.io/apiserver/pkg/storage/etcd3/metrics k8s.io/apiserver/pkg/storage/value k8s.io/apiserver/pkg/util/feature # k8s.io/client-go v0.16.9-beta.0 => k8s.io/client-go v0.0.0-20191016111102-bec269661e48 +## explicit k8s.io/client-go/discovery k8s.io/client-go/discovery/fake k8s.io/client-go/informers @@ -571,6 +585,7 @@ k8s.io/cloud-provider k8s.io/cloud-provider/volume k8s.io/cloud-provider/volume/helpers # k8s.io/component-base v0.0.0 => k8s.io/component-base v0.16.9-beta.0 +## explicit k8s.io/component-base/cli/flag k8s.io/component-base/featuregate k8s.io/component-base/metrics @@ -580,10 +595,12 @@ k8s.io/component-base/version k8s.io/csi-translation-lib k8s.io/csi-translation-lib/plugins # k8s.io/klog v1.0.0 => k8s.io/klog v0.4.0 +## explicit k8s.io/klog # k8s.io/kube-openapi v0.0.0-20190816220812-743ec37842bf => k8s.io/kube-openapi v0.0.0-20190816220812-743ec37842bf k8s.io/kube-openapi/pkg/util/proto # k8s.io/kubernetes v1.16.2 +## explicit k8s.io/kubernetes/pkg/api/legacyscheme k8s.io/kubernetes/pkg/api/service k8s.io/kubernetes/pkg/api/v1/pod @@ -641,4 +658,34 @@ k8s.io/utils/pointer k8s.io/utils/strings k8s.io/utils/trace # sigs.k8s.io/yaml v1.1.0 +## explicit sigs.k8s.io/yaml +# k8s.io/api => k8s.io/api v0.0.0-20200131112707-d64dbec685a4 +# k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.0.0-20200318010308-c96942bf1b43 +# k8s.io/apimachinery => k8s.io/apimachinery v0.16.9-beta.0 +# k8s.io/apiserver => k8s.io/apiserver v0.0.0-20200317213526-62c18ca8f66b +# k8s.io/cli-runtime => k8s.io/cli-runtime v0.0.0-20200228051546-a99b7f303206 +# k8s.io/client-go => k8s.io/client-go v0.0.0-20191016111102-bec269661e48 +# k8s.io/cloud-provider => k8s.io/cloud-provider v0.0.0-20200228053305-d368059ead1c +# k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.0.0-20200228053038-8bbe56c26119 +# k8s.io/code-generator => k8s.io/code-generator v0.16.9-beta.0 +# k8s.io/component-base => k8s.io/component-base v0.16.9-beta.0 +# k8s.io/cri-api => k8s.io/cri-api v0.16.9-beta.0 +# k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.0.0-20200228053533-d83d9540380e +# k8s.io/gengo => k8s.io/gengo v0.0.0-20190822140433-26a664648505 +# k8s.io/heapster => k8s.io/heapster v1.2.0-beta.1 +# k8s.io/klog => k8s.io/klog v0.4.0 +# k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.0.0-20200228045702-699a8508e8d2 +# k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.0.0-20200228052810-d1f697fa5379 +# k8s.io/kube-openapi => k8s.io/kube-openapi v0.0.0-20190816220812-743ec37842bf +# k8s.io/kube-proxy => k8s.io/kube-proxy v0.0.0-20200228052049-a6119a74791a +# k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.0.0-20200228052541-0195892e09a0 +# k8s.io/kubectl => k8s.io/kubectl v0.0.0-20200228054512-419760c9116d +# k8s.io/kubelet => k8s.io/kubelet v0.0.0-20200131120825-905bd8eea4c4 +# k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.0.0-20200317194326-32f35d4b17b4 +# k8s.io/metrics => k8s.io/metrics v0.0.0-20200228051301-c298383a72cb +# k8s.io/node-api => k8s.io/node-api v0.0.0-20200228054017-076d0760c5f1 +# k8s.io/repo-infra => k8s.io/repo-infra v0.0.0-20181204233714-00fe14e3d1a3 +# k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.0.0-20200228050048-9291aec797aa +# k8s.io/sample-cli-plugin => k8s.io/sample-cli-plugin v0.0.0-20200228051820-6fc9c909879b +# k8s.io/sample-controller => k8s.io/sample-controller v0.0.0-20200228050512-c37f031c4cf9