diff --git a/.github/actions/minikube-setup/action.yml b/.github/actions/minikube-setup/action.yml index f8ed073551e..2b7ea1adcb8 100644 --- a/.github/actions/minikube-setup/action.yml +++ b/.github/actions/minikube-setup/action.yml @@ -12,7 +12,7 @@ runs: - name: Setup Minikube uses: medyagh/setup-minikube@latest with: - minikube-version: '1.31.1' + minikube-version: '1.32.0' kubernetes-version: 'v1.27.4' driver: 'none' network-plugin: 'cni' diff --git a/README.md b/README.md index 432a5e324cb..bb4d3ad1230 100644 --- a/README.md +++ b/README.md @@ -5,20 +5,19 @@ [![LICENSE](https://img.shields.io/github/license/opendatahub-io/kserve.svg)](https://github.com/opendatahub-io/kserve/blob/master/LICENSE) [![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://github.com/kserve/community/blob/main/README.md#questions-and-issues) -KServe provides a Kubernetes [Custom Resource Definition](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) for serving machine learning (ML) models on arbitrary frameworks. It aims to solve production model serving use cases by providing performant, high abstraction interfaces for common ML frameworks like Tensorflow, XGBoost, ScikitLearn, PyTorch, and ONNX. +KServe provides a Kubernetes [Custom Resource Definition](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) for serving predictive and generative machine learning (ML) models. It aims to solve production model serving use cases by providing high abstraction interfaces for Tensorflow, XGBoost, ScikitLearn, PyTorch, Huggingface Transformer/LLM models using standardized data plane protocols. It encapsulates the complexity of autoscaling, networking, health checking, and server configuration to bring cutting edge serving features like GPU Autoscaling, Scale to Zero, and Canary Rollouts to your ML deployments. It enables a simple, pluggable, and complete story for Production ML Serving including prediction, pre-processing, post-processing and explainability. KServe is being [used across various organizations.](https://kserve.github.io/website/master/community/adopters/) For more details, visit the [KServe website](https://kserve.github.io/website/). -![KServe](/docs/diagrams/kserve.png) +![KServe](/docs/diagrams/kserve_new.png) -_Since 0.7 [KFServing is rebranded to KServe](https://blog.kubeflow.org/release/official/2021/09/27/kfserving-transition.html), we still support the RTS release -[0.6.x](https://github.com/kserve/kserve/tree/release-0.6), please refer to corresponding release branch for docs_. +_Since 0.7 [KFServing is rebranded to KServe](https://blog.kubeflow.org/release/official/2021/09/27/kfserving-transition.html). ### Why KServe? -- KServe is a standard, cloud agnostic **Model Inference Platform** on Kubernetes, built for highly scalable use cases. -- Provides performant, **standardized inference protocol** across ML frameworks. +- KServe is a standard, cloud agnostic **Model Inference Platform** for serving predictive and generative AI models on Kubernetes, built for highly scalable use cases. +- Provides performant, **standardized inference protocol** across ML frameworks including OpenAI specification for generative models. - Support modern **serverless inference workload** with **request based autoscaling including scale-to-zero** on **CPU and GPU**. - Provides **high scalability, density packing and intelligent routing** using **ModelMesh**. - **Simple and pluggable production serving** for **inference**, **pre/post processing**, **monitoring** and **explainability**. diff --git a/charts/kserve-resources/templates/clusterrole.yaml b/charts/kserve-resources/templates/clusterrole.yaml index 46f82a08840..b55328a8277 100644 --- a/charts/kserve-resources/templates/clusterrole.yaml +++ b/charts/kserve-resources/templates/clusterrole.yaml @@ -63,9 +63,7 @@ rules: verbs: - create - get - - list - update - - watch - apiGroups: - "" resources: @@ -102,18 +100,14 @@ rules: - create - delete - get - - list - patch - update - - watch - apiGroups: - "" resources: - serviceaccounts verbs: - get - - list - - watch - apiGroups: - "" resources: diff --git a/cmd/manager/main.go b/cmd/manager/main.go index 688b2f467ab..95e4d9a003b 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -23,15 +23,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - "github.com/kserve/kserve/pkg/constants" - graphcontroller "github.com/kserve/kserve/pkg/controller/v1alpha1/inferencegraph" - trainedmodelcontroller "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel" - "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig" - v1beta1controller "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice" - "github.com/kserve/kserve/pkg/webhook/admission/pod" - "github.com/kserve/kserve/pkg/webhook/admission/servingruntime" istio_networking "istio.io/api/networking/v1beta1" istioclientv1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1" v1 "k8s.io/api/core/v1" @@ -42,13 +33,22 @@ import ( "k8s.io/client-go/tools/record" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" "sigs.k8s.io/controller-runtime/pkg/log/zap" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/manager/signals" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" + + "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + graphcontroller "github.com/kserve/kserve/pkg/controller/v1alpha1/inferencegraph" + trainedmodelcontroller "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel" + "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig" + v1beta1controller "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice" + "github.com/kserve/kserve/pkg/webhook/admission/pod" + "github.com/kserve/kserve/pkg/webhook/admission/servingruntime" ) var ( @@ -112,6 +112,13 @@ func main() { os.Exit(1) } + // Setup clientset to directly talk to the api server + clientSet, err := kubernetes.NewForConfig(cfg) + if err != nil { + setupLog.Error(err, "unable to create clientSet") + os.Exit(1) + } + // Create a new Cmd to provide shared dependencies and start components setupLog.Info("Setting up manager") mgr, err := manager.New(cfg, manager.Options{ @@ -142,17 +149,12 @@ func main() { os.Exit(1) } - kubeClient, err := client.New(mgr.GetConfig(), client.Options{Scheme: mgr.GetScheme()}) - if err != nil { - setupLog.Error(err, "unable to create new client.") - } - - deployConfig, err := v1beta1.NewDeployConfig(kubeClient) + deployConfig, err := v1beta1.NewDeployConfig(clientSet) if err != nil { setupLog.Error(err, "unable to get deploy config.") os.Exit(1) } - ingressConfig, err := v1beta1.NewIngressConfig(kubeClient) + ingressConfig, err := v1beta1.NewIngressConfig(clientSet) if err != nil { setupLog.Error(err, "unable to get ingress config.") os.Exit(1) @@ -181,16 +183,12 @@ func main() { // Setup all Controllers setupLog.Info("Setting up v1beta1 controller") eventBroadcaster := record.NewBroadcaster() - clientSet, err := kubernetes.NewForConfig(mgr.GetConfig()) - if err != nil { - setupLog.Error(err, "unable to create clientSet") - os.Exit(1) - } eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: clientSet.CoreV1().Events("")}) if err = (&v1beta1controller.InferenceServiceReconciler{ - Client: mgr.GetClient(), - Log: ctrl.Log.WithName("v1beta1Controllers").WithName("InferenceService"), - Scheme: mgr.GetScheme(), + Client: mgr.GetClient(), + Clientset: clientSet, + Log: ctrl.Log.WithName("v1beta1Controllers").WithName("InferenceService"), + Scheme: mgr.GetScheme(), Recorder: eventBroadcaster.NewRecorder( mgr.GetScheme(), v1.EventSource{Component: "v1beta1Controllers"}), }).SetupWithManager(mgr, deployConfig, ingressConfig); err != nil { @@ -207,7 +205,7 @@ func main() { Log: ctrl.Log.WithName("v1beta1Controllers").WithName("TrainedModel"), Scheme: mgr.GetScheme(), Recorder: eventBroadcaster.NewRecorder(mgr.GetScheme(), v1.EventSource{Component: "v1beta1Controllers"}), - ModelConfigReconciler: modelconfig.NewModelConfigReconciler(mgr.GetClient(), mgr.GetScheme()), + ModelConfigReconciler: modelconfig.NewModelConfigReconciler(mgr.GetClient(), clientSet, mgr.GetScheme()), }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "v1beta1Controllers", "TrainedModel") os.Exit(1) @@ -218,10 +216,11 @@ func main() { setupLog.Info("Setting up InferenceGraph controller") inferenceGraphEventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: clientSet.CoreV1().Events("")}) if err = (&graphcontroller.InferenceGraphReconciler{ - Client: mgr.GetClient(), - Log: ctrl.Log.WithName("v1alpha1Controllers").WithName("InferenceGraph"), - Scheme: mgr.GetScheme(), - Recorder: eventBroadcaster.NewRecorder(mgr.GetScheme(), v1.EventSource{Component: "InferenceGraphController"}), + Client: mgr.GetClient(), + Clientset: clientSet, + Log: ctrl.Log.WithName("v1alpha1Controllers").WithName("InferenceGraph"), + Scheme: mgr.GetScheme(), + Recorder: eventBroadcaster.NewRecorder(mgr.GetScheme(), v1.EventSource{Component: "InferenceGraphController"}), }).SetupWithManager(mgr, deployConfig); err != nil { setupLog.Error(err, "unable to create controller", "v1alpha1Controllers", "InferenceGraph") os.Exit(1) @@ -232,7 +231,7 @@ func main() { setupLog.Info("registering webhooks to the webhook server") hookServer.Register("/mutate-pods", &webhook.Admission{ - Handler: &pod.Mutator{Client: mgr.GetClient(), Decoder: admission.NewDecoder(mgr.GetScheme())}, + Handler: &pod.Mutator{Client: mgr.GetClient(), Clientset: clientSet, Decoder: admission.NewDecoder(mgr.GetScheme())}, }) //log.Info("registering cluster serving runtime validator webhook to the webhook server") diff --git a/config/default/inferenceservice_conversion_webhook.yaml b/config/default/inferenceservice_conversion_webhook.yaml deleted file mode 100644 index 7f6916b8871..00000000000 --- a/config/default/inferenceservice_conversion_webhook.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# The following patch enables conversion webhook for CRD -# CRD conversion requires k8s 1.16 or later. -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: inferenceservices.serving.kserve.io - creationTimestamp: null -spec: - preserveUnknownFields: false - conversion: - strategy: Webhook - webhook: - conversionReviewVersions: ["v1beta1"] - clientConfig: - # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, - # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) - caBundle: Cg== - service: - namespace: $(kserveNamespace) - name: $(webhookServiceName) - path: /convert diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 3e660044d6a..4b419f7569a 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -32,11 +32,6 @@ replacements: name: kserve-webhook-server-service version: v1 targets: -# - fieldPaths: -# - spec.conversion.webhook.clientConfig.service.name -# select: -# kind: CustomResourceDefinition -# name: inferenceservices.serving.kserve.io - fieldPaths: - webhooks.*.clientConfig.service.name select: @@ -83,11 +78,6 @@ replacements: name: kserve-controller-manager version: v1 targets: -# - fieldPaths: -# - spec.conversion.webhook.clientConfig.service.namespace -# select: -# kind: CustomResourceDefinition -# name: inferenceservices.serving.kserve.io - fieldPaths: - webhooks.*.clientConfig.service.namespace select: @@ -205,7 +195,6 @@ patches: - path: servingruntime_validationwebhook_cainjection_patch.yaml - path: svc_webhook_cainjection_patch.yaml - path: manager_resources_patch.yaml -#- path: inferenceservice_conversion_webhook.yaml - path: cainjection_conversion_webhook.yaml # Since OpenShift serving-certificates are being used, # remove CA bundle placeholders diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 9e2c6226146..1974fe1278f 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -48,9 +48,7 @@ rules: verbs: - create - get - - list - update - - watch - apiGroups: - "" resources: @@ -87,18 +85,14 @@ rules: - create - delete - get - - list - patch - update - - watch - apiGroups: - "" resources: - serviceaccounts verbs: - get - - list - - watch - apiGroups: - "" resources: diff --git a/docs/diagrams/kserve_new.png b/docs/diagrams/kserve_new.png new file mode 100644 index 00000000000..49a05f64b1d Binary files /dev/null and b/docs/diagrams/kserve_new.png differ diff --git a/go.mod b/go.mod index c0924dc4d94..beb96248709 100644 --- a/go.mod +++ b/go.mod @@ -5,13 +5,13 @@ go 1.21 require ( cloud.google.com/go/storage v1.35.1 github.com/aws/aws-sdk-go v1.48.0 - github.com/cloudevents/sdk-go/v2 v2.14.0 + github.com/cloudevents/sdk-go/v2 v2.15.2 github.com/fsnotify/fsnotify v1.7.0 github.com/getkin/kin-openapi v0.120.0 github.com/go-logr/logr v1.3.0 github.com/gofrs/uuid/v5 v5.0.0 github.com/google/go-cmp v0.6.0 - github.com/google/uuid v1.4.0 + github.com/google/uuid v1.6.0 github.com/googleapis/google-cloud-go-testing v0.0.0-20210719221736-1c9a4c676720 github.com/json-iterator/go v1.1.12 github.com/kelseyhightower/envconfig v1.4.0 @@ -22,7 +22,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 github.com/tidwall/gjson v1.17.0 - go.uber.org/zap v1.26.0 + go.uber.org/zap v1.27.0 gomodules.xyz/jsonpatch/v2 v2.4.0 google.golang.org/api v0.151.0 google.golang.org/protobuf v1.32.0 diff --git a/go.sum b/go.sum index afcc4d6bd51..45cf9d3c617 100644 --- a/go.sum +++ b/go.sum @@ -76,6 +76,8 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMn github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cloudevents/sdk-go/v2 v2.14.0 h1:Nrob4FwVgi5L4tV9lhjzZcjYqFVyJzsA56CwPaPfv6s= github.com/cloudevents/sdk-go/v2 v2.14.0/go.mod h1:xDmKfzNjM8gBvjaF8ijFjM1VYOVUEeUfapHMUX1T5To= +github.com/cloudevents/sdk-go/v2 v2.15.2 h1:54+I5xQEnI73RBhWHxbI1XJcqOFOVJN85vb41+8mHUc= +github.com/cloudevents/sdk-go/v2 v2.15.2/go.mod h1:lL7kSWAE/V8VI4Wh0jbL2v/jvqsm6tjmaQBSvxcv4uE= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= @@ -211,6 +213,8 @@ github.com/google/s2a-go v0.1.7/go.mod h1:50CgR4k1jNlWBu4UfS4AcfhVe1r6pdZPygJ3R8 github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4= github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfFxPRy3Bf7vr3h0cechB90XaQs= github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= @@ -397,6 +401,8 @@ go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN8 go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k= golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= diff --git a/hack/quick_install.sh b/hack/quick_install.sh index d787ce2a147..54728e73833 100755 --- a/hack/quick_install.sh +++ b/hack/quick_install.sh @@ -30,10 +30,10 @@ while getopts ":hsr" option; do esac done -export ISTIO_VERSION=1.19.4 +export ISTIO_VERSION=1.20.4 export ISTIO_DIR=istio-${ISTIO_VERSION} -export KNATIVE_SERVING_VERSION=knative-v1.10.1 -export KNATIVE_ISTIO_VERSION=knative-v1.10.0 +export KNATIVE_SERVING_VERSION=knative-v1.13.1 +export KNATIVE_ISTIO_VERSION=knative-v1.13.1 export KSERVE_VERSION=v0.12.0 export CERT_MANAGER_VERSION=v1.9.0 export SCRIPT_DIR="$( dirname -- "${BASH_SOURCE[0]}" )" diff --git a/pkg/apis/serving/v1beta1/configmap.go b/pkg/apis/serving/v1beta1/configmap.go index 557a7f8a6cc..71182f53ee6 100644 --- a/pkg/apis/serving/v1beta1/configmap.go +++ b/pkg/apis/serving/v1beta1/configmap.go @@ -22,10 +22,11 @@ import ( "fmt" "text/template" - "github.com/kserve/kserve/pkg/constants" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + + "github.com/kserve/kserve/pkg/constants" ) // ConfigMap Keys @@ -83,9 +84,8 @@ type DeployConfig struct { DefaultDeploymentMode string `json:"defaultDeploymentMode,omitempty"` } -func NewInferenceServicesConfig(cli client.Client) (*InferenceServicesConfig, error) { - configMap := &v1.ConfigMap{} - err := cli.Get(context.TODO(), types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: constants.KServeNamespace}, configMap) +func NewInferenceServicesConfig(clientset kubernetes.Interface) (*InferenceServicesConfig, error) { + configMap, err := clientset.CoreV1().ConfigMaps(constants.KServeNamespace).Get(context.TODO(), constants.InferenceServiceConfigMapName, metav1.GetOptions{}) if err != nil { return nil, err } @@ -100,9 +100,8 @@ func NewInferenceServicesConfig(cli client.Client) (*InferenceServicesConfig, er return icfg, nil } -func NewIngressConfig(cli client.Client) (*IngressConfig, error) { - configMap := &v1.ConfigMap{} - err := cli.Get(context.TODO(), types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: constants.KServeNamespace}, configMap) +func NewIngressConfig(clientset kubernetes.Interface) (*IngressConfig, error) { + configMap, err := clientset.CoreV1().ConfigMaps(constants.KServeNamespace).Get(context.TODO(), constants.InferenceServiceConfigMapName, metav1.GetOptions{}) if err != nil { return nil, err } @@ -123,10 +122,10 @@ func NewIngressConfig(cli client.Client) (*IngressConfig, error) { // For now simply check that this is a valid template. _, err := template.New("path-template").Parse(ingressConfig.PathTemplate) if err != nil { - return nil, fmt.Errorf("Invalid ingress config, unable to parse pathTemplate: %w", err) + return nil, fmt.Errorf("invalid ingress config, unable to parse pathTemplate: %w", err) } if ingressConfig.IngressDomain == "" { - return nil, fmt.Errorf("Invalid ingress config - igressDomain is required if pathTemplate is given") + return nil, fmt.Errorf("invalid ingress config - ingressDomain is required if pathTemplate is given") } } } @@ -150,15 +149,14 @@ func getComponentConfig(key string, configMap *v1.ConfigMap, componentConfig int if data, ok := configMap.Data[key]; ok { err := json.Unmarshal([]byte(data), componentConfig) if err != nil { - return fmt.Errorf("Unable to unmarshall %v json string due to %w ", key, err) + return fmt.Errorf("unable to unmarshall %v json string due to %w ", key, err) } } return nil } -func NewDeployConfig(cli client.Client) (*DeployConfig, error) { - configMap := &v1.ConfigMap{} - err := cli.Get(context.TODO(), types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: constants.KServeNamespace}, configMap) +func NewDeployConfig(clientset kubernetes.Interface) (*DeployConfig, error) { + configMap, err := clientset.CoreV1().ConfigMaps(constants.KServeNamespace).Get(context.TODO(), constants.InferenceServiceConfigMapName, metav1.GetOptions{}) if err != nil { return nil, err } @@ -166,17 +164,17 @@ func NewDeployConfig(cli client.Client) (*DeployConfig, error) { if deploy, ok := configMap.Data[DeployConfigName]; ok { err := json.Unmarshal([]byte(deploy), &deployConfig) if err != nil { - return nil, fmt.Errorf("Unable to parse deploy config json: %w", err) + return nil, fmt.Errorf("unable to parse deploy config json: %w", err) } if deployConfig.DefaultDeploymentMode == "" { - return nil, fmt.Errorf("Invalid deploy config, defaultDeploymentMode is required.") + return nil, fmt.Errorf("invalid deploy config, defaultDeploymentMode is required") } if deployConfig.DefaultDeploymentMode != string(constants.Serverless) && deployConfig.DefaultDeploymentMode != string(constants.RawDeployment) && deployConfig.DefaultDeploymentMode != string(constants.ModelMeshDeployment) { - return nil, fmt.Errorf("Invalid deployment mode. Supported modes are Serverless," + + return nil, fmt.Errorf("invalid deployment mode. Supported modes are Serverless," + " RawDeployment and ModelMesh") } } diff --git a/pkg/apis/serving/v1beta1/configmap_test.go b/pkg/apis/serving/v1beta1/configmap_test.go index e0e7e1ac248..1410eeee8ff 100644 --- a/pkg/apis/serving/v1beta1/configmap_test.go +++ b/pkg/apis/serving/v1beta1/configmap_test.go @@ -25,6 +25,7 @@ import ( "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + fakeclientset "k8s.io/client-go/kubernetes/fake" "sigs.k8s.io/controller-runtime/pkg/client" fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake" ) @@ -78,27 +79,30 @@ func createFakeClient() client.WithWatch { func TestNewInferenceServiceConfig(t *testing.T) { g := gomega.NewGomegaWithT(t) - fakeClient := createFakeClient() - - isvcConfig, err := NewInferenceServicesConfig(fakeClient) + clientset := fakeclientset.NewSimpleClientset(&v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: constants.InferenceServiceConfigMapName, Namespace: constants.KServeNamespace}, + }) + isvcConfig, err := NewInferenceServicesConfig(clientset) g.Expect(err).Should(gomega.BeNil()) g.Expect(isvcConfig).ShouldNot(gomega.BeNil()) } func TestNewIngressConfig(t *testing.T) { g := gomega.NewGomegaWithT(t) - fakeClient := createFakeClient() - - ingressCfg, err := NewIngressConfig(fakeClient) + clientset := fakeclientset.NewSimpleClientset(&v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: constants.InferenceServiceConfigMapName, Namespace: constants.KServeNamespace}, + }) + ingressCfg, err := NewIngressConfig(clientset) g.Expect(err).Should(gomega.BeNil()) g.Expect(ingressCfg).ShouldNot(gomega.BeNil()) } func TestNewDeployConfig(t *testing.T) { g := gomega.NewGomegaWithT(t) - fakeClient := createFakeClient() - - deployConfig, err := NewDeployConfig(fakeClient) + clientset := fakeclientset.NewSimpleClientset(&v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: constants.InferenceServiceConfigMapName, Namespace: constants.KServeNamespace}, + }) + deployConfig, err := NewDeployConfig(clientset) g.Expect(err).Should(gomega.BeNil()) g.Expect(deployConfig).ShouldNot(gomega.BeNil()) } diff --git a/pkg/apis/serving/v1beta1/inference_service_defaults.go b/pkg/apis/serving/v1beta1/inference_service_defaults.go index 25cd18395aa..5193eb51cb2 100644 --- a/pkg/apis/serving/v1beta1/inference_service_defaults.go +++ b/pkg/apis/serving/v1beta1/inference_service_defaults.go @@ -21,14 +21,15 @@ import ( "reflect" "strconv" - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/utils" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "sigs.k8s.io/controller-runtime/pkg/client" + "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/client/config" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook" + + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/utils" ) var ( @@ -65,15 +66,21 @@ func setResourceRequirementDefaults(requirements *v1.ResourceRequirements) { func (isvc *InferenceService) Default() { mutatorLogger.Info("Defaulting InferenceService", "namespace", isvc.Namespace, "isvc", isvc.Spec.Predictor) - cli, err := client.New(config.GetConfigOrDie(), client.Options{}) + cfg, err := config.GetConfig() if err != nil { - panic("Failed to create client in defaulter") + mutatorLogger.Error(err, "unable to set up client config") + panic(err) + } + clientSet, err := kubernetes.NewForConfig(cfg) + if err != nil { + mutatorLogger.Error(err, "unable to create clientSet") + panic(err) } - configMap, err := NewInferenceServicesConfig(cli) + configMap, err := NewInferenceServicesConfig(clientSet) if err != nil { panic(err) } - deployConfig, err := NewDeployConfig(cli) + deployConfig, err := NewDeployConfig(clientSet) if err != nil { panic(err) } diff --git a/pkg/controller/v1alpha1/inferencegraph/controller.go b/pkg/controller/v1alpha1/inferencegraph/controller.go index a84e43c7e9b..987621a1232 100644 --- a/pkg/controller/v1alpha1/inferencegraph/controller.go +++ b/pkg/controller/v1alpha1/inferencegraph/controller.go @@ -25,20 +25,18 @@ import ( "context" "encoding/json" "fmt" + "github.com/go-logr/logr" - v1alpha1api "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - v1beta1api "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - "github.com/kserve/kserve/pkg/constants" - isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apierr "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" "knative.dev/pkg/apis" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" @@ -46,14 +44,21 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/reconcile" + + v1alpha1api "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + v1beta1api "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" ) // InferenceGraphReconciler reconciles a InferenceGraph object type InferenceGraphReconciler struct { client.Client - Log logr.Logger - Scheme *runtime.Scheme - Recorder record.EventRecorder + Clientset kubernetes.Interface + Log logr.Logger + Scheme *runtime.Scheme + Recorder record.EventRecorder } // InferenceGraphState describes the Readiness of the InferenceGraph @@ -126,8 +131,7 @@ func (r *InferenceGraphReconciler) Reconcile(ctx context.Context, req ctrl.Reque } r.Log.Info("Reconciling inference graph", "apiVersion", graph.APIVersion, "graph", graph.Name) - configMap := &v1.ConfigMap{} - err := r.Client.Get(ctx, types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: constants.KServeNamespace}, configMap) + configMap, err := r.Clientset.CoreV1().ConfigMaps(constants.KServeNamespace).Get(context.TODO(), constants.InferenceServiceConfigMapName, metav1.GetOptions{}) if err != nil { r.Log.Error(err, "Failed to find config map", "name", constants.InferenceServiceConfigMapName) return reconcile.Result{}, err @@ -160,7 +164,7 @@ func (r *InferenceGraphReconciler) Reconcile(ctx context.Context, req ctrl.Reque } } } - deployConfig, err := v1beta1api.NewDeployConfig(r.Client) + deployConfig, err := v1beta1api.NewDeployConfig(r.Clientset) if err != nil { return reconcile.Result{}, errors.Wrapf(err, "fails to create DeployConfig") } @@ -169,7 +173,7 @@ func (r *InferenceGraphReconciler) Reconcile(ctx context.Context, req ctrl.Reque r.Log.Info("Inference graph deployment ", "deployment mode ", deploymentMode) if deploymentMode == constants.RawDeployment { //Create inference graph resources such as deployment, service, hpa in raw deployment mode - deployment, url, err := handleInferenceGraphRawDeployment(r.Client, r.Scheme, graph, routerConfig) + deployment, url, err := handleInferenceGraphRawDeployment(r.Client, r.Clientset, r.Scheme, graph, routerConfig) if err != nil { return ctrl.Result{}, errors.Wrapf(err, "fails to reconcile inference graph raw deployment") diff --git a/pkg/controller/v1alpha1/inferencegraph/raw_ig.go b/pkg/controller/v1alpha1/inferencegraph/raw_ig.go index 32014bd9585..cfb93ae4507 100644 --- a/pkg/controller/v1alpha1/inferencegraph/raw_ig.go +++ b/pkg/controller/v1alpha1/inferencegraph/raw_ig.go @@ -18,21 +18,24 @@ package inferencegraph import ( "encoding/json" - v1alpha1api "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/raw" + "strings" + "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes" "knative.dev/pkg/apis" knapis "knative.dev/pkg/apis" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" logf "sigs.k8s.io/controller-runtime/pkg/log" - "strings" + + v1alpha1api "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/raw" ) var logger = logf.Log.WithName("InferenceGraphRawDeployer") @@ -124,14 +127,15 @@ Handles bulk of raw deployment logic for Inference graph controller 4. Set controller referneces 5. Finally reconcile */ -func handleInferenceGraphRawDeployment(cl client.Client, scheme *runtime.Scheme, graph *v1alpha1api.InferenceGraph, routerConfig *RouterConfig) (*appsv1.Deployment, *knapis.URL, error) { +func handleInferenceGraphRawDeployment(cl client.Client, clientset kubernetes.Interface, scheme *runtime.Scheme, + graph *v1alpha1api.InferenceGraph, routerConfig *RouterConfig) (*appsv1.Deployment, *knapis.URL, error) { // create desired service object. desiredSvc := createInferenceGraphPodSpec(graph, routerConfig) objectMeta, componentExtSpec := constructForRawDeployment(graph) //create the reconciler - reconciler, err := raw.NewRawKubeReconciler(cl, scheme, objectMeta, &componentExtSpec, desiredSvc) + reconciler, err := raw.NewRawKubeReconciler(cl, clientset, scheme, objectMeta, &componentExtSpec, desiredSvc) if err != nil { return nil, reconciler.URL, errors.Wrapf(err, "fails to create NewRawKubeReconciler for inference graph") diff --git a/pkg/controller/v1alpha1/inferencegraph/suite_test.go b/pkg/controller/v1alpha1/inferencegraph/suite_test.go index 7a920d380df..30f0a941806 100644 --- a/pkg/controller/v1alpha1/inferencegraph/suite_test.go +++ b/pkg/controller/v1alpha1/inferencegraph/suite_test.go @@ -18,26 +18,28 @@ package inferencegraph import ( "context" - kfservingv1alpha1 "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - "github.com/kserve/kserve/pkg/constants" - pkgtest "github.com/kserve/kserve/pkg/testing" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" v1 "k8s.io/api/core/v1" netv1 "k8s.io/api/networking/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - "testing" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "sigs.k8s.io/controller-runtime/pkg/client" + kfservingv1alpha1 "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + pkgtest "github.com/kserve/kserve/pkg/testing" ) // These tests use Ginkgo (BDD-style Go testing framework). Refer to @@ -49,6 +51,7 @@ var ( testEnv *envtest.Environment cancel context.CancelFunc ctx context.Context + clientset kubernetes.Interface ) func TestAPIs(t *testing.T) { @@ -78,6 +81,10 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) Expect(k8sClient).ToNot(BeNil()) + clientset, err = kubernetes.NewForConfig(cfg) + Expect(err).ToNot(HaveOccurred()) + Expect(clientset).ToNot(BeNil()) + //Create namespace kfservingNamespaceObj := &v1.Namespace{ ObjectMeta: metav1.ObjectMeta{ @@ -97,10 +104,11 @@ var _ = BeforeSuite(func() { deployConfig := &v1beta1.DeployConfig{DefaultDeploymentMode: "Serverless"} err = (&InferenceGraphReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), - Log: ctrl.Log.WithName("V1alpha1InferenceGraphController"), - Recorder: k8sManager.GetEventRecorderFor("V1alpha1InferenceGraphController"), + Client: k8sClient, + Clientset: clientset, + Scheme: k8sClient.Scheme(), + Log: ctrl.Log.WithName("V1alpha1InferenceGraphController"), + Recorder: k8sManager.GetEventRecorderFor("V1alpha1InferenceGraphController"), }).SetupWithManager(k8sManager, deployConfig) Expect(err).ToNot(HaveOccurred()) diff --git a/pkg/controller/v1alpha1/trainedmodel/controller.go b/pkg/controller/v1alpha1/trainedmodel/controller.go index f9430a8a3fb..86ccc8743ea 100644 --- a/pkg/controller/v1alpha1/trainedmodel/controller.go +++ b/pkg/controller/v1alpha1/trainedmodel/controller.go @@ -18,9 +18,9 @@ limitations under the License. // +kubebuilder:rbac:groups=serving.kserve.io,resources=trainedmodels/status,verbs=get;update;patch // +kubebuilder:rbac:groups=serving.knative.dev,resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=serving.knative.dev,resources=services/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch -// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;update -// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get +// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;update +// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=namespaces,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete package trainedmodel @@ -30,17 +30,12 @@ import ( "fmt" "github.com/go-logr/logr" - v1alpha1api "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - v1beta1api "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig" - v1beta1utils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" - "github.com/kserve/kserve/pkg/utils" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" "knative.dev/pkg/apis" duckv1 "knative.dev/pkg/apis/duck/v1" @@ -48,6 +43,13 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" + + v1alpha1api "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + v1beta1api "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig" + v1beta1utils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" + "github.com/kserve/kserve/pkg/utils" ) const ( @@ -62,6 +64,7 @@ var log = logf.Log.WithName("TrainedModel controller") // TrainedModelReconciler reconciles a TrainedModel object type TrainedModelReconciler struct { client.Client + Clientset kubernetes.Interface Log logr.Logger Scheme *runtime.Scheme Recorder record.EventRecorder diff --git a/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig/modelconfig_reconciler.go b/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig/modelconfig_reconciler.go index 372448d391d..231119f19bd 100644 --- a/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig/modelconfig_reconciler.go +++ b/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig/modelconfig_reconciler.go @@ -20,29 +20,32 @@ import ( "context" "fmt" - v1alpha1api "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/sharding/memory" - "github.com/kserve/kserve/pkg/modelconfig" - corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" + + v1alpha1api "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/sharding/memory" + "github.com/kserve/kserve/pkg/modelconfig" ) var log = logf.Log.WithName("Reconciler") type ModelConfigReconciler struct { - client client.Client - scheme *runtime.Scheme + client client.Client + clientset kubernetes.Interface + scheme *runtime.Scheme } -func NewModelConfigReconciler(client client.Client, scheme *runtime.Scheme) *ModelConfigReconciler { +func NewModelConfigReconciler(client client.Client, clientset kubernetes.Interface, scheme *runtime.Scheme) *ModelConfigReconciler { return &ModelConfigReconciler{ - client: client, - scheme: scheme, + client: client, + clientset: clientset, + scheme: scheme, } } @@ -52,9 +55,9 @@ func (c *ModelConfigReconciler) Reconcile(req ctrl.Request, tm *v1alpha1api.Trai shardId := shardStrategy.GetOrAssignShard(tm) // Use tm's parent InferenceService field to get the model modelConfig modelConfigName := constants.ModelConfigName(tm.Spec.InferenceService, shardId) - desiredModelConfig := &corev1.ConfigMap{} - log.Info("Reconciling modelConfig", "modelConfigName", modelConfigName) - if err := c.client.Get(context.TODO(), types.NamespacedName{Name: modelConfigName, Namespace: req.Namespace}, desiredModelConfig); err != nil { + log.Info("Reconciling modelConfig", "modelConfigName", modelConfigName, "namespace", req.Namespace) + desiredModelConfig, err := c.clientset.CoreV1().ConfigMaps(req.Namespace).Get(context.TODO(), modelConfigName, metav1.GetOptions{}) + if err != nil { log.Error(err, "Failed to find model ConfigMap to reconcile for InferenceService", "name", tm.Spec.Model, "namespace", req.Namespace) // Error reading the object - requeue the request. return err diff --git a/pkg/controller/v1alpha1/trainedmodel/suite_test.go b/pkg/controller/v1alpha1/trainedmodel/suite_test.go index 5f7fb259328..5f44b0f43f7 100644 --- a/pkg/controller/v1alpha1/trainedmodel/suite_test.go +++ b/pkg/controller/v1alpha1/trainedmodel/suite_test.go @@ -20,28 +20,27 @@ import ( "context" "testing" - pkgtest "github.com/kserve/kserve/pkg/testing" - "k8s.io/client-go/tools/record" - - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - ctrl "sigs.k8s.io/controller-runtime" - - kfservingv1alpha1 "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - kfservingv1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" + "k8s.io/client-go/tools/record" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + kfservingv1alpha1 "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + kfservingv1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/reconcilers/modelconfig" + pkgtest "github.com/kserve/kserve/pkg/testing" // +kubebuilder:scaffold:imports ) @@ -54,6 +53,7 @@ var ( testEnv *envtest.Environment cancel context.CancelFunc ctx context.Context + clientset kubernetes.Interface ) func TestAPIs(t *testing.T) { @@ -84,6 +84,10 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) Expect(k8sClient).ToNot(BeNil()) + clientset, err = kubernetes.NewForConfig(cfg) + Expect(err).ToNot(HaveOccurred()) + Expect(clientset).ToNot(BeNil()) + //Create namespace kfservingNamespaceObj := &v1.Namespace{ ObjectMeta: metav1.ObjectMeta{ @@ -101,10 +105,11 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) err = (&TrainedModelReconciler{ Client: k8sManager.GetClient(), + Clientset: clientset, Scheme: scheme.Scheme, Log: ctrl.Log.WithName("v1beta1TrainedModelController"), Recorder: record.NewBroadcaster().NewRecorder(scheme.Scheme, v1.EventSource{Component: "v1betaController"}), - ModelConfigReconciler: modelconfig.NewModelConfigReconciler(k8sManager.GetClient(), scheme.Scheme), + ModelConfigReconciler: modelconfig.NewModelConfigReconciler(k8sManager.GetClient(), clientset, scheme.Scheme), }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) defer GinkgoRecover() diff --git a/pkg/controller/v1beta1/inferenceservice/components/explainer.go b/pkg/controller/v1beta1/inferenceservice/components/explainer.go index 4996718ddc3..f000010f5f6 100644 --- a/pkg/controller/v1beta1/inferenceservice/components/explainer.go +++ b/pkg/controller/v1beta1/inferenceservice/components/explainer.go @@ -21,22 +21,24 @@ import ( "fmt" "github.com/go-logr/logr" - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/knative" - "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/raw" - isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" - "github.com/kserve/kserve/pkg/credentials" - "github.com/kserve/kserve/pkg/utils" "github.com/pkg/errors" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/knative" + "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/raw" + isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" + "github.com/kserve/kserve/pkg/credentials" + "github.com/kserve/kserve/pkg/utils" + "github.com/kserve/kserve/pkg/apis/serving/v1beta1" ) @@ -45,6 +47,7 @@ var _ Component = &Explainer{} // Explainer reconciles resources for this component. type Explainer struct { client client.Client + clientset kubernetes.Interface scheme *runtime.Scheme inferenceServiceConfig *v1beta1.InferenceServicesConfig credentialBuilder *credentials.CredentialBuilder //nolint: unused @@ -52,10 +55,11 @@ type Explainer struct { Log logr.Logger } -func NewExplainer(client client.Client, scheme *runtime.Scheme, inferenceServiceConfig *v1beta1.InferenceServicesConfig, - deploymentMode constants.DeploymentModeType) Component { +func NewExplainer(client client.Client, clientset kubernetes.Interface, scheme *runtime.Scheme, + inferenceServiceConfig *v1beta1.InferenceServicesConfig, deploymentMode constants.DeploymentModeType) Component { return &Explainer{ client: client, + clientset: clientset, scheme: scheme, inferenceServiceConfig: inferenceServiceConfig, deploymentMode: deploymentMode, @@ -137,8 +141,8 @@ func (e *Explainer) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro // Here we allow switch between knative and vanilla deployment if e.deploymentMode == constants.RawDeployment { - r, err := raw.NewRawKubeReconciler(e.client, e.scheme, objectMeta, &isvc.Spec.Explainer.ComponentExtensionSpec, - &podSpec) + r, err := raw.NewRawKubeReconciler(e.client, e.clientset, e.scheme, objectMeta, + &isvc.Spec.Explainer.ComponentExtensionSpec, &podSpec) if err != nil { return ctrl.Result{}, errors.Wrapf(err, "fails to create NewRawKubeReconciler for explainer") } diff --git a/pkg/controller/v1beta1/inferenceservice/components/predictor.go b/pkg/controller/v1beta1/inferenceservice/components/predictor.go index c1abccd4293..57df0991439 100644 --- a/pkg/controller/v1beta1/inferenceservice/components/predictor.go +++ b/pkg/controller/v1beta1/inferenceservice/components/predictor.go @@ -17,23 +17,16 @@ limitations under the License. package components import ( - "fmt" - "context" + "fmt" "github.com/go-logr/logr" - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/knative" - modelconfig "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/modelconfig" - raw "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/raw" - isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" - "github.com/kserve/kserve/pkg/credentials" - "github.com/kserve/kserve/pkg/utils" "github.com/pkg/errors" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -41,6 +34,13 @@ import ( "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/knative" + modelconfig "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/modelconfig" + "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/raw" + isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" + "github.com/kserve/kserve/pkg/credentials" + "github.com/kserve/kserve/pkg/utils" ) var _ Component = &Predictor{} @@ -48,6 +48,7 @@ var _ Component = &Predictor{} // Predictor reconciles resources for this component. type Predictor struct { client client.Client + clientset kubernetes.Interface scheme *runtime.Scheme inferenceServiceConfig *v1beta1.InferenceServicesConfig credentialBuilder *credentials.CredentialBuilder //nolint: unused @@ -55,10 +56,11 @@ type Predictor struct { Log logr.Logger } -func NewPredictor(client client.Client, scheme *runtime.Scheme, inferenceServiceConfig *v1beta1.InferenceServicesConfig, - deploymentMode constants.DeploymentModeType) Component { +func NewPredictor(client client.Client, clientset kubernetes.Interface, scheme *runtime.Scheme, + inferenceServiceConfig *v1beta1.InferenceServicesConfig, deploymentMode constants.DeploymentModeType) Component { return &Predictor{ client: client, + clientset: clientset, scheme: scheme, inferenceServiceConfig: inferenceServiceConfig, deploymentMode: deploymentMode, @@ -85,7 +87,7 @@ func (p *Predictor) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro addAgentAnnotations(isvc, annotations) // Reconcile modelConfig - configMapReconciler := modelconfig.NewModelConfigReconciler(p.client, p.scheme) + configMapReconciler := modelconfig.NewModelConfigReconciler(p.client, p.clientset, p.scheme) if err := configMapReconciler.Reconcile(isvc); err != nil { return ctrl.Result{}, err } @@ -305,7 +307,7 @@ func (p *Predictor) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, erro if p.deploymentMode == constants.RawDeployment { rawDeployment = true podLabelKey = constants.RawDeploymentAppLabel - r, err := raw.NewRawKubeReconciler(p.client, p.scheme, objectMeta, &isvc.Spec.Predictor.ComponentExtensionSpec, + r, err := raw.NewRawKubeReconciler(p.client, p.clientset, p.scheme, objectMeta, &isvc.Spec.Predictor.ComponentExtensionSpec, &podSpec) if err != nil { return ctrl.Result{}, errors.Wrapf(err, "fails to create NewRawKubeReconciler for predictor") diff --git a/pkg/controller/v1beta1/inferenceservice/components/transformer.go b/pkg/controller/v1beta1/inferenceservice/components/transformer.go index 586744fab1f..f0a5b25c2be 100644 --- a/pkg/controller/v1beta1/inferenceservice/components/transformer.go +++ b/pkg/controller/v1beta1/inferenceservice/components/transformer.go @@ -17,30 +17,30 @@ limitations under the License. package components import ( + "context" "fmt" "net/url" "time" - "context" - "github.com/go-logr/logr" - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/knative" - raw "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/raw" - isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" - "github.com/kserve/kserve/pkg/credentials" - "github.com/kserve/kserve/pkg/utils" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/knative" + raw "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/raw" + isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" + "github.com/kserve/kserve/pkg/credentials" + "github.com/kserve/kserve/pkg/utils" ) var _ Component = &Transformer{} @@ -48,6 +48,7 @@ var _ Component = &Transformer{} // Transformer reconciles resources for this component. type Transformer struct { client client.Client + clientset kubernetes.Interface scheme *runtime.Scheme inferenceServiceConfig *v1beta1.InferenceServicesConfig credentialBuilder *credentials.CredentialBuilder //nolint: unused @@ -55,10 +56,11 @@ type Transformer struct { Log logr.Logger } -func NewTransformer(client client.Client, scheme *runtime.Scheme, inferenceServiceConfig *v1beta1.InferenceServicesConfig, - deploymentMode constants.DeploymentModeType) Component { +func NewTransformer(client client.Client, clientset kubernetes.Interface, scheme *runtime.Scheme, + inferenceServiceConfig *v1beta1.InferenceServicesConfig, deploymentMode constants.DeploymentModeType) Component { return &Transformer{ client: client, + clientset: clientset, scheme: scheme, inferenceServiceConfig: inferenceServiceConfig, deploymentMode: deploymentMode, @@ -167,8 +169,8 @@ func (p *Transformer) Reconcile(isvc *v1beta1.InferenceService) (ctrl.Result, er // Here we allow switch between knative and vanilla deployment if p.deploymentMode == constants.RawDeployment { - r, err := raw.NewRawKubeReconciler(p.client, p.scheme, objectMeta, &isvc.Spec.Transformer.ComponentExtensionSpec, - &podSpec) + r, err := raw.NewRawKubeReconciler(p.client, p.clientset, p.scheme, objectMeta, + &isvc.Spec.Transformer.ComponentExtensionSpec, &podSpec) if err != nil { return ctrl.Result{}, errors.Wrapf(err, "fails to create NewRawKubeReconciler for transformer") } diff --git a/pkg/controller/v1beta1/inferenceservice/controller.go b/pkg/controller/v1beta1/inferenceservice/controller.go index 6f3ab5b37b2..3c78f38fa13 100644 --- a/pkg/controller/v1beta1/inferenceservice/controller.go +++ b/pkg/controller/v1beta1/inferenceservice/controller.go @@ -22,15 +22,6 @@ import ( "reflect" "github.com/go-logr/logr" - v1alpha1api "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - v1beta1api "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/components" - "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/cabundleconfigmap" - "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/ingress" - modelconfig "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/modelconfig" - isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" - "github.com/kserve/kserve/pkg/utils" "github.com/pkg/errors" istioclientv1beta1 "istio.io/client-go/pkg/apis/networking/v1beta1" appsv1 "k8s.io/api/apps/v1" @@ -40,12 +31,23 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/record" "knative.dev/pkg/apis" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + + v1alpha1api "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + v1beta1api "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/components" + "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/cabundleconfigmap" + "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/ingress" + modelconfig "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/modelconfig" + isvcutils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" + "github.com/kserve/kserve/pkg/utils" ) // +kubebuilder:rbac:groups=serving.kserve.io,resources=inferenceservices;inferenceservices/finalizers,verbs=get;list;watch;create;update;patch;delete @@ -66,9 +68,9 @@ import ( // +kubebuilder:rbac:groups=admissionregistration.k8s.io,resources=mutatingwebhookconfigurations;validatingwebhookconfigurations,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalers,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get;list;watch -// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create -// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=core,resources=serviceaccounts,verbs=get +// +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;create +// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=namespaces,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch @@ -85,9 +87,10 @@ const ( // InferenceServiceReconciler reconciles a InferenceService object type InferenceServiceReconciler struct { client.Client - Log logr.Logger - Scheme *runtime.Scheme - Recorder record.EventRecorder + Clientset kubernetes.Interface + Log logr.Logger + Scheme *runtime.Scheme + Recorder record.EventRecorder } func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { @@ -108,7 +111,7 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req return !utils.Includes(constants.ServiceAnnotationDisallowedList, key) }) - deployConfig, err := v1beta1api.NewDeployConfig(r.Client) + deployConfig, err := v1beta1api.NewDeployConfig(r.Clientset) if err != nil { return reconcile.Result{}, errors.Wrapf(err, "fails to create DeployConfig") } @@ -163,26 +166,26 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req } r.Log.Info("Reconciling inference service", "apiVersion", isvc.APIVersion, "isvc", isvc.Name) - isvcConfig, err := v1beta1api.NewInferenceServicesConfig(r.Client) + isvcConfig, err := v1beta1api.NewInferenceServicesConfig(r.Clientset) if err != nil { return reconcile.Result{}, errors.Wrapf(err, "fails to create InferenceServicesConfig") } // Reconcile cabundleConfigMap - caBundleConfigMapReconciler := cabundleconfigmap.NewCaBundleConfigMapReconciler(r.Client, r.Scheme) + caBundleConfigMapReconciler := cabundleconfigmap.NewCaBundleConfigMapReconciler(r.Client, r.Clientset, r.Scheme) if err := caBundleConfigMapReconciler.Reconcile(isvc); err != nil { return reconcile.Result{}, err } reconcilers := []components.Component{} if deploymentMode != constants.ModelMeshDeployment { - reconcilers = append(reconcilers, components.NewPredictor(r.Client, r.Scheme, isvcConfig, deploymentMode)) + reconcilers = append(reconcilers, components.NewPredictor(r.Client, r.Clientset, r.Scheme, isvcConfig, deploymentMode)) } if isvc.Spec.Transformer != nil { - reconcilers = append(reconcilers, components.NewTransformer(r.Client, r.Scheme, isvcConfig, deploymentMode)) + reconcilers = append(reconcilers, components.NewTransformer(r.Client, r.Clientset, r.Scheme, isvcConfig, deploymentMode)) } if isvc.Spec.Explainer != nil { - reconcilers = append(reconcilers, components.NewExplainer(r.Client, r.Scheme, isvcConfig, deploymentMode)) + reconcilers = append(reconcilers, components.NewExplainer(r.Client, r.Clientset, r.Scheme, isvcConfig, deploymentMode)) } for _, reconciler := range reconcilers { result, err := reconciler.Reconcile(isvc) @@ -212,7 +215,7 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req isvc.Status.PropagateCrossComponentStatus(componentList, v1beta1api.LatestDeploymentReady) } //Reconcile ingress - ingressConfig, err := v1beta1api.NewIngressConfig(r.Client) + ingressConfig, err := v1beta1api.NewIngressConfig(r.Clientset) if err != nil { return reconcile.Result{}, errors.Wrapf(err, "fails to create IngressConfig") } @@ -235,7 +238,7 @@ func (r *InferenceServiceReconciler) Reconcile(ctx context.Context, req ctrl.Req } // Reconcile modelConfig - configMapReconciler := modelconfig.NewModelConfigReconciler(r.Client, r.Scheme) + configMapReconciler := modelconfig.NewModelConfigReconciler(r.Client, r.Clientset, r.Scheme) if err := configMapReconciler.Reconcile(isvc); err != nil { return reconcile.Result{}, err } diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/cabundleconfigmap/cabundle_configmap_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/cabundleconfigmap/cabundle_configmap_reconciler.go index 4f32de2c72b..a39526aca3e 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/cabundleconfigmap/cabundle_configmap_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/cabundleconfigmap/cabundle_configmap_reconciler.go @@ -21,41 +21,41 @@ import ( "encoding/json" "fmt" - kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/webhook/admission/pod" - + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/api/equality" apierr "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes" "knative.dev/pkg/kmp" "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" + + kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/webhook/admission/pod" ) var log = logf.Log.WithName("CaBundleConfigMapReconciler") type CaBundleConfigMapReconciler struct { - client client.Client - scheme *runtime.Scheme + client client.Client + clientset kubernetes.Interface + scheme *runtime.Scheme } -func NewCaBundleConfigMapReconciler(client client.Client, scheme *runtime.Scheme) *CaBundleConfigMapReconciler { +func NewCaBundleConfigMapReconciler(client client.Client, clientset kubernetes.Interface, scheme *runtime.Scheme) *CaBundleConfigMapReconciler { return &CaBundleConfigMapReconciler{ - client: client, - scheme: scheme, + client: client, + clientset: clientset, + scheme: scheme, } } func (c *CaBundleConfigMapReconciler) Reconcile(isvc *kservev1beta1.InferenceService) error { log.Info("Reconciling CaBundleConfigMap", "namespace", isvc.Namespace) - - isvcConfigMap := &corev1.ConfigMap{} - err := c.client.Get(context.TODO(), types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: constants.KServeNamespace}, isvcConfigMap) + isvcConfigMap, err := c.clientset.CoreV1().ConfigMaps(constants.KServeNamespace).Get(context.TODO(), constants.InferenceServiceConfigMapName, metav1.GetOptions{}) if err != nil { log.Error(err, "failed to find config map", "name", constants.InferenceServiceConfigMapName) return err @@ -91,10 +91,9 @@ func (c *CaBundleConfigMapReconciler) getCabundleConfigMapForUserNS(caBundleName // Check if cabundle configmap exist & the cabundle.crt exist in the data in controller namespace // If it does not exist, return error - caBundleConfigMap := &corev1.ConfigMap{} - if err := c.client.Get(context.TODO(), - types.NamespacedName{Name: caBundleNameInConfig, Namespace: kserveNamespace}, caBundleConfigMap); err == nil { + caBundleConfigMap, err := c.clientset.CoreV1().ConfigMaps(kserveNamespace).Get(context.TODO(), caBundleNameInConfig, metav1.GetOptions{}) + if err == nil { if caBundleConfigMapData := caBundleConfigMap.Data[constants.DefaultCaBundleFileName]; caBundleConfigMapData == "" { return nil, fmt.Errorf("specified cabundle file %s not found in cabundle configmap %s", constants.DefaultCaBundleFileName, caBundleNameInConfig) @@ -105,7 +104,7 @@ func (c *CaBundleConfigMapReconciler) getCabundleConfigMapForUserNS(caBundleName newCaBundleConfigMap = getDesiredCaBundleConfigMapForUserNS(constants.DefaultGlobalCaBundleConfigMapName, isvcNamespace, configData) } } else { - return nil, fmt.Errorf("can't read cabundle configmap %s: %w", constants.DefaultCaBundleFileName, err) + return nil, errors.Wrapf(err, "failed to get configmap %s from the cluster", caBundleNameInConfig) } return newCaBundleConfigMap, nil @@ -127,8 +126,7 @@ func getDesiredCaBundleConfigMapForUserNS(configmapName string, namespace string func (c *CaBundleConfigMapReconciler) ReconcileCaBundleConfigMap(desiredConfigMap *corev1.ConfigMap) error { // Create ConfigMap if does not exist - existingConfigMap := &corev1.ConfigMap{} - err := c.client.Get(context.TODO(), types.NamespacedName{Name: desiredConfigMap.Name, Namespace: desiredConfigMap.Namespace}, existingConfigMap) + existingConfigMap, err := c.clientset.CoreV1().ConfigMaps(desiredConfigMap.Namespace).Get(context.TODO(), desiredConfigMap.Name, metav1.GetOptions{}) if err != nil { if apierr.IsNotFound(err) { log.Info("Creating cabundle configmap", "namespace", desiredConfigMap.Namespace, "name", desiredConfigMap.Name) diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/modelconfig/modelconfig_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/modelconfig/modelconfig_reconciler.go index 455d328ccb7..1a2a66a79bd 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/modelconfig/modelconfig_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/modelconfig/modelconfig_reconciler.go @@ -19,31 +19,34 @@ package multimodelconfig import ( "context" - v1beta1api "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/sharding/memory" - v1beta1utils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" - "github.com/kserve/kserve/pkg/modelconfig" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" logf "sigs.k8s.io/controller-runtime/pkg/log" + + v1beta1api "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/controller/v1alpha1/trainedmodel/sharding/memory" + v1beta1utils "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/utils" + "github.com/kserve/kserve/pkg/modelconfig" ) var log = logf.Log.WithName("Reconciler") type ModelConfigReconciler struct { - client client.Client - scheme *runtime.Scheme + client client.Client + clientset kubernetes.Interface + scheme *runtime.Scheme } -func NewModelConfigReconciler(client client.Client, scheme *runtime.Scheme) *ModelConfigReconciler { +func NewModelConfigReconciler(client client.Client, clientset kubernetes.Interface, scheme *runtime.Scheme) *ModelConfigReconciler { return &ModelConfigReconciler{ - client: client, - scheme: scheme, + client: client, + clientset: clientset, + scheme: scheme, } } @@ -54,9 +57,9 @@ func (c *ModelConfigReconciler) Reconcile(isvc *v1beta1api.InferenceService) err // An InferenceService with storageUri is considered as multi-model InferenceService with only one model, a modelConfig configmap should be created as well shardStrategy := memory.MemoryStrategy{} for _, id := range shardStrategy.GetShard(isvc) { - modelConfig := corev1.ConfigMap{} - modelConfigName := types.NamespacedName{Name: constants.ModelConfigName(isvc.Name, id), Namespace: isvc.Namespace} - if err := c.client.Get(context.TODO(), modelConfigName, &modelConfig); err != nil { + modelConfigName := constants.ModelConfigName(isvc.Name, id) + _, err := c.clientset.CoreV1().ConfigMaps(isvc.Namespace).Get(context.TODO(), modelConfigName, metav1.GetOptions{}) + if err != nil { if errors.IsNotFound(err) { // If the modelConfig does not exist for an InferenceService without storageUri, create an empty modelConfig log.Info("Creating modelConfig", "configmap", modelConfigName, "inferenceservice", isvc.Name, "namespace", isvc.Namespace) diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/raw/raw_kube_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/raw/raw_kube_reconciler.go index a0ae5e3cdc3..45c9d220511 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/raw/raw_kube_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/raw/raw_kube_reconciler.go @@ -19,17 +19,19 @@ package raw import ( "fmt" - "github.com/kserve/kserve/pkg/apis/serving/v1beta1" - autoscaler "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/autoscaler" - deployment "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment" - "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/ingress" - service "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/service" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes" knapis "knative.dev/pkg/apis" "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + autoscaler "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/autoscaler" + deployment "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment" + "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/ingress" + service "github.com/kserve/kserve/pkg/controller/v1beta1/inferenceservice/reconcilers/service" ) // RawKubeReconciler reconciles the Native K8S Resources @@ -44,6 +46,7 @@ type RawKubeReconciler struct { // NewRawKubeReconciler creates raw kubernetes resource reconciler. func NewRawKubeReconciler(client client.Client, + clientset kubernetes.Interface, scheme *runtime.Scheme, componentMeta metav1.ObjectMeta, componentExt *v1beta1.ComponentExtensionSpec, @@ -53,7 +56,7 @@ func NewRawKubeReconciler(client client.Client, return nil, err } - url, err := createRawURL(client, componentMeta) + url, err := createRawURL(clientset, componentMeta) if err != nil { return nil, err } @@ -68,8 +71,8 @@ func NewRawKubeReconciler(client client.Client, }, nil } -func createRawURL(client client.Client, metadata metav1.ObjectMeta) (*knapis.URL, error) { - ingressConfig, err := v1beta1.NewIngressConfig(client) +func createRawURL(clientset kubernetes.Interface, metadata metav1.ObjectMeta) (*knapis.URL, error) { + ingressConfig, err := v1beta1.NewIngressConfig(clientset) if err != nil { return nil, err } diff --git a/pkg/controller/v1beta1/inferenceservice/suite_test.go b/pkg/controller/v1beta1/inferenceservice/suite_test.go index 96014c82ad1..1bb29c5427e 100644 --- a/pkg/controller/v1beta1/inferenceservice/suite_test.go +++ b/pkg/controller/v1beta1/inferenceservice/suite_test.go @@ -20,26 +20,26 @@ import ( "context" "testing" - netv1 "k8s.io/api/networking/v1" - - kfservingv1alpha1 "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - "github.com/kserve/kserve/pkg/constants" - pkgtest "github.com/kserve/kserve/pkg/testing" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - ctrl "sigs.k8s.io/controller-runtime" - - "github.com/kserve/kserve/pkg/apis/serving/v1beta1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + netv1 "k8s.io/api/networking/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/envtest" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + kfservingv1alpha1 "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + "github.com/kserve/kserve/pkg/apis/serving/v1beta1" + "github.com/kserve/kserve/pkg/constants" + pkgtest "github.com/kserve/kserve/pkg/testing" ) // These tests use Ginkgo (BDD-style Go testing framework). Refer to @@ -51,6 +51,7 @@ var ( testEnv *envtest.Environment cancel context.CancelFunc ctx context.Context + clientset kubernetes.Interface ) func TestV1beta1APIs(t *testing.T) { @@ -81,6 +82,10 @@ var _ = BeforeSuite(func() { Expect(err).ToNot(HaveOccurred()) Expect(k8sClient).ToNot(BeNil()) + clientset, err = kubernetes.NewForConfig(cfg) + Expect(err).ToNot(HaveOccurred()) + Expect(clientset).ToNot(BeNil()) + //Create namespace kfservingNamespaceObj := &v1.Namespace{ ObjectMeta: metav1.ObjectMeta{ @@ -106,10 +111,11 @@ var _ = BeforeSuite(func() { DisableIstioVirtualHost: false, } err = (&InferenceServiceReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), - Log: ctrl.Log.WithName("V1beta1InferenceServiceController"), - Recorder: k8sManager.GetEventRecorderFor("V1beta1InferenceServiceController"), + Client: k8sClient, + Clientset: clientset, + Scheme: k8sClient.Scheme(), + Log: ctrl.Log.WithName("V1beta1InferenceServiceController"), + Recorder: k8sManager.GetEventRecorderFor("V1beta1InferenceServiceController"), }).SetupWithManager(k8sManager, deployConfig, ingressConfig) Expect(err).ToNot(HaveOccurred()) diff --git a/pkg/credentials/credentials_suite_test.go b/pkg/credentials/credentials_suite_test.go index ae7125d16a0..2bd5228e2e8 100644 --- a/pkg/credentials/credentials_suite_test.go +++ b/pkg/credentials/credentials_suite_test.go @@ -20,14 +20,17 @@ import ( "os" "testing" - pkgtest "github.com/kserve/kserve/pkg/testing" + "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client" + + pkgtest "github.com/kserve/kserve/pkg/testing" ) var cfg *rest.Config var c client.Client +var clientset kubernetes.Interface func TestMain(m *testing.M) { t := pkgtest.SetupEnvTest() @@ -39,6 +42,10 @@ func TestMain(m *testing.M) { if c, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}); err != nil { log.Error(err, "Failed to start client") } + if clientset, err = kubernetes.NewForConfig(cfg); err != nil { + log.Error(err, "Failed to create clientset") + } + code := m.Run() t.Stop() os.Exit(code) diff --git a/pkg/credentials/service_account_credentials.go b/pkg/credentials/service_account_credentials.go index 13f9eecd2ef..086d6a75524 100644 --- a/pkg/credentials/service_account_credentials.go +++ b/pkg/credentials/service_account_credentials.go @@ -23,19 +23,18 @@ import ( "fmt" "strings" - "github.com/kserve/kserve/pkg/constants" - - "github.com/kserve/kserve/pkg/credentials/https" - v1 "k8s.io/api/core/v1" apierr "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/types" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" + "github.com/kserve/kserve/pkg/constants" "github.com/kserve/kserve/pkg/credentials/azure" "github.com/kserve/kserve/pkg/credentials/gcs" "github.com/kserve/kserve/pkg/credentials/hdfs" + "github.com/kserve/kserve/pkg/credentials/https" "github.com/kserve/kserve/pkg/credentials/s3" "github.com/kserve/kserve/pkg/utils" ) @@ -64,13 +63,14 @@ type CredentialConfig struct { } type CredentialBuilder struct { - client client.Client - config CredentialConfig + client client.Client + clientset kubernetes.Interface + config CredentialConfig } var log = logf.Log.WithName("CredentialBuilder") -func NewCredentialBuilder(client client.Client, config *v1.ConfigMap) *CredentialBuilder { +func NewCredentialBuilder(client client.Client, clientset kubernetes.Interface, config *v1.ConfigMap) *CredentialBuilder { credentialConfig := CredentialConfig{} if credential, ok := config.Data[CredentialConfigKeyName]; ok { err := json.Unmarshal([]byte(credential), &credentialConfig) @@ -80,8 +80,9 @@ func NewCredentialBuilder(client client.Client, config *v1.ConfigMap) *Credentia } return &CredentialBuilder{ - client: client, - config: credentialConfig, + client: client, + clientset: clientset, + config: credentialConfig, } } @@ -101,11 +102,10 @@ func (c *CredentialBuilder) CreateStorageSpecSecretEnvs(namespace string, annota storageSecretName = secretName } } + secret, err := c.clientset.CoreV1().Secrets(namespace).Get(context.TODO(), storageSecretName, metav1.GetOptions{}) - secret := &v1.Secret{} var storageData []byte - if err := c.client.Get(context.TODO(), - types.NamespacedName{Name: storageSecretName, Namespace: namespace}, secret); err == nil { + if err == nil { if storageKey != "" { if storageData = secret.Data[storageKey]; storageData == nil { return fmt.Errorf("specified storage key %s not found in storage secret %s", @@ -199,10 +199,7 @@ func (c *CredentialBuilder) CreateSecretVolumeAndEnv(namespace string, annotatio if serviceAccountName == "" { serviceAccountName = "default" } - - serviceAccount := &v1.ServiceAccount{} - err := c.client.Get(context.TODO(), types.NamespacedName{Name: serviceAccountName, - Namespace: namespace}, serviceAccount) + serviceAccount, err := c.clientset.CoreV1().ServiceAccounts(namespace).Get(context.TODO(), serviceAccountName, metav1.GetOptions{}) if err != nil { log.Error(err, "Failed to find service account", "ServiceAccountName", serviceAccountName, "Namespace", namespace) @@ -243,9 +240,7 @@ func (c *CredentialBuilder) CreateSecretVolumeAndEnv(namespace string, annotatio func (c *CredentialBuilder) mountSecretCredential(secretName string, namespace string, container *v1.Container, volumes *[]v1.Volume) error { - secret := &v1.Secret{} - err := c.client.Get(context.TODO(), types.NamespacedName{Name: secretName, - Namespace: namespace}, secret) + secret, err := c.clientset.CoreV1().Secrets(namespace).Get(context.TODO(), secretName, metav1.GetOptions{}) if err != nil { log.Error(err, "Failed to find secret", "SecretName", secretName) return err diff --git a/pkg/credentials/service_account_credentials_test.go b/pkg/credentials/service_account_credentials_test.go index 866ef8a831a..e305b7920df 100644 --- a/pkg/credentials/service_account_credentials_test.go +++ b/pkg/credentials/service_account_credentials_test.go @@ -165,7 +165,7 @@ func TestS3CredentialBuilder(t *testing.T) { }, } - builder := NewCredentialBuilder(c, configMap) + builder := NewCredentialBuilder(c, clientset, configMap) for name, scenario := range scenarios { g.Expect(c.Create(context.TODO(), existingServiceAccount)).NotTo(gomega.HaveOccurred()) g.Expect(c.Create(context.TODO(), existingS3Secret)).NotTo(gomega.HaveOccurred()) @@ -295,7 +295,7 @@ func TestS3CredentialBuilderWithStorageSecret(t *testing.T) { }, } - builder := NewCredentialBuilder(c, configMap) + builder := NewCredentialBuilder(c, clientset, configMap) for name, scenario := range scenarios { g.Expect(c.Create(context.TODO(), existingServiceAccount)).NotTo(gomega.HaveOccurred()) g.Expect(c.Create(context.TODO(), existingS3Secret)).NotTo(gomega.HaveOccurred()) @@ -399,7 +399,7 @@ func TestS3ServiceAccountCredentialBuilder(t *testing.T) { }, } - builder := NewCredentialBuilder(c, configMap) + builder := NewCredentialBuilder(c, clientset, configMap) for name, scenario := range scenarios { g.Expect(c.Create(context.TODO(), existingServiceAccount)).NotTo(gomega.HaveOccurred()) @@ -509,7 +509,7 @@ func TestGCSCredentialBuilder(t *testing.T) { }, } - builder := NewCredentialBuilder(c, configMap) + builder := NewCredentialBuilder(c, clientset, configMap) for name, scenario := range scenarios { g.Expect(c.Create(context.TODO(), existingServiceAccount)).NotTo(gomega.HaveOccurred()) g.Expect(c.Create(context.TODO(), existingGCSSecret)).NotTo(gomega.HaveOccurred()) @@ -651,7 +651,7 @@ func TestLegacyAzureCredentialBuilder(t *testing.T) { g.Expect(c.Create(context.TODO(), customAzureSecret)).NotTo(gomega.HaveOccurred()) g.Expect(c.Create(context.TODO(), customOnlyServiceAccount)).NotTo(gomega.HaveOccurred()) - builder := NewCredentialBuilder(c, configMap) + builder := NewCredentialBuilder(c, clientset, configMap) for name, scenario := range scenarios { err := builder.CreateSecretVolumeAndEnv(scenario.serviceAccount.Namespace, nil, scenario.serviceAccount.Name, @@ -762,7 +762,7 @@ func TestHdfsCredentialBuilder(t *testing.T) { g.Expect(c.Create(context.TODO(), customHdfsSecret)).NotTo(gomega.HaveOccurred()) g.Expect(c.Create(context.TODO(), customOnlyServiceAccount)).NotTo(gomega.HaveOccurred()) - builder := NewCredentialBuilder(c, configMap) + builder := NewCredentialBuilder(c, clientset, configMap) for name, scenario := range scenarios { err := builder.CreateSecretVolumeAndEnv(scenario.serviceAccount.Namespace, nil, scenario.serviceAccount.Name, @@ -902,7 +902,7 @@ func TestAzureCredentialBuilder(t *testing.T) { g.Expect(c.Create(context.TODO(), customAzureSecret)).NotTo(gomega.HaveOccurred()) g.Expect(c.Create(context.TODO(), customOnlyServiceAccount)).NotTo(gomega.HaveOccurred()) - builder := NewCredentialBuilder(c, configMap) + builder := NewCredentialBuilder(c, clientset, configMap) for name, scenario := range scenarios { err := builder.CreateSecretVolumeAndEnv(scenario.serviceAccount.Namespace, nil, scenario.serviceAccount.Name, @@ -1006,7 +1006,7 @@ func TestAzureStorageAccessKeyCredentialBuilder(t *testing.T) { g.Expect(c.Create(context.TODO(), customAzureSecret)).NotTo(gomega.HaveOccurred()) g.Expect(c.Create(context.TODO(), customOnlyServiceAccount)).NotTo(gomega.HaveOccurred()) - builder := NewCredentialBuilder(c, configMap) + builder := NewCredentialBuilder(c, clientset, configMap) for name, scenario := range scenarios { err := builder.CreateSecretVolumeAndEnv(scenario.serviceAccount.Namespace, nil, scenario.serviceAccount.Name, @@ -1034,7 +1034,7 @@ func TestAzureStorageAccessKeyCredentialBuilder(t *testing.T) { func TestCredentialBuilder_CreateStorageSpecSecretEnvs(t *testing.T) { g := gomega.NewGomegaWithT(t) namespace := "default" - builder := NewCredentialBuilder(c, configMap) + builder := NewCredentialBuilder(c, clientset, configMap) scenarios := map[string]struct { secret *v1.Secret diff --git a/pkg/webhook/admission/pod/agent_injector_test.go b/pkg/webhook/admission/pod/agent_injector_test.go index cd8e4e0d2f5..83fd529a87e 100644 --- a/pkg/webhook/admission/pod/agent_injector_test.go +++ b/pkg/webhook/admission/pod/agent_injector_test.go @@ -17,6 +17,7 @@ limitations under the License. package pod import ( + fakeclientset "k8s.io/client-go/kubernetes/fake" "testing" "github.com/kserve/kserve/pkg/apis/serving/v1beta1" @@ -1100,8 +1101,8 @@ func TestAgentInjector(t *testing.T) { }, }, } - - credentialBuilder := credentials.NewCredentialBuilder(c, &v1.ConfigMap{ + clientset := fakeclientset.NewSimpleClientset() + credentialBuilder := credentials.NewCredentialBuilder(c, clientset, &v1.ConfigMap{ Data: map[string]string{}, }) diff --git a/pkg/webhook/admission/pod/batcher_injector_test.go b/pkg/webhook/admission/pod/batcher_injector_test.go index a6c986c9c18..3b3f97ca6e5 100644 --- a/pkg/webhook/admission/pod/batcher_injector_test.go +++ b/pkg/webhook/admission/pod/batcher_injector_test.go @@ -19,14 +19,14 @@ package pod import ( "testing" - "k8s.io/apimachinery/pkg/api/resource" - "knative.dev/pkg/kmp" - - "github.com/kserve/kserve/pkg/constants" "github.com/onsi/gomega" "github.com/onsi/gomega/types" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "knative.dev/pkg/kmp" + + "github.com/kserve/kserve/pkg/constants" ) const ( diff --git a/pkg/webhook/admission/pod/mutator.go b/pkg/webhook/admission/pod/mutator.go index 35b49433372..3bfaf798412 100644 --- a/pkg/webhook/admission/pod/mutator.go +++ b/pkg/webhook/admission/pod/mutator.go @@ -21,13 +21,14 @@ import ( "net/http" v1 "k8s.io/api/core/v1" - k8types "k8s.io/apimachinery/pkg/types" - - "github.com/kserve/kserve/pkg/constants" - "github.com/kserve/kserve/pkg/credentials" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + "github.com/kserve/kserve/pkg/constants" + "github.com/kserve/kserve/pkg/credentials" ) // +kubebuilder:webhook:path=/mutate-pods,mutating=true,failurePolicy=fail,groups="",resources=pods,verbs=create,versions=v1,name=inferenceservice.kserve-webhook-server.pod-mutator,reinvocationPolicy=IfNeeded @@ -35,8 +36,9 @@ var log = logf.Log.WithName(constants.PodMutatorWebhookName) // Mutator is a webhook that injects incoming pods type Mutator struct { - Client client.Client - Decoder *admission.Decoder + Client client.Client + Clientset kubernetes.Interface + Decoder *admission.Decoder } // Handle decodes the incoming Pod and executes mutation logic. @@ -52,8 +54,8 @@ func (mutator *Mutator) Handle(ctx context.Context, req admission.Request) admis return admission.ValidationResponse(true, "") } - configMap := &v1.ConfigMap{} - err := mutator.Client.Get(context.TODO(), k8types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: constants.KServeNamespace}, configMap) + configMap, err := mutator.Clientset.CoreV1().ConfigMaps(constants.KServeNamespace).Get(context.TODO(), + constants.InferenceServiceConfigMapName, metav1.GetOptions{}) if err != nil { log.Error(err, "Failed to find config map", "name", constants.InferenceServiceConfigMapName) return admission.Errored(http.StatusInternalServerError, err) @@ -77,7 +79,7 @@ func (mutator *Mutator) Handle(ctx context.Context, req admission.Request) admis } func (mutator *Mutator) mutate(pod *v1.Pod, configMap *v1.ConfigMap) error { - credentialBuilder := credentials.NewCredentialBuilder(mutator.Client, configMap) + credentialBuilder := credentials.NewCredentialBuilder(mutator.Client, mutator.Clientset, configMap) storageInitializerConfig, err := getStorageInitializerConfigs(configMap) if err != nil { diff --git a/pkg/webhook/admission/pod/mutator_test.go b/pkg/webhook/admission/pod/mutator_test.go index ba552af80c7..4d7084f7698 100644 --- a/pkg/webhook/admission/pod/mutator_test.go +++ b/pkg/webhook/admission/pod/mutator_test.go @@ -52,7 +52,7 @@ func TestMutator_Handle(t *testing.T) { if err := c.Create(context.TODO(), &kserveNamespace); err != nil { t.Errorf("failed to create namespace: %v", err) } - mutator := Mutator{Client: c, Decoder: admission.NewDecoder(c.Scheme())} + mutator := Mutator{Client: c, Clientset: clientset, Decoder: admission.NewDecoder(c.Scheme())} cases := map[string]struct { configMap v1.ConfigMap diff --git a/pkg/webhook/admission/pod/storage_initializer_injector_test.go b/pkg/webhook/admission/pod/storage_initializer_injector_test.go index 8113871369c..df2db9e973a 100644 --- a/pkg/webhook/admission/pod/storage_initializer_injector_test.go +++ b/pkg/webhook/admission/pod/storage_initializer_injector_test.go @@ -21,22 +21,20 @@ import ( "strings" "testing" + "github.com/onsi/gomega" + "github.com/onsi/gomega/types" + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/apimachinery/pkg/runtime" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "knative.dev/pkg/kmp" "knative.dev/pkg/ptr" - "sigs.k8s.io/controller-runtime/pkg/client/fake" "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" "github.com/kserve/kserve/pkg/constants" "github.com/kserve/kserve/pkg/credentials" "github.com/kserve/kserve/pkg/credentials/gcs" "github.com/kserve/kserve/pkg/credentials/s3" - "github.com/onsi/gomega" - "github.com/onsi/gomega/types" - "github.com/stretchr/testify/assert" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) const ( @@ -357,7 +355,7 @@ func TestStorageInitializerInjector(t *testing.T) { for name, scenario := range scenarios { injector := &StorageInitializerInjector{ - credentialBuilder: credentials.NewCredentialBuilder(c, &v1.ConfigMap{ + credentialBuilder: credentials.NewCredentialBuilder(c, clientset, &v1.ConfigMap{ Data: map[string]string{}, }), config: storageInitializerConfig, @@ -398,7 +396,7 @@ func TestStorageInitializerFailureCases(t *testing.T) { for name, scenario := range scenarios { injector := &StorageInitializerInjector{ - credentialBuilder: credentials.NewCredentialBuilder(c, &v1.ConfigMap{ + credentialBuilder: credentials.NewCredentialBuilder(c, clientset, &v1.ConfigMap{ Data: map[string]string{}, }), config: storageInitializerConfig, @@ -498,7 +496,7 @@ func TestCustomSpecStorageUriInjection(t *testing.T) { for name, scenario := range scenarios { injector := &StorageInitializerInjector{ - credentialBuilder: credentials.NewCredentialBuilder(c, &v1.ConfigMap{ + credentialBuilder: credentials.NewCredentialBuilder(c, clientset, &v1.ConfigMap{ Data: map[string]string{}, }), config: storageInitializerConfig, @@ -946,7 +944,7 @@ func TestCredentialInjection(t *testing.T) { }, } - builder := credentials.NewCredentialBuilder(c, configMap) + builder := credentials.NewCredentialBuilder(c, clientset, configMap) for name, scenario := range scenarios { g.Expect(c.Create(context.TODO(), scenario.sa)).NotTo(gomega.HaveOccurred()) g.Expect(c.Create(context.TODO(), scenario.secret)).NotTo(gomega.HaveOccurred()) @@ -1037,7 +1035,7 @@ func TestStorageInitializerConfigmap(t *testing.T) { for name, scenario := range scenarios { injector := &StorageInitializerInjector{ - credentialBuilder: credentials.NewCredentialBuilder(c, &v1.ConfigMap{ + credentialBuilder: credentials.NewCredentialBuilder(c, clientset, &v1.ConfigMap{ Data: map[string]string{}, }), config: &StorageInitializerConfig{ @@ -1874,7 +1872,7 @@ func TestCaBundleConfigMapVolumeMountInStorageInitializer(t *testing.T) { }, } - builder := credentials.NewCredentialBuilder(c, configMap) + builder := credentials.NewCredentialBuilder(c, clientset, configMap) for name, scenario := range scenarios { g.Expect(c.Create(context.TODO(), scenario.sa)).NotTo(gomega.HaveOccurred()) g.Expect(c.Create(context.TODO(), scenario.secret)).NotTo(gomega.HaveOccurred()) @@ -2004,7 +2002,7 @@ func TestDirectVolumeMountForPvc(t *testing.T) { for name, scenario := range scenarios { injector := &StorageInitializerInjector{ - credentialBuilder: credentials.NewCredentialBuilder(c, &v1.ConfigMap{ + credentialBuilder: credentials.NewCredentialBuilder(c, clientset, &v1.ConfigMap{ Data: map[string]string{}, }), config: &StorageInitializerConfig{ @@ -2309,7 +2307,7 @@ func TestTransformerCollocation(t *testing.T) { for name, scenario := range scenarios { injector := &StorageInitializerInjector{ - credentialBuilder: credentials.NewCredentialBuilder(c, &v1.ConfigMap{ + credentialBuilder: credentials.NewCredentialBuilder(c, clientset, &v1.ConfigMap{ Data: map[string]string{}, }), config: scenario.storageConfig, @@ -2344,7 +2342,7 @@ func TestGetStorageContainerSpec(t *testing.T) { } s3AzureSpec := v1alpha1.ClusterStorageContainer{ ObjectMeta: metav1.ObjectMeta{ - Name: "s3Azure", + Name: "s3-azure", }, Spec: v1alpha1.StorageContainerSpec{ Container: v1.Container{ @@ -2358,16 +2356,21 @@ func TestGetStorageContainerSpec(t *testing.T) { SupportedUriFormats: []v1alpha1.SupportedUriFormat{{Prefix: "s3://"}, {Regex: "https://(.+?).blob.core.windows.net/(.+)"}}, }, } - storageContainerSpecs := &v1alpha1.ClusterStorageContainerList{ - Items: []v1alpha1.ClusterStorageContainer{customSpec, s3AzureSpec}, - } - s := runtime.NewScheme() - err := v1alpha1.AddToScheme(s) - if err != nil { - t.Errorf("unable to add scheme : %v", err) + if err := c.Create(context.TODO(), &s3AzureSpec); err != nil { + t.Fatalf("unable to create cluster storage container: %v", err) + } + if err := c.Create(context.TODO(), &customSpec); err != nil { + t.Fatalf("unable to create cluster storage container: %v", err) } - mockClient := fake.NewClientBuilder().WithLists(storageContainerSpecs).WithScheme(s).Build() + defer func() { + if err := c.Delete(context.TODO(), &s3AzureSpec); err != nil { + t.Errorf("unable to delete cluster storage container: %v", err) + } + if err := c.Delete(context.TODO(), &customSpec); err != nil { + t.Errorf("unable to delete cluster storage container: %v", err) + } + }() scenarios := map[string]struct { storageUri string expectedSpec *v1.Container @@ -2387,8 +2390,9 @@ func TestGetStorageContainerSpec(t *testing.T) { } for name, scenario := range scenarios { var container *v1.Container + var err error - if container, err = GetContainerSpecForStorageUri(scenario.storageUri, mockClient); err != nil { + if container, err = GetContainerSpecForStorageUri(scenario.storageUri, c); err != nil { t.Errorf("Test %q unexpected result: %s", name, err) } g.Expect(container).To(gomega.Equal(scenario.expectedSpec)) @@ -2414,7 +2418,7 @@ func TestStorageContainerCRDInjection(t *testing.T) { } s3AzureSpec := v1alpha1.ClusterStorageContainer{ ObjectMeta: metav1.ObjectMeta{ - Name: "s3Azure", + Name: "s3-azure", }, Spec: v1alpha1.StorageContainerSpec{ Container: v1.Container{ @@ -2431,16 +2435,21 @@ func TestStorageContainerCRDInjection(t *testing.T) { SupportedUriFormats: []v1alpha1.SupportedUriFormat{{Prefix: "s3://"}, {Regex: "https://(.+?).blob.core.windows.net/(.+)"}}, }, } - storageContainerSpecs := &v1alpha1.ClusterStorageContainerList{ - Items: []v1alpha1.ClusterStorageContainer{customSpec, s3AzureSpec}, + if err := c.Create(context.TODO(), &s3AzureSpec); err != nil { + t.Fatalf("unable to create cluster storage container: %v", err) } - - s := runtime.NewScheme() - err := v1alpha1.AddToScheme(s) - if err != nil { - t.Errorf("unable to add scheme : %v", err) + if err := c.Create(context.TODO(), &customSpec); err != nil { + t.Fatalf("unable to create cluster storage container: %v", err) } - mockClient := fake.NewClientBuilder().WithLists(storageContainerSpecs).WithScheme(s).Build() + defer func() { + if err := c.Delete(context.TODO(), &s3AzureSpec); err != nil { + t.Errorf("unable to delete cluster storage container: %v", err) + } + if err := c.Delete(context.TODO(), &customSpec); err != nil { + t.Errorf("unable to delete cluster storage container: %v", err) + } + }() + scenarios := map[string]struct { original *v1.Pod expected *v1.Pod @@ -2580,14 +2589,14 @@ func TestStorageContainerCRDInjection(t *testing.T) { } for name, scenario := range scenarios { injector := &StorageInitializerInjector{ - credentialBuilder: credentials.NewCredentialBuilder(mockClient, &v1.ConfigMap{ + credentialBuilder: credentials.NewCredentialBuilder(c, clientset, &v1.ConfigMap{ Data: map[string]string{}, }), config: storageInitializerConfig, - client: mockClient, + client: c, } - if err = injector.InjectStorageInitializer(scenario.original); err != nil { + if err := injector.InjectStorageInitializer(scenario.original); err != nil { t.Errorf("Test %q unexpected result: %s", name, err) } if diff, _ := kmp.SafeDiff(scenario.expected.Spec, scenario.original.Spec); diff != "" { @@ -3629,7 +3638,7 @@ func TestStorageInitializerUIDForIstioCNI(t *testing.T) { for name, scenario := range scenarios { injector := &StorageInitializerInjector{ - credentialBuilder: credentials.NewCredentialBuilder(c, &v1.ConfigMap{ + credentialBuilder: credentials.NewCredentialBuilder(c, clientset, &v1.ConfigMap{ Data: map[string]string{}, }), config: storageInitializerConfig, diff --git a/pkg/webhook/admission/pod/suite_test.go b/pkg/webhook/admission/pod/suite_test.go index 12804546910..81ee1fe0464 100644 --- a/pkg/webhook/admission/pod/suite_test.go +++ b/pkg/webhook/admission/pod/suite_test.go @@ -20,17 +20,19 @@ import ( "os" "testing" - "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" - pkgtest "github.com/kserve/kserve/pkg/testing" - + "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" "k8s.io/klog" "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/kserve/kserve/pkg/apis/serving/v1alpha1" + pkgtest "github.com/kserve/kserve/pkg/testing" ) var cfg *rest.Config var c client.Client +var clientset kubernetes.Interface func TestMain(m *testing.M) { t := pkgtest.SetupEnvTest() @@ -47,6 +49,11 @@ func TestMain(m *testing.M) { if c, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}); err != nil { klog.Error(err, "Failed to start client") } + + if clientset, err = kubernetes.NewForConfig(cfg); err != nil { + klog.Error(err, "Failed to create clientset") + } + code := m.Run() t.Stop() os.Exit(code) diff --git a/python/huggingfaceserver/huggingfaceserver/__main__.py b/python/huggingfaceserver/huggingfaceserver/__main__.py index 1fccea2a110..15679bf936f 100644 --- a/python/huggingfaceserver/huggingfaceserver/__main__.py +++ b/python/huggingfaceserver/huggingfaceserver/__main__.py @@ -67,11 +67,9 @@ def list_of_strings(arg): kwargs=vars(args)) try: model.load() + kserve.ModelServer().start([model] if model.ready else []) except ModelMissingError: logging.error(f"fail to locate model file for model {args.model_name} under dir {args.model_dir}," f"trying loading from model repository.") - if not args.model_id: kserve.ModelServer(registered_models=HuggingfaceModelRepository(args.model_dir)).start( [model] if model.ready else []) - else: - kserve.ModelServer().start([model] if model.ready else []) diff --git a/python/huggingfaceserver/huggingfaceserver/model.py b/python/huggingfaceserver/huggingfaceserver/model.py index ea6a18e022c..b70cac0e30c 100644 --- a/python/huggingfaceserver/huggingfaceserver/model.py +++ b/python/huggingfaceserver/huggingfaceserver/model.py @@ -32,6 +32,7 @@ from kserve.utils.utils import get_predict_response, get_predict_input, from_np_dtype from kserve import Model import torch +from accelerate import init_empty_weights try: from vllm.sampling_params import SamplingParams @@ -42,7 +43,7 @@ _vllm = False from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, \ - AutoConfig, \ + AutoConfig, AutoModel, \ AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoModelForQuestionAnswering, \ AutoModelForMaskedLM, BatchEncoding, TensorType @@ -59,6 +60,7 @@ def __init__(self, model_name: str, kwargs, self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu" ) + self.device_map = "cuda" if torch.cuda.is_available() else "cpu" self.model_id = kwargs.get('model_id', None) self.model_dir = kwargs.get('model_dir', None) if not self.model_id and not self.model_dir: @@ -78,8 +80,7 @@ def __init__(self, model_name: str, kwargs, self.ready = False @staticmethod - def infer_task_from_model_architecture(model_config_path: str): - model_config = AutoConfig.from_pretrained(model_config_path) + def infer_task_from_model_architecture(model_config: str): architecture = model_config.architectures[0] task = None for arch_options in ARCHITECTURES_2_TASK: @@ -110,31 +111,46 @@ def load(self) -> bool: self.ready = True return self.ready + model_config = AutoConfig.from_pretrained(model_id_or_path) + if not self.task: - self.task = self.infer_task_from_model_architecture(model_id_or_path) + self.task = self.infer_task_from_model_architecture(model_config) + + # device_map = "auto" enables model parallelism but all model architcture dont support it. + # For pre-check we initialize the model class without weights to check the `_no_split_modules` + # device_map = "auto" for models that support this else set to either cuda/cpu + with init_empty_weights(): + self.model = AutoModel.from_config(model_config) + + if self.model._no_split_modules: + self.device_map = "auto" # load huggingface tokenizer self.tokenizer = AutoTokenizer.from_pretrained( - model_id_or_path, do_lower_case=self.do_lower_case) + model_id_or_path, do_lower_case=self.do_lower_case, device_map=self.device_map) + if not self.tokenizer.pad_token: + self.tokenizer.add_special_tokens({'pad_token': '[PAD]'}) logger.info(f"successfully loaded tokenizer for task: {self.task}") + # load huggingface model using from_pretrained for inference mode if not self.predictor_host: if self.task == MLTask.sequence_classification.value: self.model = AutoModelForSequenceClassification.from_pretrained( - model_id_or_path) + model_id_or_path, device_map=self.device_map) elif self.task == MLTask.question_answering.value: - self.model = AutoModelForQuestionAnswering.from_pretrained(model_id_or_path) + self.model = AutoModelForQuestionAnswering.from_pretrained( + model_id_or_path, device_map=self.device_map) elif self.task == MLTask.token_classification.value: - self.model = AutoModelForTokenClassification.from_pretrained(model_id_or_path) + self.model = AutoModelForTokenClassification.from_pretrained( + model_id_or_path, device_map=self.device_map) elif self.task == MLTask.fill_mask.value: - self.model = AutoModelForMaskedLM.from_pretrained(model_id_or_path) + self.model = AutoModelForMaskedLM.from_pretrained(model_id_or_path, device_map=self.device_map) elif self.task == MLTask.text_generation.value: - self.model = AutoModelForCausalLM.from_pretrained(model_id_or_path) + self.model = AutoModelForCausalLM.from_pretrained(model_id_or_path, device_map=self.device_map) elif self.task == MLTask.text2text_generation.value: - self.model = AutoModelForSeq2SeqLM.from_pretrained(model_id_or_path) + self.model = AutoModelForSeq2SeqLM.from_pretrained(model_id_or_path, device_map=self.device_map) else: raise ValueError(f"Unsupported task {self.task}. Please check the supported `task` option.") self.model.eval() - self.model.to(self.device) logger.info(f"successfully loaded huggingface model from path {model_id_or_path}") self.ready = True return self.ready diff --git a/python/huggingfaceserver/poetry.lock b/python/huggingfaceserver/poetry.lock index fa118102457..7eb52446438 100644 --- a/python/huggingfaceserver/poetry.lock +++ b/python/huggingfaceserver/poetry.lock @@ -1,5 +1,35 @@ # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +[[package]] +name = "accelerate" +version = "0.27.2" +description = "Accelerate" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "accelerate-0.27.2-py3-none-any.whl", hash = "sha256:a818dd27b9ba24e9eb5030d1b285cf4cdd1b41bbfa675fb4eb2477ddfc097074"}, + {file = "accelerate-0.27.2.tar.gz", hash = "sha256:cc715fe9a8bc7a286259bfb6d65fb78363badd3371e7cbda4e4a4ef34a0010aa"}, +] + +[package.dependencies] +huggingface-hub = "*" +numpy = ">=1.17" +packaging = ">=20.0" +psutil = "*" +pyyaml = "*" +safetensors = ">=0.3.1" +torch = ">=1.10.0" + +[package.extras] +dev = ["bitsandbytes", "black (>=23.1,<24.0)", "datasets", "deepspeed (<0.13.0)", "evaluate", "hf-doc-builder (>=0.3.0)", "parameterized", "pytest", "pytest-subtests", "pytest-xdist", "rich", "ruff (>=0.1.15,<0.2.0)", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] +quality = ["black (>=23.1,<24.0)", "hf-doc-builder (>=0.3.0)", "ruff (>=0.1.15,<0.2.0)"] +rich = ["rich"] +sagemaker = ["sagemaker"] +test-dev = ["bitsandbytes", "datasets", "deepspeed (<0.13.0)", "evaluate", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] +test-prod = ["parameterized", "pytest", "pytest-subtests", "pytest-xdist"] +test-trackers = ["comet-ml", "dvclive", "tensorboard", "wandb"] +testing = ["bitsandbytes", "datasets", "deepspeed (<0.13.0)", "evaluate", "parameterized", "pytest", "pytest-subtests", "pytest-xdist", "scikit-learn", "scipy", "timm", "torchpippy (>=0.2.0)", "tqdm", "transformers"] + [[package]] name = "aiohttp" version = "3.9.3" @@ -4274,4 +4304,4 @@ vllm = ["vllm"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "d707cb9cfa8bdc420eaa108304bcd1bee96d6771f63db8b4dedda56ffdc13e0f" +content-hash = "1f579b720a6ddda1c26a3790267156c6c040bc039aca7484883def6035318cfc" diff --git a/python/huggingfaceserver/pyproject.toml b/python/huggingfaceserver/pyproject.toml index 99dc1f97f91..b3df54454e0 100644 --- a/python/huggingfaceserver/pyproject.toml +++ b/python/huggingfaceserver/pyproject.toml @@ -13,6 +13,7 @@ packages = [ python = ">=3.9,<3.12" kserve = { path = "../kserve", extras = ["storage"], develop = true } transformers = "~4.37.0" +accelerate = "~0.27.2" torch = "~2.1.2" vllm = { version = "^0.2.7", optional = true } diff --git a/python/kserve/kserve/storage/storage.py b/python/kserve/kserve/storage/storage.py index 32777962bae..51c88e22220 100644 --- a/python/kserve/kserve/storage/storage.py +++ b/python/kserve/kserve/storage/storage.py @@ -438,7 +438,10 @@ def _download_azure_blob(uri, out_dir: str): # pylint: disable=too-many-locals blobs += container_client.list_blobs(name_starts_with=item.name, include=['snapshots']) for blob in blobs: - dest_path = os.path.join(out_dir, blob.name.replace(prefix, "", 1).lstrip("/")) + file_name = blob.name.replace(prefix, "", 1).lstrip("/") + if not file_name: + file_name = os.path.basename(prefix) + dest_path = os.path.join(out_dir, file_name) Path(os.path.dirname(dest_path)).mkdir(parents=True, exist_ok=True) logging.info("Downloading: %s to %s", blob.name, dest_path) downloader = container_client.download_blob(blob.name) diff --git a/python/kserve/kserve/storage/test/test_azure_storage.py b/python/kserve/kserve/storage/test/test_azure_storage.py index ff837fd4ba2..56730fbf16f 100644 --- a/python/kserve/kserve/storage/test/test_azure_storage.py +++ b/python/kserve/kserve/storage/test/test_azure_storage.py @@ -105,6 +105,25 @@ def test_blob(mock_storage, mock_makedirs): # pylint: disable=unused-argument credential=None) +@mock.patch(STORAGE_MODULE + '.os.makedirs') +@mock.patch(STORAGE_MODULE + '.BlobServiceClient') +def test_blob_file_direct(mock_storage, mock_makedirs): # pylint: disable=unused-argument + + # given + blob_path = 'https://accountname.blob.core.windows.net/container/somefile.text' + paths = ['somefile.text'] + mock_blob, mock_container = create_mock_blob(mock_storage, paths) + + # when + Storage._download_azure_blob(blob_path, "dest_path") + + # then + arg_list = get_call_args(mock_container.download_blob.call_args_list) + assert arg_list == [('somefile.text',)] + mock_storage.assert_called_with('https://accountname.blob.core.windows.net', + credential=None) + + @mock.patch(STORAGE_MODULE + '.os.makedirs') @mock.patch(STORAGE_MODULE + '.Storage._get_azure_storage_token') @mock.patch(STORAGE_MODULE + '.BlobServiceClient') diff --git a/test/overlays/knative/knative-serving-istio.yaml b/test/overlays/knative/knative-serving-istio.yaml index f1bcc747bf3..8dc6d08802d 100644 --- a/test/overlays/knative/knative-serving-istio.yaml +++ b/test/overlays/knative/knative-serving-istio.yaml @@ -9,7 +9,7 @@ metadata: name: knative-serving namespace: knative-serving spec: - version: "1.10.2" + version: "1.13.1" config: deployment: # Skip tag resolution for certain domains @@ -89,4 +89,4 @@ spec: memory: 32Mi limits: cpu: 100m - memory: 128Mi \ No newline at end of file + memory: 128Mi diff --git a/test/overlays/knative/knative-serving-kourier.yaml b/test/overlays/knative/knative-serving-kourier.yaml index 8535ea0b01c..97268aba452 100644 --- a/test/overlays/knative/knative-serving-kourier.yaml +++ b/test/overlays/knative/knative-serving-kourier.yaml @@ -9,7 +9,7 @@ metadata: name: knative-serving namespace: knative-serving spec: - version: "1.10.2" + version: "1.13.1" ingress: kourier: enabled: true @@ -94,4 +94,4 @@ spec: memory: 32Mi limits: cpu: 100m - memory: 128Mi \ No newline at end of file + memory: 128Mi diff --git a/test/scripts/gh-actions/install-knative-operator.sh b/test/scripts/gh-actions/install-knative-operator.sh index e2440e97d7b..ab866f3ab9e 100755 --- a/test/scripts/gh-actions/install-knative-operator.sh +++ b/test/scripts/gh-actions/install-knative-operator.sh @@ -20,9 +20,9 @@ set -o errexit set -o nounset set -o pipefail -KNATIVE_OPERATOR_PLUGIN_VERSION="knative-v1.11.0" -KNATIVE_OPERATOR_VERSION="1.11.2" -KNATIVE_CLI_VERSION="knative-v1.11.0" +KNATIVE_OPERATOR_PLUGIN_VERSION="knative-v1.13.0" +KNATIVE_OPERATOR_VERSION="1.13.1" +KNATIVE_CLI_VERSION="knative-v1.13.0" echo "Installing Knative cli ..." wget https://github.com/knative/client/releases/download/"${KNATIVE_CLI_VERSION}"/kn-linux-amd64 -O /usr/local/bin/kn && chmod +x /usr/local/bin/kn diff --git a/test/scripts/gh-actions/setup-deps.sh b/test/scripts/gh-actions/setup-deps.sh index 5acd8cbd75f..fbf5e018d48 100755 --- a/test/scripts/gh-actions/setup-deps.sh +++ b/test/scripts/gh-actions/setup-deps.sh @@ -24,7 +24,7 @@ set -o pipefail SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}"; )" &> /dev/null && pwd 2> /dev/null; )"; DEPLOYMENT_MODE="${1:-'serverless'}" -ISTIO_VERSION="1.19.4" +ISTIO_VERSION="1.20.4" CERT_MANAGER_VERSION="v1.5.0" YQ_VERSION="v4.28.1" diff --git a/tools/tf2openapi/Dockerfile b/tools/tf2openapi/Dockerfile index e070eada3cb..2202d798f93 100644 --- a/tools/tf2openapi/Dockerfile +++ b/tools/tf2openapi/Dockerfile @@ -9,7 +9,7 @@ COPY go.mod go.mod COPY go.sum go.sum # Build -RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o tf2openapi ./tools/tf2openapi/cmd +RUN CGO_ENABLED=0 GOOS=linux go build -a -o tf2openapi ./tools/tf2openapi/cmd # Copy tf2openapi into a thin image FROM gcr.io/distroless/static:nonroot