diff --git a/.gitignore b/.gitignore index 961d4cd660d..588e3ed7547 100644 --- a/.gitignore +++ b/.gitignore @@ -74,3 +74,7 @@ docker-compose.yml # Istio installer downloaded by quick install istio-* + + +# Ignore DevSpace cache and log folder +.devspace/ diff --git a/dev_tools/devspace.yaml b/dev_tools/devspace.yaml new file mode 100644 index 00000000000..af3c3b82afe --- /dev/null +++ b/dev_tools/devspace.yaml @@ -0,0 +1,102 @@ +version: v2beta1 +name: kserve-controllergit + +# This is a list of `pipelines` that DevSpace can execute (you can define your own) +pipelines: + # This is the pipeline for the main command: `devspace dev` (or `devspace run-pipeline dev`) + dev: + run: |- + run_dependencies --all # 1. Deploy any projects this project needs (see "dependencies") + ensure_pull_secrets --all # 2. Ensure pull secrets + create_deployments --all # 3. Deploy Helm charts and manifests specfied as "deployments" + start_dev app # 4. Start dev mode "app" (see "dev" section) + # You can run this pipeline via `devspace deploy` (or `devspace run-pipeline deploy`) + deploy: + run: |- + run_dependencies --all # 1. Deploy any projects this project needs (see "dependencies") + ensure_pull_secrets --all # 2. Ensure pull secrets + build_images --all -t $(git describe --always) # 3. Build, tag (git commit hash) and push all images (see "images") + create_deployments --all # 4. Deploy Helm charts and manifests specfied as "deployments" + # You can run this pipeline via `devspace run-pipeline debug` + debug: + run: |- + run_pipelines dev + code --folder-uri vscode-remote://ssh-remote+app.kserve-controllergit.devspace/app + +# This is a list of `images` that DevSpace can build for this project +# We recommend to skip image building during development (devspace dev) as much as possible +images: + app: + image: quay.io/rh-ee-allausas/kserve-controller:latest-0.0.1 + dockerfile: ./Dockerfile +# This is a list of `deployments` that DevSpace can create for this project +deployments: + app: + # This deployment uses `kubectl` but you can also define `helm` deployments + kubectl: + manifests: + - ../config/manager + kustomize: true + +vars: + GOCACHE: + source: env + default: /tmp/.cache + GOPROXY: + source: env + default: https://proxy.golang.org,direct + +# This is a list of `dev` containers that are based on the containers created by your deployments +dev: + app: + namespace: opendatahub + workingDir: /app + # Search for the container that runs this image + labelSelector: + control-plane: kserve-controller-manager + # Replace the container image with this dev-optimized image (allows to skip image building during development) + devImage: quay.io/rh-ee-allausas/golang:1.22-odh-devspace-debug + # Sync files between the local filesystem and the development container + sync: + - path: ../:/app + # Open a terminal and use the following command to start it + terminal: + command: dev_tools/devspace_start.sh + # Inject a lightweight SSH server into the container (so your IDE can connect to the remote dev env) + ssh: + enabled: true + # Make the following commands from my local machine available inside the dev container + proxyCommands: + - command: devspace + - command: kubectl + - command: helm + - command: git + # Forward the following ports to be able access your application via localhost + ports: + - port: "2345" + resources: + requests: + cpu: "1.5" + memory: 2Gi + limits: + cpu: "2.5" + memory: 4Gi + patches: + - op: replace + path: spec.securityContext.runAsNonRoot + value: false + +# Use the `commands` section to define repeatable dev workflows for this project +commands: + migrate-db: + command: |- + echo 'This is a cross-platform, shared command that can be used to codify any kind of dev task.' + echo 'Anyone using this project can invoke it via "devspace run migrate-db"' + +# Define dependencies to other projects with a devspace.yaml +# dependencies: +# api: +# git: https://... # Git-based dependencies +# tag: v1.0.0 +# ui: +# path: ./ui # Path-based dependencies (for monorepos) diff --git a/dev_tools/devspace_start.sh b/dev_tools/devspace_start.sh new file mode 100755 index 00000000000..cebf9e346e2 --- /dev/null +++ b/dev_tools/devspace_start.sh @@ -0,0 +1,36 @@ +#!/bin/bash +set +e # Continue on errors + +COLOR_BLUE="\033[0;94m" +COLOR_GREEN="\033[0;92m" +COLOR_RESET="\033[0m" + +# Print useful output for user +echo -e "${COLOR_BLUE} + %########% + %###########% ____ _____ + %#########% | _ \ ___ __ __ / ___/ ____ ____ ____ ___ + %#########% | | | | / _ \\\\\ \ / / \___ \ | _ \ / _ | / __// _ \\ + %#############% | |_| |( __/ \ V / ____) )| |_) )( (_| |( (__( __/ + %#############% |____/ \___| \_/ \____/ | __/ \__,_| \___\\\\\___| + %###############% |_| + %###########%${COLOR_RESET} + + +Welcome to your development container! + +This is how you can work with it: +- Files will be synchronized between your local machine and this container +- Some ports will be forwarded, so you can access this container via localhost +- Run \`${COLOR_GREEN}go run main.go${COLOR_RESET}\` to start the application +" + +# Set terminal prompt +export PS1="\[${COLOR_BLUE}\]devspace\[${COLOR_RESET}\] ./\W \[${COLOR_BLUE}\]\\$\[${COLOR_RESET}\] " +if [ -z "$BASH" ]; then export PS1="$ "; fi + +# Include project's bin/ folder in PATH +export PATH="./bin:$PATH" + +# Open shell +bash --norc diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index 3b8f2369dee..730bc7a4cab 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -100,6 +100,7 @@ var ( PrometheusPortAnnotationKey = "prometheus.io/port" PrometheusPathAnnotationKey = "prometheus.io/path" StorageReadonlyAnnotationKey = "storage.kserve.io/readonly" + AutoUpdateAnnotationKey = KServeAPIGroupName + "/auto-update" DefaultPrometheusPath = "/metrics" QueueProxyAggregatePrometheusMetricsPort = 9088 DefaultPodPrometheusPort = "9091" diff --git a/pkg/controller/v1beta1/inferenceservice/controller.go b/pkg/controller/v1beta1/inferenceservice/controller.go index 9bb7c1b9071..112546ea857 100644 --- a/pkg/controller/v1beta1/inferenceservice/controller.go +++ b/pkg/controller/v1beta1/inferenceservice/controller.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "reflect" + "slices" "strings" "github.com/go-logr/logr" @@ -38,8 +39,12 @@ import ( "knative.dev/pkg/apis" knservingv1 "knative.dev/serving/pkg/apis/serving/v1" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/yaml" @@ -330,8 +335,48 @@ func inferenceServiceStatusEqual(s1, s2 v1beta1api.InferenceServiceStatus, deplo return equality.Semantic.DeepEqual(s1, s2) } +func (r *InferenceServiceReconciler) servingRuntimeFunc(ctx context.Context, obj client.Object) []reconcile.Request { + runtimeObj, ok := obj.(*v1alpha1api.ServingRuntime) + if !ok || runtimeObj == nil || runtimeObj.Spec.SupportedModelFormats == nil { + return nil + } + + var isvcList v1beta1api.InferenceServiceList + // List all InferenceServices in the same namespace. + if err := r.Client.List(ctx, &isvcList, client.InNamespace(runtimeObj.Namespace)); err != nil { + r.Log.Error(err, "unable to list InferenceServices", "runtime", runtimeObj.Name) + return nil + } + + var requests []reconcile.Request + supportedModelFormatNames := []string{} + for _, supportedModelFormat := range runtimeObj.Spec.SupportedModelFormats { + supportedModelFormatNames = append(supportedModelFormatNames, supportedModelFormat.Name) + } + for _, isvc := range isvcList.Items { + // Filter out if auto-update is explicitly disabled. + annotations := isvc.GetAnnotations() + if annotations != nil { + if autoUpdate, found := annotations[constants.AutoUpdateAnnotationKey]; found && autoUpdate == "false" { + r.Log.Info("Auto-update is disabled for InferenceService", "InferenceService", isvc.Name) + continue + } + } + if slices.Contains(supportedModelFormatNames, isvc.Spec.Predictor.Model.ModelFormat.Name) { + requests = append(requests, reconcile.Request{ + NamespacedName: types.NamespacedName{ + Namespace: isvc.Namespace, + Name: isvc.Name, + }, + }) + } + } + return requests +} + func (r *InferenceServiceReconciler) SetupWithManager(mgr ctrl.Manager, deployConfig *v1beta1api.DeployConfig, ingressConfig *v1beta1api.IngressConfig) error { r.ClientConfig = mgr.GetConfig() + ctx := context.Background() ksvcFound, err := utils.IsCrdAvailable(r.ClientConfig, knservingv1.SchemeGroupVersion.String(), constants.KnativeServiceKind) if err != nil { @@ -343,6 +388,33 @@ func (r *InferenceServiceReconciler) SetupWithManager(mgr ctrl.Manager, deployCo return err } + if err := mgr.GetFieldIndexer().IndexField(ctx, &v1beta1api.InferenceService{}, "spec.predictor.model.runtime", func(rawObj client.Object) []string { + isvc, ok := rawObj.(*v1beta1api.InferenceService) + if !ok { + return nil + } + if isvc.Spec.Predictor.Model == nil || isvc.Spec.Predictor.Model.Runtime == nil { + return nil + } + if *isvc.Spec.Predictor.Model.Runtime != "" { + return []string{*isvc.Spec.Predictor.Model.Runtime} + } + return nil + }); err != nil { + return err + } + + servingRuntimesPredicate := predicate.Funcs{ + UpdateFunc: func(e event.UpdateEvent) bool { + oldServingRuntime := e.ObjectOld.(*v1alpha1api.ServingRuntime) + newServingRuntime := e.ObjectNew.(*v1alpha1api.ServingRuntime) + return !reflect.DeepEqual(oldServingRuntime.Spec, newServingRuntime.Spec) + }, + CreateFunc: func(e event.CreateEvent) bool { return false }, + DeleteFunc: func(e event.DeleteEvent) bool { return false }, + GenericFunc: func(e event.GenericEvent) bool { return false }, + } + ctrlBuilder := ctrl.NewControllerManagedBy(mgr). For(&v1beta1api.InferenceService{}). Owns(&appsv1.Deployment{}) @@ -359,7 +431,7 @@ func (r *InferenceServiceReconciler) SetupWithManager(mgr ctrl.Manager, deployCo r.Log.Info("The InferenceService controller won't watch networking.istio.io/v1beta1/VirtualService resources because the CRD is not available.") } - return ctrlBuilder.Complete(r) + return ctrlBuilder.Watches(&v1alpha1api.ServingRuntime{}, handler.EnqueueRequestsFromMapFunc(r.servingRuntimeFunc), builder.WithPredicates(servingRuntimesPredicate)).Complete(r) } func (r *InferenceServiceReconciler) deleteExternalResources(isvc *v1beta1api.InferenceService) error { diff --git a/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go b/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go index 090989f5bb4..da8a2539983 100644 --- a/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go +++ b/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go @@ -1575,6 +1575,653 @@ var _ = Describe("v1beta1 inference service controller", func() { }) }) + Context("When Updating a Serving Runtime", func() { + It("InferenceService should reconcile the deployment if auto-update annotation is not present", func() { + // Create configmap + isvcNamespace := constants.KServeNamespace + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: constants.KServeNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + Eventually(func() error { + cm := &v1.ConfigMap{} + return k8sClient.Get(context.TODO(), types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: isvcNamespace}, cm) + }, timeout, interval).Should(Succeed()) + isvcName := "isvc-enable-auto-update-missing" + serviceKey := types.NamespacedName{Name: isvcName, Namespace: isvcNamespace} + storageUri := "s3://test/mnist/export" + servingRuntimeName := "pytorch-serving-auto-update-missing" + servingRuntime := &v1alpha1.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: isvcNamespace, + }, + Spec: v1alpha1.ServingRuntimeSpec{ + SupportedModelFormats: []v1alpha1.SupportedModelFormat{ + { + Name: "pytorch", + Version: proto.String("1"), + AutoSelect: proto.Bool(true), + }, + }, + ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Labels: map[string]string{ + "key1": "val1FromSR", + "key2": "val2FromSR", + "key3": "val3FromSR", + }, + Annotations: map[string]string{ + "key1": "val1FromSR", + "key2": "val2FromSR", + "key3": "val3FromSR", + }, + Containers: []v1.Container{ + { + Name: constants.InferenceServiceContainerName, + Image: "pytorch/serving:1.14.0", + Command: []string{"/usr/bin/pytorch_model_server"}, + Args: []string{ + "--port=9000", + "--rest_api_port=8080", + "--model_base_path=/mnt/models", + "--rest_api_timeout_in_ms=60000", + }, + Resources: defaultResource, + }, + }, + ImagePullSecrets: []v1.LocalObjectReference{ + {Name: "sr-image-pull-secret"}, + }, + }, + Disabled: proto.Bool(false), + }, + } + Expect(k8sClient.Create(ctx, servingRuntime)).Should(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimeName, Namespace: isvcNamespace}, &v1alpha1.ServingRuntime{}) + }, timeout, interval).Should(Succeed()) + defer k8sClient.Delete(ctx, servingRuntime) + + // Define InferenceService with auto-update disabled. + isvc := &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: isvcNamespace, + Annotations: map[string]string{ + "serving.kserve.io/deploymentMode": "RawDeployment", + "serving.kserve.io/autoscalerClass": "hpa", + "serving.kserve.io/metrics": "cpu", + "serving.kserve.io/targetUtilizationPercentage": "75", + }, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + PyTorch: &v1beta1.TorchServeSpec{ + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: &storageUri, + RuntimeVersion: proto.String("1.14.0"), + Container: v1.Container{ + Name: constants.InferenceServiceContainerName, + Resources: defaultResource, + }, + }, + }, + }, + }, + } + + createdConfigMap := &v1.ConfigMap{} + Eventually(func() error { + return k8sClient.Get(context.TODO(), types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: isvcNamespace}, createdConfigMap) + }, timeout, interval).Should(Succeed()) + isvc.DefaultInferenceService(nil, nil, &v1beta1.SecurityConfig{AutoMountServiceAccountToken: false}, nil) + Expect(k8sClient.Create(ctx, isvc)).Should(Succeed()) + inferenceService := &v1beta1.InferenceService{} + + Eventually(func() bool { + err := k8sClient.Get(ctx, serviceKey, inferenceService) + return err == nil + }, timeout, interval).Should(BeTrue()) + defer k8sClient.Delete(ctx, isvc) + + // Update the ServingRuntime spec + servingRuntimeToUpdate := &v1alpha1.ServingRuntime{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimeName, Namespace: isvcNamespace}, servingRuntimeToUpdate)).Should(Succeed()) + servingRuntimeToUpdate.Spec.ServingRuntimePodSpec.Labels["key1"] = "updatedServingRuntime" + Eventually(func() error { + return k8sClient.Update(ctx, servingRuntimeToUpdate) + }, timeout, interval).Should(Succeed()) + + // Wait until the ServingRuntime reflects the updated spec. + servingRuntimeAfterUpdate := &v1alpha1.ServingRuntime{} + Eventually(func() (string, error) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimeName, Namespace: isvcNamespace}, servingRuntimeAfterUpdate) + if err != nil { + return "", err + } + return servingRuntimeAfterUpdate.Spec.Labels["key1"], nil + }, timeout, interval).Should(Equal("updatedServingRuntime")) + deploymentAfterUpdate := &appsv1.Deployment{} + deploymentName := constants.PredictorServiceName(serviceKey.Name) + Eventually(func() (string, error) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: deploymentName, Namespace: serviceKey.Namespace}, deploymentAfterUpdate) + if err != nil { + return "", err + } + return deploymentAfterUpdate.Spec.Template.Labels["key1"], nil + }, timeout, interval).Should(Equal("updatedServingRuntime")) + }) + + It("InferenceService should reconcile the deployment if auto-update is enabled ", func() { + // Create configmap + isvcNamespace := constants.KServeNamespace + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: isvcNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + Eventually(func() error { + cm := &v1.ConfigMap{} + return k8sClient.Get(context.TODO(), types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: isvcNamespace}, cm) + }, timeout, interval).Should(Succeed()) + isvcName := "isvc-enable-auto-update-true" + serviceKey := types.NamespacedName{Name: isvcName, Namespace: isvcNamespace} + storageUri := "s3://test/mnist/export" + servingRuntimeName := "pytorch-serving-auto-update-true" + servingRuntime := &v1alpha1.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: isvcNamespace, + }, + Spec: v1alpha1.ServingRuntimeSpec{ + SupportedModelFormats: []v1alpha1.SupportedModelFormat{ + { + Name: "pytorch", + Version: proto.String("1"), + AutoSelect: proto.Bool(true), + }, + }, + ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Labels: map[string]string{ + "key1": "val1FromSR", + "key2": "val2FromSR", + "key3": "val3FromSR", + }, + Annotations: map[string]string{ + "key1": "val1FromSR", + "key2": "val2FromSR", + "key3": "val3FromSR", + }, + Containers: []v1.Container{ + { + Name: constants.InferenceServiceContainerName, + Image: "pytorch/serving:1.14.0", + Command: []string{"/usr/bin/pytorch_model_server"}, + Args: []string{ + "--port=9000", + "--rest_api_port=8080", + "--model_base_path=/mnt/models", + "--rest_api_timeout_in_ms=60000", + }, + Resources: defaultResource, + }, + }, + ImagePullSecrets: []v1.LocalObjectReference{ + {Name: "sr-image-pull-secret"}, + }, + }, + Disabled: proto.Bool(false), + }, + } + Expect(k8sClient.Create(ctx, servingRuntime)).Should(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimeName, Namespace: isvcNamespace}, &v1alpha1.ServingRuntime{}) + }, timeout, interval).Should(Succeed()) + defer k8sClient.Delete(ctx, servingRuntime) + // Define InferenceService with auto-update disabled. + isvc := &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: isvcNamespace, + Annotations: map[string]string{ + "serving.kserve.io/deploymentMode": "RawDeployment", + "serving.kserve.io/autoscalerClass": "external", + constants.AutoUpdateAnnotationKey: "true", + }, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + PyTorch: &v1beta1.TorchServeSpec{ + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: &storageUri, + RuntimeVersion: proto.String("1.14.0"), + Container: v1.Container{ + Name: constants.InferenceServiceContainerName, + Resources: defaultResource, + }, + }, + }, + }, + }, + } + isvc.DefaultInferenceService(nil, nil, &v1beta1.SecurityConfig{AutoMountServiceAccountToken: false}, nil) + Expect(k8sClient.Create(ctx, isvc)).Should(Succeed()) + + inferenceService := &v1beta1.InferenceService{} + + Eventually(func() bool { + err := k8sClient.Get(ctx, serviceKey, inferenceService) + return err == nil + }, timeout, interval).Should(BeTrue()) + defer k8sClient.Delete(ctx, isvc) + + // Update the ServingRuntime spec + servingRuntimeToUpdate := &v1alpha1.ServingRuntime{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimeName, Namespace: isvcNamespace}, servingRuntimeToUpdate)).Should(Succeed()) + servingRuntimeToUpdate.Spec.ServingRuntimePodSpec.Labels["key1"] = "updatedServingRuntime" + Eventually(func() error { + return k8sClient.Update(ctx, servingRuntimeToUpdate) + }, timeout, interval).Should(Succeed()) + + // Wait until the ServingRuntime reflects the updated spec. + servingRuntimeAfterUpdate := &v1alpha1.ServingRuntime{} + Eventually(func() (string, error) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimeName, Namespace: isvcNamespace}, servingRuntimeAfterUpdate) + if err != nil { + return "", err + } + return servingRuntimeAfterUpdate.Spec.Labels["key1"], nil + }, timeout, interval).Should(Equal("updatedServingRuntime")) + // Wait until the Deployment reflects the update + deploymentAfterUpdate := &appsv1.Deployment{} + deploymentName := constants.PredictorServiceName(serviceKey.Name) + Eventually(func() (string, error) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: deploymentName, Namespace: serviceKey.Namespace}, deploymentAfterUpdate) + if err != nil { + return "", err + } + return deploymentAfterUpdate.Spec.Template.Labels["key1"], nil + }, timeout, interval).Should(Equal("updatedServingRuntime")) + }) + + It("InferenceService should not reconcile the deployment if auto-update is disabled", func() { + // Create configmap + isvcNamespace := constants.KServeNamespace + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: isvcNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + Eventually(func() error { + cm := &v1.ConfigMap{} + return k8sClient.Get(context.TODO(), types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: isvcNamespace}, cm) + }, timeout, interval).Should(Succeed()) + isvcName := "isvc-enable-auto-update-false" + serviceKey := types.NamespacedName{Name: isvcName, Namespace: isvcNamespace} + storageUri := "s3://test/mnist/export" + servingRuntimeName := "pytorch-serving-auto-update-false" + servingRuntime := &v1alpha1.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: servingRuntimeName, + Namespace: isvcNamespace, + }, + Spec: v1alpha1.ServingRuntimeSpec{ + SupportedModelFormats: []v1alpha1.SupportedModelFormat{ + { + Name: "pytorch", + Version: proto.String("1"), + AutoSelect: proto.Bool(true), + }, + }, + ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Labels: map[string]string{ + "key1": "val1FromSR", + "key2": "val2FromSR", + "key3": "val3FromSR", + }, + Annotations: map[string]string{ + "key1": "val1FromSR", + "key2": "val2FromSR", + "key3": "val3FromSR", + }, + Containers: []v1.Container{ + { + Name: constants.InferenceServiceContainerName, + Image: "pytorch/serving:1.14.0", + Command: []string{"/usr/bin/pytorch_model_server"}, + Args: []string{ + "--port=9000", + "--rest_api_port=8080", + "--model_base_path=/mnt/models", + "--rest_api_timeout_in_ms=60000", + }, + Resources: defaultResource, + }, + }, + ImagePullSecrets: []v1.LocalObjectReference{ + {Name: "sr-image-pull-secret"}, + }, + }, + Disabled: proto.Bool(false), + }, + } + Expect(k8sClient.Create(ctx, servingRuntime)).Should(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimeName, Namespace: isvcNamespace}, &v1alpha1.ServingRuntime{}) + }, timeout, interval).Should(Succeed()) + defer k8sClient.Delete(ctx, servingRuntime) + + // Define InferenceService with auto-update disabled. + isvc := &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: isvcNamespace, + Annotations: map[string]string{ + "serving.kserve.io/deploymentMode": "RawDeployment", + "serving.kserve.io/autoscalerClass": "external", + constants.AutoUpdateAnnotationKey: "false", + }, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + PyTorch: &v1beta1.TorchServeSpec{ + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: &storageUri, + RuntimeVersion: proto.String("1.14.0"), + Container: v1.Container{ + Name: constants.InferenceServiceContainerName, + Resources: defaultResource, + }, + }, + }, + }, + }, + } + isvc.DefaultInferenceService(nil, nil, &v1beta1.SecurityConfig{AutoMountServiceAccountToken: false}, nil) + Expect(k8sClient.Create(ctx, isvc)).Should(Succeed()) + + inferenceService := &v1beta1.InferenceService{} + + Eventually(func() bool { + err := k8sClient.Get(ctx, serviceKey, inferenceService) + return err == nil + }, timeout, interval).Should(BeTrue()) + defer k8sClient.Delete(ctx, isvc) + + // Update the ServingRuntime spec + servingRuntimeToUpdate := &v1alpha1.ServingRuntime{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimeName, Namespace: isvcNamespace}, servingRuntimeToUpdate)).Should(Succeed()) + servingRuntimeToUpdate.Spec.ServingRuntimePodSpec.Labels["key1"] = "updatedServingRuntime" + Eventually(func() error { + return k8sClient.Update(ctx, servingRuntimeToUpdate) + }, timeout, interval).Should(Succeed()) + + // Wait until the ServingRuntime reflects the updated spec. + servingRuntimeAfterUpdate := &v1alpha1.ServingRuntime{} + Eventually(func() (string, error) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimeName, Namespace: isvcNamespace}, servingRuntimeAfterUpdate) + if err != nil { + return "", err + } + return servingRuntimeAfterUpdate.Spec.Labels["key1"], nil + }, timeout, interval).Should(Equal("updatedServingRuntime")) + // Check to make sure deployement didn't update + deploymentAfterUpdate := &appsv1.Deployment{} + deploymentName := constants.PredictorServiceName(serviceKey.Name) + Eventually(func() (string, error) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: deploymentName, Namespace: serviceKey.Namespace}, deploymentAfterUpdate) + if err != nil { + return "", err + } + return deploymentAfterUpdate.Spec.Template.Labels["key1"], nil + }, timeout, interval).Should(Equal("val1FromSR")) + }) + It("InferenceService should reconcile only if the matching serving runtime was updated even if multiple exist", func() { + // Create configmap + isvcNamespace := constants.KServeNamespace + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: isvcNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + Eventually(func() error { + cm := &v1.ConfigMap{} + return k8sClient.Get(context.TODO(), types.NamespacedName{Name: constants.InferenceServiceConfigMapName, Namespace: isvcNamespace}, cm) + }, timeout, interval).Should(Succeed()) + isvcNamePytorch := "isvc-enable-auto-update-multiple-pytorch" + serviceKeyPytorch := types.NamespacedName{Name: isvcNamePytorch, Namespace: isvcNamespace} + isvcNameTensorflow := "isvc-enable-auto-update-multiple-tensorflow" + serviceKeyTensorflow := types.NamespacedName{Name: isvcNameTensorflow, Namespace: isvcNamespace} + storageUri := "s3://test/mnist/export" + servingRuntimePytorchName := "pytorch-serving-auto-update-true-multiple" + servingRuntimeTensorflowName := "tensorflow-serving-auto-update-true-multiple" + pytorchServingRuntime := &v1alpha1.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: servingRuntimePytorchName, + Namespace: isvcNamespace, + }, + Spec: v1alpha1.ServingRuntimeSpec{ + SupportedModelFormats: []v1alpha1.SupportedModelFormat{ + { + Name: "pytorch", + Version: proto.String("1"), + AutoSelect: proto.Bool(true), + }, + }, + ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Labels: map[string]string{ + "key1": "val1FromSR", + "key2": "val2FromSR", + "key3": "val3FromSR", + }, + Annotations: map[string]string{ + "key1": "val1FromSR", + "key2": "val2FromSR", + "key3": "val3FromSR", + }, + Containers: []v1.Container{ + { + Name: constants.InferenceServiceContainerName, + Image: "pytorch/serving:1.14.0", + Command: []string{"/usr/bin/pytorch_model_server"}, + Args: []string{ + "--port=9000", + "--rest_api_port=8080", + "--model_base_path=/mnt/models", + "--rest_api_timeout_in_ms=60000", + }, + Resources: defaultResource, + }, + }, + ImagePullSecrets: []v1.LocalObjectReference{ + {Name: "sr-image-pull-secret"}, + }, + }, + Disabled: proto.Bool(false), + }, + } + Expect(k8sClient.Create(ctx, pytorchServingRuntime)).Should(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimePytorchName, Namespace: isvcNamespace}, &v1alpha1.ServingRuntime{}) + }, timeout, interval).Should(Succeed()) + defer k8sClient.Delete(ctx, pytorchServingRuntime) + + tensorflowServingRuntime := &v1alpha1.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: servingRuntimeTensorflowName, + Namespace: isvcNamespace, + }, + Spec: v1alpha1.ServingRuntimeSpec{ + SupportedModelFormats: []v1alpha1.SupportedModelFormat{ + { + Name: "tensorflow", + Version: proto.String("1"), + AutoSelect: proto.Bool(true), + }, + }, + ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Labels: map[string]string{ + "key1": "val1FromSR", + "key2": "val2FromSR", + "key3": "val3FromSR", + }, + Annotations: map[string]string{ + "key1": "val1FromSR", + "key2": "val2FromSR", + "key3": "val3FromSR", + }, + Containers: []v1.Container{ + { + Name: constants.InferenceServiceContainerName, + Image: "tensorflow/serving:1.14.0", + Command: []string{"/usr/bin/tensorflow_server_model"}, + Args: []string{ + "--port=9000", + "--rest_api_port=8080", + "--model_base_path=/mnt/models", + "--rest_api_timeout_in_ms=60000", + }, + Resources: defaultResource, + }, + }, + ImagePullSecrets: []v1.LocalObjectReference{ + {Name: "sr-image-pull-secret"}, + }, + }, + Disabled: proto.Bool(false), + }, + } + + Expect(k8sClient.Create(ctx, tensorflowServingRuntime)).Should(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimeTensorflowName, Namespace: isvcNamespace}, &v1alpha1.ServingRuntime{}) + }, timeout, interval).Should(Succeed()) + defer k8sClient.Delete(ctx, tensorflowServingRuntime) + // Define InferenceService with auto-update disabled. + pytorchIsvc := &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKeyPytorch.Name, + Namespace: isvcNamespace, + Annotations: map[string]string{ + "serving.kserve.io/deploymentMode": "RawDeployment", + "serving.kserve.io/autoscalerClass": "external", + constants.AutoUpdateAnnotationKey: "true", + }, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + PyTorch: &v1beta1.TorchServeSpec{ + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: &storageUri, + RuntimeVersion: proto.String("1.14.0"), + Container: v1.Container{ + Name: constants.InferenceServiceContainerName, + Resources: defaultResource, + }, + }, + }, + }, + }, + } + pytorchIsvc.DefaultInferenceService(nil, nil, &v1beta1.SecurityConfig{AutoMountServiceAccountToken: false}, nil) + Expect(k8sClient.Create(ctx, pytorchIsvc)).Should(Succeed()) + + inferenceService := &v1beta1.InferenceService{} + + Eventually(func() bool { + err := k8sClient.Get(ctx, serviceKeyPytorch, inferenceService) + return err == nil + }, timeout, interval).Should(BeTrue()) + defer k8sClient.Delete(ctx, pytorchIsvc) + + tensorflowIsvc := &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKeyTensorflow.Name, + Namespace: isvcNamespace, + Annotations: map[string]string{ + "serving.kserve.io/deploymentMode": "RawDeployment", + "serving.kserve.io/autoscalerClass": "external", + constants.AutoUpdateAnnotationKey: "true", + }, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + Tensorflow: &v1beta1.TFServingSpec{ + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: &storageUri, + RuntimeVersion: proto.String("1.14.0"), + Container: v1.Container{ + Name: constants.InferenceServiceContainerName, + Resources: defaultResource, + }, + }, + }, + }, + }, + } + tensorflowIsvc.DefaultInferenceService(nil, nil, &v1beta1.SecurityConfig{AutoMountServiceAccountToken: false}, nil) + Expect(k8sClient.Create(ctx, tensorflowIsvc)).Should(Succeed()) + + inferenceServiceTensorflow := &v1beta1.InferenceService{} + + Eventually(func() bool { + err := k8sClient.Get(ctx, serviceKeyTensorflow, inferenceServiceTensorflow) + return err == nil + }, timeout, interval).Should(BeTrue()) + defer k8sClient.Delete(ctx, tensorflowIsvc) + + // Update the ServingRuntime spec + servingRuntimeToUpdate := &v1alpha1.ServingRuntime{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimePytorchName, Namespace: isvcNamespace}, servingRuntimeToUpdate)).Should(Succeed()) + servingRuntimeToUpdate.Spec.ServingRuntimePodSpec.Labels["key1"] = "updatedServingRuntime" + Eventually(func() error { + return k8sClient.Update(ctx, servingRuntimeToUpdate) + }, timeout, interval).Should(Succeed()) + + // Wait until the ServingRuntime reflects the updated spec. + pytorchServingRuntimeAfterUpdate := &v1alpha1.ServingRuntime{} + Eventually(func() (string, error) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: servingRuntimePytorchName, Namespace: isvcNamespace}, pytorchServingRuntimeAfterUpdate) + if err != nil { + return "", err + } + return pytorchServingRuntimeAfterUpdate.Spec.Labels["key1"], nil + }, timeout, interval).Should(Equal("updatedServingRuntime")) + // Wait until the Deployment reflects the update + pytorchDeploymentAfterUpdate := &appsv1.Deployment{} + deploymentName := constants.PredictorServiceName(serviceKeyPytorch.Name) + Eventually(func() (string, error) { + err := k8sClient.Get(ctx, types.NamespacedName{Name: deploymentName, Namespace: serviceKeyPytorch.Namespace}, pytorchDeploymentAfterUpdate) + if err != nil { + return "", err + } + return pytorchDeploymentAfterUpdate.Spec.Template.Labels["key1"], nil + }, timeout, interval).Should(Equal("updatedServingRuntime")) + + tensorFlowDeploymentAfterUpdate := &appsv1.Deployment{} + tensorflowDeploymentName := constants.PredictorServiceName(serviceKeyTensorflow.Name) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: tensorflowDeploymentName, Namespace: serviceKeyTensorflow.Namespace}, tensorFlowDeploymentAfterUpdate)).Should(Succeed()) + Expect(tensorFlowDeploymentAfterUpdate.Spec.Template.Labels["key1"]).Should(Equal("val1FromSR")) + + }) + }) Context("When creating inference service with raw kube predictor and empty ingressClassName", func() { configs := map[string]string{ "explainers": `{ @@ -3182,6 +3829,7 @@ var _ = Describe("v1beta1 inference service controller", func() { verifyTensorParallelSizeDeployments(actualDefaultDeployment, actualWorkerDeployment, "3", constants.NvidiaGPUResourceType) }) }) + }) func verifyPipelineParallelSizeDeployments(actualDefaultDeployment *appsv1.Deployment, actualWorkerDeployment *appsv1.Deployment, pipelineParallelSize string, replicas *int32) {