diff --git a/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go b/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go index 090989f5bb4..e441be8fed2 100644 --- a/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go +++ b/pkg/controller/v1beta1/inferenceservice/rawkube_controller_test.go @@ -19,6 +19,7 @@ package inferenceservice import ( "context" "fmt" + "reflect" "time" apierr "k8s.io/apimachinery/pkg/api/errors" @@ -3182,6 +3183,143 @@ var _ = Describe("v1beta1 inference service controller", func() { verifyTensorParallelSizeDeployments(actualDefaultDeployment, actualWorkerDeployment, "3", constants.NvidiaGPUResourceType) }) }) + Context("When creating an inference service with modelcar and raw deployment", func() { + It("Should only have the ImagePullSecrets that are specified in the InferenceService", func() { + By("Updating an InferenceService with a new ImagePullSecret and checking the deployment") + var configMap = &v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: constants.InferenceServiceConfigMapName, + Namespace: constants.KServeNamespace, + }, + Data: configs, + } + Expect(k8sClient.Create(context.TODO(), configMap)).NotTo(HaveOccurred()) + defer k8sClient.Delete(context.TODO(), configMap) + + servingRuntime := &v1alpha1.ServingRuntime{ + ObjectMeta: metav1.ObjectMeta{ + Name: "vllm-runtime", + Namespace: constants.KServeNamespace, + }, + Spec: v1alpha1.ServingRuntimeSpec{ + SupportedModelFormats: []v1alpha1.SupportedModelFormat{ + { + AutoSelect: proto.Bool(true), + Name: "vLLM", + }, + }, + ServingRuntimePodSpec: v1alpha1.ServingRuntimePodSpec{ + Containers: []v1.Container{ + { + Name: constants.InferenceServiceContainerName, + Image: "kserve/vllm:latest", + Command: []string{"bash", "-c"}, + Args: []string{ + "python2 -m vllm --model_name=${MODEL_NAME} --model_dir=${MODEL} --tensor-parallel-size=${TENSOR_PARALLEL_SIZE} --pipeline-parallel-size=${PIPELINE_PARALLEL_SIZE}", + }, + Resources: defaultResource, + }, + }, + }, + Disabled: proto.Bool(false), + }, + } + + k8sClient.Create(context.TODO(), servingRuntime) + defer k8sClient.Delete(context.TODO(), servingRuntime) + serviceName := "modelcar-raw-deployment" + var expectedRequest = reconcile.Request{NamespacedName: types.NamespacedName{Name: serviceName, Namespace: constants.KServeNamespace}} + var serviceKey = expectedRequest.NamespacedName + var storageUri = "oci://test/mnist/export" + ctx := context.Background() + isvc := &v1beta1.InferenceService{ + ObjectMeta: metav1.ObjectMeta{ + Name: serviceKey.Name, + Namespace: serviceKey.Namespace, + Annotations: map[string]string{ + "serving.kserve.io/deploymentMode": "RawDeployment", + "serving.kserve.io/autoscalerClass": "hpa", + "serving.kserve.io/metrics": "cpu", + "serving.kserve.io/targetUtilizationPercentage": "75", + }, + }, + Spec: v1beta1.InferenceServiceSpec{ + Predictor: v1beta1.PredictorSpec{ + ComponentExtensionSpec: v1beta1.ComponentExtensionSpec{ + MinReplicas: v1beta1.GetIntReference(1), + MaxReplicas: 2, + }, + PodSpec: v1beta1.PodSpec{ + ImagePullSecrets: []v1.LocalObjectReference{ + {Name: "isvc-image-pull-secret"}, + }, + }, + Model: &v1beta1.ModelSpec{ + ModelFormat: v1beta1.ModelFormat{ + Name: "vLLM", + }, + PredictorExtensionSpec: v1beta1.PredictorExtensionSpec{ + StorageURI: &storageUri, + RuntimeVersion: proto.String("0.14.0"), + Container: v1.Container{ + Name: constants.InferenceServiceContainerName, + Resources: v1.ResourceRequirements{ + Limits: v1.ResourceList{ + constants.NvidiaGPUResourceType: resource.MustParse("1"), + }, + Requests: v1.ResourceList{ + constants.NvidiaGPUResourceType: resource.MustParse("1"), + }, + }, + }, + }, + }, + }, + }, + } + + isvc.DefaultInferenceService(nil, nil, &v1beta1.SecurityConfig{AutoMountServiceAccountToken: false}, nil) + Expect(k8sClient.Create(ctx, isvc)).Should(Succeed()) + defer k8sClient.Delete(ctx, isvc) + + inferenceService := &v1beta1.InferenceService{} + + Eventually(func() bool { + return k8sClient.Get(ctx, serviceKey, inferenceService) == nil + }, timeout, interval).Should(BeTrue()) + + actualDeployment := &appsv1.Deployment{} + predictorDeploymentKey := types.NamespacedName{Name: constants.PredictorServiceName(serviceKey.Name), + Namespace: serviceKey.Namespace} + Eventually(func() error { return k8sClient.Get(context.TODO(), predictorDeploymentKey, actualDeployment) }, timeout, interval). + Should(Succeed()) + + Expect(actualDeployment.Spec.Template.Spec.ImagePullSecrets).To(HaveLen(1)) + Expect(actualDeployment.Spec.Template.Spec.ImagePullSecrets[0].Name).To(Equal("isvc-image-pull-secret")) + + Expect(k8sClient.Get(ctx, serviceKey, inferenceService)).Should(Succeed()) + updateForInferenceService := inferenceService.DeepCopy() + updateForInferenceService.Spec.Predictor.PodSpec.ImagePullSecrets = []v1.LocalObjectReference{ + {Name: "new-image-pull-secret"}, + } + expectedImagePullSecrets := updateForInferenceService.Spec.Predictor.PodSpec.ImagePullSecrets + Eventually(func() error { + return k8sClient.Update(ctx, updateForInferenceService) + }, timeout, interval).Should(Succeed()) + + updatedDeployment := &appsv1.Deployment{} + Eventually(func() (bool, error) { + if err := k8sClient.Get(ctx, predictorDeploymentKey, updatedDeployment); err != nil { + return false, err + } + if len(updatedDeployment.Spec.Template.Spec.ImagePullSecrets) != 1 { + return false, nil + } + return reflect.DeepEqual(updatedDeployment.Spec.Template.Spec.ImagePullSecrets, expectedImagePullSecrets), nil + }, timeout, interval).Should(BeTrue()) + + }) + }) }) func verifyPipelineParallelSizeDeployments(actualDefaultDeployment *appsv1.Deployment, actualWorkerDeployment *appsv1.Deployment, pipelineParallelSize string, replicas *int32) { diff --git a/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go b/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go index 7be9bf9ad6b..057a36c59a0 100644 --- a/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go +++ b/pkg/controller/v1beta1/inferenceservice/reconcilers/deployment/deployment_reconciler.go @@ -21,6 +21,7 @@ import ( "crypto/rand" "encoding/base64" "encoding/json" + "errors" "fmt" "strconv" "strings" @@ -571,11 +572,6 @@ func (r *DeploymentReconciler) Reconcile() ([]*appsv1.Deployment, error) { // get the current deployment _ = r.client.Get(context.TODO(), types.NamespacedName{Name: deployment.Name, Namespace: deployment.Namespace}, originalDeployment) // we need to remove the Replicas field from the deployment spec - originalDeployment.Spec.Replicas = nil - curJson, err := json.Marshal(originalDeployment) - if err != nil { - return nil, err - } // Check if there are any envs to remove // If there, its value will be set to "delete" so we can update the patchBytes with @@ -602,10 +598,31 @@ func (r *DeploymentReconciler) Reconcile() ([]*appsv1.Deployment, error) { deployment.Spec.Template.Spec.Containers[i].Env = envs } + originalDeployment.Spec.Replicas = nil + curJson, err := json.Marshal(originalDeployment) + if err != nil { + return nil, err + } // To avoid the conflict between HPA and Deployment, // we need to remove the Replicas field from the deployment spec deployment.Spec.Replicas = nil + imagePullSecretsDesired := deployment.Spec.Template.Spec.ImagePullSecrets + originalDeploymentPullSecrets := originalDeployment.Spec.Template.Spec.ImagePullSecrets + imagePullSecretsToRemove := []string{} + for _, secret := range originalDeploymentPullSecrets { + found := false + for _, desiredSecret := range imagePullSecretsDesired { + if secret.Name == desiredSecret.Name { + found = true + break + } + } + if !found { + imagePullSecretsToRemove = append(imagePullSecretsToRemove, secret.Name) + } + } + modJson, err := json.Marshal(deployment) if err != nil { return nil, err @@ -617,10 +634,58 @@ func (r *DeploymentReconciler) Reconcile() ([]*appsv1.Deployment, error) { return nil, err } - // override the envs that needs to be removed with "$patch": "delete" + // Patch the deployment object with the strategic merge patch patchByte = []byte(strings.ReplaceAll(string(patchByte), "\"value\":\""+utils.PLACEHOLDER_FOR_DELETION+"\"", "\"$patch\":\"delete\"")) - // Patch the deployment object with the strategic merge patch + // The strategic merge patch does not remove items from list just by removing it from the patch, + // to delete lists items using strategic merge patch, the $patch delete pattern is used. + // Example: + // imagePullSecrets: + // - "name": "pull-secret-1", + // "$patch": "delete" + patchJson := map[string]interface{}{} + err = json.Unmarshal(patchByte, &patchJson) + if err != nil { + return nil, err + } + spec, ok := patchJson["spec"].(map[string]interface{}) + if !ok { + return nil, errors.New("spec not found") + } + template, ok := spec["template"].(map[string]interface{}) + if !ok { + return nil, errors.New("template not found") + } + specTemplate, ok := template["spec"].(map[string]interface{}) + if !ok { + return nil, errors.New("template.spec not found") + } + + // Ensure imagePullSecrets is a slice, defaulting to an empty slice if nil. + ipsField, exists := specTemplate["imagePullSecrets"] + var imagePullSecrets []interface{} + if exists && ipsField != nil { + var ok bool + imagePullSecrets, ok = ipsField.([]interface{}) + if !ok { + return nil, errors.New("imagePullSecrets is not the expected type") + } + } else { + imagePullSecrets = []interface{}{} + } + + for _, secret := range imagePullSecretsToRemove { + for _, secretMap := range imagePullSecrets { + if secretMap.(map[string]interface{})["name"] == secret { + secretMap.(map[string]interface{})["$patch"] = "delete" + } + } + } + patchJson["spec"].(map[string]interface{})["template"].(map[string]interface{})["spec"].(map[string]interface{})["imagePullSecrets"] = imagePullSecrets + patchByte, err = json.Marshal(patchJson) + if err != nil { + return nil, err + } opErr = r.client.Patch(context.TODO(), deployment, kclient.RawPatch(types.StrategicMergePatchType, patchByte)) }