From f86bc3d307ce3d9d8e5f6a17a8092d13273ca97d Mon Sep 17 00:00:00 2001 From: Adam Stewart Date: Sat, 24 Feb 2024 14:18:53 +0000 Subject: [PATCH 1/3] fix: Add 'model_version' to InferResponse in python library (#3466) * Add model_version to InferResponse class Signed-off-by: Adam Stewart * Update infer_type tests Signed-off-by: Adam Stewart * Updated from_grpc class method argument ordering Signed-off-by: Adam Stewart --------- Signed-off-by: Adam Stewart --- python/kserve/kserve/protocol/infer_type.py | 18 +++++++++++++----- python/kserve/test/test_infer_type.py | 11 ++++++----- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/python/kserve/kserve/protocol/infer_type.py b/python/kserve/kserve/protocol/infer_type.py index b5da94df013..1298827ff27 100644 --- a/python/kserve/kserve/protocol/infer_type.py +++ b/python/kserve/kserve/protocol/infer_type.py @@ -621,12 +621,13 @@ def __eq__(self, other): class InferResponse: id: str model_name: str + model_version: Optional[str] parameters: Optional[Dict] outputs: List[InferOutput] from_grpc: bool def __init__(self, response_id: str, model_name: str, infer_outputs: List[InferOutput], - raw_outputs=None, from_grpc: Optional[bool] = False, + model_version: Optional[str] = None, raw_outputs=None, from_grpc: Optional[bool] = False, parameters: Optional[Union[Dict, MessageMap[str, InferParameter]]] = None): """The InferResponse Data Model @@ -634,6 +635,7 @@ def __init__(self, response_id: str, model_name: str, infer_outputs: List[InferO response_id: The id of the inference response. model_name: The name of the model. infer_outputs: The inference outputs of the inference response. + model_version: The version of the model. raw_outputs: The raw binary data of the inference outputs. from_grpc: Indicate if the InferResponse is constructed from a gRPC response. parameters: The additional inference parameters. @@ -641,6 +643,7 @@ def __init__(self, response_id: str, model_name: str, infer_outputs: List[InferO self.id = response_id self.model_name = model_name + self.model_version = model_version self.outputs = infer_outputs self.parameters = parameters self.from_grpc = from_grpc @@ -657,8 +660,9 @@ def from_grpc(cls, response: ModelInferResponse) -> 'InferResponse': data=get_content(output.datatype, output.contents), parameters=output.parameters) for output in response.outputs] - return cls(model_name=response.model_name, response_id=response.id, parameters=response.parameters, - infer_outputs=infer_outputs, raw_outputs=response.raw_output_contents, from_grpc=True) + return cls(model_name=response.model_name, model_version=response.model_version, response_id=response.id, + parameters=response.parameters, infer_outputs=infer_outputs, + raw_outputs=response.raw_output_contents, from_grpc=True) @classmethod def from_rest(cls, model_name: str, response: Dict) -> 'InferResponse': @@ -672,6 +676,7 @@ def from_rest(cls, model_name: str, response: Dict) -> 'InferResponse': parameters=output.get('parameters', None)) for output in response['outputs']] return cls(model_name=model_name, + model_version=response.get('model_version', None), response_id=response.get('id', None), parameters=response.get('parameters', None), infer_outputs=infer_outputs) @@ -702,6 +707,7 @@ def to_rest(self) -> Dict: res = { 'id': self.id, 'model_name': self.model_name, + 'model_version': self.model_version, 'outputs': infer_outputs } if self.parameters: @@ -742,8 +748,8 @@ def to_grpc(self) -> ModelInferResponse: raise InvalidInput("to_grpc: invalid output datatype") infer_outputs.append(infer_output_dict) - return ModelInferResponse(id=self.id, model_name=self.model_name, outputs=infer_outputs, - raw_output_contents=raw_output_contents, + return ModelInferResponse(id=self.id, model_name=self.model_name, model_version=self.model_version, + outputs=infer_outputs, raw_output_contents=raw_output_contents, parameters=to_grpc_parameters(self.parameters) if self.parameters else None) def __eq__(self, other): @@ -751,6 +757,8 @@ def __eq__(self, other): return False if self.model_name != other.model_name: return False + if self.model_version != other.model_version: + return False if self.id != other.id: return False if self.from_grpc != other.from_grpc: diff --git a/python/kserve/test/test_infer_type.py b/python/kserve/test/test_infer_type.py index 43a4010dfbe..ff7d2931dca 100644 --- a/python/kserve/test/test_infer_type.py +++ b/python/kserve/test/test_infer_type.py @@ -139,7 +139,7 @@ def test_from_grpc(self): class TestInferResponse: def test_to_rest(self): - infer_res = InferResponse(model_name="TestModel", response_id="123", + infer_res = InferResponse(model_name="TestModel", response_id="123", model_version="v1", parameters={ "test-str": InferParameter(string_param="dummy"), "test-bool": InferParameter(bool_param=True), @@ -156,6 +156,7 @@ def test_to_rest(self): expected = { "id": "123", "model_name": "TestModel", + "model_version": "v1", "outputs": [ { "name": "output-0", @@ -179,7 +180,7 @@ def test_to_rest(self): assert res == expected def test_to_grpc(self): - infer_res = InferResponse(model_name="TestModel", response_id="123", + infer_res = InferResponse(model_name="TestModel", response_id="123", model_version="v1", parameters={ "test-str": "dummy", "test-bool": True, @@ -193,7 +194,7 @@ def test_to_grpc(self): "test-int": 100 })] ) - expected = ModelInferResponse(model_name="TestModel", id="123", + expected = ModelInferResponse(model_name="TestModel", id="123", model_version="v1", parameters={ "test-str": InferParameter(string_param="dummy"), "test-bool": InferParameter(bool_param=True), @@ -218,7 +219,7 @@ def test_to_grpc(self): assert res == expected def test_from_grpc(self): - infer_res = ModelInferResponse(model_name="TestModel", id="123", + infer_res = ModelInferResponse(model_name="TestModel", id="123", model_version="v1", parameters={ "test-str": InferParameter(string_param="dummy"), "test-bool": InferParameter(bool_param=True), @@ -239,7 +240,7 @@ def test_from_grpc(self): }, }] ) - expected = InferResponse(model_name="TestModel", response_id="123", + expected = InferResponse(model_name="TestModel", response_id="123", model_version="v1", parameters={ "test-str": InferParameter(string_param="dummy"), "test-bool": InferParameter(bool_param=True), From 98fe9a1d13190477cd1b2346ce86082905de39ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roland=20Hu=C3=9F?= Date: Wed, 20 Dec 2023 12:44:55 +0100 Subject: [PATCH 2/3] Add new storageUri schema "oci" that points to a OCI image For that a new injector that adds a so-called "modelcar" container to "kserve-container" as a sidecar has been added. This setups a pod for sharing the process namespace (shareProcessNamespace = true). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following configuration options have been added: * `enableModelcar` to switch on this feature (default: false) * `cpuModelcar` and `memoryModelcar` to set the resources for the modelcar container * `uidModelcar` for the UID to use for the user-container *and* the modelcar contained See https://github.com/kserve/kserve/pull/3110 for more information and architecture of this feature. Signed-off-by: Roland Huß --- config/configmap/inferenceservice.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/config/configmap/inferenceservice.yaml b/config/configmap/inferenceservice.yaml index f79aa0039ac..a495978cb05 100644 --- a/config/configmap/inferenceservice.yaml +++ b/config/configmap/inferenceservice.yaml @@ -96,7 +96,11 @@ data: # enableDirectPvcVolumeMount controls whether users can mount pvc volumes directly. # if pvc volume is provided in storageuri then the pvc volume is directly mounted to /mnt/models in the user container. # rather than symlink it to a shared volume. For more info see https://github.com/kserve/kserve/issues/2737 +<<<<<<< HEAD "enableDirectPvcVolumeMount": true, +======= + "enableDirectPvcVolumeMount": false, +>>>>>>> ab4a91cc (Add new storageUri schema "oci" that points to a OCI image) # enableModelcar enabled allows you to directly access an OCI container image by # using a source URL with an "oci://" schema. @@ -476,7 +480,12 @@ data: "cpuLimit": "1", "caBundleConfigMapName": "", "caBundleVolumeMountPath": "/etc/ssl/custom-certs", +<<<<<<< HEAD "enableDirectPvcVolumeMount": true, +======= + "enableDirectPvcVolumeMount": false + "enableDirectPvcVolumeMount": false, +>>>>>>> ab4a91cc (Add new storageUri schema "oci" that points to a OCI image) "enableModelcar": false, "cpuModelcar": "10m", "memoryModelcar": "15Mi" From c9570d65153387456dd44f3872268a2513f4dba3 Mon Sep 17 00:00:00 2001 From: Dan Sun Date: Sun, 24 Dec 2023 05:28:41 -0500 Subject: [PATCH 3/3] Allow oci prefix Signed-off-by: Dan Sun --- config/configmap/inferenceservice.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/config/configmap/inferenceservice.yaml b/config/configmap/inferenceservice.yaml index a495978cb05..f79aa0039ac 100644 --- a/config/configmap/inferenceservice.yaml +++ b/config/configmap/inferenceservice.yaml @@ -96,11 +96,7 @@ data: # enableDirectPvcVolumeMount controls whether users can mount pvc volumes directly. # if pvc volume is provided in storageuri then the pvc volume is directly mounted to /mnt/models in the user container. # rather than symlink it to a shared volume. For more info see https://github.com/kserve/kserve/issues/2737 -<<<<<<< HEAD "enableDirectPvcVolumeMount": true, -======= - "enableDirectPvcVolumeMount": false, ->>>>>>> ab4a91cc (Add new storageUri schema "oci" that points to a OCI image) # enableModelcar enabled allows you to directly access an OCI container image by # using a source URL with an "oci://" schema. @@ -480,12 +476,7 @@ data: "cpuLimit": "1", "caBundleConfigMapName": "", "caBundleVolumeMountPath": "/etc/ssl/custom-certs", -<<<<<<< HEAD "enableDirectPvcVolumeMount": true, -======= - "enableDirectPvcVolumeMount": false - "enableDirectPvcVolumeMount": false, ->>>>>>> ab4a91cc (Add new storageUri schema "oci" that points to a OCI image) "enableModelcar": false, "cpuModelcar": "10m", "memoryModelcar": "15Mi"