updated

Signed-off-by: [email protected] <[email protected]>
vllm-project · Mar 8, 2025 · 0db214e · 0db214e
1 parent cf0b14b
commit 0db214e
Show file tree

Hide file tree

Showing 7 changed files with 15 additions and 14 deletions.
diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py
@@ -56,7 +56,6 @@ def inference_mode(cls):
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         import vllm.envs as envs
         from vllm.utils import GiB_bytes
-
         model_config = vllm_config.model_config
         # Reminder: Please update docs/source/features/compatibility_matrix.md
         # If the feature combo become valid

diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
@@ -91,6 +91,9 @@ def get_device_total_memory(cls, device_id: int = 0) -> int:
 
     @classmethod
     def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool:
+        if envs.VLLM_USE_V1:
+            return False
+
         if enforce_eager:
             logger.warning(
                 "To see benefits of async output processing, enable CUDA "

diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py
@@ -44,9 +44,6 @@ def inference_mode():
 
     @classmethod
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
-        if envs.VLLM_USE_V1:
-            raise NotImplementedError(
-                "V1 is not supported on HPU. Set VLLM_USE_V1=0")
 
         scheduler_config = vllm_config.scheduler_config
         if scheduler_config.is_multi_step:

diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
@@ -335,11 +335,12 @@ def use_all_gather(cls) -> bool:
         """
         Whether to use allgather in LogitsProcessor to gather the logits.
         """
+        import vllm.envs as envs
         from vllm.config import get_current_vllm_config
 
         parallel_config = get_current_vllm_config().parallel_config
-        use_v1 = get_current_vllm_config().use_v1
-        return (use_v1 or parallel_config.distributed_executor_backend
+        return (envs.VLLM_USE_V1
+                or parallel_config.distributed_executor_backend
                 == "external_launcher")
 
 

diff --git a/vllm/platforms/neuron.py b/vllm/platforms/neuron.py
@@ -2,7 +2,6 @@
 
 from typing import TYPE_CHECKING, Optional
 
-import vllm.envs as envs
 from vllm.logger import init_logger
 
 from .interface import Platform, PlatformEnum
@@ -33,9 +32,6 @@ def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool:
 
     @classmethod
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
-        if envs.VLLM_USE_V1:
-            raise NotImplementedError(
-                "V1 is not supported on Neuron. Set VLLM_USE_V1=0")
 
         parallel_config = vllm_config.parallel_config
         if parallel_config.worker_cls == "auto":

diff --git a/vllm/platforms/openvino.py b/vllm/platforms/openvino.py
@@ -67,9 +67,6 @@ def is_pin_memory_available(cls) -> bool:
     @classmethod
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         from vllm.utils import GiB_bytes
-        if envs.VLLM_USE_V1:
-            raise NotImplementedError(
-                "V1 is not supported on OpenVino. Set VLLM_USE_V1=0")
 
         parallel_config = vllm_config.parallel_config
         assert (parallel_config.world_size == 1

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
@@ -152,7 +152,15 @@ def get_device_total_memory(cls, device_id: int = 0) -> int:
 
     @classmethod
     def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool:
-        return not enforce_eager
+        if envs.VLLM_USE_V1:
+            return False
+        if enforce_eager:
+            logger.warning(
+                "To see benefits of async output processing, enable CUDA "
+                "graph. Since, enforce-eager is enabled, async output "
+                "processor cannot be used")
+            return False
+        return True
 
     @classmethod
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None: