Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-redhat committed Mar 8, 2025
1 parent 143ab7e commit 5e51220
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
4 changes: 4 additions & 0 deletions vllm/attention/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ def get_attn_backend(
use_mla: bool = False,
) -> Type[AttentionBackend]:
"""Selects which attention backend to use and lazily imports it."""
# Accessing envs.* behind an @lru_cache decorator can cause the wrong
# value to be returned from the cache if the value changes between calls.
# To avoid this, we read envs.VLLM_USE_V1 here and pass it explicitly to the
# private function.
return _cached_get_attn_backend(
head_size=head_size,
dtype=dtype,
Expand Down
7 changes: 4 additions & 3 deletions vllm/executor/ray_distributed_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def _init_executor(self) -> None:
self.input_encoder = msgspec.msgpack.Encoder(enc_hook=encode_hook)
self.output_decoder = msgspec.msgpack.Decoder(
Optional[List[SamplerOutput]])
self.use_v1 = envs.VLLM_USE_V1

self.pp_locks: Optional[List[asyncio.Lock]] = None
if not self.use_ray_compiled_dag:
Expand Down Expand Up @@ -442,12 +443,12 @@ def execute_model(
if self.forward_dag is None:
self.forward_dag = self._compiled_ray_dag(enable_asyncio=False)

if envs.VLLM_USE_V1:
if self.use_v1:
serialized_data = execute_model_req
else:
serialized_data = self.input_encoder.encode(execute_model_req)
outputs = ray.get(self.forward_dag.execute(serialized_data))
if envs.VLLM_USE_V1:
if self.use_v1:
output = outputs[0]
else:
output = self.output_decoder.decode(outputs[0])
Expand Down Expand Up @@ -574,7 +575,7 @@ def _compiled_ray_dag(self, enable_asyncio: bool):
for pp_rank, tp_group in enumerate(self.pp_tp_workers):
# Each PP worker takes in the output of the previous PP worker,
# and the TP group executes in SPMD fashion.
if envs.VLLM_USE_V1:
if self.use_v1:
outputs = [
worker.execute_model_ray.
bind( # type: ignore[attr-defined]
Expand Down

0 comments on commit 5e51220

Please sign in to comment.