Skip to content

Commit

Permalink
rebase fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
alexm-redhat committed Aug 23, 2024
1 parent cdd9203 commit 0cd8996
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 1 deletion.
4 changes: 3 additions & 1 deletion examples/offline_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)

# Create an LLM.
llm = LLM(model="facebook/opt-125m")
llm = LLM(model="facebook/opt-125m",
num_scheduler_steps=8,
use_v2_block_manager=True)
# Generate texts from the prompts. The output is a list of RequestOutput objects
# that contain the prompt, generated text, and other information.
outputs = llm.generate(prompts, sampling_params)
Expand Down
7 changes: 7 additions & 0 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ async def step_async(
cached_outputs = self.cached_scheduler_outputs[virtual_engine]
seq_group_metadata_list = cached_outputs.seq_group_metadata_list
scheduler_outputs = cached_outputs.scheduler_outputs
scheduled_ids = cached_outputs.scheduled_ids
allow_async_output_proc = cached_outputs.allow_async_output_proc

# skip the scheduler if there are any remaining steps in the seq groups.
# This ensures that the scheduler is only called again when the current
Expand All @@ -302,6 +304,10 @@ async def step_async(

assert seq_group_metadata_list is not None
assert scheduler_outputs is not None
assert scheduled_ids is not None

if self.scheduler_config.is_multi_step:
assert not allow_async_output_proc

if not scheduler_outputs.is_empty():
finished_requests_ids = self.scheduler[
Expand Down Expand Up @@ -340,6 +346,7 @@ async def step_async(
self._update_cached_scheduler_output(virtual_engine, output)
else:
if len(self.output_queue) > 0:
assert not self.scheduler_config.is_multi_step
self._process_model_outputs(is_async=True)
output = []

Expand Down
8 changes: 8 additions & 0 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1402,6 +1402,8 @@ def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]:
cached_outputs = self.cached_scheduler_outputs[0]
seq_group_metadata_list = cached_outputs.seq_group_metadata_list
scheduler_outputs = cached_outputs.scheduler_outputs
scheduled_ids = cached_outputs.scheduled_ids
allow_async_output_proc = cached_outputs.allow_async_output_proc

# Skip the scheduler if there are any remaining steps in the seq groups.
# This ensures that the scheduler is only called again when the current
Expand All @@ -1423,6 +1425,10 @@ def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]:

assert seq_group_metadata_list is not None
assert scheduler_outputs is not None
assert scheduled_ids is not None

if self.scheduler_config.is_multi_step:
assert not allow_async_output_proc

if not scheduler_outputs.is_empty():
finished_requests_ids = self.scheduler[
Expand Down Expand Up @@ -1460,6 +1466,7 @@ def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]:
self._update_cached_scheduler_output(0, output)
else:
if len(self.output_queue) > 0:
assert not self.scheduler_config.is_multi_step
self._process_model_outputs(is_async=True)
output = []

Expand Down Expand Up @@ -1500,6 +1507,7 @@ def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]:
if not self.has_unfinished_requests():
# Drain async postprocessor
if len(self.output_queue) > 0:
assert not self.scheduler_config.is_multi_step
self._process_model_outputs(is_async=True, clear_outputs=False)
assert len(self.output_queue) == 0

Expand Down

0 comments on commit 0cd8996

Please sign in to comment.