From 4dd96bb0f6c396a19b6d88807dc78c9d1cfd733e Mon Sep 17 00:00:00 2001 From: Kamil Kaczor Date: Tue, 25 Feb 2025 12:34:00 +0100 Subject: [PATCH] [SW-212036] Change gc thr multiplier to 16 (#832) https://docs.python.org/3/library/gc.html#gc.set_threshold We see every X calls a gap of 100-1s depending on the benchmark when garbage collector is called and increasing the default of this multiplier is fixing the issue. --- vllm/worker/hpu_model_runner.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/vllm/worker/hpu_model_runner.py b/vllm/worker/hpu_model_runner.py index a543cd709a9e2..6d917cb724311 100755 --- a/vllm/worker/hpu_model_runner.py +++ b/vllm/worker/hpu_model_runner.py @@ -713,19 +713,26 @@ def __init__( self.is_pooler = False def _set_gc_threshold(self) -> None: - # Read https://docs.python.org/3/library/gc.html#gc.set_threshold - # for comprehensive description of gc generations. - # We can either use VLLM_GC_THR_GEN[0-2] (this has higher priority) - # to set particular generation threshold or use simpler - # VLLM_GC_THR_MULTIPLIER to multiply default values. - default_gc_thrs = list(gc.get_threshold()) + """ + Read https://docs.python.org/3/library/gc.html#gc.set_threshold + for comprehensive description of gc generations. + We can either use VLLM_GC_THR_GEN[0-2] (this has higher priority) + to set particular generation threshold or use simpler + VLLM_GC_THR_MULTIPLIER to multiply default values. + """ + + # gc.get_threshold default, avoiding potential overflow due to + # multiplier and set later (get->mult->set->repeat->...->overflow) + default_gc_thrs = [700, 10, 10] + requested_gc_thrs = [0] * len(default_gc_thrs) for i in range(len(default_gc_thrs)): requested_gc_thrs[i] = int( os.environ.get(f'VLLM_GC_THR_GEN{i}', default_gc_thrs[i])) if requested_gc_thrs == default_gc_thrs: - gc_thr_multiplier = int(os.environ.get('VLLM_GC_THR_MULTIPLIER', - 2)) + # 16*threshold is rare enough for gc to not cause perf issues + gc_thr_multiplier = int( + os.environ.get('VLLM_GC_THR_MULTIPLIER', 16)) requested_gc_thrs = [ t * gc_thr_multiplier for t in default_gc_thrs ]