Skip to content

Commit 6c5ab72

Browse files
[BugFix] fix num_requests_running after clear_data (#4927)
* [BugFix] fix num_requests_running after clear_data * [fix] fix tasks_list and stop flags not cleared when _free_blocks failed
1 parent 5b24013 commit 6c5ab72

File tree

2 files changed

+18
-10
lines changed

2 files changed

+18
-10
lines changed

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -728,14 +728,7 @@ def _allocate_decode_and_extend():
728728
if scheduled_reqs:
729729
llm_logger.debug(f"schedued_reqs: {scheduled_reqs}")
730730

731-
# Update metrics
732-
num_tasks = sum([1 if task else 0 for task in self.tasks_list])
733-
num_blocks_used_by_tasks = sum([len(task.block_tables) if task else 0 for task in self.tasks_list])
734-
main_process_metrics.available_gpu_block_num.set(self.total_block_number() - num_blocks_used_by_tasks)
735-
main_process_metrics.batch_size.set(self.max_num_seqs - self.available_batch())
736-
main_process_metrics.gpu_cache_usage_perc.set(self.get_gpu_cache_usage_perc())
737-
main_process_metrics.num_requests_running.set(len(self.running))
738-
main_process_metrics.num_requests_waiting.set(num_tasks - len(self.running))
731+
self.update_metrics()
739732

740733
return scheduled_reqs
741734

@@ -962,7 +955,10 @@ def finish_requests(self, request_ids: Union[str, Iterable[str]]):
962955
if request in self.running: # normally run and finished
963956
self.running.remove(request)
964957
request.status = RequestStatus.FINISHED
965-
self._free_blocks(request)
958+
try:
959+
self._free_blocks(request)
960+
except Exception as e:
961+
llm_logger.warning(f"release block failed {req_id}: {e}")
966962
if (
967963
request.request_id in self.to_be_rescheduled_request_id_set
968964
): # finished after preempted, blocks have been recycled.
@@ -981,7 +977,19 @@ def finish_requests(self, request_ids: Union[str, Iterable[str]]):
981977
del self.req_dict[req_id]
982978
except Exception as e:
983979
llm_logger.error(f"finish_request err: {e}, {str(traceback.format_exc())}")
980+
finally:
981+
self.update_metrics()
984982

985983
def clear_data(self):
986984
self.waiting: deque[Request] = deque()
987985
self.to_be_rescheduled_request_id_set = set()
986+
987+
def update_metrics(self):
988+
# Update metrics
989+
num_tasks = sum([1 if task else 0 for task in self.tasks_list])
990+
num_blocks_used_by_tasks = sum([len(task.block_tables) if task else 0 for task in self.tasks_list])
991+
main_process_metrics.available_gpu_block_num.set(self.total_block_number() - num_blocks_used_by_tasks)
992+
main_process_metrics.batch_size.set(self.max_num_seqs - self.available_batch())
993+
main_process_metrics.gpu_cache_usage_perc.set(self.get_gpu_cache_usage_perc())
994+
main_process_metrics.num_requests_running.set(len(self.running))
995+
main_process_metrics.num_requests_waiting.set(num_tasks - len(self.running))

fastdeploy/output/token_processor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -847,7 +847,7 @@ def _record_speculative_decoding_mertics(self, accept_num):
847847
def clear_data(self):
848848
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
849849
self.resource_manager.clear_data()
850-
for i in range(self.cfg.max_num_seqs):
850+
for i in range(self.resource_manager.max_num_seqs):
851851
if self.resource_manager.stop_flags[i]:
852852
continue
853853
task = self.resource_manager.tasks_list[i]

0 commit comments

Comments
 (0)