diff --git a/fastdeploy/cache_manager/prefix_cache_manager.py b/fastdeploy/cache_manager/prefix_cache_manager.py index e12e47d3fd0..c41a6109029 100644 --- a/fastdeploy/cache_manager/prefix_cache_manager.py +++ b/fastdeploy/cache_manager/prefix_cache_manager.py @@ -731,6 +731,8 @@ def update_cache_blocks(self, task, block_size, num_computed_tokens): req_id = task.request_id last_node, num_cached_tokens = self.req_to_radix_tree_info[req_id] can_cache_computed_tokens = num_computed_tokens - num_computed_tokens % block_size + if can_cache_computed_tokens <= num_cached_tokens: + return if req_id in self.leaf_req_map[last_node]: # delete old leaf record, update later self.leaf_req_map[last_node].remove(req_id) logger.debug(