From 4ed21230e0afa6bd66788711e3adcd1a6cb455ba Mon Sep 17 00:00:00 2001 From: kevincheng2 Date: Tue, 12 May 2026 20:58:45 +0800 Subject: [PATCH] fix(PrefixCache): fix garbled text in PD disaggregation by early return when no new tokens to cache - Add early return check when can_cache_computed_tokens <= num_cached_tokens - Avoid unnecessary cache insertion operations that cause garbled output - Only affects PD disaggregation scenarios --- fastdeploy/cache_manager/prefix_cache_manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fastdeploy/cache_manager/prefix_cache_manager.py b/fastdeploy/cache_manager/prefix_cache_manager.py index b1e79834d92..d28c1e6f6b0 100644 --- a/fastdeploy/cache_manager/prefix_cache_manager.py +++ b/fastdeploy/cache_manager/prefix_cache_manager.py @@ -701,6 +701,8 @@ def update_cache_blocks(self, task, block_size, num_computed_tokens): req_id = task.request_id last_node, num_cached_tokens = self.req_to_radix_tree_info[req_id] can_cache_computed_tokens = num_computed_tokens - num_computed_tokens % block_size + if can_cache_computed_tokens <= num_cached_tokens: + return if req_id in self.leaf_req_map[last_node]: # delete old leaf record, update later self.leaf_req_map[last_node].remove(req_id) logger.debug(