From 27a4c46e08fbe902e3c412e2afab2c1962eae6a8 Mon Sep 17 00:00:00 2001 From: kevin Date: Wed, 13 May 2026 14:55:19 +0800 Subject: [PATCH] fix(PrefixCache): fix garbled text in PD disaggregation by early return when no new tokens to cache (#7797) - Add early return check when can_cache_computed_tokens <= num_cached_tokens - Avoid unnecessary cache insertion operations that cause garbled output - Only affects PD disaggregation scenarios --- fastdeploy/cache_manager/prefix_cache_manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fastdeploy/cache_manager/prefix_cache_manager.py b/fastdeploy/cache_manager/prefix_cache_manager.py index e12e47d3fd0..c41a6109029 100644 --- a/fastdeploy/cache_manager/prefix_cache_manager.py +++ b/fastdeploy/cache_manager/prefix_cache_manager.py @@ -731,6 +731,8 @@ def update_cache_blocks(self, task, block_size, num_computed_tokens): req_id = task.request_id last_node, num_cached_tokens = self.req_to_radix_tree_info[req_id] can_cache_computed_tokens = num_computed_tokens - num_computed_tokens % block_size + if can_cache_computed_tokens <= num_cached_tokens: + return if req_id in self.leaf_req_map[last_node]: # delete old leaf record, update later self.leaf_req_map[last_node].remove(req_id) logger.debug(