@@ -1189,7 +1189,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
11891189
11901190 self .device_kv_caches = kv_caches
11911191 self .dst_num_blocks [self .engine_id ] = self .num_blocks
1192- if self .kv_topo .is_kv_layout_blocks_first ::
1192+ if self .kv_topo .is_kv_layout_blocks_first :
11931193 for i in range (len (self .slot_size_per_layer )):
11941194 assert self .slot_size_per_layer [i ] % 2 == 0
11951195 self .slot_size_per_layer [i ] //= 2
@@ -1217,7 +1217,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
12171217 # (addr, len, device id)
12181218 blocks_data .append ((addr , kv_block_len , self .device_id ))
12191219
1220- if self .kv_topo .is_kv_layout_blocks_first ::
1220+ if self .kv_topo .is_kv_layout_blocks_first :
12211221 # Separate and interleave K/V regions to maintain the same
12221222 # descs ordering. This is needed for selecting contiguous heads
12231223 # when split across TP ranks.
@@ -1379,7 +1379,7 @@ def add_remote_agent(
13791379 # (addr, len, device id)
13801380 blocks_data .append ((addr , kv_block_len , nixl_agent_meta .device_id ))
13811381
1382- if self .kv_topo .is_kv_layout_blocks_first ::
1382+ if self .kv_topo .is_kv_layout_blocks_first :
13831383 # With FlashInfer index V separately to allow head splitting.
13841384 for block_id in range (nixl_agent_meta .num_blocks ):
13851385 block_offset = block_id * nixl_agent_meta .block_lens [i ]
@@ -1462,7 +1462,7 @@ def _validate_remote_agent_handshake(
14621462 remote_block_size = remote_block_len // (
14631463 self .slot_size_per_layer [0 ] * tp_ratio
14641464 )
1465- if self .kv_topo .is_kv_layout_blocks_first ::
1465+ if self .kv_topo .is_kv_layout_blocks_first :
14661466 # With flashinfer, KV are sent in the same message.
14671467 remote_block_size //= 2
14681468
@@ -1948,7 +1948,7 @@ def get_backend_aware_kv_block_len(self, layer_idx: int):
19481948 For FlashInfer, this is half the length of the whole block, as K and V
19491949 share the same region.
19501950 """
1951- if self .kv_topo .is_kv_layout_blocks_first ::
1951+ if self .kv_topo .is_kv_layout_blocks_first :
19521952 # For indexing only half (either just the K or V part).
19531953 block_len = self .block_len_per_layer [layer_idx ] // 2
19541954 else :
0 commit comments