typo fix:(

khluu · khluu · commit 46cc89081758 · 2025-11-17T11:48:12.000-08:00
Signed-off-by: Kevin H. Luu &lt;khluu000@gmail.com&gt;
diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
@@ -1189,7 +1189,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
 
         self.device_kv_caches = kv_caches
         self.dst_num_blocks[self.engine_id] = self.num_blocks
-        if self.kv_topo.is_kv_layout_blocks_first::
+        if self.kv_topo.is_kv_layout_blocks_first:
             for i in range(len(self.slot_size_per_layer)):
                 assert self.slot_size_per_layer[i] % 2 == 0
                 self.slot_size_per_layer[i] //= 2
@@ -1217,7 +1217,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]):
                 # (addr, len, device id)
                 blocks_data.append((addr, kv_block_len, self.device_id))
 
-            if self.kv_topo.is_kv_layout_blocks_first::
+            if self.kv_topo.is_kv_layout_blocks_first:
                 # Separate and interleave K/V regions to maintain the same
                 # descs ordering. This is needed for selecting contiguous heads
                 # when split across TP ranks.
@@ -1379,7 +1379,7 @@ def add_remote_agent(
                 # (addr, len, device id)
                 blocks_data.append((addr, kv_block_len, nixl_agent_meta.device_id))
 
-            if self.kv_topo.is_kv_layout_blocks_first::
+            if self.kv_topo.is_kv_layout_blocks_first:
                 # With FlashInfer index V separately to allow head splitting.
                 for block_id in range(nixl_agent_meta.num_blocks):
                     block_offset = block_id * nixl_agent_meta.block_lens[i]
@@ -1462,7 +1462,7 @@ def _validate_remote_agent_handshake(
             remote_block_size = remote_block_len // (
                 self.slot_size_per_layer[0] * tp_ratio
             )
-            if self.kv_topo.is_kv_layout_blocks_first::
+            if self.kv_topo.is_kv_layout_blocks_first:
                 # With flashinfer, KV are sent in the same message.
                 remote_block_size //= 2
 
@@ -1948,7 +1948,7 @@ def get_backend_aware_kv_block_len(self, layer_idx: int):
         For FlashInfer, this is half the length of the whole block, as K and V
         share the same region.
         """
-        if self.kv_topo.is_kv_layout_blocks_first::
+        if self.kv_topo.is_kv_layout_blocks_first:
             # For indexing only half (either just the K or V part).
             block_len = self.block_len_per_layer[layer_idx] // 2
         else: