Test on attention type and automatically modify flash block sizes object when 'tokamax_flash' requested

coolkp · coolkp · commit 8ea69b8dec4d · 2025-11-13T16:34:39.000Z
Signed-off-by: Kunjan Patel &lt;kunjanp@google.com&gt;
diff --git a/src/maxdiffusion/models/attention_flax.py b/src/maxdiffusion/models/attention_flax.py
@@ -240,7 +240,7 @@ def _tpu_flash_attention(
         block_q_dkv=min(q_max_block_size, query.shape[2]),
         block_kv_dkv=min(kv_max_block_size, key.shape[2]),
         block_kv_dkv_compute=min(kv_max_block_size, query.shape[2]),
-        block_q_dq=None if attention_kernel == "tokamax_flash" else block_sizes.block_q_dq,
+        block_q_dq=None if attention_kernel == "tokamax_flash" else min(q_max_block_size, query.shape[2]),
         block_kv_dq=None if attention_kernel == "tokamax_flash" else min(kv_max_block_size, query.shape[2]),
         use_fused_bwd_kernel=True if attention_kernel == "tokamax_flash" else False,
     )

Original file line number	Diff line number	Diff line change
`@@ -240,7 +240,7 @@ def _tpu_flash_attention(`
`240`	`240`	`block_q_dkv=min(q_max_block_size, query.shape[2]),`
`241`	`241`	`block_kv_dkv=min(kv_max_block_size, key.shape[2]),`
`242`	`242`	`block_kv_dkv_compute=min(kv_max_block_size, query.shape[2]),`
`243`		`- block_q_dq=None if attention_kernel == "tokamax_flash" else block_sizes.block_q_dq,`
	`243`	`+ block_q_dq=None if attention_kernel == "tokamax_flash" else min(q_max_block_size, query.shape[2]),`
`244`	`244`	`block_kv_dq=None if attention_kernel == "tokamax_flash" else min(kv_max_block_size, query.shape[2]),`
`245`	`245`	`use_fused_bwd_kernel=True if attention_kernel == "tokamax_flash" else False,`
`246`	`246`	`)`