Skip to content

Commit 2916163

Browse files
henrylhtsangmeta-codesync[bot]
authored andcommitted
Allow some other version (#5134)
Summary: Pull Request resolved: #5134 X-link: https://github.com/facebookresearch/FBGEMM/pull/2136 NA Reviewed By: Aya-ZIbra Differential Revision: D87104671 fbshipit-source-id: cff2f82455f240dc0a6b94d2615d370a0d0e3e51
1 parent 373d798 commit 2916163

File tree

5 files changed

+12
-7
lines changed

5 files changed

+12
-7
lines changed

fbgemm_gpu/experimental/gen_ai/src/attention/cuda/cutlass_blackwell_fmha/kernel/sm100_fmha_bwd_kernel_tma_warpspecialized.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1649,7 +1649,8 @@ struct Sm100FmhaBwdKernelTmaWarpSpecialized {
16491649

16501650

16511651
CUTLASS_DEVICE void operator()(Params const& params, char* smem) {
1652-
#if (! defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && ! defined(CUTLASS_ARCH_MMA_SM100F_ENABLED))
1652+
#if (! defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && \
1653+
! defined(CUTLASS_ARCH_MMA_SM100F_ENABLED) && ! defined(CUTLASS_ARCH_MMA_SM103A_ENABLED))
16531654
printf("ERROR : Arch conditional MMA instruction used without targeting appropriate compute capability. Aborting.\n");
16541655
#else
16551656
int warp_idx = cutlass::canonical_warp_idx_sync();

fbgemm_gpu/experimental/gen_ai/src/attention/cuda/cutlass_blackwell_fmha/kernel/sm100_fmha_bwd_mla_kernel_tma_warpspecialized.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,9 +1481,10 @@ struct Sm100FmhaBwdMlaKernelTmaWarpSpecialized {
14811481

14821482

14831483
CUTLASS_DEVICE void operator()(Params const& params, char* smem) {
1484-
#if (! defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && ! defined(CUTLASS_ARCH_MMA_SM100F_ENABLED))
1484+
#if (! defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && \
1485+
! defined(CUTLASS_ARCH_MMA_SM100F_ENABLED) && ! defined(CUTLASS_ARCH_MMA_SM103A_ENABLED))
14851486
printf("ERROR : Arch conditional MMA instruction used without targeting appropriate compute capability. Aborting.\n");
1486-
#else
1487+
#else
14871488
int warp_idx = cutlass::canonical_warp_idx_sync();
14881489
auto role = warp_idx_to_role(warp_idx);
14891490
uint32_t lane_predicate = cute::elect_one_sync();

fbgemm_gpu/experimental/gen_ai/src/attention/cuda/cutlass_blackwell_fmha/kernel/sm100_fmha_fwd_kernel_tma_warpspecialized.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,8 @@ struct Sm100FmhaFwdKernelTmaWarpspecialized {
265265
}
266266

267267
CUTLASS_DEVICE void operator()(const Params &params, char* smem) {
268-
#if (! defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && ! defined(CUTLASS_ARCH_MMA_SM100F_ENABLED))
268+
#if (! defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && \
269+
! defined(CUTLASS_ARCH_MMA_SM100F_ENABLED) && ! defined(CUTLASS_ARCH_MMA_SM103A_ENABLED))
269270
printf("ERROR : Arch conditional MMA instruction used without targeting appropriate compute capability. Aborting.\n");
270271
#else
271272

fbgemm_gpu/experimental/gen_ai/src/attention/cuda/cutlass_blackwell_fmha/kernel/sm100_fmha_gen_kernel_warpspecialized.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,8 @@ struct Sm100FmhaGenKernelWarpspecialized {
248248
}
249249

250250
CUTLASS_DEVICE void operator()(const Params &params, char* smem) {
251-
#if (! defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && ! defined(CUTLASS_ARCH_MMA_SM100F_ENABLED))
251+
#if (! defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && \
252+
! defined(CUTLASS_ARCH_MMA_SM100F_ENABLED) && ! defined(CUTLASS_ARCH_MMA_SM103A_ENABLED))
252253
printf("ERROR : Arch conditional MMA instruction used without targeting appropriate compute capability. Aborting.\n");
253254
#else
254255

@@ -280,7 +281,7 @@ struct Sm100FmhaGenKernelWarpspecialized {
280281
shared_storage.pipelines.load_q,
281282
pipeline_load_q_params,
282283
ClusterShape{}, cute::true_type{}, /*mask calc*/cute::false_type{});
283-
284+
284285
typename CollectiveMainloop::PipelineKV::Params pipeline_load_kv_params;
285286
if (role == WarpRole::Load) {
286287
pipeline_load_kv_params.role = CollectiveMainloop::PipelineKV::ThreadCategory::Producer;

fbgemm_gpu/experimental/gen_ai/src/attention/cuda/cutlass_blackwell_fmha/kernel/sm100_fmha_mla_tma_warpspecialized.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,8 @@ struct Sm100FmhaMlaKernelTmaWarpspecialized {
508508

509509

510510
CUTLASS_DEVICE void operator()(Params const& params, char* smem_raw) {
511-
#if (! defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && ! defined(CUTLASS_ARCH_MMA_SM100F_ENABLED))
511+
#if (! defined(CUTLASS_ARCH_MMA_SM100A_ENABLED) && \
512+
! defined(CUTLASS_ARCH_MMA_SM100F_ENABLED) && ! defined(CUTLASS_ARCH_MMA_SM103A_ENABLED))
512513
printf("ERROR : Arch conditional MMA instruction used without targeting appropriate compute capability. Aborting.\n");
513514
#else
514515

0 commit comments

Comments
 (0)