@@ -422,6 +422,46 @@ uint8 __builtin_IB_subgroup_block_read_flat_transpose_u32_k8(long baseoffset, in
422422ulong4 __builtin_IB_subgroup_block_read_flat_transpose_u64_k4 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord );
423423#endif // cl_intel_subgroup_extended_block_read
424424
425+ #ifdef cl_intel_subgroup_extended_block_read_cacheopts
426+ // 2d block read cacheopts
427+ ushort2 __builtin_IB_subgroup_block_read_cacheopts_u8_m1k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
428+ ushort4 __builtin_IB_subgroup_block_read_cacheopts_u8_m2k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
429+ ushort8 __builtin_IB_subgroup_block_read_cacheopts_u8_m4k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
430+ ushort16 __builtin_IB_subgroup_block_read_cacheopts_u8_m8k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
431+ ushort2 __builtin_IB_subgroup_block_read_cacheopts_u16_m1k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
432+ ushort4 __builtin_IB_subgroup_block_read_cacheopts_u16_m2k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
433+ ushort8 __builtin_IB_subgroup_block_read_cacheopts_u16_m4k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
434+ ushort16 __builtin_IB_subgroup_block_read_cacheopts_u16_m8k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
435+ uint8 __builtin_IB_subgroup_block_read_cacheopts_transform_u8_k32 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
436+ uint8 __builtin_IB_subgroup_block_read_cacheopts_transform_u16_k16 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
437+ // 2d block write cacheopts
438+ void __builtin_IB_subgroup_block_write_cacheopts_u8_m1k32v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort val , enum LSC_STCC cache_control );
439+ void __builtin_IB_subgroup_block_write_cacheopts_u8_m2k32v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort2 val , enum LSC_STCC cache_control );
440+ void __builtin_IB_subgroup_block_write_cacheopts_u8_m4k32v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort4 val , enum LSC_STCC cache_control );
441+ void __builtin_IB_subgroup_block_write_cacheopts_u8_m8k32v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort8 val , enum LSC_STCC cache_control );
442+ void __builtin_IB_subgroup_block_write_cacheopts_u16_m1k16v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort val , enum LSC_STCC cache_control );
443+ void __builtin_IB_subgroup_block_write_cacheopts_u16_m2k16v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort2 val , enum LSC_STCC cache_control );
444+ void __builtin_IB_subgroup_block_write_cacheopts_u16_m4k16v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort4 val , enum LSC_STCC cache_control );
445+ void __builtin_IB_subgroup_block_write_cacheopts_u16_m8k16v1 (long base_address , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , ushort8 val , enum LSC_STCC cache_control );
446+ // equivalent to transpose_transform_u8_k32 and transpose_transform_u16_k16
447+ uint8 __builtin_IB_subgroup_block_read_cacheopts_transpose_u32_k8 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
448+ ulong4 __builtin_IB_subgroup_block_read_cacheopts_transpose_u64_k4 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
449+
450+ // 2d block read prefetch
451+ void __builtin_IB_subgroup_block_read_prefetch_u8_m1k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
452+ void __builtin_IB_subgroup_block_read_prefetch_u8_m2k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
453+ void __builtin_IB_subgroup_block_read_prefetch_u8_m4k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
454+ void __builtin_IB_subgroup_block_read_prefetch_u8_m8k32v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
455+ void __builtin_IB_subgroup_block_read_prefetch_u16_m1k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
456+ void __builtin_IB_subgroup_block_read_prefetch_u16_m2k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
457+ void __builtin_IB_subgroup_block_read_prefetch_u16_m4k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
458+ void __builtin_IB_subgroup_block_read_prefetch_u16_m8k16v2 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
459+ void __builtin_IB_subgroup_block_read_prefetch_transform_u8_k32 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
460+ void __builtin_IB_subgroup_block_read_prefetch_transform_u16_k16 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
461+ // equivalent to transpose_transform_u8_k32 and transpose_transform_u16_k16
462+ void __builtin_IB_subgroup_block_read_prefetch_transpose_u32_k8 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
463+ void __builtin_IB_subgroup_block_read_prefetch_transpose_u64_k4 (long baseoffset , int width_minus_one , int height_minus_one , int pitch_minus_one , int2 coord , enum LSC_LDCC cacheOpt );
464+ #endif // cl_intel_subgroup_extended_block_read_cacheopts
425465
426466// experimental
427467#ifdef cl_intel_subgroup_extended_block_read
0 commit comments