================ @@ -2601,67 +2601,73 @@ def int_amdgcn_ds_bvh_stack_rtn : [ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; +def int_amdgcn_s_wait_event_export_ready : + ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">, + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn] +>; + // WMMA (Wave Matrix Multiply-Accumulate) intrinsics // // These operations perform a matrix multiplication and accumulation of // the form: D = A * B + C . class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> : Intrinsic< - [CD], // %D + [CD], // %D [ AB, // %A - AB, // %B + LLVMMatchType<1>, // %B LLVMMatchType<0>, // %C ], [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] >; class AMDGPUWmmaIntrinsicOPSEL<LLVMType AB, LLVMType CD> : Intrinsic< - [CD], // %D + [CD], // %D [ AB, // %A - AB, // %B + LLVMMatchType<1>, // %B LLVMMatchType<0>, // %C - llvm_i1_ty, // %high + llvm_i1_ty, // %high (op_sel) for GFX11, 0 for GFX12 ], [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; class AMDGPUWmmaIntrinsicIU<LLVMType AB, LLVMType CD> : Intrinsic< - [CD], // %D + [CD], // %D [ llvm_i1_ty, // %A_sign AB, // %A llvm_i1_ty, // %B_sign - AB, // %B + LLVMMatchType<1>, // %B LLVMMatchType<0>, // %C llvm_i1_ty, // %clamp ], [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; -def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_v16f16_ty, llvm_anyfloat_ty>; -def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_v16i16_ty, llvm_anyfloat_ty>; -// The regular, untied f16/bf16 wmma intrinsics only write to one half -// of the registers (set via the op_sel bit). -// The content of the other 16-bit of the registers is undefined. -def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>; -def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>; -// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix -// registers to the input accumulator registers. -// Essentially, the content of the other 16-bit is preserved from the input. -def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>; -def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>; -def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_v4i32_ty, llvm_anyint_ty>; -def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_v2i32_ty, llvm_anyint_ty>; +// WMMA GFX11Only -def int_amdgcn_s_wait_event_export_ready : - ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn] ->; +// The OPSEL intrinsics read from and write to one half of the registers, selected by the op_sel bit. +// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix registers to the input accumulator registers. +// The content of the other 16-bit half is preserved from the input. +def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_anyfloat_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_any_ty, llvm_any_ty>; + +// WMMA GFX11Plus + +def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_anyfloat_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_any_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>; + +// GFX11: The OPSEL intrinsics read from and write to one half of the registers, selected by the op_sel bit. +// The content of the other 16-bit half is undefined. +// GFX12: The op_sel bit must be 0. +def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyfloat_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_any_ty, llvm_any_ty>; ---------------- arsenm wrote:
Why is this using any_Ty? Should just be the one? https://github.com/llvm/llvm-project/pull/77795 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits