Author: Changpeng Fang Date: 2025-07-11T15:07:21-07:00 New Revision: 8c1b5169484533a41d6a05603315a092c364975d
URL: https://github.com/llvm/llvm-project/commit/8c1b5169484533a41d6a05603315a092c364975d DIFF: https://github.com/llvm/llvm-project/commit/8c1b5169484533a41d6a05603315a092c364975d.diff LOG: AMDGPU: Implement s_wait_asynccnt and s_wait_tensorcnt for gfx1250 (#148292) Co-authored-by: Stanislav Mekhanoshin <stanislav.mekhanos...@amd.com> Co-authored-by: Vang Thao <vang.t...@amd.com> Added: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll Modified: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl llvm/include/llvm/IR/IntrinsicsAMDGPU.td llvm/lib/Target/AMDGPU/SOPInstructions.td llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index a5ee8013adff6..4d371a9f7d6db 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -665,6 +665,9 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64, "LiLi*3Li", "n TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst") TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep, "vIs", "n", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_wait_asynccnt, "vIUs", "n", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_s_wait_tensorcnt, "vIUs", "n", "gfx1250-insts") + TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts") diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 421099d3876e3..a1b91d0cc38dc 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -24,6 +24,24 @@ void test_s_monitor_sleep() { __builtin_amdgcn_s_monitor_sleep(10); } +// CHECK-LABEL: @test_s_wait_asynccnt( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.amdgcn.s.wait.asynccnt(i16 0) +// CHECK-NEXT: ret void +// +void test_s_wait_asynccnt() { + __builtin_amdgcn_s_wait_asynccnt(0); +} + +// CHECK-LABEL: @test_s_wait_tensorcnt( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.amdgcn.s.wait.tensorcnt(i16 0) +// CHECK-NEXT: ret void +// +void test_s_wait_tensorcnt() { + __builtin_amdgcn_s_wait_tensorcnt(0); +} + // CHECK-LABEL: @test_cvt_f16_fp8( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl index 7494c4f984353..9711b3bdded6b 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl @@ -12,6 +12,14 @@ void test_s_monitor_sleep(short a) { __builtin_amdgcn_s_monitor_sleep(a); // expected-error {{'__builtin_amdgcn_s_monitor_sleep' must be a constant integer}} } +void test_s_wait_asynccnt(short a) { + __builtin_amdgcn_s_wait_asynccnt(a); // expected-error {{'__builtin_amdgcn_s_wait_asynccnt' must be a constant integer}} +} + +void test_s_wait_tensorcnt(short a) { + __builtin_amdgcn_s_wait_tensorcnt(a); // expected-error {{'__builtin_amdgcn_s_wait_tensorcnt' must be a constant integer}} +} + void test__builtin_amdgcn_cvt_f16_fp8(int a, int b) { __builtin_amdgcn_cvt_f16_fp8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_fp8' must be a constant integer}} } diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 16885f331e9dd..8016757cf0f3c 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3510,6 +3510,18 @@ def int_amdgcn_ashr_pk_u8_i32 : ClangBuiltin<"__builtin_amdgcn_ashr_pk_u8_i32">, // gfx1250 intrinsics // ===----------------------------------------------------------------------===// +// Async waits decrement ASYNCcnt and tensor waits decrement TENSORcnt which is +// modeled as InaccessibleMem. +class AMDGPUWaitAsyncIntrinsic : + Intrinsic<[], [llvm_i16_ty], + [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>, IntrWillReturn, IntrNoCallback, + IntrNoFree]>; + +def int_amdgcn_s_wait_asynccnt : + ClangBuiltin<"__builtin_amdgcn_s_wait_asynccnt">, AMDGPUWaitAsyncIntrinsic; +def int_amdgcn_s_wait_tensorcnt : + ClangBuiltin<"__builtin_amdgcn_s_wait_tensorcnt">, AMDGPUWaitAsyncIntrinsic; + def int_amdgcn_ds_atomic_async_barrier_arrive_b64 : ClangBuiltin<"__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64">, Intrinsic<[], [local_ptr_ty], diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index c7c4276e0e252..2472b76fcf02c 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1764,6 +1764,27 @@ let OtherPredicates = [HasExportInsts] in [(int_amdgcn_s_wait_kmcnt timm:$simm16)]>; } // End SubtargetPredicate = isGFX12Plus, hasSideEffects = 1 +let SubtargetPredicate = isGFX1250Plus, hasSideEffects = 1 in { + def S_WAIT_ASYNCCNT : + SOPP_Pseudo<"s_wait_asynccnt", (ins s16imm:$simm16), "$simm16", + [(int_amdgcn_s_wait_asynccnt timm:$simm16)]> { + let mayLoad = 1; + let mayStore = 1; + let maybeAtomic = 0; + let Uses = [ASYNCcnt]; + let Defs = [ASYNCcnt]; + } + def S_WAIT_TENSORCNT : + SOPP_Pseudo<"s_wait_tensorcnt", (ins s16imm:$simm16), "$simm16", + [(int_amdgcn_s_wait_tensorcnt timm:$simm16)]> { + let mayLoad = 1; + let mayStore = 1; + let maybeAtomic = 0; + let Uses = [TENSORcnt]; + let Defs = [TENSORcnt]; + } +} // End SubtargetPredicate = isGFX1250Plus, hasSideEffects = 1 + let SubtargetPredicate = HasWaitXcnt, hasSideEffects = 1 in { def S_WAIT_XCNT : SOPP_Pseudo<"s_wait_xcnt", (ins s16imm:$simm16), "$simm16">; @@ -2609,6 +2630,8 @@ defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>; //===----------------------------------------------------------------------===// defm S_SETPRIO_INC_WG : SOPP_Real_32_gfx12<0x03e>; defm S_WAIT_XCNT : SOPP_Real_32_gfx12<0x045>; +defm S_WAIT_ASYNCCNT : SOPP_Real_32_gfx12<0x04a>; +defm S_WAIT_TENSORCNT : SOPP_Real_32_gfx12<0x04b>; //===----------------------------------------------------------------------===// // SOPP - GFX11, GFX12. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll new file mode 100644 index 0000000000000..2173d07baa57e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx1250.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX12 +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s -check-prefix=GFX12 + +define amdgpu_ps void @test_asynccnt() { +; GFX12-LABEL: test_asynccnt: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_asynccnt 0x0 +; GFX12-NEXT: s_endpgm + call void @llvm.amdgcn.s.wait.asynccnt(i16 0) + ret void +} + +define amdgpu_ps void @test_tensorcnt() { +; GFX12-LABEL: test_tensorcnt: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_tensorcnt 0x0 +; GFX12-NEXT: s_endpgm + call void @llvm.amdgcn.s.wait.tensorcnt(i16 0) + ret void +} + +declare void @llvm.amdgcn.s.wait.asynccnt(i16) +declare void @llvm.amdgcn.s.wait.tensorcnt(i16) diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s index 6ebc17468eed6..234c2ed0de793 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s @@ -1,6 +1,26 @@ // RUN: llvm-mc -triple=amdgcn -show-encoding -mcpu=gfx1250 %s | FileCheck --check-prefix=GFX1250 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR --implicit-check-not=error: -strict-whitespace %s +s_wait_asynccnt 0x1234 +// GFX1250: [0x34,0x12,0xca,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_wait_asynccnt 0xc1d1 +// GFX1250: [0xd1,0xc1,0xca,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_wait_tensorcnt 0x0 +// GFX1250: [0x00,0x00,0xcb,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_wait_tensorcnt 0x1 +// GFX1250: [0x01,0x00,0xcb,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_wait_tensorcnt 0x3 +// GFX1250: [0x03,0x00,0xcb,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + s_wait_xcnt 0x0 // GFX1250: [0x00,0x00,0xc5,0xbf] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt index 220f9e5084f0e..e7026df3c0e2b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt @@ -1,5 +1,20 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1250 %s +# GFX1250: s_wait_asynccnt 0x1234 ; encoding: [0x34,0x12,0xca,0xbf] +0x34,0x12,0xca,0xbf + +# GFX1250: s_wait_asynccnt 0xc1d1 ; encoding: [0xd1,0xc1,0xca,0xbf] +0xd1,0xc1,0xca,0xbf + +# GFX1250: s_wait_tensorcnt 0x0 ; encoding: [0x00,0x00,0xcb,0xbf] +0x00,0x00,0xcb,0xbf + +# GFX1250: s_wait_tensorcnt 0x1 ; encoding: [0x01,0x00,0xcb,0xbf] +0x01,0x00,0xcb,0xbf + +# GFX1250: s_wait_tensorcnt 0x3 ; encoding: [0x03,0x00,0xcb,0xbf] +0x03,0x00,0xcb,0xbf + # GFX1250: s_wait_xcnt 0x0 ; encoding: [0x00,0x00,0xc5,0xbf] 0x00,0x00,0xc5,0xbf _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits