gandhi21299 created this revision. gandhi21299 added reviewers: foad, arsenm, rampitec, Joe_Nash. Herald added subscribers: kosarev, StephenFan, kerbowa, tpr, dstuttard, yaxunl, jvesely, kzhuravl. Herald added a project: All. gandhi21299 requested review of this revision. Herald added subscribers: cfe-commits, wdng. Herald added a project: clang.
Change target feature of __builtin_amdgcn_global_atomic_fadd_f32 to atomic-fadd-rtn-insts. Enable atomic-fadd-rtn-insts for gfx90a, gfx908 and gfx1100 as they all support global_atomic_add_f32. Fixes https://github.com/llvm/llvm-project/issues/61331. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D146840 Files: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/lib/Basic/Targets/AMDGPU.cpp clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl clang/test/CodeGenOpenCL/builtins-amdgcn-gfx908.cl Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx908.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx908.cl @@ -0,0 +1,11 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=IR +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -o - %s | FileCheck %s --check-prefix=GFX908 + +// IR-LABEL: @test_global_add_f32 +// IR: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}}) +// GFX908-LABEL: test_global_add_f32 +// GFX908: global_atomic_add_f32 +void test_global_add_f32(float *rtn, global float *addr, float x) { + *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); +} Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -43,3 +43,9 @@ void test_s_wait_event_export_ready() { __builtin_amdgcn_s_wait_event_export_ready(); } + +// CHECK-LABEL: @test_global_add_f32 +// CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}}) +void test_global_add_f32(float *rtn, global float *addr, float x) { + *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); +} Index: clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl @@ -11,7 +11,6 @@ float *fp_rtn; double *rtn; *half_rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}} - *fp_rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmax_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmin_f64' needs target feature gfx90a-insts}} Index: clang/lib/Basic/Targets/AMDGPU.cpp =================================================================== --- clang/lib/Basic/Targets/AMDGPU.cpp +++ clang/lib/Basic/Targets/AMDGPU.cpp @@ -206,6 +206,7 @@ Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; Features["gfx11-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; break; case GK_GFX1036: case GK_GFX1035: @@ -264,6 +265,7 @@ case GK_GFX90A: Features["gfx90a-insts"] = true; Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; [[fallthrough]]; case GK_GFX908: Features["dot3-insts"] = true; @@ -271,6 +273,7 @@ Features["dot5-insts"] = true; Features["dot6-insts"] = true; Features["mai-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; [[fallthrough]]; case GK_GFX906: Features["dl-insts"] = true; Index: clang/include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- clang/include/clang/Basic/BuiltinsAMDGPU.def +++ clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -214,7 +214,7 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "gfx90a-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "t", "gfx90a-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "t", "gfx90a-insts")
Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx908.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx908.cl @@ -0,0 +1,11 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=IR +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -S -o - %s | FileCheck %s --check-prefix=GFX908 + +// IR-LABEL: @test_global_add_f32 +// IR: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}}) +// GFX908-LABEL: test_global_add_f32 +// GFX908: global_atomic_add_f32 +void test_global_add_f32(float *rtn, global float *addr, float x) { + *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); +} Index: clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -43,3 +43,9 @@ void test_s_wait_event_export_ready() { __builtin_amdgcn_s_wait_event_export_ready(); } + +// CHECK-LABEL: @test_global_add_f32 +// CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}}) +void test_global_add_f32(float *rtn, global float *addr, float x) { + *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); +} Index: clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl =================================================================== --- clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl +++ clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx908-err.cl @@ -11,7 +11,6 @@ float *fp_rtn; double *rtn; *half_rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2f16' needs target feature atomic-buffer-global-pk-add-f16-insts}} - *fp_rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f32' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmax_f64' needs target feature gfx90a-insts}} *rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x); // expected-error{{'__builtin_amdgcn_global_atomic_fmin_f64' needs target feature gfx90a-insts}} Index: clang/lib/Basic/Targets/AMDGPU.cpp =================================================================== --- clang/lib/Basic/Targets/AMDGPU.cpp +++ clang/lib/Basic/Targets/AMDGPU.cpp @@ -206,6 +206,7 @@ Features["gfx10-insts"] = true; Features["gfx10-3-insts"] = true; Features["gfx11-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; break; case GK_GFX1036: case GK_GFX1035: @@ -264,6 +265,7 @@ case GK_GFX90A: Features["gfx90a-insts"] = true; Features["atomic-buffer-global-pk-add-f16-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; [[fallthrough]]; case GK_GFX908: Features["dot3-insts"] = true; @@ -271,6 +273,7 @@ Features["dot5-insts"] = true; Features["dot6-insts"] = true; Features["mai-insts"] = true; + Features["atomic-fadd-rtn-insts"] = true; [[fallthrough]]; case GK_GFX906: Features["dl-insts"] = true; Index: clang/include/clang/Basic/BuiltinsAMDGPU.def =================================================================== --- clang/include/clang/Basic/BuiltinsAMDGPU.def +++ clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -214,7 +214,7 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f64, "dd*1d", "t", "gfx90a-insts") -TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "gfx90a-insts") +TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_f32, "ff*1f", "t", "atomic-fadd-rtn-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2f16, "V2hV2h*1V2h", "t", "atomic-buffer-global-pk-add-f16-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmin_f64, "dd*1d", "t", "gfx90a-insts") TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fmax_f64, "dd*1d", "t", "gfx90a-insts")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits