saiislam updated this revision to Diff 269013.
saiislam added a comment.
1. Added 64 bit variants of inc/dec as well
2. Added test cases for 64 bit variants
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D80804/new/
https://reviews.llvm.org/D80804
Files:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
clang/test/SemaOpenCL/builtins-amdgcn-error.cl
Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===================================================================
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -144,3 +144,51 @@
__builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
__builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
}
+
+void test_atomic_inc32() {
+ int val = 17;
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_inc32(4); // expected-error {{too few arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_inc32(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_inc32(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
+ const char ptr[] = "workgroup";
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
+
+void test_atomic_inc64() {
+ __INT64_TYPE__ val = 17;
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_inc64(4); // expected-error {{too few arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_inc64(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_inc64(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
+ const char ptr[] = "workgroup";
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
+
+void test_atomic_dec32() {
+ int val = 17;
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_dec32(4); // expected-error {{too few arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_dec32(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_dec32(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
+ const char ptr[] = "workgroup";
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
+
+void test_atomic_dec64() {
+ __INT64_TYPE__ val = 17;
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ val = __builtin_amdgcn_atomic_dec64(4); // expected-error {{too few arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_dec64(&val, val, 4, 4, 4, 4); // expected-error {{too many arguments to function call, expected 4}}
+ val = __builtin_amdgcn_atomic_dec64(&val, val, 3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE, 5); // expected-warning {{incompatible integer to pointer conversion passing 'int' to parameter of type 'const char *'}}
+ const char ptr[] = "workgroup";
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
+}
Index: clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
===================================================================
--- /dev/null
+++ clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
@@ -0,0 +1,18 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -x hip -fcuda-is-device -o - -emit-llvm -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_host() {
+ int val;
+
+ // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc32' in __host__ function
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "");
+
+ // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc64' in __host__ function
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "");
+
+ // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec32' in __host__ function
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "");
+
+ // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec64' in __host__ function
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "");
+}
Index: clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenCXX/builtin-amdgcn-atomic-inc-dec.cpp
@@ -0,0 +1,253 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 %s -x hip -fcuda-is-device -emit-llvm -O0 -o - \
+// RUN: -triple=amdgcn-amd-amdhsa | opt -S | FileCheck %s
+
+__attribute__((device)) void test_non_volatile_parameter32(int *ptr) {
+ // CHECK-LABEL: test_non_volatile_parameter32
+ int res;
+ // CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5)
+ // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32**
+ // CHECK-NEXT: %res = alloca i32, align 4, addrspace(5)
+ // CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32*
+ // CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %2 = load i32, i32* %1, align 4
+ // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4
+ res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %6 = load i32, i32* %5, align 4
+ // CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4
+ res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_non_volatile_parameter64(__INT64_TYPE__ *ptr) {
+ // CHECK-LABEL: test_non_volatile_parameter64
+ __INT64_TYPE__ res;
+ // CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5)
+ // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64**
+ // CHECK-NEXT: %res = alloca i64, align 8, addrspace(5)
+ // CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64*
+ // CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %2 = load i64, i64* %1, align 8
+ // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8
+ res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %6 = load i64, i64* %5, align 8
+ // CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8
+ res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_volatile_parameter32(volatile int *ptr) {
+ // CHECK-LABEL: test_volatile_parameter32
+ int res;
+ // CHECK: %ptr.addr = alloca i32*, align 8, addrspace(5)
+ // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i32* addrspace(5)* %ptr.addr to i32**
+ // CHECK-NEXT: %res = alloca i32, align 4, addrspace(5)
+ // CHECK-NEXT: %res.ascast = addrspacecast i32 addrspace(5)* %res to i32*
+ // CHECK-NEXT: store i32* %ptr, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %0 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %1 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %2 = load volatile i32, i32* %1, align 4
+ // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %0, i32 %2, i32 7, i32 2, i1 true)
+ // CHECK-NEXT: store i32 %3, i32* %res.ascast, align 4
+ res = __builtin_amdgcn_atomic_inc32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %4 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %5 = load i32*, i32** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %6 = load volatile i32, i32* %5, align 4
+ // CHECK-NEXT: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %4, i32 %6, i32 7, i32 2, i1 true)
+ // CHECK-NEXT: store i32 %7, i32* %res.ascast, align 4
+ res = __builtin_amdgcn_atomic_dec32(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_volatile_parameter64(volatile __INT64_TYPE__ *ptr) {
+ // CHECK-LABEL: test_volatile_parameter64
+ __INT64_TYPE__ res;
+ // CHECK: %ptr.addr = alloca i64*, align 8, addrspace(5)
+ // CHECK-NEXT: %ptr.addr.ascast = addrspacecast i64* addrspace(5)* %ptr.addr to i64**
+ // CHECK-NEXT: %res = alloca i64, align 8, addrspace(5)
+ // CHECK-NEXT: %res.ascast = addrspacecast i64 addrspace(5)* %res to i64*
+ // CHECK-NEXT: store i64* %ptr, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %0 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %1 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %2 = load volatile i64, i64* %1, align 8
+ // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %0, i64 %2, i32 7, i32 2, i1 true)
+ // CHECK-NEXT: store i64 %3, i64* %res.ascast, align 8
+ res = __builtin_amdgcn_atomic_inc64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %4 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %5 = load i64*, i64** %ptr.addr.ascast, align 8
+ // CHECK-NEXT: %6 = load volatile i64, i64* %5, align 8
+ // CHECK-NEXT: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %4, i64 %6, i32 7, i32 2, i1 true)
+ // CHECK-NEXT: store i64 %7, i64* %res.ascast, align 8
+ res = __builtin_amdgcn_atomic_dec64(ptr, *ptr, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_shared32() {
+ // CHECK-LABEL: test_shared32
+ __attribute__((shared)) int val;
+
+ // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
+ // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
+ // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(3)* @_ZZ13test_shared32vE3val to i32*), align 4
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_shared64() {
+ // CHECK-LABEL: test_shared64
+ __attribute__((shared)) __INT64_TYPE__ val;
+
+ // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
+ // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
+ // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(3)* @_ZZ13test_shared64vE3val to i64*), align 8
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+int global_val32;
+__attribute__((device)) void test_global32() {
+ // CHECK-LABEL: test_global32
+ // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
+ // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %1, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
+ global_val32 = __builtin_amdgcn_atomic_inc32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
+ // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), i32 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %3, i32* addrspacecast (i32 addrspace(1)* @global_val32 to i32*), align 4
+ global_val32 = __builtin_amdgcn_atomic_dec32(&global_val32, global_val32, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__INT64_TYPE__ global_val64;
+__attribute__((device)) void test_global64() {
+ // CHECK-LABEL: test_global64
+ // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
+ // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %1, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
+ global_val64 = __builtin_amdgcn_atomic_inc64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
+ // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), i64 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %3, i64* addrspacecast (i64 addrspace(1)* @global_val64 to i64*), align 8
+ global_val64 = __builtin_amdgcn_atomic_dec64(&global_val64, global_val64, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((constant)) int cval32;
+__attribute__((device)) void test_constant32() {
+ // CHECK-LABEL: test_constant32
+ int local_val;
+
+ // CHECK: %0 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4
+ // CHECK-NEXT: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %1, i32* %local_val.ascast, align 4
+ local_val = __builtin_amdgcn_atomic_inc32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i32, i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), align 4
+ // CHECK-NEXT: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(4)* @cval32 to i32*), i32 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i32 %3, i32* %local_val.ascast, align 4
+ local_val = __builtin_amdgcn_atomic_dec32(&cval32, cval32, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((constant)) __INT64_TYPE__ cval64;
+__attribute__((device)) void test_constant64() {
+ // CHECK-LABEL: test_constant64
+ __INT64_TYPE__ local_val;
+
+ // CHECK: %0 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8
+ // CHECK-NEXT: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %0, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %1, i64* %local_val.ascast, align 8
+ local_val = __builtin_amdgcn_atomic_inc64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %2 = load i64, i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), align 8
+ // CHECK-NEXT: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(4)* @cval64 to i64*), i64 %2, i32 7, i32 2, i1 false)
+ // CHECK-NEXT: store i64 %3, i64* %local_val.ascast, align 8
+ local_val = __builtin_amdgcn_atomic_dec64(&cval64, cval64, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_order32() {
+ // CHECK-LABEL: test_order32
+ __attribute__((shared)) int val;
+
+ // CHECK: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %0, i32 4, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, "workgroup");
+
+ // CHECK: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %2, i32 5, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_RELEASE, "workgroup");
+
+ // CHECK: %5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %4, i32 6, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQ_REL, "workgroup");
+
+ // CHECK: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_order32vE3val to i32*), i32 %6, i32 7, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_order64() {
+ // CHECK-LABEL: test_order64
+ __attribute__((shared)) __INT64_TYPE__ val;
+
+ // CHECK: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %0, i32 4, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, "workgroup");
+
+ // CHECK: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %2, i32 5, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_RELEASE, "workgroup");
+
+ // CHECK: %5 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %4, i32 6, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQ_REL, "workgroup");
+
+ // CHECK: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_order64vE3val to i64*), i64 %6, i32 7, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+}
+
+__attribute__((device)) void test_scope32() {
+ // CHECK-LABEL: test_scope32
+ __attribute__((shared)) int val;
+
+ // CHECK: %1 = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %0, i32 7, i32 1, i1 false)
+ val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST, "");
+
+ // CHECK: %3 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %2, i32 7, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %5 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %4, i32 7, i32 3, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "agent");
+
+ // CHECK: %7 = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* addrspacecast (i32 addrspace(3)* @_ZZ12test_scope32vE3val to i32*), i32 %6, i32 7, i32 4, i1 false)
+ val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST, "wavefront");
+}
+
+__attribute__((device)) void test_scope64() {
+ // CHECK-LABEL: test_scope64
+ __attribute__((shared)) __INT64_TYPE__ val;
+
+ // CHECK: %1 = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %0, i32 7, i32 1, i1 false)
+ val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST, "");
+
+ // CHECK: %3 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %2, i32 7, i32 2, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "workgroup");
+
+ // CHECK: %5 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %4, i32 7, i32 3, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "agent");
+
+ // CHECK: %7 = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* addrspacecast (i64 addrspace(3)* @_ZZ12test_scope64vE3val to i64*), i64 %6, i32 7, i32 4, i1 false)
+ val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST, "wavefront");
+}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3066,41 +3066,56 @@
bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
CallExpr *TheCall) {
+ // position of memory order and scope arguments in the builtin
+ unsigned OrderIndex, ScopeIndex;
switch (BuiltinID) {
- case AMDGPU::BI__builtin_amdgcn_fence: {
- ExprResult Arg = TheCall->getArg(0);
- auto ArgExpr = Arg.get();
- Expr::EvalResult ArgResult;
-
- if (!ArgExpr->EvaluateAsInt(ArgResult, Context))
- return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
- << ArgExpr->getType();
- int ord = ArgResult.Val.getInt().getZExtValue();
-
- // Check valididty of memory ordering as per C11 / C++11's memody model.
- switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
- case llvm::AtomicOrderingCABI::acquire:
- case llvm::AtomicOrderingCABI::release:
- case llvm::AtomicOrderingCABI::acq_rel:
- case llvm::AtomicOrderingCABI::seq_cst:
- break;
- default: {
- return Diag(ArgExpr->getBeginLoc(),
- diag::warn_atomic_op_has_invalid_memory_order)
- << ArgExpr->getSourceRange();
- }
- }
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
+ OrderIndex = 2;
+ ScopeIndex = 3;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_fence:
+ OrderIndex = 0;
+ ScopeIndex = 1;
+ break;
+ default:
+ return false;
+ }
- Arg = TheCall->getArg(1);
- ArgExpr = Arg.get();
- Expr::EvalResult ArgResult1;
- // Check that sync scope is a constant literal
- if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen,
- Context))
- return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
- << ArgExpr->getType();
- } break;
+ ExprResult Arg = TheCall->getArg(OrderIndex);
+ auto ArgExpr = Arg.get();
+ Expr::EvalResult ArgResult;
+
+ if (!ArgExpr->EvaluateAsInt(ArgResult, Context))
+ return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
+ << ArgExpr->getType();
+ int ord = ArgResult.Val.getInt().getZExtValue();
+
+ // Check valididty of memory ordering as per C11 / C++11's memody model.
+ switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
+ case llvm::AtomicOrderingCABI::acquire:
+ case llvm::AtomicOrderingCABI::release:
+ case llvm::AtomicOrderingCABI::acq_rel:
+ case llvm::AtomicOrderingCABI::seq_cst:
+ break;
+ default: {
+ return Diag(ArgExpr->getBeginLoc(),
+ diag::warn_atomic_op_has_invalid_memory_order)
+ << ArgExpr->getSourceRange();
+ }
}
+
+ Arg = TheCall->getArg(ScopeIndex);
+ ArgExpr = Arg.get();
+ Expr::EvalResult ArgResult1;
+ // Check that sync scope is a constant literal
+ if (!ArgExpr->EvaluateAsConstantExpr(ArgResult1, Expr::EvaluateForCodeGen,
+ Context))
+ return Diag(ArgExpr->getExprLoc(), diag::err_expr_not_string_literal)
+ << ArgExpr->getType();
+
return false;
}
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -3988,6 +3988,9 @@
llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
const CallExpr *E);
llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+ bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope,
+ llvm::AtomicOrdering &AO,
+ llvm::SyncScope::ID &SSID);
private:
enum class MSVCIntrin;
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14289,8 +14289,49 @@
}
} // namespace
+// For processing memory ordering and memory scope arguments of various
+// amdgcn builtins.
+// \p Order takes a C++11 comptabile memory-ordering specifier and converts
+// it into LLVM's memory ordering specifier using atomic C ABI, and writes
+// to \p AO. \p Scope takes a const char * and converts it into AMDGCN
+// specific SyncScopeID and writes it to \p SSID.
+bool CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
+ llvm::AtomicOrdering &AO,
+ llvm::SyncScope::ID &SSID) {
+ if (isa<llvm::ConstantInt>(Order)) {
+ int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+
+ // Map C11/C++11 memory ordering to LLVM memory ordering
+ switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
+ case llvm::AtomicOrderingCABI::acquire:
+ AO = llvm::AtomicOrdering::Acquire;
+ break;
+ case llvm::AtomicOrderingCABI::release:
+ AO = llvm::AtomicOrdering::Release;
+ break;
+ case llvm::AtomicOrderingCABI::acq_rel:
+ AO = llvm::AtomicOrdering::AcquireRelease;
+ break;
+ case llvm::AtomicOrderingCABI::seq_cst:
+ AO = llvm::AtomicOrdering::SequentiallyConsistent;
+ break;
+ case llvm::AtomicOrderingCABI::consume:
+ case llvm::AtomicOrderingCABI::relaxed:
+ break;
+ }
+
+ StringRef scp;
+ llvm::getConstantStringInfo(Scope, scp);
+ SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+ return true;
+ }
+ return false;
+}
+
Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
+ llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
+ llvm::SyncScope::ID SSID;
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_div_scale:
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
@@ -14495,38 +14536,57 @@
}
case AMDGPU::BI__builtin_amdgcn_fence: {
- llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
- llvm::SyncScope::ID SSID;
- Value *Order = EmitScalarExpr(E->getArg(0));
- Value *Scope = EmitScalarExpr(E->getArg(1));
+ if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),
+ EmitScalarExpr(E->getArg(1)), AO, SSID))
+ return Builder.CreateFence(AO, SSID);
+ LLVM_FALLTHROUGH;
+ }
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
+ unsigned BuiltinAtomicOp;
+ llvm::IntegerType *IntType;
- if (isa<llvm::ConstantInt>(Order)) {
- int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
+ IntType = Int32Ty;
+ BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
+ IntType = Int64Ty;
+ BuiltinAtomicOp = Intrinsic::amdgcn_atomic_inc;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
+ IntType = Int32Ty;
+ BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
+ IntType = Int64Ty;
+ BuiltinAtomicOp = Intrinsic::amdgcn_atomic_dec;
+ break;
+ }
- // Map C11/C++11 memory ordering to LLVM memory ordering
- switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
- case llvm::AtomicOrderingCABI::acquire:
- AO = llvm::AtomicOrdering::Acquire;
- break;
- case llvm::AtomicOrderingCABI::release:
- AO = llvm::AtomicOrdering::Release;
- break;
- case llvm::AtomicOrderingCABI::acq_rel:
- AO = llvm::AtomicOrdering::AcquireRelease;
- break;
- case llvm::AtomicOrderingCABI::seq_cst:
- AO = llvm::AtomicOrdering::SequentiallyConsistent;
- break;
- case llvm::AtomicOrderingCABI::consume: // not supported by LLVM fence
- case llvm::AtomicOrderingCABI::relaxed: // not supported by LLVM fence
- break;
- }
+ Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Value *Val = EmitScalarExpr(E->getArg(1));
- StringRef scp;
- llvm::getConstantStringInfo(Scope, scp);
- SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
+ llvm::Function *F =
+ CGM.getIntrinsic(BuiltinAtomicOp, {IntType, Ptr->getType()});
- return Builder.CreateFence(AO, SSID);
+ if (ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
+ EmitScalarExpr(E->getArg(3)), AO, SSID)) {
+
+ // llvm.amdgcn.atomic.inc and llvm.amdgcn.atomic.dec expects ordering and
+ // scope as unsigned values
+ Value *MemOrder = Builder.getInt32(static_cast<int>(AO));
+ Value *MemScope = Builder.getInt32(static_cast<int>(SSID));
+
+ QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
+ bool Volatile =
+ PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
+ Value *IsVolatile = Builder.getInt1(static_cast<bool>(Volatile));
+
+ return Builder.CreateCall(F, {Ptr, Val, MemOrder, MemScope, IsVolatile});
}
LLVM_FALLTHROUGH;
}
Index: clang/include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -60,6 +60,12 @@
BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n")
BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
+BUILTIN(__builtin_amdgcn_atomic_inc32, "ZiZiD*ZiUicC*", "n")
+BUILTIN(__builtin_amdgcn_atomic_inc64, "WiWiD*WiUicC*", "n")
+
+BUILTIN(__builtin_amdgcn_atomic_dec32, "ZiZiD*ZiUicC*", "n")
+BUILTIN(__builtin_amdgcn_atomic_dec64, "WiWiD*WiUicC*", "n")
+
// FIXME: Need to disallow constant address space.
BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n")
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits