saiislam updated this revision to Diff 259238.
saiislam added a comment.
Herald added subscribers: kerbowa, nhaehnle, jvesely.
Changed the builtin to be AMDGCN-specific
It is named as __builtin_amdgcn_fence(order, scope)
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D75917/new/
https://reviews.llvm.org/D75917
Files:
clang/docs/LanguageExtensions.rst
clang/include/clang/Basic/Builtins.def
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/include/clang/Sema/Sema.h
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp
clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
clang/test/CodeGenHIP/builtin_memory_fence.cpp
clang/test/Sema/builtins.c
clang/test/SemaOpenCL/builtins-amdgcn-error.cl
Index: clang/test/SemaOpenCL/builtins-amdgcn-error.cl
===================================================================
--- clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -128,3 +128,11 @@
*out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
*out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a); // expected-error {{argument to '__builtin_amdgcn_ds_fmaxf' must be a constant integer}}
}
+
+void test_fence() {
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
+ __builtin_amdgcn_fence(4); // expected-error {{too few arguments to function call, expected 2}}
+ __builtin_amdgcn_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
+ __builtin_amdgcn_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
+}
Index: clang/test/Sema/builtins.c
===================================================================
--- clang/test/Sema/builtins.c
+++ clang/test/Sema/builtins.c
@@ -320,15 +320,3 @@
// expected-error@+1 {{use of unknown builtin '__builtin_is_constant_evaluated'}}
return __builtin_is_constant_evaluated();
}
-
-void test_memory_fence_errors() {
- __builtin_memory_fence(__ATOMIC_SEQ_CST + 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
-
- __builtin_memory_fence(__ATOMIC_ACQUIRE - 1, "workgroup"); // expected-warning {{memory order argument to atomic operation is invalid}}
-
- __builtin_memory_fence(4); // expected-error {{too few arguments to function call, expected 2}}
-
- __builtin_memory_fence(4, 4, 4); // expected-error {{too many arguments to function call, expected 2}}
-
- __builtin_memory_fence(3.14, ""); // expected-warning {{implicit conversion from 'double' to 'unsigned int' changes value from 3.14 to 3}}
-}
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
===================================================================
--- clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -1,25 +1,22 @@
// REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 %s -x hip -emit-llvm -O0 -o - \
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - \
// RUN: -triple=amdgcn-amd-amdhsa | opt -instnamer -S | FileCheck %s
void test_memory_fence_success() {
// CHECK-LABEL: test_memory_fence_success
// CHECK: fence syncscope("workgroup") seq_cst
- __builtin_memory_fence(__ATOMIC_SEQ_CST, "workgroup");
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
// CHECK: fence syncscope("agent") acquire
- __builtin_memory_fence(__ATOMIC_ACQUIRE, "agent");
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
// CHECK: fence seq_cst
- __builtin_memory_fence(__ATOMIC_SEQ_CST, "");
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
// CHECK: fence syncscope("agent") acq_rel
- __builtin_memory_fence(4, "agent");
+ __builtin_amdgcn_fence(4, "agent");
// CHECK: fence syncscope("workgroup") release
- __builtin_memory_fence(3, "workgroup");
-
- // CHECK: fence syncscope("foobar") release
- __builtin_memory_fence(3, "foobar");
-}
\ No newline at end of file
+ __builtin_amdgcn_fence(3, "workgroup");
+}
Index: clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenCXX/builtin-amdgcn-fence-failure.cpp
@@ -0,0 +1,9 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: not %clang_cc1 %s -S \
+// RUN: -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+
+void test_amdgcn_fence_failure() {
+
+ // CHECK: error: Unsupported atomic synchronization scope
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "foobar");
+}
\ No newline at end of file
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1871,7 +1871,7 @@
<< TheCall->getSourceRange();
} break;
- case Builtin::BI__builtin_memory_fence: {
+ /*case AMDGPU::BI__builtin_amdgcn_fence: {
ExprResult Arg = TheCall->getArg(0);
auto ArgExpr = Arg.get();
Expr::EvalResult ArgResult;
@@ -1897,7 +1897,7 @@
return ExprError();
}
}
- } break;
+ } break;*/
}
// Since the target specific builtins for each arch overlap, only check those
@@ -1948,6 +1948,10 @@
if (CheckPPCBuiltinFunctionCall(BuiltinID, TheCall))
return ExprError();
break;
+ case llvm::Triple::amdgcn:
+ if (CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall))
+ return ExprError();
+ break;
default:
break;
}
@@ -2949,6 +2953,37 @@
return SemaBuiltinConstantArgRange(TheCall, i, l, u);
}
+bool Sema::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_fence: {
+ ExprResult Arg = TheCall->getArg(0);
+ auto ArgExpr = Arg.get();
+ Expr::EvalResult ArgResult;
+
+ if(!ArgExpr->EvaluateAsInt(ArgResult, Context)) {
+ return Diag(ArgExpr->getExprLoc(), diag::err_typecheck_expect_int)
+ << ArgExpr->getType();
+ }
+ int ord = ArgResult.Val.getInt().getZExtValue();
+
+ // Check valididty of memory ordering as per C11 / C++11's memody model.
+ switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {
+ case llvm::AtomicOrderingCABI::acquire:
+ case llvm::AtomicOrderingCABI::release:
+ case llvm::AtomicOrderingCABI::acq_rel:
+ case llvm::AtomicOrderingCABI::seq_cst:
+ break;
+ default: {
+ return Diag(ArgExpr->getBeginLoc(),
+ diag::warn_atomic_op_has_invalid_memory_order)
+ << ArgExpr->getSourceRange();
+ }
+ }
+ } break;
+ }
+ return false;
+}
+
bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
CallExpr *TheCall) {
if (BuiltinID == SystemZ::BI__builtin_tabort) {
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -13618,7 +13618,7 @@
return Builder.CreateCall(F, { Src0, Src1, Src2 });
}
- case Builtin::BI__builtin_memory_fence: {
+ case AMDGPU::BI__builtin_amdgcn_fence: {
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
llvm::SyncScope::ID SSID;
Value *Order = EmitScalarExpr(E->getArg(0));
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -11896,6 +11896,7 @@
bool CheckX86BuiltinGatherScatterScale(unsigned BuiltinID, CallExpr *TheCall);
bool CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+ bool CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool SemaBuiltinVAStart(unsigned BuiltinID, CallExpr *TheCall);
bool SemaBuiltinVAStartARMMicrosoft(CallExpr *Call);
Index: clang/include/clang/Basic/BuiltinsAMDGPU.def
===================================================================
--- clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -53,6 +53,7 @@
BUILTIN(__builtin_amdgcn_ds_gws_sema_v, "vUi", "n")
BUILTIN(__builtin_amdgcn_ds_gws_sema_br, "vUiUi", "n")
BUILTIN(__builtin_amdgcn_ds_gws_sema_p, "vUi", "n")
+BUILTIN(__builtin_amdgcn_fence, "vUicC*", "n")
// FIXME: Need to disallow constant address space.
BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n")
Index: clang/include/clang/Basic/Builtins.def
===================================================================
--- clang/include/clang/Basic/Builtins.def
+++ clang/include/clang/Basic/Builtins.def
@@ -785,11 +785,6 @@
BUILTIN(__sync_fetch_and_umin, "UiUiD*Ui", "n")
BUILTIN(__sync_fetch_and_umax, "UiUiD*Ui", "n")
-// clang builtin to expose llvm fence instruction
-// First argument : uint in range [2, 5] i.e. [acquire, seq_cst]
-// Second argument : target specific sync scope string
-BUILTIN(__builtin_memory_fence, "vUicC*", "n")
-
// Random libc builtins.
BUILTIN(__builtin_abort, "v", "Fnr")
BUILTIN(__builtin_index, "c*cC*i", "Fn")
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -2455,6 +2455,63 @@
and ``__OPENCL_MEMORY_SCOPE_SUB_GROUP`` are provided, with values
corresponding to the enumerators of OpenCL's ``memory_scope`` enumeration.)
+AMDGCN specific builtins
+-------------------------
+
+``__builtin_amdgcn_fence``
+-------------------------
+
+``__builtin_amdgcn_fence`` allows using `Fence instruction <https://llvm.org/docs/LangRef.html#fence-instruction>`_
+from clang. It takes C++11 compatible memory-ordering and AMDGCN-specific
+sync-scope as arguments, and generates a fence instruction in the IR.
+
+**Syntax**:
+
+.. code-block:: c++
+
+ __builtin_amdgcn_fence(unsigned int memory_ordering, String sync_scope)
+
+**Example of use**:
+
+.. code-block:: c++
+
+ void my_fence(int i) {
+ i++;
+ __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
+ i--;
+ __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
+ }
+
+**Description**:
+
+The first argument of ``__builtin_amdgcn_fence()`` builtin is one of the
+memory-ordering specifiers ``__ATOMIC_ACQUIRE``, ``__ATOMIC_RELEASE``,
+``__ATOMIC_ACQ_REL``, or ``__ATOMIC_SEQ_CST`` following C++11 memory model
+semantics. Equivalent enum values of these memory-ordering can also be
+specified. The builtin maps these C++ memory-ordering to corresponding
+LLVM Atomic Memory Ordering for the fence instruction using LLVM Atomic C
+ABI, as given in the table below. The second argument is a AMDGCN-specific
+synchronization scope defined as a String. It can take any of the sync scopes
+defined for `AMDHSA LLVM Sync Scopes <https://llvm.org/docs/AMDGPUUsage.html#memory-scopes>`_
+This builtin transparently passes the second argument to fence instruction
+and relies on AMDGCN implementation for validity check.
+
++------------------------------+--------------------------------+
+| Input in clang | Output in IR |
+| (C++11 Memory-ordering) | (LLVM Atomic Memory-ordering) |
++======================+=======+========================+=======+
+| Enum | Value | Enum | Value |
++----------------------+-------+------------------------+-------+
+| ``__ATOMIC_ACQUIRE`` | 2 | Acquire | 4 |
++----------------------+-------+------------------------+-------+
+| ``__ATOMIC_RELEASE`` | 3 | Release | 5 |
++----------------------+-------+------------------------+-------+
+| ``__ATOMIC_ACQ_REL`` | 4 | AcquireRelease | 6 |
++----------------------+-------+------------------------+-------+
+| ``__ATOMIC_SEQ_CST`` | 5 | SequentiallyConsistent | 7 |
++----------------------+-------+------------------------+-------+
+
+
Low-level ARM exclusive memory builtins
---------------------------------------
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits