skc7 created this revision.
Herald added subscribers: jdoerfert, hiraditya.
Herald added a reviewer: aaron.ballman.
Herald added a project: All.
skc7 requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.
This change introduces shuffle as function attribute in clang and llvm IR. It
is used to identify __shfl_sync like cross-lane APIs [allows exchange of
variable across all active threads]. At clang codegen, noundef attribute is
skipped to arguments and return types for functions with shuffle attribute.
Shuffle attribute has been added as per suggestions/comments from review:
D124158 <https://reviews.llvm.org/D124158>
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D125378
Files:
clang/include/clang/Basic/Attr.td
clang/include/clang/Basic/AttrDocs.td
clang/lib/CodeGen/CGCall.cpp
clang/lib/Headers/__clang_cuda_intrinsics.h
clang/lib/Sema/SemaDeclAttr.cpp
clang/test/CodeGenHIP/shuffle-attr-verify.hip
clang/test/CodeGenHIP/shuffle-noundef-attr.hip
clang/test/Misc/pragma-attribute-supported-attributes-list.test
llvm/include/llvm/Bitcode/LLVMBitCodes.h
llvm/include/llvm/IR/Attributes.td
llvm/include/llvm/IR/Function.h
llvm/lib/Bitcode/Reader/BitcodeReader.cpp
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
llvm/lib/IR/Attributes.cpp
llvm/lib/Transforms/Utils/CodeExtractor.cpp
Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp
===================================================================
--- llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -960,6 +960,7 @@
case Attribute::NoCfCheck:
case Attribute::MustProgress:
case Attribute::NoProfile:
+ case Attribute::Shuffle:
break;
// These attributes cannot be applied to functions.
case Attribute::Alignment:
Index: llvm/lib/IR/Attributes.cpp
===================================================================
--- llvm/lib/IR/Attributes.cpp
+++ llvm/lib/IR/Attributes.cpp
@@ -1804,7 +1804,8 @@
.addAttribute(Attribute::StructRet)
.addAttribute(Attribute::ByRef)
.addAttribute(Attribute::ElementType)
- .addAttribute(Attribute::AllocatedPointer);
+ .addAttribute(Attribute::AllocatedPointer)
+ .addAttribute(Attribute::Shuffle);
}
// Attributes that only apply to pointers or vectors of pointers.
Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
===================================================================
--- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -778,6 +778,8 @@
case Attribute::EmptyKey:
case Attribute::TombstoneKey:
llvm_unreachable("Trying to encode EmptyKey/TombstoneKey");
+ case Attribute::Shuffle:
+ return bitc::ATTR_KIND_SHUFFLE;
}
llvm_unreachable("Trying to encode unknown attribute");
Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp
===================================================================
--- llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1632,6 +1632,8 @@
return Attribute::MustProgress;
case bitc::ATTR_KIND_HOT:
return Attribute::Hot;
+ case bitc::ATTR_KIND_SHUFFLE:
+ return Attribute::Shuffle;
}
}
Index: llvm/include/llvm/IR/Function.h
===================================================================
--- llvm/include/llvm/IR/Function.h
+++ llvm/include/llvm/IR/Function.h
@@ -626,6 +626,12 @@
return AttributeSets.getUWTableKind();
}
+ /// Determine if the function is __shfl_sync like.
+ bool isShuffle() const {
+ return hasFnAttribute(Attribute::Shuffle);
+ }
+ void setShuffle() { addFnAttr(Attribute::Shuffle); }
+
/// True if the ABI mandates (or the user requested) that this
/// function be in a unwind table.
bool hasUWTable() const {
Index: llvm/include/llvm/IR/Attributes.td
===================================================================
--- llvm/include/llvm/IR/Attributes.td
+++ llvm/include/llvm/IR/Attributes.td
@@ -300,6 +300,9 @@
/// Function is required to make Forward Progress.
def MustProgress : EnumAttr<"mustprogress", [FnAttr]>;
+/// Function is a __shfl_sync like API.
+def Shuffle : EnumAttr<"shuffle", [FnAttr]>;
+
/// Target-independent string attributes.
def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">;
def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">;
Index: llvm/include/llvm/Bitcode/LLVMBitCodes.h
===================================================================
--- llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -684,6 +684,7 @@
ATTR_KIND_NO_SANITIZE_BOUNDS = 79,
ATTR_KIND_ALLOC_ALIGN = 80,
ATTR_KIND_ALLOCATED_POINTER = 81,
+ ATTR_KIND_SHUFFLE = 82,
};
enum ComdatSelectionKindCodes {
Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test
===================================================================
--- clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -161,6 +161,7 @@
// CHECK-NEXT: ScopedLockable (SubjectMatchRule_record)
// CHECK-NEXT: Section (SubjectMatchRule_function, SubjectMatchRule_variable_is_global, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property)
// CHECK-NEXT: SetTypestate (SubjectMatchRule_function_is_member)
+// CHECK-NEXT: Shuffle (SubjectMatchRule_function)
// CHECK-NEXT: SpeculativeLoadHardening (SubjectMatchRule_function, SubjectMatchRule_objc_method)
// CHECK-NEXT: StandaloneDebug (SubjectMatchRule_record)
// CHECK-NEXT: SwiftAsync (SubjectMatchRule_function, SubjectMatchRule_objc_method)
Index: clang/test/CodeGenHIP/shuffle-noundef-attr.hip
===================================================================
--- /dev/null
+++ clang/test/CodeGenHIP/shuffle-noundef-attr.hip
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip -fcuda-is-device -emit-llvm %s \
+// RUN: -o - | FileCheck %s
+
+#define __global__ __attribute__((global))
+#define __device__ __attribute__((device))
+#define __shuffle __attribute__((shuffle))
+#define HYPRE_WARP_SIZE 64
+
+static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;
+
+__device__ static inline unsigned int __lane_id() {
+ return __builtin_amdgcn_mbcnt_hi(
+ -1, __builtin_amdgcn_mbcnt_lo(-1, 0));
+}
+
+__device__
+inline
+int __shfl(int var, int src_lane, int width = warpSize) {
+ int self = __lane_id();
+ int index = src_lane + (self & ~(width-1));
+ return __builtin_amdgcn_ds_bpermute(index<<2, var);
+}
+
+template <typename T>
+static __device__
+T __shuffle __shfl_sync(unsigned mask, T val, int src_line, int width=HYPRE_WARP_SIZE)
+{
+ return __shfl(val, src_line, width);
+}
+
+template <typename T>
+static __device__
+T __shfl_sync_1(unsigned mask, T val, int src_line, int width=HYPRE_WARP_SIZE)
+{
+ return __shfl(val, src_line, width);
+}
+
+// CHECK-LABEL: @_Z13shufflekernelv(
+// CHECK: call i32 @_ZL11__shfl_syncIiET_jS0_ii(i32 64, i32 %0, i32 0, i32 64)
+// CHECK: call noundef i32 @_ZL13__shfl_sync_1IiET_jS0_ii(i32 noundef 64, i32 noundef %1, i32 noundef 0, i32 noundef 64)
+__global__ void
+shufflekernel()
+{
+ int t;
+ int res, res1;
+ res = __shfl_sync(HYPRE_WARP_SIZE, t, 0);
+ res1 = __shfl_sync_1(HYPRE_WARP_SIZE, t, 0);
+}
\ No newline at end of file
Index: clang/test/CodeGenHIP/shuffle-attr-verify.hip
===================================================================
--- /dev/null
+++ clang/test/CodeGenHIP/shuffle-attr-verify.hip
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip -fcuda-is-device -emit-llvm %s \
+// RUN: -o - | FileCheck %s
+
+#define __global__ __attribute__((global))
+#define __device__ __attribute__((device))
+#define __shuffle __attribute__((shuffle))
+#define HYPRE_WARP_SIZE 64
+
+static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;
+
+__device__ static inline unsigned int __lane_id() {
+ return __builtin_amdgcn_mbcnt_hi(
+ -1, __builtin_amdgcn_mbcnt_lo(-1, 0));
+}
+
+// CHECK: define linkonce_odr i32 @_Z11__shfl_synciii(i32 %var, i32 %src_lane, i32 %width) #[[attr1:[0-9]+]]
+__device__
+inline
+int __shuffle __shfl_sync(int var, int src_lane, int width = warpSize) {
+ int self = __lane_id();
+ int index = src_lane + (self & ~(width-1));
+ return __builtin_amdgcn_ds_bpermute(index<<2, var);
+}
+
+__global__ void
+shufflekernel()
+{
+ int t;
+ int res, res1;
+ res = __shfl_sync(HYPRE_WARP_SIZE, t, 0);
+}
+
+// CHECK-DAG: attributes #[[attr1]] = { {{[^}]*}}shuffle{{[^}]*}} }
\ No newline at end of file
Index: clang/lib/Sema/SemaDeclAttr.cpp
===================================================================
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -8423,6 +8423,9 @@
case ParsedAttr::AT_AMDGPUNumVGPR:
handleAMDGPUNumVGPRAttr(S, D, AL);
break;
+ case ParsedAttr::AT_Shuffle:
+ handleSimpleAttribute<ShuffleAttr>(S, D, AL);
+ break;
case ParsedAttr::AT_AVRSignal:
handleAVRSignalAttr(S, D, AL);
break;
Index: clang/lib/Headers/__clang_cuda_intrinsics.h
===================================================================
--- clang/lib/Headers/__clang_cuda_intrinsics.h
+++ clang/lib/Headers/__clang_cuda_intrinsics.h
@@ -45,7 +45,7 @@
_Static_assert(sizeof(__val) == sizeof(__Bits)); \
_Static_assert(sizeof(__Bits) == 2 * sizeof(int)); \
__Bits __tmp; \
- memcpy(&__tmp, &__val, sizeof(__val)); \
+ memcpy(&__tmp, &__val, sizeof(__val)); \
__tmp.__a = ::__FnName(__tmp.__a, __offset, __width); \
__tmp.__b = ::__FnName(__tmp.__b, __offset, __width); \
long long __ret; \
@@ -100,27 +100,29 @@
#if CUDA_VERSION >= 9000
#if (!defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300)
+#define __shuffle __attribute__((shuffle))
// __shfl_sync_* variants available in CUDA-9
#pragma push_macro("__MAKE_SYNC_SHUFFLES")
#define __MAKE_SYNC_SHUFFLES(__FnName, __IntIntrinsic, __FloatIntrinsic, \
__Mask, __Type) \
- inline __device__ int __FnName(unsigned int __mask, int __val, \
+ inline __device__ __shuffle int __FnName(unsigned int __mask, int __val, \
__Type __offset, int __width = warpSize) { \
return __IntIntrinsic(__mask, __val, __offset, \
((warpSize - __width) << 8) | (__Mask)); \
} \
- inline __device__ float __FnName(unsigned int __mask, float __val, \
+ inline __device__ __shuffle float __FnName(unsigned int __mask, float __val, \
__Type __offset, int __width = warpSize) { \
return __FloatIntrinsic(__mask, __val, __offset, \
((warpSize - __width) << 8) | (__Mask)); \
} \
- inline __device__ unsigned int __FnName(unsigned int __mask, \
+ inline __device__ __shuffle unsigned int __FnName(unsigned int __mask, \
unsigned int __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned int>( \
::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \
} \
- inline __device__ long long __FnName(unsigned int __mask, long long __val, \
+ inline __device__ __shuffle long long __FnName(unsigned int __mask, \
+ long long __val, \
__Type __offset, \
int __width = warpSize) { \
struct __Bits { \
@@ -136,13 +138,13 @@
memcpy(&__ret, &__tmp, sizeof(__tmp)); \
return __ret; \
} \
- inline __device__ unsigned long long __FnName( \
+ inline __device__ __shuffle unsigned long long __FnName( \
unsigned int __mask, unsigned long long __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned long long>(::__FnName( \
__mask, static_cast<unsigned long long>(__val), __offset, __width)); \
} \
- inline __device__ long __FnName(unsigned int __mask, long __val, \
+ inline __device__ __shuffle long __FnName(unsigned int __mask, long __val, \
__Type __offset, int __width = warpSize) { \
_Static_assert(sizeof(long) == sizeof(long long) || \
sizeof(long) == sizeof(int)); \
@@ -154,13 +156,14 @@
::__FnName(__mask, static_cast<int>(__val), __offset, __width)); \
} \
} \
- inline __device__ unsigned long __FnName( \
+ inline __device__ __shuffle unsigned long __FnName( \
unsigned int __mask, unsigned long __val, __Type __offset, \
int __width = warpSize) { \
return static_cast<unsigned long>( \
::__FnName(__mask, static_cast<long>(__val), __offset, __width)); \
} \
- inline __device__ double __FnName(unsigned int __mask, double __val, \
+ inline __device__ __shuffle double __FnName(unsigned int __mask, \
+ double __val, \
__Type __offset, int __width = warpSize) { \
long long __tmp; \
_Static_assert(sizeof(__tmp) == sizeof(__val)); \
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -2035,6 +2035,19 @@
return false;
}
+static bool DetermineNoUndefForShuffle(const Decl *TargetDecl) {
+ if (!TargetDecl)
+ return true;
+
+ // Function has shuffle attribute.
+ // Skip adding noundef in this case.
+ if (TargetDecl->hasAttr<ShuffleAttr>()) {
+ return false;
+ }
+
+ return true;
+}
+
/// Construct the IR attribute list of a function or call.
///
/// When adding an attribute, please consider where it should be handled:
@@ -2101,6 +2114,8 @@
FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
if (TargetDecl->hasAttr<ConvergentAttr>())
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+ if (TargetDecl->hasAttr<ShuffleAttr>())
+ FuncAttrs.addAttribute(llvm::Attribute::Shuffle);
if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
AddAttributesFromFunctionProtoType(
@@ -2298,8 +2313,10 @@
// Determine if the return type could be partially undef
if (CodeGenOpts.EnableNoundefAttrs && HasStrictReturn) {
if (!RetTy->isVoidType() && RetAI.getKind() != ABIArgInfo::Indirect &&
- DetermineNoUndef(RetTy, getTypes(), DL, RetAI))
- RetAttrs.addAttribute(llvm::Attribute::NoUndef);
+ DetermineNoUndef(RetTy, getTypes(), DL, RetAI) &&
+ DetermineNoUndefForShuffle(TargetDecl)) {
+ RetAttrs.addAttribute(llvm::Attribute::NoUndef);
+ }
}
switch (RetAI.getKind()) {
@@ -2431,8 +2448,9 @@
// Decide whether the argument we're handling could be partially undef
if (CodeGenOpts.EnableNoundefAttrs &&
- DetermineNoUndef(ParamType, getTypes(), DL, AI)) {
- Attrs.addAttribute(llvm::Attribute::NoUndef);
+ DetermineNoUndef(ParamType, getTypes(), DL, AI) &&
+ DetermineNoUndefForShuffle(TargetDecl)) {
+ Attrs.addAttribute(llvm::Attribute::NoUndef);
}
// 'restrict' -> 'noalias' is done in EmitFunctionProlog when we
Index: clang/include/clang/Basic/AttrDocs.td
===================================================================
--- clang/include/clang/Basic/AttrDocs.td
+++ clang/include/clang/Basic/AttrDocs.td
@@ -1316,6 +1316,30 @@
}];
}
+def ShuffleDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+The ``shuffle`` attribute can be placed on a function declaration. It indicates
+that the call instructions of a function with this attribute can take undef
+arguments and is still valid.
+
+In languages HIP or CUDA, there are APIs like
+T __shfl_sync(unsigned mask,T var, int srcLane, int width=warpSize);
+etc which permit exchanging of a variable between threads within a warp without
+use of shared memory. These APIs allow variable var to be uninitialised in the program.
+Noundef analysis on such APIs can lead to ambiguous kernel execution.
+So shuffle attribute on a function is used to skip adding noundef attribute to such APIs.
+
+Sample usage:
+.. code-block:: c
+
+ void shufflefunc(void) __attribute__((shuffle));
+ // Setting it as a C++11 attribute is also valid in a C++ program.
+ // void shufflefunc(void) [[clang::shuffle]];
+
+ }];
+}
+
def NoSplitStackDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
Index: clang/include/clang/Basic/Attr.td
===================================================================
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -1774,6 +1774,13 @@
let SimpleHandler = 1;
}
+def Shuffle : InheritableAttr {
+ let Spellings = [Clang<"shuffle">];
+ let Subjects = SubjectList<[Function]>;
+ let Documentation = [ShuffleDocs];
+ let SimpleHandler = 1;
+}
+
def NoInline : DeclOrStmtAttr {
let Spellings = [GCC<"noinline">, CXX11<"clang", "noinline">,
C2x<"clang", "noinline">, Declspec<"noinline">];
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits