saiislam updated this revision to Diff 288072.
saiislam added a comment.
1. Reformarting
2. Comments
3. Reduced scope of specialized PrePostActionTy
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D86097/new/
https://reviews.llvm.org/D86097
Files:
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/CodeGen/CGOpenMPRuntime.h
clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
clang/test/OpenMP/amdgcn_target_codegen.cpp
Index: clang/test/OpenMP/amdgcn_target_codegen.cpp
===================================================================
--- clang/test/OpenMP/amdgcn_target_codegen.cpp
+++ clang/test/OpenMP/amdgcn_target_codegen.cpp
@@ -8,6 +8,29 @@
#define N 1000
+// CHECK: @"_openmp_kernel_static_glob_rd$ptr" = weak addrspace(3) externally_initialized global i8* undef
+
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_tid_threadsv_l[[LINE1:.+]]_kern_desc = weak constant %struct.__tgt_attribute_struct { i16 2, i16 9, i16 0, i8 1, i8 1, i8 0 }, align 2
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_tid_threadsv_l[[LINE1]]_exec_mode = weak constant i8 1
+
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_tid_threads_simdv_l[[LINE2:.+]]_kern_desc = weak constant %struct.__tgt_attribute_struct { i16 2, i16 9, i16 0, i8 0, i8 1, i8 0 }, align 2
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_tid_threads_simdv_l[[LINE2]]_exec_mode = weak constant i8 0
+
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_max_parallel_levelv_l[[LINE3:.+]]_kern_desc = weak constant %struct.__tgt_attribute_struct { i16 2, i16 9, i16 0, i8 0, i8 1, i8 3 }, align 2
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_max_parallel_levelv_l[[LINE3]]_exec_mode = weak constant i8 0
+
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_attributes_spmdv_l[[LINE4:.+]]_wg_size = weak addrspace(1) constant i16 10
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_attributes_spmdv_l[[LINE4]]_kern_desc = weak constant %struct.__tgt_attribute_struct { i16 2, i16 9, i16 10, i8 0, i8 1, i8 0 }, align 2
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_attributes_spmdv_l[[LINE4]]_exec_mode = weak constant i8 0
+
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_attributes_non_spmdv_l[[LINE5:.+]]_wg_size = weak addrspace(1) constant i16 74
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_attributes_non_spmdv_l[[LINE5]]_kern_desc = weak constant %struct.__tgt_attribute_struct { i16 2, i16 9, i16 74, i8 1, i8 1, i8 0 }, align 2
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_attributes_non_spmdv_l[[LINE5]]_exec_mode = weak constant i8 1
+
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_attributes_max_work_group_sizev_l[[LINE6:.+]]_wg_size = weak addrspace(1) constant i16 1024
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_attributes_max_work_group_sizev_l[[LINE6]]_kern_desc = weak constant %struct.__tgt_attribute_struct { i16 2, i16 9, i16 1024, i8 1, i8 1, i8 0 }, align 2
+// CHECK: @__omp_offloading_{{.*}}test_amdgcn_target_attributes_max_work_group_sizev_l[[LINE6]]_exec_mode = weak constant i8 1
+
int test_amdgcn_target_tid_threads() {
// CHECK-LABEL: define weak void @{{.*}}test_amdgcn_target_tid_threads
@@ -40,4 +63,65 @@
return arr[0];
}
+int test_amdgcn_target_max_parallel_level() {
+ // CHECK-LABEL: define weak void @{{.*}}test_amdgcn_target_max_parallel_level
+ int arr[N];
+
+#pragma omp target parallel for
+ for (int i = 0; i < N; i++)
+#pragma omp parallel for
+ for (int j = 0; j < N; j++)
+#pragma omp parallel for
+ for (int k = 0; k < N; k++)
+ for (int l = 0; l < N; l++)
+#pragma omp parallel for
+ for (int m = 0; m < N; m++)
+ arr[m] = 0;
+
+ return arr[0];
+}
+
+int test_amdgcn_target_attributes_spmd() {
+ int arr[N];
+
+// CHECK: {{.*}}"amdgpu-flat-work-group-size"="10,10"
+#pragma omp target parallel num_threads(10)
+ for (int i = 0; i < N; i++) {
+ arr[i] = 1;
+ }
+
+ return arr[0];
+}
+
+int test_amdgcn_target_attributes_non_spmd() {
+ int arr[N];
+
+// CHECK: {{.*}}"amdgpu-flat-work-group-size"="74,74"
+#pragma omp target teams thread_limit(10)
+ for (int i = 0; i < N; i++) {
+ arr[i] = 1;
+ }
+
+ return arr[0];
+}
+
+int test_amdgcn_target_attributes_max_work_group_size() {
+ int arr[N];
+
+// CHECK: {{.*}}"amdgpu-flat-work-group-size"="1024,1024"
+#pragma omp target teams thread_limit(1500)
+ for (int i = 0; i < N; i++) {
+ arr[i] = 1;
+ }
+
+ return arr[0];
+}
+
#endif
+
+// CHECK: !0 = !{i32 0, i32 [[ARG1:[0-9]+]], i32 [[ARG2:[0-9]+]], !"_Z37test_amdgcn_target_max_parallel_levelv", i32 [[LINE3]], i32 2}
+// CHECK: !1 = !{i32 0, i32 [[ARG1]], i32 [[ARG2]], !"_Z30test_amdgcn_target_tid_threadsv", i32 [[LINE1]], i32 0}
+// CHECK: !2 = !{i32 0, i32 [[ARG1]], i32 [[ARG2]], !"_Z35test_amdgcn_target_tid_threads_simdv", i32 [[LINE2]], i32 1}
+// CHECK: !3 = !{i32 0, i32 [[ARG1]], i32 [[ARG2]], !"_Z38test_amdgcn_target_attributes_non_spmdv", i32 [[LINE5]], i32 4}
+// CHECK: !4 = !{i32 0, i32 [[ARG1]], i32 [[ARG2]], !"_Z34test_amdgcn_target_attributes_spmdv", i32 [[LINE4]], i32 3}
+// CHECK: !5 = !{i32 0, i32 [[ARG1]], i32 [[ARG2]], !"_Z49test_amdgcn_target_attributes_max_work_group_sizev", i32 [[LINE6]], i32 5}
\ No newline at end of file
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -35,6 +35,54 @@
/// Get the maximum number of threads in a block of the GPU.
llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override;
+
+ /// Allocate global variable for TransferMedium
+ llvm::GlobalVariable *allocateTransferMediumGlobal(CodeGenModule &CGM,
+ llvm::ArrayType *Ty,
+ StringRef Name) override;
+
+ /// Allocate global variable for SharedStaticRD
+ llvm::GlobalVariable *
+ allocateSharedStaticRDGlobal(CodeGenModule &CGM,
+ llvm::Type *LLVMStaticTy) override;
+
+ /// Allocate global variable for KernelStaticGlobalized
+ llvm::GlobalVariable *
+ allocateKernelStaticGlobalized(CodeGenModule &CGM) override;
+
+ /// Get target specific PrePostAction
+ PrePostActionTy *getPrePostActionTy() override;
+
+ /// Target independent wrapper over target specific emitSPMDKernel()
+ void emitSPMDKernelWrapper(const OMPExecutableDirective &D,
+ StringRef ParentName, llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// Target independent wrapper over target specific emitNonSPMDKernel()
+ void emitNonSPMDKernelWrapper(const OMPExecutableDirective &D,
+ StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// NVPTX specific class for PrePostActionTy
+ class NVPTXPrePostActionTy final : public PrePostActionTy {
+ bool &IsInParallelRegion;
+ bool PrevIsInParallelRegion;
+
+ public:
+ NVPTXPrePostActionTy(bool &IsInParallelRegion)
+ : IsInParallelRegion(IsInParallelRegion) {}
+ void Enter(CodeGenFunction &CGF) override {
+ PrevIsInParallelRegion = IsInParallelRegion;
+ IsInParallelRegion = true;
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ IsInParallelRegion = PrevIsInParallelRegion;
+ }
+ };
};
} // CodeGen namespace.
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -30,6 +30,11 @@
: CGOpenMPRuntimeGPU(CGM) {
if (!CGM.getLangOpts().OpenMPIsDevice)
llvm_unreachable("OpenMP NVPTX can only handle device code.");
+
+ // FIXME: nvlink does not handle weak linkage correctly (object with the
+ // different size are reported as erroneous).
+ // Restore CommonLinkage as soon as nvlink is fixed.
+ StaticRDLinkage = llvm::GlobalValue::InternalLinkage;
}
llvm::Value *CGOpenMPRuntimeNVPTX::getGPUWarpSize(CodeGenFunction &CGF) {
@@ -54,3 +59,56 @@
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x);
return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
}
+
+llvm::GlobalVariable *CGOpenMPRuntimeNVPTX::allocateTransferMediumGlobal(
+ CodeGenModule &CGM, llvm::ArrayType *Ty, StringRef TransferMediumName) {
+ return new llvm::GlobalVariable(
+ CGM.getModule(), Ty, /*isConstant=*/false,
+ llvm::GlobalVariable::CommonLinkage, llvm::Constant::getNullValue(Ty),
+ TransferMediumName,
+ /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+}
+
+llvm::GlobalVariable *
+CGOpenMPRuntimeNVPTX::allocateSharedStaticRDGlobal(CodeGenModule &CGM,
+ llvm::Type *LLVMStaticTy) {
+ return new llvm::GlobalVariable(
+ CGM.getModule(), LLVMStaticTy,
+ /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
+ llvm::Constant::getNullValue(LLVMStaticTy),
+ "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+}
+
+llvm::GlobalVariable *
+CGOpenMPRuntimeNVPTX::allocateKernelStaticGlobalized(CodeGenModule &CGM) {
+ return new llvm::GlobalVariable(
+ CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+ "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+}
+
+void CGOpenMPRuntimeNVPTX::emitSPMDKernelWrapper(
+ const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+ emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ CodeGen);
+}
+
+void CGOpenMPRuntimeNVPTX::emitNonSPMDKernelWrapper(
+ const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+ emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ CodeGen);
+}
+
+PrePostActionTy *CGOpenMPRuntimeNVPTX::getPrePostActionTy() {
+ return new NVPTXPrePostActionTy(IsInParallelRegion);
+}
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -33,6 +33,9 @@
/// Unknown execution mode (orphaned directive).
EM_Unknown,
};
+ /// Linkage type of StaticRD Global variable
+ llvm::GlobalValue::LinkageTypes StaticRDLinkage;
+
private:
/// Parallel outlined function work for workers to execute.
llvm::SmallVector<llvm::Function *, 16> Work;
@@ -99,36 +102,6 @@
uint64_t Size, int32_t Flags,
llvm::GlobalValue::LinkageTypes Linkage) override;
- /// Emit outlined function specialized for the Fork-Join
- /// programming model for applicable target directives on the NVPTX device.
- /// \param D Directive to emit.
- /// \param ParentName Name of the function that encloses the target region.
- /// \param OutlinedFn Outlined function value to be defined by this call.
- /// \param OutlinedFnID Outlined function ID value to be defined by this call.
- /// \param IsOffloadEntry True if the outlined function is an offload entry.
- /// An outlined function may not be an entry if, e.g. the if clause always
- /// evaluates to false.
- void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
- llvm::Function *&OutlinedFn,
- llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
- const RegionCodeGenTy &CodeGen);
-
- /// Emit outlined function specialized for the Single Program
- /// Multiple Data programming model for applicable target directives on the
- /// NVPTX device.
- /// \param D Directive to emit.
- /// \param ParentName Name of the function that encloses the target region.
- /// \param OutlinedFn Outlined function value to be defined by this call.
- /// \param OutlinedFnID Outlined function ID value to be defined by this call.
- /// \param IsOffloadEntry True if the outlined function is an offload entry.
- /// \param CodeGen Object containing the target statements.
- /// An outlined function may not be an entry if, e.g. the if clause always
- /// evaluates to false.
- void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
- llvm::Function *&OutlinedFn,
- llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
- const RegionCodeGenTy &CodeGen);
-
/// Emit outlined function for 'target' directive on the NVPTX
/// device.
/// \param D Directive to emit.
@@ -194,6 +167,36 @@
/// Full/Lightweight runtime mode. Used for better optimization.
unsigned getDefaultLocationReserved2Flags() const override;
+ /// Emit outlined function specialized for the Fork-Join
+ /// programming model for applicable target directives on the NVPTX device.
+ /// \param D Directive to emit.
+ /// \param ParentName Name of the function that encloses the target region.
+ /// \param OutlinedFn Outlined function value to be defined by this call.
+ /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+ /// \param IsOffloadEntry True if the outlined function is an offload entry.
+ /// An outlined function may not be an entry if, e.g. the if clause always
+ /// evaluates to false.
+ void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen);
+
+ /// Emit outlined function specialized for the Single Program
+ /// Multiple Data programming model for applicable target directives on the
+ /// NVPTX device.
+ /// \param D Directive to emit.
+ /// \param ParentName Name of the function that encloses the target region.
+ /// \param OutlinedFn Outlined function value to be defined by this call.
+ /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+ /// \param IsOffloadEntry True if the outlined function is an offload entry.
+ /// \param CodeGen Object containing the target statements.
+ /// An outlined function may not be an entry if, e.g. the if clause always
+ /// evaluates to false.
+ void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen);
+
public:
explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
void clear() override;
@@ -211,6 +214,39 @@
/// Get the maximum number of threads in a block of the GPU.
virtual llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) = 0;
+ /// Allocate global variable for TransferMedium
+ virtual llvm::GlobalVariable *
+ allocateTransferMediumGlobal(CodeGenModule &CGM, llvm::ArrayType *Ty,
+ StringRef TransferMediumName) = 0;
+
+ /// Allocate global variable for SharedStaticRD
+ virtual llvm::GlobalVariable *
+ allocateSharedStaticRDGlobal(CodeGenModule &CGM,
+ llvm::Type *LLVMStaticTy) = 0;
+
+ /// Allocate global variable for KernelStaticGlobalized
+ virtual llvm::GlobalVariable *
+ allocateKernelStaticGlobalized(CodeGenModule &CGM) = 0;
+
+ /// Get target specific PrePostAction
+ virtual PrePostActionTy *getPrePostActionTy() = 0;
+
+ /// Target independent wrapper over target specific emitSPMDKernel()
+ virtual void emitSPMDKernelWrapper(const OMPExecutableDirective &D,
+ StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) = 0;
+
+ /// Target independent wrapper over target specific emitNonSPMDKernel()
+ virtual void emitNonSPMDKernelWrapper(const OMPExecutableDirective &D,
+ StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) = 0;
+
/// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
/// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
virtual void emitProcBindClause(CodeGenFunction &CGF,
@@ -376,6 +412,9 @@
Generic,
};
+ /// true if we're definitely in the parallel region.
+ bool IsInParallelRegion = false;
+
/// Cleans up references to the objects in finished function.
///
void functionFinished(CodeGenFunction &CGF) override;
@@ -424,8 +463,6 @@
/// true if currently emitting code for target/teams/distribute region, false
/// - otherwise.
bool IsInTTDRegion = false;
- /// true if we're definitely in the parallel region.
- bool IsInParallelRegion = false;
/// Map between an outlined function and its wrapper.
llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "CGOpenMPRuntimeGPU.h"
+#include "CGOpenMPRuntimeAMDGCN.h"
#include "CGOpenMPRuntimeNVPTX.h"
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
@@ -1190,13 +1191,8 @@
// Reserve place for the globalized memory.
GlobalizedRecords.emplace_back();
if (!KernelStaticGlobalized) {
- KernelStaticGlobalized = new llvm::GlobalVariable(
- CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
- llvm::GlobalValue::InternalLinkage,
- llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
- "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
- llvm::GlobalValue::NotThreadLocal,
- CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+ auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGM.getOpenMPRuntime());
+ KernelStaticGlobalized = RT.allocateKernelStaticGlobalized(CGM);
}
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
@@ -1321,13 +1317,8 @@
// Reserve place for the globalized memory.
GlobalizedRecords.emplace_back();
if (!KernelStaticGlobalized) {
- KernelStaticGlobalized = new llvm::GlobalVariable(
- CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
- llvm::GlobalValue::InternalLinkage,
- llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
- "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
- llvm::GlobalValue::NotThreadLocal,
- CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+ auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGM.getOpenMPRuntime());
+ KernelStaticGlobalized = RT.allocateKernelStaticGlobalized(CGM);
}
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
@@ -1888,12 +1879,11 @@
bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
if (Mode)
- emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
- CodeGen);
+ emitSPMDKernelWrapper(D, ParentName, OutlinedFn, OutlinedFnID,
+ IsOffloadEntry, CodeGen);
else
- emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
- CodeGen);
-
+ emitNonSPMDKernelWrapper(D, ParentName, OutlinedFn, OutlinedFnID,
+ IsOffloadEntry, CodeGen);
setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
}
@@ -1963,22 +1953,9 @@
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
// Emit target region as a standalone region.
- class NVPTXPrePostActionTy : public PrePostActionTy {
- bool &IsInParallelRegion;
- bool PrevIsInParallelRegion;
-
- public:
- NVPTXPrePostActionTy(bool &IsInParallelRegion)
- : IsInParallelRegion(IsInParallelRegion) {}
- void Enter(CodeGenFunction &CGF) override {
- PrevIsInParallelRegion = IsInParallelRegion;
- IsInParallelRegion = true;
- }
- void Exit(CodeGenFunction &CGF) override {
- IsInParallelRegion = PrevIsInParallelRegion;
- }
- } Action(IsInParallelRegion);
- CodeGen.setAction(Action);
+ auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGM.getOpenMPRuntime());
+ PrePostActionTy *Action = RT.getPrePostActionTy();
+ CodeGen.setAction(*Action);
bool PrevIsInTTDRegion = IsInTTDRegion;
IsInTTDRegion = false;
bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
@@ -3231,6 +3208,7 @@
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
CGBuilderTy &Bld = CGF.Builder;
+ auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
// This array is used as a medium to transfer, one reduce element at a time,
// the data from the first lane of every warp to lanes in the first warp
@@ -3246,16 +3224,11 @@
unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
if (!TransferMedium) {
auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
- unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
- TransferMedium = new llvm::GlobalVariable(
- M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
- llvm::Constant::getNullValue(Ty), TransferMediumName,
- /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
- SharedAddressSpace);
+ TransferMedium =
+ RT.allocateTransferMediumGlobal(CGM, Ty, TransferMediumName);
CGM.addCompilerUsedGlobal(TransferMedium);
}
- auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
// Get the CUDA thread id of the current OpenMP thread on the GPU.
llvm::Value *ThreadID = RT.getGPUThreadID(CGF);
// nvptx_lane_id = nvptx_id % warpsize
@@ -5104,6 +5077,7 @@
}
void CGOpenMPRuntimeGPU::clear() {
+ auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGM.getOpenMPRuntime());
if (!GlobalizedRecords.empty() &&
!CGM.getLangOpts().OpenMPCUDATargetParallel) {
ASTContext &C = CGM.getContext();
@@ -5152,9 +5126,6 @@
llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0));
}
// Allocate SharedMemorySize buffer for the shared memory.
- // FIXME: nvlink does not handle weak linkage correctly (object with the
- // different size are reported as erroneous).
- // Restore this code as sson as nvlink is fixed.
if (!SharedStaticRD->field_empty()) {
llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize);
QualType SubTy = C.getConstantArrayType(
@@ -5171,13 +5142,7 @@
if (!SharedStaticRD->field_empty()) {
QualType StaticTy = C.getRecordType(SharedStaticRD);
llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
- auto *GV = new llvm::GlobalVariable(
- CGM.getModule(), LLVMStaticTy,
- /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
- llvm::Constant::getNullValue(LLVMStaticTy),
- "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
- llvm::GlobalValue::NotThreadLocal,
- C.getTargetAddressSpace(LangAS::cuda_shared));
+ auto *GV = RT.allocateSharedStaticRDGlobal(CGM, LLVMStaticTy);
auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
GV, CGM.VoidPtrTy);
for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
@@ -5198,14 +5163,11 @@
C.getConstantArrayType(Arr1Ty, Size2, nullptr, ArrayType::Normal,
/*IndexTypeQuals=*/0);
llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
- // FIXME: nvlink does not handle weak linkage correctly (object with the
- // different size are reported as erroneous).
- // Restore CommonLinkage as soon as nvlink is fixed.
- auto *GV = new llvm::GlobalVariable(
- CGM.getModule(), LLVMArr2Ty,
- /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
- llvm::Constant::getNullValue(LLVMArr2Ty),
- "_openmp_static_glob_rd_$_");
+ auto *GV =
+ new llvm::GlobalVariable(CGM.getModule(), LLVMArr2Ty,
+ /*isConstant=*/false, RT.StaticRDLinkage,
+ llvm::Constant::getNullValue(LLVMArr2Ty),
+ "_openmp_static_glob_rd_$_");
auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
GV, CGM.VoidPtrTy);
for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
@@ -5238,7 +5200,7 @@
// Restore CommonLinkage as soon as nvlink is fixed.
auto *GV = new llvm::GlobalVariable(
CGM.getModule(), LLVMReductionsBufferTy,
- /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
+ /*isConstant=*/false, RT.StaticRDLinkage,
llvm::Constant::getNullValue(LLVMReductionsBufferTy),
"_openmp_teams_reductions_buffer_$_");
KernelTeamsReductionPtr->setInitializer(
Index: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
@@ -27,6 +27,15 @@
public:
explicit CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM);
+ /// Curret nesting level of parallel region
+ int ParallelLevel = 0;
+
+ /// Maximum nesting level of parallel region
+ int MaxParallelLevel = 0;
+
+ /// Struct to store kernel descriptors
+ QualType TgtAttributeStructQTy;
+
/// Get the GPU warp size.
llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override;
@@ -35,6 +44,73 @@
/// Get the maximum number of threads in a block of the GPU.
llvm::Value *getGPUNumThreads(CodeGenFunction &CGF) override;
+
+ /// Allocate global variable for TransferMedium
+ llvm::GlobalVariable *
+ allocateTransferMediumGlobal(CodeGenModule &CGM, llvm::ArrayType *Ty,
+ StringRef TransferMediumName) override;
+
+ /// Allocate global variable for SharedStaticRD
+ llvm::GlobalVariable *
+ allocateSharedStaticRDGlobal(CodeGenModule &CGM,
+ llvm::Type *LLVMStaticTy) override;
+
+ /// Get global variable KernelStaticGlobalized which is a shared pointer for
+ /// the global memory in the global memory buffer used for the given kernel
+ llvm::GlobalVariable *
+ allocateKernelStaticGlobalized(CodeGenModule &CGM) override;
+
+ /// Get target specific PrePostActionTy
+ PrePostActionTy *getPrePostActionTy() override;
+
+ /// Target independent wrapper over target specific emitSPMDKernel()
+ void emitSPMDKernelWrapper(const OMPExecutableDirective &D,
+ StringRef ParentName, llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// Target independent wrapper over target specific emitNonSPMDKernel()
+ void emitNonSPMDKernelWrapper(const OMPExecutableDirective &D,
+ StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// Create a unique global variable to indicate the flat-work-group-size
+ /// for this region. Values are [256..1024].
+ static void setPropertyWorkGroupSize(CodeGenModule &CGM, StringRef Name,
+ unsigned WGSize);
+
+ /// Generate global variables _wg_size, kern_desc, __tgt_attribute_struct.
+ /// Also generate appropriate value of attribute amdgpu-flat-work-group-size
+ void generateMetaData(CodeGenModule &CGM, const OMPExecutableDirective &D,
+ llvm::Function *&OutlinedFn, bool IsGeneric);
+
+ /// Returns __tgt_attribute_struct type.
+ QualType getTgtAttributeStructQTy();
+
+ /// Emit structure descriptor for a kernel
+ void emitStructureKernelDesc(CodeGenModule &CGM, StringRef Name,
+ int16_t WG_Size, int8_t Mode,
+ int8_t HostServices, int8_t MaxParallelLevel);
+
+ /// AMDGCN specific PrePostActionTy implementation
+ class AMDGCNPrePostActionTy final : public PrePostActionTy {
+ int &ParallelLevel;
+ int &MaxParallelLevel;
+
+ public:
+ AMDGCNPrePostActionTy(int &ParallelLevel, int &MaxParallelLevel)
+ : ParallelLevel(ParallelLevel), MaxParallelLevel(MaxParallelLevel) {}
+ void Enter(CodeGenFunction &CGF) override {
+ // Count the number of nested parallels.
+ if (ParallelLevel > MaxParallelLevel)
+ MaxParallelLevel = ParallelLevel;
+ ParallelLevel++;
+ }
+ void Exit(CodeGenFunction &CGF) override { ParallelLevel--; }
+ };
};
} // namespace CodeGen
Index: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "CGOpenMPRuntimeAMDGCN.h"
+#include "CGOpenMPRuntime.h"
#include "CGOpenMPRuntimeGPU.h"
#include "CodeGenFunction.h"
#include "clang/AST/Attr.h"
@@ -30,6 +31,7 @@
: CGOpenMPRuntimeGPU(CGM) {
if (!CGM.getLangOpts().OpenMPIsDevice)
llvm_unreachable("OpenMP AMDGCN can only handle device code.");
+ StaticRDLinkage = llvm::GlobalValue::PrivateLinkage;
}
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
@@ -59,3 +61,176 @@
return Bld.CreateTrunc(
Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty);
}
+
+llvm::GlobalVariable *CGOpenMPRuntimeAMDGCN::allocateTransferMediumGlobal(
+ CodeGenModule &CGM, llvm::ArrayType *Ty, StringRef TransferMediumName) {
+ return new llvm::GlobalVariable(
+ CGM.getModule(), Ty, /*isConstant=*/false,
+ llvm::GlobalVariable::WeakAnyLinkage, llvm::UndefValue::get(Ty),
+ TransferMediumName,
+ /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared),
+ /*isExternallyInitialized*/ true);
+}
+
+llvm::GlobalVariable *
+CGOpenMPRuntimeAMDGCN::allocateSharedStaticRDGlobal(CodeGenModule &CGM,
+ llvm::Type *LLVMStaticTy) {
+ return new llvm::GlobalVariable(
+ CGM.getModule(), LLVMStaticTy,
+ /*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage,
+ llvm::UndefValue::get(LLVMStaticTy), "_openmp_shared_static_glob_rd_$_",
+ /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared),
+ /*isExternallyInitialized*/ true);
+}
+
+llvm::GlobalVariable *
+CGOpenMPRuntimeAMDGCN::allocateKernelStaticGlobalized(CodeGenModule &CGM) {
+ return new llvm::GlobalVariable(
+ CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+ llvm::GlobalValue::WeakAnyLinkage, llvm::UndefValue::get(CGM.VoidPtrTy),
+ "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+ llvm::GlobalValue::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared),
+ /*isExternallyInitialized*/ true);
+}
+
+void CGOpenMPRuntimeAMDGCN::setPropertyWorkGroupSize(CodeGenModule &CGM,
+ StringRef Name,
+ unsigned WGSize) {
+ auto *GVMode = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true,
+ llvm::GlobalValue::WeakAnyLinkage,
+ llvm::ConstantInt::get(CGM.Int16Ty, WGSize), Name + Twine("_wg_size"),
+ /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
+ CGM.getContext().getTargetAddressSpace(LangAS::cuda_device),
+ /*isExternallyInitialized*/ false);
+ CGM.addCompilerUsedGlobal(GVMode);
+}
+
+void CGOpenMPRuntimeAMDGCN::generateMetaData(CodeGenModule &CGM,
+ const OMPExecutableDirective &D,
+ llvm::Function *&OutlinedFn,
+ bool IsGeneric) {
+ int FlatAttr = 0;
+ bool FlatAttrEmitted = false;
+ unsigned DefaultWorkGroupSz =
+ CGM.getTarget().getGridValue(llvm::omp::GVIDX::GV_Default_WG_Size);
+
+ if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
+ isOpenMPParallelDirective(D.getDirectiveKind())) {
+ const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
+ const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
+ unsigned MaxWorkGroupSz =
+ CGM.getTarget().getGridValue(llvm::omp::GVIDX::GV_Max_WG_Size);
+ unsigned CompileTimeThreadLimit = 0;
+ // Only one of thread_limit or num_threads is used, cant do it for both
+ if (ThreadLimitClause && !NumThreadsClause) {
+ Expr *ThreadLimitExpr = ThreadLimitClause->getThreadLimit();
+ clang::Expr::EvalResult Result;
+ if (ThreadLimitExpr->EvaluateAsInt(Result, CGM.getContext()))
+ CompileTimeThreadLimit = Result.Val.getInt().getExtValue();
+ } else if (!ThreadLimitClause && NumThreadsClause) {
+ Expr *NumThreadsExpr = NumThreadsClause->getNumThreads();
+ clang::Expr::EvalResult Result;
+ if (NumThreadsExpr->EvaluateAsInt(Result, CGM.getContext()))
+ CompileTimeThreadLimit = Result.Val.getInt().getExtValue();
+ }
+
+ // Add kernel metadata if ThreadLimit Clause is compile time constant > 0
+ if (CompileTimeThreadLimit > 0) {
+ // Add the WarpSize to generic, to reflect what runtime dispatch does.
+ if (IsGeneric)
+ CompileTimeThreadLimit +=
+ CGM.getTarget().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
+ if (CompileTimeThreadLimit > MaxWorkGroupSz)
+ CompileTimeThreadLimit = MaxWorkGroupSz;
+ std::string AttrVal = llvm::utostr(CompileTimeThreadLimit);
+ FlatAttr = CompileTimeThreadLimit;
+ OutlinedFn->addFnAttr("amdgpu-flat-work-group-size",
+ AttrVal + "," + AttrVal);
+ setPropertyWorkGroupSize(CGM, OutlinedFn->getName(),
+ CompileTimeThreadLimit);
+ }
+ FlatAttrEmitted = true;
+ } // end of amdgcn teams or parallel directive
+
+ // emit amdgpu-flat-work-group-size if not emitted already.
+ if (!FlatAttrEmitted) {
+ std::string FlatAttrVal = llvm::utostr(DefaultWorkGroupSz);
+ OutlinedFn->addFnAttr("amdgpu-flat-work-group-size",
+ FlatAttrVal + "," + FlatAttrVal);
+ }
+ // Emit a kernel descriptor for runtime.
+ StringRef KernDescName = OutlinedFn->getName();
+ CGOpenMPRuntimeAMDGCN::emitStructureKernelDesc(CGM, KernDescName, FlatAttr,
+ IsGeneric,
+ 1, // Uses HostServices
+ MaxParallelLevel);
+ // Reset it to zero for any subsequent kernel
+ MaxParallelLevel = 0;
+}
+
+void CGOpenMPRuntimeAMDGCN::emitSPMDKernelWrapper(
+ const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+ emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ CodeGen);
+ generateMetaData(CGM, D, OutlinedFn, /*SPMD*/ false);
+}
+
+void CGOpenMPRuntimeAMDGCN::emitNonSPMDKernelWrapper(
+ const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+ emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+ CodeGen);
+ generateMetaData(CGM, D, OutlinedFn, /*Generic*/ true);
+}
+
+PrePostActionTy *CGOpenMPRuntimeAMDGCN::getPrePostActionTy() {
+ return new AMDGCNPrePostActionTy(ParallelLevel, MaxParallelLevel);
+}
+
+/// Emit structure descriptor for a kernel
+void CGOpenMPRuntimeAMDGCN::emitStructureKernelDesc(
+ CodeGenModule &CGM, StringRef Name, int16_t WG_Size, int8_t Mode,
+ int8_t HostServices, int8_t MaxParallelLevel) {
+
+ // Create all device images
+ llvm::Constant *AttrData[] = {
+ llvm::ConstantInt::get(CGM.Int16Ty, 2), // Version
+ llvm::ConstantInt::get(CGM.Int16Ty, 9), // Size in bytes
+ llvm::ConstantInt::get(CGM.Int16Ty, WG_Size),
+ llvm::ConstantInt::get(CGM.Int8Ty, Mode), // 0 => SPMD, 1 => GENERIC
+ llvm::ConstantInt::get(CGM.Int8Ty, HostServices), // 1 => use HostServices
+ llvm::ConstantInt::get(CGM.Int8Ty, MaxParallelLevel)}; // number of nests
+
+ llvm::GlobalVariable *AttrImages = createGlobalStruct(
+ CGM, getTgtAttributeStructQTy(), isDefaultLocationConstant(), AttrData,
+ Name + Twine("_kern_desc"), llvm::GlobalValue::WeakAnyLinkage);
+ CGM.addCompilerUsedGlobal(AttrImages);
+}
+
+// Create Tgt Attribute Struct type.
+QualType CGOpenMPRuntimeAMDGCN::getTgtAttributeStructQTy() {
+ ASTContext &C = CGM.getContext();
+ QualType KmpInt8Ty = C.getIntTypeForBitwidth(/*Width=*/8, /*Signed=*/1);
+ QualType KmpInt16Ty = C.getIntTypeForBitwidth(/*Width=*/16, /*Signed=*/1);
+ if (TgtAttributeStructQTy.isNull()) {
+ RecordDecl *RD = C.buildImplicitRecord("__tgt_attribute_struct");
+ RD->startDefinition();
+ clang::CodeGen::addFieldToRecordDecl(C, RD, KmpInt16Ty); // Version
+ clang::CodeGen::addFieldToRecordDecl(C, RD,
+ KmpInt16Ty); // Struct Size in bytes.
+ clang::CodeGen::addFieldToRecordDecl(C, RD, KmpInt16Ty); // WG_size
+ clang::CodeGen::addFieldToRecordDecl(C, RD, KmpInt8Ty); // Mode
+ clang::CodeGen::addFieldToRecordDecl(C, RD, KmpInt8Ty); // HostServices
+ clang::CodeGen::addFieldToRecordDecl(C, RD, KmpInt8Ty); // MaxParallelLevel
+ RD->completeDefinition();
+ TgtAttributeStructQTy = C.getRecordType(RD);
+ }
+ return TgtAttributeStructQTy;
+}
Index: clang/lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.h
+++ clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -495,6 +495,7 @@
QualType TgtOffloadEntryQTy;
/// Entity that registers the offloading constants that were emitted so
/// far.
+
class OffloadEntriesInfoManagerTy {
CodeGenModule &CGM;
@@ -681,7 +682,6 @@
OffloadDeviceGlobalVarEntryInfoActTy;
void actOnDeviceGlobalVarEntriesInfo(
const OffloadDeviceGlobalVarEntryInfoActTy &Action);
-
private:
// Storage for target region entries kind. The storage is to be indexed by
// file ID, device ID, parent function name and line number.
@@ -2476,6 +2476,18 @@
}
};
+/// Declaration of functions visible in clang::CodeGen namespace, to
+/// be used by target specific specializations of CGOpenMPRuntimeGPU.
+
+FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
+ QualType FieldTy);
+
+template <class... As>
+llvm::GlobalVariable *createGlobalStruct(CodeGenModule &CGM, QualType Ty,
+ bool IsConstant,
+ ArrayRef<llvm::Constant *> Data,
+ const Twine &Name, As &&... Args);
+
} // namespace CodeGen
} // namespace clang
Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1048,17 +1048,6 @@
AlignmentSource::Decl);
}
-static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
- QualType FieldTy) {
- auto *Field = FieldDecl::Create(
- C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
- C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
- /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
- Field->setAccess(AS_public);
- DC->addDecl(Field);
- return Field;
-}
-
CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
StringRef Separator)
: CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
@@ -1352,11 +1341,21 @@
}
}
+FieldDecl *clang::CodeGen::addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
+ QualType FieldTy) {
+ auto *Field = FieldDecl::Create(
+ C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
+ C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ DC->addDecl(Field);
+ return Field;
+}
+
template <class... As>
-static llvm::GlobalVariable *
-createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
- ArrayRef<llvm::Constant *> Data, const Twine &Name,
- As &&... Args) {
+llvm::GlobalVariable *clang::CodeGen::createGlobalStruct(
+ CodeGenModule &CGM, QualType Ty, bool IsConstant,
+ ArrayRef<llvm::Constant *> Data, const Twine &Name, As &&... Args) {
const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
ConstantInitBuilder CIBuilder(CGM);
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits