ABataev updated this revision to Diff 338107.
ABataev added a comment.
Rebase
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D99432/new/
https://reviews.llvm.org/D99432
Files:
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
Index: clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
+++ clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
@@ -28,16 +28,19 @@
int main(int argc, char **argv) {
int b[10], c[10], d[10];
#pragma omp target teams map(tofrom:a)
+ {
+ double escaped = 0;
#pragma omp distribute parallel for firstprivate(b) lastprivate(c) if(a)
for (int i= 0; i < argc; ++i)
- a = foo(&i) + foo(&a) + foo(&b[i]) + foo(&c[i]) + foo(&d[i]);
+ a = foo(&i) + foo(&a) + foo(&b[i]) + foo(&c[i]) + foo(&d[i]) + escaped;
+ }
return 0;
}
// SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
// SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
// SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
-// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 40
+// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 48
// SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
// CHECK-DAG: @__omp_offloading_{{.*}}_main_[[LINE:l.+]]_exec_mode = weak constant i8 0
@@ -47,9 +50,10 @@
// SEQ: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
// SEQ: [[PTR:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
// SEQ: [[GEP:%.+]] = getelementptr inbounds i8, i8* [[PTR]], i{{64|32}} 0
-// PAR: [[GEP:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{32|64}} 40, i16 1)
+// PAR: [[GEP:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{32|64}} 48, i16 1)
// CHECK: [[STACK:%.+]] = bitcast i8* [[GEP]] to %struct._globalized_locals_ty*
-// CHECK: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 0
+// CHECK-DAG: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 1
+// CHECK-DAG: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 0
// CHECK-NOT: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]],
// CHECK: call void @__kmpc_for_static_init_4(
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -229,6 +229,7 @@
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
bool AllEscaped = false;
bool IsForCombinedParallelRegion = false;
+ bool IsInSPMDKernel = false;
void markAsEscaped(const ValueDecl *VD) {
// Do not globalize declare target variables.
@@ -242,6 +243,9 @@
// Variables captured by value must be globalized.
if (auto *CSI = CGF.CapturedStmtInfo) {
if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
+ // Do not globalize captured vars in SPMD mode.
+ if (IsInSPMDKernel)
+ return;
// Check if need to capture the variable that was already captured by
// value in the outer region.
if (!IsForCombinedParallelRegion) {
@@ -351,9 +355,10 @@
public:
CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
- ArrayRef<const ValueDecl *> TeamsReductions)
- : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
- }
+ ArrayRef<const ValueDecl *> TeamsReductions,
+ bool IsInSPMDKernel = false)
+ : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()),
+ IsInSPMDKernel(IsInSPMDKernel) {}
virtual ~CheckVarsEscapingDeclContext() = default;
void VisitDeclStmt(const DeclStmt *S) {
if (!S)
@@ -1631,65 +1636,30 @@
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
SourceLocation Loc = D.getBeginLoc();
- const RecordDecl *GlobalizedRD = nullptr;
- llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
- llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
- unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
- // Globalize team reductions variable unconditionally in all modes.
- if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
- getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
- if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) {
- getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
- if (!LastPrivatesReductions.empty()) {
- GlobalizedRD = ::buildRecordForGlobalizedVars(
- CGM.getContext(), llvm::None, LastPrivatesReductions,
- MappedDeclsFields, WarpSize);
- }
- } else if (!LastPrivatesReductions.empty()) {
- assert(!TeamAndReductions.first &&
- "Previous team declaration is not expected.");
- TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
- std::swap(TeamAndReductions.second, LastPrivatesReductions);
- }
+ assert(!TeamAndReductions.first &&
+ "Previous team declaration is not expected.");
+ if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
+ getDistributeLastprivateVars(CGM.getContext(), D, TeamAndReductions.second);
+ else
+ getTeamsReductionVars(CGM.getContext(), D, TeamAndReductions.second);
+ TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
SourceLocation &Loc;
- const RecordDecl *GlobalizedRD;
- llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
- &MappedDeclsFields;
public:
- NVPTXPrePostActionTy(
- SourceLocation &Loc, const RecordDecl *GlobalizedRD,
- llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
- &MappedDeclsFields)
- : Loc(Loc), GlobalizedRD(GlobalizedRD),
- MappedDeclsFields(MappedDeclsFields) {}
+ NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
void Enter(CodeGenFunction &CGF) override {
auto &Rt =
static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
- if (GlobalizedRD) {
- auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
- I->getSecond().GlobalRecord = GlobalizedRD;
- I->getSecond().MappedParams =
- std::make_unique<CodeGenFunction::OMPMapVars>();
- DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
- for (const auto &Pair : MappedDeclsFields) {
- assert(Pair.getFirst()->isCanonicalDecl() &&
- "Expected canonical declaration");
- Data.insert(std::make_pair(Pair.getFirst(),
- MappedVarData(Pair.getSecond(),
- /*IsOnePerTeam=*/true)));
- }
- }
Rt.emitGenericVarsProlog(CGF, Loc);
}
void Exit(CodeGenFunction &CGF) override {
static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())
.emitGenericVarsEpilog(CGF);
}
- } Action(Loc, GlobalizedRD, MappedDeclsFields);
+ } Action(Loc);
CodeGen.setAction(Action);
llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
@@ -4311,7 +4281,8 @@
void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
const Decl *D) {
- if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+ if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
+ getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
return;
assert(D && "Expected function or captured|block decl.");
@@ -4328,13 +4299,16 @@
} else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
Body = CD->getBody();
NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
- if (NeedToDelayGlobalization &&
+ if (NeedToDelayGlobalization && !IsInTTDRegion &&
getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
return;
}
if (!Body)
return;
- CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
+ bool IsInSPMDKernel = NeedToDelayGlobalization &&
+ getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD;
+ CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second,
+ IsInSPMDKernel);
VarChecker.Visit(Body);
const RecordDecl *GlobalizedVarsRecord =
VarChecker.getGlobalizedRecord(IsInTTDRegion);
@@ -4359,6 +4333,8 @@
const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
}
+ if (IsInSPMDKernel)
+ return;
if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);
VarChecker.Visit(Body);
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits