[clang] Reset PostAllocaInsertPt when the AllocaInsertPt is changed in clang::CodeGenFunction (PR #98668)

2024-07-12 Thread Eric Wright via cfe-commits

https://github.com/efwright created 
https://github.com/llvm/llvm-project/pull/98668

Clang's CodeGenFunction tracks two insert points for "allocas". One where the 
alloca instructions are placed, and another (immediately following the allocas) 
where any address space casts are placed. For code outlining the two helper 
classes (OutlinedRegionBodyRAII and InlinedRegionBodyRAII) both change the 
first insert point which then causes a desync between the two insert points.

This changes nulls out the PostAllocaInsertPt whenever the AllocaInsertPt is 
changed which will cause those two insert points to sync up again the next time 
the PostAllocaInsertPt is referenced.

>From db22886b15f6bcab77b41846742f07639544bbe7 Mon Sep 17 00:00:00 2001
From: Eric Francis Wright 
Date: Fri, 12 Jul 2024 10:29:11 -0700
Subject: [PATCH] Update PostAllocaInsertPt when AllocaInsertPt is changed

---
 clang/lib/CodeGen/CodeGenFunction.h | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index cdb5ae6663405..2955949cbacdd 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -428,7 +428,6 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// we prefer to insert allocas.
   llvm::AssertingVH AllocaInsertPt;
 
-private:
   /// PostAllocaInsertPt - This is a place in the prologue where code can be
   /// inserted that will be dominated by all the static allocas. This helps
   /// achieve two things:
@@ -439,7 +438,6 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// PostAllocaInsertPt will be lazily created when it is *really* required.
   llvm::AssertingVH PostAllocaInsertPt = nullptr;
 
-public:
   /// Return PostAllocaInsertPt. If it is not yet created, then insert it
   /// immediately after AllocaInsertPt.
   llvm::Instruction *getPostAllocaInsertPoint() {
@@ -2004,11 +2002,17 @@ class CodeGenFunction : public CodeGenTypeCache {
 
 OldReturnBlock = CGF.ReturnBlock;
 CGF.ReturnBlock = CGF.getJumpDestInCurrentScope(&RetBB);
+
+CGF.PostAllocaInsertPt = nullptr;
+
   }
 
   ~OutlinedRegionBodyRAII() {
 CGF.AllocaInsertPt = OldAllocaIP;
 CGF.ReturnBlock = OldReturnBlock;
+
+CGF.PostAllocaInsertPt = nullptr;
+
   }
 };
 
@@ -2031,8 +2035,10 @@ class CodeGenFunction : public CodeGenTypeCache {
"Insertion point should be in the entry block of containing "
"function!");
 OldAllocaIP = CGF.AllocaInsertPt;
-if (AllocaIP.isSet())
+if (AllocaIP.isSet()) {
   CGF.AllocaInsertPt = &*AllocaIP.getPoint();
+  CGF.PostAllocaInsertPt = nullptr;
+}
 
 // TODO: Remove the call, after making sure the counter is not used by
 //   the EHStack.
@@ -2042,7 +2048,11 @@ class CodeGenFunction : public CodeGenTypeCache {
 (void)CGF.getJumpDestInCurrentScope(&FiniBB);
   }
 
-  ~InlinedRegionBodyRAII() { CGF.AllocaInsertPt = OldAllocaIP; }
+  ~InlinedRegionBodyRAII()
+  {
+CGF.AllocaInsertPt = OldAllocaIP;
+CGF.PostAllocaInsertPt = nullptr;
+  }
 };
   };
 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] OpenMP offload 'simd' directive (PR #91261)

2024-05-10 Thread Eric Wright via cfe-commits

efwright wrote:

@jhuber6 @shiltian 
@jdoerfert mentioned you may be interested in taking a look at this.

https://github.com/llvm/llvm-project/pull/91261
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] OpenMP offload 'simd' directive (PR #91261)

2024-10-18 Thread Eric Wright via cfe-commits

https://github.com/efwright updated 
https://github.com/llvm/llvm-project/pull/91261

>From 4b76d56f38baf86f6b65ef7e610ad266ba3d69b1 Mon Sep 17 00:00:00 2001
From: Eric Francis Wright 
Date: Mon, 6 May 2024 12:20:44 -0700
Subject: [PATCH 1/4] OpenMP offload 'simd' directive

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp |   2 +
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp  |   8 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp| 185 +++--
 clang/lib/CodeGen/CodeGenFunction.cpp |   2 +-
 .../target_teams_generic_loop_codegen.cpp |  18 +-
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h   |  27 +-
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |  12 +
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 655 +-
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   |  13 +-
 offload/DeviceRTL/include/Interface.h |  11 +
 offload/DeviceRTL/include/Mapping.h   |   7 +
 offload/DeviceRTL/src/Kernel.cpp  |   4 +-
 offload/DeviceRTL/src/Mapping.cpp |  34 +
 offload/DeviceRTL/src/Parallelism.cpp |  25 +-
 offload/DeviceRTL/src/Reduction.cpp   |  48 ++
 offload/DeviceRTL/src/State.cpp   |   7 +-
 offload/DeviceRTL/src/Synchronization.cpp |   4 +
 offload/DeviceRTL/src/Workshare.cpp   |  44 ++
 18 files changed, 1023 insertions(+), 83 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 3747b00d4893ad..836253ab1a7d8b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1035,6 +1035,7 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, 
DeclContext *DC,
 
 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
 : CGM(CGM), OMPBuilder(CGM.getModule()) {
+
   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
   llvm::OpenMPIRBuilderConfig Config(
   CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
@@ -1056,6 +1057,7 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
 }
 
 void CGOpenMPRuntime::clear() {
+
   InternalVars.clear();
   // Clean non-target variable declarations possibly used only in debug info.
   for (const auto &Data : EmittedNonTargetVariables) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 35ff75416cb776..16aff085579807 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -262,6 +262,7 @@ class CheckVarsEscapingDeclContext final
bool IsCombinedParallelRegion) {
 if (!S)
   return;
+
 for (const CapturedStmt::Capture &C : S->captures()) {
   if (C.capturesVariable() && !C.capturesVariableByCopy()) {
 const ValueDecl *VD = C.getCapturedVar();
@@ -336,13 +337,15 @@ class CheckVarsEscapingDeclContext final
   return;
 if (!D->hasAssociatedStmt())
   return;
+
 if (const auto *S =
 dyn_cast_or_null(D->getAssociatedStmt())) {
   // Do not analyze directives that do not actually require capturing,
   // like `omp for` or `omp simd` directives.
   llvm::SmallVector CaptureRegions;
   getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind());
-  if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) 
{
+  if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown 
&&
+  D->getDirectiveKind() != OMPD_simd) {
 VisitStmt(S->getCapturedStmt());
 return;
   }
@@ -1661,6 +1664,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
   bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
   bool DistributeReduction = 
isOpenMPDistributeDirective(Options.ReductionKind);
   bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
+  bool SimdReduction = isOpenMPSimdDirective(Options.ReductionKind);
 
   ASTContext &C = CGM.getContext();
 
@@ -1755,7 +1759,7 @@ void CGOpenMPRuntimeGPU::emitReduction(
 
   CGF.Builder.restoreIP(OMPBuilder.createReductionsGPU(
   OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction,
-  DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
+  DistributeReduction, SimdReduction, 
llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
   CGF.getTarget().getGridValue(), 
C.getLangOpts().OpenMPCUDAReductionBufNum,
   RTLoc));
   return;
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 71a27d0c6bc1fb..b4e699c1d003b8 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1454,6 +1454,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
 }
 
 const auto *VD = cast(cast(TaskRedRef)->getDecl());
+llvm::dbgs() << "Emitting " << VD->getName() << " " << VD << "\n";
 EmitVarDecl(*VD);
 EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
   /*Volatile=*/false, TaskRedRef->getType());
@@ -1494,7 +1495