[clang] Fix for OpenMP offloading compilation error with GNU++20 option when using complex header (PR #115306)

2024-11-11 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/115306
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Fix for codegen Crash in Clang when using locator omp_all_memory with depobj construct (PR #114221)

2024-11-11 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/114221
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Fix for OpenMP offloading compilation error with GNU++20 option when using complex header (PR #115306)

2024-11-10 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/115306

>From 792ccf7ef364f3119b920121dd68285eb4ca1e41 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Thu, 7 Nov 2024 05:54:48 -0600
Subject: [PATCH] Fix for OpenMP offloading compilation with GNU++20 option
 when using complex header

---
 clang/lib/Headers/openmp_wrappers/complex_cmath.h | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Headers/openmp_wrappers/complex_cmath.h 
b/clang/lib/Headers/openmp_wrappers/complex_cmath.h
index e3d9aebbbc2436..cee36bde3f522e 100644
--- a/clang/lib/Headers/openmp_wrappers/complex_cmath.h
+++ b/clang/lib/Headers/openmp_wrappers/complex_cmath.h
@@ -64,8 +64,13 @@ template  __DEVICE__ _Tp norm(const 
std::complex<_Tp> &__c) {
 }
 
 // conj
-
-template  std::complex<_Tp> conj(const std::complex<_Tp> &__c) {
+#ifdef _GLIBCXX20_CONSTEXPR
+#define CXX20_CONSTEXPR_DEVICE __DEVICE__
+#else
+#define CXX20_CONSTEXPR_DEVICE
+#endif
+template 
+CXX20_CONSTEXPR_DEVICE std::complex<_Tp> conj(const std::complex<_Tp> &__c) {
   return std::complex<_Tp>(__c.real(), -__c.imag());
 }
 

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Fix for codegen Crash in Clang when using locator omp_all_memory with depobj construct (PR #114221)

2024-11-10 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/114221

>From 4e6d8c1edb73fe08659519d8798cab162875ebc0 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Wed, 30 Oct 2024 07:18:06 -0500
Subject: [PATCH 1/4] Fix for codegen Crash in Clang when using locator
 omp_all_memory with depobj

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp   | 10 ++
 clang/test/OpenMP/depobj_codegen.cpp |  2 ++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 1c32a675380c7f..5125044b2aa629 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5556,12 +5556,14 @@ void CodeGenFunction::EmitOMPDepobjDirective(const 
OMPDepobjDirective &S) {
   const auto *DO = S.getSingleClause();
   LValue DOLVal = EmitLValue(DO->getDepobj());
   if (const auto *DC = S.getSingleClause()) {
-OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
-   DC->getModifier());
-Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
+// Build list and emit dependences
+OMPTaskDataTy Data;
+buildDependences(S, Data);
+for (auto &Dep : Data.Dependences) {
 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
-*this, Dependencies, DC->getBeginLoc());
+*this, Dep, DC->getBeginLoc());
 EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal);
+}
 return;
   }
   if (const auto *DC = S.getSingleClause()) {
diff --git a/clang/test/OpenMP/depobj_codegen.cpp 
b/clang/test/OpenMP/depobj_codegen.cpp
index 92751ac44b8c78..00cf43f54d6695 100644
--- a/clang/test/OpenMP/depobj_codegen.cpp
+++ b/clang/test/OpenMP/depobj_codegen.cpp
@@ -36,6 +36,8 @@ int main(int argc, char **argv) {
 #pragma omp depobj(b) update(mutexinoutset)
 #pragma omp depobj(a) depend(iterator(char *p = argv[argc]:argv[0]:-1), out: 
p[0])
   (void)tmain(a), tmain(b);
+   omp_depend_t obj;
+#pragma omp depobj(obj) depend(inout: omp_all_memory)
   return 0;
 }
 

>From 30eb82552f431604695ac55e584bb2bfcf6a927d Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 4 Nov 2024 02:13:38 -0600
Subject: [PATCH 2/4] clang-format fix

---
 clang/lib/CodeGen/CGStmtOpenMP.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 5125044b2aa629..390516fea38498 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5560,9 +5560,9 @@ void CodeGenFunction::EmitOMPDepobjDirective(const 
OMPDepobjDirective &S) {
 OMPTaskDataTy Data;
 buildDependences(S, Data);
 for (auto &Dep : Data.Dependences) {
-Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
-*this, Dep, DC->getBeginLoc());
-EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal);
+  Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
+  *this, Dep, DC->getBeginLoc());
+  EmitStoreOfScalar(DepAddr.emitRawPointer(*this), DOLVal);
 }
 return;
   }

>From a35fe7e37fff62ca06a838a91f0c7b1149ccb165 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Thu, 7 Nov 2024 04:30:06 -0600
Subject: [PATCH 3/4] updated with codegen checks

---
 clang/test/OpenMP/depobj_codegen.cpp | 32 +---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/clang/test/OpenMP/depobj_codegen.cpp 
b/clang/test/OpenMP/depobj_codegen.cpp
index 00cf43f54d6695..8c7dce8d734060 100644
--- a/clang/test/OpenMP/depobj_codegen.cpp
+++ b/clang/test/OpenMP/depobj_codegen.cpp
@@ -17,6 +17,15 @@
 typedef void *omp_depend_t;
 
 void foo() {}
+void tmainc(){
+   omp_depend_t obj;
+#pragma omp depobj(obj) depend(inout: omp_all_memory)
+{
+   volatile omp_depend_t temp = obj;
+char* char_ptr = reinterpret_cast(temp);
+char_ptr[0] = 1;
+}
+}
 
 template 
 T tmain(T argc) {
@@ -36,11 +45,24 @@ int main(int argc, char **argv) {
 #pragma omp depobj(b) update(mutexinoutset)
 #pragma omp depobj(a) depend(iterator(char *p = argv[argc]:argv[0]:-1), out: 
p[0])
   (void)tmain(a), tmain(b);
-   omp_depend_t obj;
-#pragma omp depobj(obj) depend(inout: omp_all_memory)
+ tmainc();
   return 0;
 }
-
+// CHECK-LABEL: tmainc
+// CHECK: [[D_ADDR:%obj]] = alloca ptr,
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
+// CHECK: [[DEP_ADDR_ADDR2:%.+]] = call ptr @__kmpc_alloc(i32 [[GTID]], i64 
48, ptr null)
+// CHECK: [[SZ_DEOOBJ:%.+]] = getelementptr inbounds nuw 
%struct.kmp_depend_info, ptr [[DEP_ADDR_ADDR2]], i{{.+}} 0, i{{.+}} 0
+// CHECK: store i64 1, ptr [[SZ_DEOOBJ]], align 8
+// CHECK: [[DEPOBJ_BASE_ADDR:%.+]] = getelementptr %struct.kmp_depend_info, 
ptr [[DEP_ADDR_ADDR2]], i{{.+}} 1
+// CHECK: [[ADDR_ONE:%.+]] = getelementptr inbounds nuw 
%struct.kmp_depend_info, ptr [[DEPOBJ_BASE_ADDR]], i{{.+}} 0, i{{.+}} 0
+// CHECK: store i64 0, ptr [[ADDR_ONE]], align 8

[clang] [flang] [llvm] seq_cst is allowed in Flush since OpenMP 5.1. (PR #114072)

2024-11-25 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/114072

>From 4b49b221a67bd77db98ca765610f7c1ace0772a0 Mon Sep 17 00:00:00 2001
From: Shashwathi N 
Date: Tue, 29 Oct 2024 09:16:04 -0500
Subject: [PATCH 1/3] Added support for seq_cst clause for flush directive

---
 clang/include/clang/AST/OpenMPClause.h|  4 ++--
 .../clang/Basic/DiagnosticSemaKinds.td|  2 +-
 clang/lib/Sema/SemaOpenMP.cpp |  3 ++-
 clang/test/OpenMP/flush_ast_print.cpp | 18 +++--
 clang/test/OpenMP/flush_codegen.cpp   | 20 ++-
 clang/test/OpenMP/flush_messages.cpp  |  6 ++
 .../Semantics/OpenMP/clause-validity01.f90|  3 +--
 flang/test/Semantics/OpenMP/flush02.f90   |  4 +---
 llvm/include/llvm/Frontend/OpenMP/OMP.td  |  1 +
 9 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h 
b/clang/include/clang/AST/OpenMPClause.h
index 9cf46f73f6e46d..8a1f16f96ddc27 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -2645,8 +2645,8 @@ class OMPCompareClause final : public OMPClause {
   }
 };
 
-/// This represents 'seq_cst' clause in the '#pragma omp atomic'
-/// directive.
+/// This represents 'seq_cst' clause in the '#pragma omp atomic|flush'
+/// directives.
 ///
 /// \code
 /// #pragma omp atomic seq_cst
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 34ff49d7238a7f..6ce969988491c1 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11360,7 +11360,7 @@ def err_omp_atomic_weak_no_equality : Error<"expected 
'==' operator for 'weak' c
 def err_omp_atomic_several_clauses : Error<
   "directive '#pragma omp atomic' cannot contain more than one 'read', 
'write', 'update', 'capture', or 'compare' clause">;
 def err_omp_several_mem_order_clauses : Error<
-  "directive '#pragma omp %0' cannot contain more than one %select{'seq_cst', 
'relaxed', |}1'acq_rel', 'acquire' or 'release' clause">;
+  "directive '#pragma omp %0' cannot contain more than one 'seq_cst',%select{ 
'relaxed',|}1 'acq_rel', 'acquire' or 'release' clause">;
 def err_omp_atomic_incompatible_mem_order_clause : Error<
   "directive '#pragma omp atomic%select{ %0|}1' cannot be used with '%2' 
clause">;
 def note_omp_previous_mem_order_clause : Note<
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 79e1536288e602..d794d572d07ead 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -11105,7 +11105,8 @@ StmtResult 
SemaOpenMP::ActOnOpenMPFlushDirective(ArrayRef Clauses,
   for (const OMPClause *C : Clauses) {
 if (C->getClauseKind() == OMPC_acq_rel ||
 C->getClauseKind() == OMPC_acquire ||
-C->getClauseKind() == OMPC_release) {
+C->getClauseKind() == OMPC_release ||
+C->getClauseKind() == OMPC_seq_cst /*OpenMP 5.1*/) {
   if (MemOrderKind != OMPC_unknown) {
 Diag(C->getBeginLoc(), diag::err_omp_several_mem_order_clauses)
 << getOpenMPDirectiveName(OMPD_flush) << 1
diff --git a/clang/test/OpenMP/flush_ast_print.cpp 
b/clang/test/OpenMP/flush_ast_print.cpp
index 9578ada020227a..768282422032fd 100644
--- a/clang/test/OpenMP/flush_ast_print.cpp
+++ b/clang/test/OpenMP/flush_ast_print.cpp
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -verify %s -ast-print | 
FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -ast-print %s | 
FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -x c++ -std=c++11 -emit-pch -o 
%t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=51 -std=c++11 -include-pch %t  
-verify %s -ast-print | FileCheck %s
 
-// RUN: %clang_cc1 -verify -fopenmp-simd -ast-print %s | FileCheck %s
-// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp-simd -std=c++11 -include-pch %t -verify %s 
-ast-print | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -ast-print %s | 
FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c++ -std=c++11 
-emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -std=c++11 -include-pch 
%t -verify %s -ast-print | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
@@ -19,6 +19,7 @@ T tmain(T argc) {
 #pragma omp flush acq_rel
 #pragma omp flush acquire
 #pragma omp flush release
+#pragma omp flush seq_cst
 #pragma omp flush(a)
   return a + argc;
 }
@@ -27,18 +28,21 @@ T tmain(T argc) {
 // CHECK-NEXT: #pragma omp flush acq_rel{{$}}
 // CHECK-NEXT: #pragma omp flush acquire{{$}}
 // CHECK-NEXT: #pragma omp flush release{{$}}
+// CHECK-NEXT: #pragma omp flush seq_cst{{$}}
 // CHECK

[clang] [flang] [llvm] seq_cst is allowed in Flush since OpenMP 5.1. (PR #114072)

2024-11-25 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/114072
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-28 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/117196
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-27 Thread CHANDRA GHALE via cfe-commits


@@ -0,0 +1,256 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ 
-emit-llvm %s -o - | FileCheck %s

chandraghale wrote:

Updated with auto gen checks !!

https://github.com/llvm/llvm-project/pull/117196
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-27 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/117196

>From d19f41d39237b3d4fd2923f037743ddd495d5c9f Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Thu, 21 Nov 2024 11:15:11 -0600
Subject: [PATCH 1/5] Initial Codegen changes for strict modifier with
 grainsize/num_tasks of taskloop construct

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp |  28 ++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |   1 +
 clang/lib/CodeGen/CGStmtOpenMP.cpp|   2 +
 .../taskloop_strictmodifier_codegen.cpp   | 256 ++
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |   3 +
 5 files changed, 290 insertions(+)
 create mode 100644 clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index cc389974e04081..361550d2f102b4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,6 +4666,33 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
CGF.getContext().VoidPtrTy);
   }
   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
+  if( Data.HasModifier ){
+llvm::Value *TaskArgs[] = {
+  UpLoc,
+  ThreadID,
+  Result.NewTask,
+  IfVal,
+  LBLVal.getPointer(CGF),
+  UBLVal.getPointer(CGF),
+  CGF.EmitLoadOfScalar(StLVal, Loc),
+  llvm::ConstantInt::getSigned(
+  CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+  llvm::ConstantInt::getSigned(
+  CGF.IntTy, Data.Schedule.getPointer()
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+  Data.Schedule.getPointer()
+  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+  /*isSigned=*/false)
+  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+  llvm::ConstantInt::get(CGF.Int32Ty, 1), //strict modifier enabled
+  Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+   : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_taskloop_5),
+  TaskArgs);
+   } else {
   llvm::Value *TaskArgs[] = {
   UpLoc,
   ThreadID,
@@ -4690,6 +4717,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
   CGM.getModule(), OMPRTL___kmpc_taskloop),
   TaskArgs);
+  }
 }
 
 /// Emit reduction operation for each element of array (required for
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h 
b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 5e7715743afb58..56d502d92806eb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,7 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
+  bool HasModifier = false;
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 390516fea38498..88c862d2975174 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7831,10 +7831,12 @@ void 
CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
 // grainsize clause
 Data.Schedule.setInt(/*IntVal=*/false);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+Data.HasModifier = (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true 
: false;
   } else if (const auto *Clause = S.getSingleClause()) {
 // num_tasks clause
 Data.Schedule.setInt(/*IntVal=*/true);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+Data.HasModifier = (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true 
: false;
   }
 
   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
diff --git a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp 
b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
new file mode 100644
index 00..d84ff181f66156
--- /dev/null
+++ b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
@@ -0,0 +1,256 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ 
-emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o 
%t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch 
%t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ 
-emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// 

[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-26 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/117196

>From d19f41d39237b3d4fd2923f037743ddd495d5c9f Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Thu, 21 Nov 2024 11:15:11 -0600
Subject: [PATCH 1/4] Initial Codegen changes for strict modifier with
 grainsize/num_tasks of taskloop construct

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp |  28 ++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |   1 +
 clang/lib/CodeGen/CGStmtOpenMP.cpp|   2 +
 .../taskloop_strictmodifier_codegen.cpp   | 256 ++
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |   3 +
 5 files changed, 290 insertions(+)
 create mode 100644 clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index cc389974e04081..361550d2f102b4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,6 +4666,33 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
CGF.getContext().VoidPtrTy);
   }
   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
+  if( Data.HasModifier ){
+llvm::Value *TaskArgs[] = {
+  UpLoc,
+  ThreadID,
+  Result.NewTask,
+  IfVal,
+  LBLVal.getPointer(CGF),
+  UBLVal.getPointer(CGF),
+  CGF.EmitLoadOfScalar(StLVal, Loc),
+  llvm::ConstantInt::getSigned(
+  CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+  llvm::ConstantInt::getSigned(
+  CGF.IntTy, Data.Schedule.getPointer()
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+  Data.Schedule.getPointer()
+  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+  /*isSigned=*/false)
+  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+  llvm::ConstantInt::get(CGF.Int32Ty, 1), //strict modifier enabled
+  Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+   : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_taskloop_5),
+  TaskArgs);
+   } else {
   llvm::Value *TaskArgs[] = {
   UpLoc,
   ThreadID,
@@ -4690,6 +4717,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
   CGM.getModule(), OMPRTL___kmpc_taskloop),
   TaskArgs);
+  }
 }
 
 /// Emit reduction operation for each element of array (required for
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h 
b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 5e7715743afb58..56d502d92806eb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,7 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
+  bool HasModifier = false;
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 390516fea38498..88c862d2975174 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7831,10 +7831,12 @@ void 
CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
 // grainsize clause
 Data.Schedule.setInt(/*IntVal=*/false);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+Data.HasModifier = (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true 
: false;
   } else if (const auto *Clause = S.getSingleClause()) {
 // num_tasks clause
 Data.Schedule.setInt(/*IntVal=*/true);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+Data.HasModifier = (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true 
: false;
   }
 
   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
diff --git a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp 
b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
new file mode 100644
index 00..d84ff181f66156
--- /dev/null
+++ b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
@@ -0,0 +1,256 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ 
-emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o 
%t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch 
%t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ 
-emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// 

[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-26 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/117196

>From d19f41d39237b3d4fd2923f037743ddd495d5c9f Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Thu, 21 Nov 2024 11:15:11 -0600
Subject: [PATCH 1/4] Initial Codegen changes for strict modifier with
 grainsize/num_tasks of taskloop construct

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp |  28 ++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |   1 +
 clang/lib/CodeGen/CGStmtOpenMP.cpp|   2 +
 .../taskloop_strictmodifier_codegen.cpp   | 256 ++
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |   3 +
 5 files changed, 290 insertions(+)
 create mode 100644 clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index cc389974e04081..361550d2f102b4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,6 +4666,33 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
CGF.getContext().VoidPtrTy);
   }
   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
+  if( Data.HasModifier ){
+llvm::Value *TaskArgs[] = {
+  UpLoc,
+  ThreadID,
+  Result.NewTask,
+  IfVal,
+  LBLVal.getPointer(CGF),
+  UBLVal.getPointer(CGF),
+  CGF.EmitLoadOfScalar(StLVal, Loc),
+  llvm::ConstantInt::getSigned(
+  CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+  llvm::ConstantInt::getSigned(
+  CGF.IntTy, Data.Schedule.getPointer()
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+  Data.Schedule.getPointer()
+  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+  /*isSigned=*/false)
+  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+  llvm::ConstantInt::get(CGF.Int32Ty, 1), //strict modifier enabled
+  Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+   : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_taskloop_5),
+  TaskArgs);
+   } else {
   llvm::Value *TaskArgs[] = {
   UpLoc,
   ThreadID,
@@ -4690,6 +4717,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
   CGM.getModule(), OMPRTL___kmpc_taskloop),
   TaskArgs);
+  }
 }
 
 /// Emit reduction operation for each element of array (required for
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h 
b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 5e7715743afb58..56d502d92806eb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,7 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
+  bool HasModifier = false;
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 390516fea38498..88c862d2975174 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7831,10 +7831,12 @@ void 
CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
 // grainsize clause
 Data.Schedule.setInt(/*IntVal=*/false);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+Data.HasModifier = (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true 
: false;
   } else if (const auto *Clause = S.getSingleClause()) {
 // num_tasks clause
 Data.Schedule.setInt(/*IntVal=*/true);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+Data.HasModifier = (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true 
: false;
   }
 
   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
diff --git a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp 
b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
new file mode 100644
index 00..d84ff181f66156
--- /dev/null
+++ b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
@@ -0,0 +1,256 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ 
-emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o 
%t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch 
%t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ 
-emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// 

[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-26 Thread CHANDRA GHALE via cfe-commits


@@ -4683,13 +4683,22 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
   Data.Schedule.getPointer()
   ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
   /*isSigned=*/false)
-  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
-  Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- Result.TaskDupFn, CGF.VoidPtrTy)
-   : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
-  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-  CGM.getModule(), OMPRTL___kmpc_taskloop),
-  TaskArgs);
+  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
+  if (Data.HasModifier)
+TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
+
+  TaskArgs.push_back(Result.TaskDupFn
+ ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+   Result.TaskDupFn, CGF.VoidPtrTy)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
+  if (Data.HasModifier)
+CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(

chandraghale wrote:

Fixed as suggested !!

https://github.com/llvm/llvm-project/pull/117196
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Codegen support for masked combined construct (PR #120520)

2025-01-06 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/120520
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] Codegen support for masked combined construct (PR #120520)

2025-01-06 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

closing this PR . Splitting into separate patches for each directives.

https://github.com/llvm/llvm-project/pull/120520
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct masked taskloop simd (PR #121916)

2025-01-07 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

Note : OpenMPSupport.rst is updated in PR : 
https://github.com/llvm/llvm-project/pull/121741

https://github.com/llvm/llvm-project/pull/121916
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct masked taskloop (PR #121914)

2025-01-07 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

Note : OpenMPSupport.rst is updated in PR : 
https://github.com/llvm/llvm-project/pull/121741

https://github.com/llvm/llvm-project/pull/121914
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop simd. (PR #121746)

2025-01-07 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

> Please update OpenMPSupport.rst

Updated the doc in this PR : https://github.com/llvm/llvm-project/pull/121741

https://github.com/llvm/llvm-project/pull/121746
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop (PR #121741)

2025-01-07 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

> Update OpenMPSupport.rst

Updated OpenMPSupport.rst . Updated the doc for all the related split-ed  PRs ( 
[PR-121746](https://github.com/llvm/llvm-project/pull/121746) , 
[121914](https://github.com/llvm/llvm-project/pull/121914) , 
[121916](https://github.com/llvm/llvm-project/pull/121916)  ) for combined 
masked construct in this PR only to avoid merge conflict. 

https://github.com/llvm/llvm-project/pull/121741
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop (PR #121741)

2025-01-07 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/121741

>From 0c790fc2768d58634e0455adf9f797a2456a7335 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 6 Jan 2025 03:35:46 -0600
Subject: [PATCH 1/3] codegen support for masked combined construct parallel
 masked taskloop

---
 clang/lib/CodeGen/CGStmt.cpp  |  3 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp| 18 ++
 clang/lib/CodeGen/CodeGenFunction.h   |  2 +
 .../OpenMP/parallel_masked_taskloop_codegen.c | 62 +++
 4 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/OpenMP/parallel_masked_taskloop_codegen.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..06c434992ccba9 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -345,7 +345,8 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 cast(*S));
 break;
   case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
-llvm_unreachable("parallel masked taskloop directive not supported yet.");
+EmitOMPParallelMaskedTaskLoopDirective(
+cast(*S));
 break;
   case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
 EmitOMPParallelMasterTaskLoopSimdDirective(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..e45a5ea60e13cd 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -8024,6 +8024,24 @@ void 
CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
  emitEmptyBoundParameters);
 }
 
+void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
+const OMPParallelMaskedTaskLoopDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+  PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  CGF.EmitOMPTaskLoopBasedDirective(S);
+};
+OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
+S.getBeginLoc());
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop, CodeGen,
+ emitEmptyBoundParameters);
+}
+
 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
 const OMPParallelMasterTaskLoopSimdDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 1a5c42f8f974d0..1e8beca5513f05 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3867,6 +3867,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
   void EmitOMPParallelMasterTaskLoopDirective(
   const OMPParallelMasterTaskLoopDirective &S);
+  void EmitOMPParallelMaskedTaskLoopDirective(
+  const OMPParallelMaskedTaskLoopDirective &S);
   void EmitOMPParallelMasterTaskLoopSimdDirective(
   const OMPParallelMasterTaskLoopSimdDirective &S);
   void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
diff --git a/clang/test/OpenMP/parallel_masked_taskloop_codegen.c 
b/clang/test/OpenMP/parallel_masked_taskloop_codegen.c
new file mode 100644
index 00..ed89ef92bb768d
--- /dev/null
+++ b/clang/test/OpenMP/parallel_masked_taskloop_codegen.c
@@ -0,0 +1,62 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 
-x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void parallel_masked_taskloop(){
+   #pragma omp parallel masked taskloop
+   for( int i = 0; i < N; i++)
+   ;
+
+}
+
+int main()
+{
+ parallel_masked_taskloop();
+}
+// CHECK-LABEL: define dso_local void @parallel_masked_taskloop(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr 
@[[GLOB1:[0-9]+]], i32 0, ptr @parallel_masked_taskloop.omp_outlined)
+// CHECK-NEXT:ret void
+//
+//
+// CHECK-LABEL: define internal void @parallel_masked_taskloop.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:[[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:[[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// C

[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop (PR #121741)

2025-01-07 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

> Also update ReleaseNotes.rst here and in other patches

@alexey-bataev release notes updated.  Updated for other patches as well in 
this PR. 

https://github.com/llvm/llvm-project/pull/121741
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop (PR #121741)

2025-01-07 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/121741

>From 0c790fc2768d58634e0455adf9f797a2456a7335 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 6 Jan 2025 03:35:46 -0600
Subject: [PATCH 1/2] codegen support for masked combined construct parallel
 masked taskloop

---
 clang/lib/CodeGen/CGStmt.cpp  |  3 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp| 18 ++
 clang/lib/CodeGen/CodeGenFunction.h   |  2 +
 .../OpenMP/parallel_masked_taskloop_codegen.c | 62 +++
 4 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/OpenMP/parallel_masked_taskloop_codegen.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..06c434992ccba9 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -345,7 +345,8 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 cast(*S));
 break;
   case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
-llvm_unreachable("parallel masked taskloop directive not supported yet.");
+EmitOMPParallelMaskedTaskLoopDirective(
+cast(*S));
 break;
   case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
 EmitOMPParallelMasterTaskLoopSimdDirective(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..e45a5ea60e13cd 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -8024,6 +8024,24 @@ void 
CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
  emitEmptyBoundParameters);
 }
 
+void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
+const OMPParallelMaskedTaskLoopDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+  PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  CGF.EmitOMPTaskLoopBasedDirective(S);
+};
+OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
+S.getBeginLoc());
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop, CodeGen,
+ emitEmptyBoundParameters);
+}
+
 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
 const OMPParallelMasterTaskLoopSimdDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 1a5c42f8f974d0..1e8beca5513f05 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3867,6 +3867,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
   void EmitOMPParallelMasterTaskLoopDirective(
   const OMPParallelMasterTaskLoopDirective &S);
+  void EmitOMPParallelMaskedTaskLoopDirective(
+  const OMPParallelMaskedTaskLoopDirective &S);
   void EmitOMPParallelMasterTaskLoopSimdDirective(
   const OMPParallelMasterTaskLoopSimdDirective &S);
   void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
diff --git a/clang/test/OpenMP/parallel_masked_taskloop_codegen.c 
b/clang/test/OpenMP/parallel_masked_taskloop_codegen.c
new file mode 100644
index 00..ed89ef92bb768d
--- /dev/null
+++ b/clang/test/OpenMP/parallel_masked_taskloop_codegen.c
@@ -0,0 +1,62 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 
-x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void parallel_masked_taskloop(){
+   #pragma omp parallel masked taskloop
+   for( int i = 0; i < N; i++)
+   ;
+
+}
+
+int main()
+{
+ parallel_masked_taskloop();
+}
+// CHECK-LABEL: define dso_local void @parallel_masked_taskloop(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr 
@[[GLOB1:[0-9]+]], i32 0, ptr @parallel_masked_taskloop.omp_outlined)
+// CHECK-NEXT:ret void
+//
+//
+// CHECK-LABEL: define internal void @parallel_masked_taskloop.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:[[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:[[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// C

[clang] [OpenMP] codegen support for masked combined construct masked taskloop (PR #121914)

2025-01-07 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale created 
https://github.com/llvm/llvm-project/pull/121914

Added codegen support for combined masked constructs `masked taskloop.`
Added implementation for `EmitOMPMaskedTaskLoopDirective`.

>From a15cfb56691aae4fb9b34d33c462fafab7ee4123 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Tue, 7 Jan 2025 04:51:54 -0600
Subject: [PATCH 1/2] codegen support for masked combined construct
 masked_taskloop

---
 clang/lib/CodeGen/CGStmt.cpp|  2 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp  | 12 
 clang/lib/CodeGen/CodeGenFunction.h |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..a8ba20c1d2d404 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -331,7 +331,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 EmitOMPMasterTaskLoopDirective(cast(*S));
 break;
   case Stmt::OMPMaskedTaskLoopDirectiveClass:
-llvm_unreachable("masked taskloop directive not supported yet.");
+EmitOMPMaskedTaskLoopDirective(cast(*S));
 break;
   case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
 EmitOMPMasterTaskLoopSimdDirective(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..3e879b6b8e5834 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7994,6 +7994,18 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
 }
 
+void CodeGenFunction::EmitOMPMaskedTaskLoopDirective(
+const OMPMaskedTaskLoopDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+Action.Enter(CGF);
+EmitOMPTaskLoopBasedDirective(S);
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
+  CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
+}
+
 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
 const OMPMasterTaskLoopSimdDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 1a5c42f8f974d0..c1d2be355ee7d6 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3863,6 +3863,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
   void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
   void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S);
+  void EmitOMPMaskedTaskLoopDirective(const OMPMaskedTaskLoopDirective &S);
   void
   EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
   void EmitOMPParallelMasterTaskLoopDirective(

>From 3d89998415b82dfaf586208cc8b1a23bd2ce37f3 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Tue, 7 Jan 2025 04:52:28 -0600
Subject: [PATCH 2/2] masked_taskloop lit testcase

---
 clang/test/OpenMP/masked_taskloop_codegen.c | 50 +
 1 file changed, 50 insertions(+)
 create mode 100644 clang/test/OpenMP/masked_taskloop_codegen.c

diff --git a/clang/test/OpenMP/masked_taskloop_codegen.c 
b/clang/test/OpenMP/masked_taskloop_codegen.c
new file mode 100644
index 00..26f54c1797bbe3
--- /dev/null
+++ b/clang/test/OpenMP/masked_taskloop_codegen.c
@@ -0,0 +1,50 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 
-x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void masked_taskloop(){
+   #pragma omp masked taskloop
+   for( int i = 0; i < N; i++)
+   ;
+
+}
+
+int main()
+{
+ masked_taskloop();
+}
+// CHECK-LABEL: define dso_local void @masked_taskloop(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// CHECK-NEXT:[[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT:[[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr 
@[[GLOB1:[0-9]+]])
+// CHECK-NEXT:[[TMP1:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 
[[TMP0]], i32 0)
+// CHECK-NEXT:[[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:br i1 [[TMP2]], label %[[OMP_IF_THEN:.*]], label 
%[[OMP_IF_END:.*]]
+// CHECK:   [[OMP_IF_THEN]]:
+// CHECK-NEXT:call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:[[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr 
@[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 0, ptr @.omp_task_entry.)
+// CHECK-NEXT:[[TMP4:%.*]] = getelementptr inbounds nuw 
[[STRUCT_

[clang] [OpenMP] codegen support for masked combined construct masked taskloop simd (PR #121916)

2025-01-07 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale created 
https://github.com/llvm/llvm-project/pull/121916

Added codegen support for combined masked constructs `masked taskloop simd`.
Added implementation for `EmitOMPMaskedTaskLoopSimdDirective`.

>From 7d03bd61553690f22c03b52ef2bda8a09938e7a1 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Tue, 7 Jan 2025 05:09:21 -0600
Subject: [PATCH] codegen support for masked combined construct masked taskloop
 simd

---
 clang/lib/CodeGen/CGStmt.cpp  |  3 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp| 12 +
 clang/lib/CodeGen/CodeGenFunction.h   |  2 +
 .../OpenMP/masked_taskloop_simd_codegen.c | 49 +++
 4 files changed, 65 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/OpenMP/masked_taskloop_simd_codegen.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..496a626f3be598 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -338,7 +338,8 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 cast(*S));
 break;
   case Stmt::OMPMaskedTaskLoopSimdDirectiveClass:
-llvm_unreachable("masked taskloop simd directive not supported yet.");
+EmitOMPMaskedTaskLoopSimdDirective(
+cast(*S));
 break;
   case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
 EmitOMPParallelMasterTaskLoopDirective(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..fa3a82dad003b9 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -8006,6 +8006,18 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
 }
 
+void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective(
+const OMPMaskedTaskLoopSimdDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+Action.Enter(CGF);
+EmitOMPTaskLoopBasedDirective(S);
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  OMPLexicalScope Scope(*this, S);
+  CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
+}
+
 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
 const OMPParallelMasterTaskLoopDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 1a5c42f8f974d0..aa08985351f811 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3865,6 +3865,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S);
   void
   EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
+  void
+  EmitOMPMaskedTaskLoopSimdDirective(const OMPMaskedTaskLoopSimdDirective &S);
   void EmitOMPParallelMasterTaskLoopDirective(
   const OMPParallelMasterTaskLoopDirective &S);
   void EmitOMPParallelMasterTaskLoopSimdDirective(
diff --git a/clang/test/OpenMP/masked_taskloop_simd_codegen.c 
b/clang/test/OpenMP/masked_taskloop_simd_codegen.c
new file mode 100644
index 00..f786bc582beb20
--- /dev/null
+++ b/clang/test/OpenMP/masked_taskloop_simd_codegen.c
@@ -0,0 +1,49 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 
-x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void masked_taskloop_simd(){
+   #pragma omp masked taskloop simd
+   for( int i = 0; i < N; i++)
+   ;
+
+}
+
+int main()
+{
+ masked_taskloop_simd();
+}
+// CHECK-LABEL: define dso_local void @masked_taskloop_simd(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// CHECK-NEXT:[[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT:[[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr 
@[[GLOB1:[0-9]+]])
+// CHECK-NEXT:[[TMP1:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 
[[TMP0]], i32 0)
+// CHECK-NEXT:[[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:br i1 [[TMP2]], label %[[OMP_IF_THEN:.*]], label 
%[[OMP_IF_END:.*]]
+// CHECK:   [[OMP_IF_THEN]]:
+// CHECK-NEXT:call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:[[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr 
@[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 0, ptr @.omp_task_entry.)
+// CHECK-NEXT:[[TMP4:%.*]] = getelementptr inbounds nuw 
[[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK-NEXT:[[TMP5:%.*]] = getelementptr inbounds nuw 
[[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 5
+// CHECK-NE

[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop (PR #121741)

2025-01-06 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale created 
https://github.com/llvm/llvm-project/pull/121741

Added codegen support for combined masked constructs Parallel masked taskloop. 
Added implementation for EmitOMPParallelMaskedTaskLoopDirective.

>From 0c790fc2768d58634e0455adf9f797a2456a7335 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 6 Jan 2025 03:35:46 -0600
Subject: [PATCH] codegen support for masked combined construct parallel masked
 taskloop

---
 clang/lib/CodeGen/CGStmt.cpp  |  3 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp| 18 ++
 clang/lib/CodeGen/CodeGenFunction.h   |  2 +
 .../OpenMP/parallel_masked_taskloop_codegen.c | 62 +++
 4 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/OpenMP/parallel_masked_taskloop_codegen.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..06c434992ccba9 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -345,7 +345,8 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 cast(*S));
 break;
   case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
-llvm_unreachable("parallel masked taskloop directive not supported yet.");
+EmitOMPParallelMaskedTaskLoopDirective(
+cast(*S));
 break;
   case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
 EmitOMPParallelMasterTaskLoopSimdDirective(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..e45a5ea60e13cd 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -8024,6 +8024,24 @@ void 
CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
  emitEmptyBoundParameters);
 }
 
+void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
+const OMPParallelMaskedTaskLoopDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+  PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  CGF.EmitOMPTaskLoopBasedDirective(S);
+};
+OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
+S.getBeginLoc());
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop, CodeGen,
+ emitEmptyBoundParameters);
+}
+
 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
 const OMPParallelMasterTaskLoopSimdDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 1a5c42f8f974d0..1e8beca5513f05 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3867,6 +3867,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
   void EmitOMPParallelMasterTaskLoopDirective(
   const OMPParallelMasterTaskLoopDirective &S);
+  void EmitOMPParallelMaskedTaskLoopDirective(
+  const OMPParallelMaskedTaskLoopDirective &S);
   void EmitOMPParallelMasterTaskLoopSimdDirective(
   const OMPParallelMasterTaskLoopSimdDirective &S);
   void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
diff --git a/clang/test/OpenMP/parallel_masked_taskloop_codegen.c 
b/clang/test/OpenMP/parallel_masked_taskloop_codegen.c
new file mode 100644
index 00..ed89ef92bb768d
--- /dev/null
+++ b/clang/test/OpenMP/parallel_masked_taskloop_codegen.c
@@ -0,0 +1,62 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 
-x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void parallel_masked_taskloop(){
+   #pragma omp parallel masked taskloop
+   for( int i = 0; i < N; i++)
+   ;
+
+}
+
+int main()
+{
+ parallel_masked_taskloop();
+}
+// CHECK-LABEL: define dso_local void @parallel_masked_taskloop(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr 
@[[GLOB1:[0-9]+]], i32 0, ptr @parallel_masked_taskloop.omp_outlined)
+// CHECK-NEXT:ret void
+//
+//
+// CHECK-LABEL: define internal void @parallel_masked_taskloop.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK

[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop simd. (PR #121746)

2025-01-06 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale created 
https://github.com/llvm/llvm-project/pull/121746

Added codegen support for combined masked constructs `Parallel masked taskloop 
simd`.
Added implementation for `EmitOMPParallelMaskedTaskLoopSimdDirective`.

>From 81f9c8f7eb18c0469b3c42db5150384cb57baef8 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 6 Jan 2025 04:25:49 -0600
Subject: [PATCH] codegen support for masked combined construct parallel masked
 taskloop simd

---
 clang/lib/CodeGen/CGStmt.cpp  |  4 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp| 18 ++
 clang/lib/CodeGen/CodeGenFunction.h   |  2 +
 .../parallel_masked_taskloop_simd_codegen.c   | 62 +++
 4 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/OpenMP/parallel_masked_taskloop_simd_codegen.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..1e40c844d09b9d 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -352,8 +352,8 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 cast(*S));
 break;
   case Stmt::OMPParallelMaskedTaskLoopSimdDirectiveClass:
-llvm_unreachable(
-"parallel masked taskloop simd directive not supported yet.");
+EmitOMPParallelMaskedTaskLoopSimdDirective(
+cast(*S));
 break;
   case Stmt::OMPDistributeDirectiveClass:
 EmitOMPDistributeDirective(cast(*S));
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..fd061edade2e6c 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -8042,6 +8042,24 @@ void 
CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
  emitEmptyBoundParameters);
 }
 
+void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective(
+const OMPParallelMaskedTaskLoopSimdDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+  PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  CGF.EmitOMPTaskLoopBasedDirective(S);
+};
+OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
+S.getBeginLoc());
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop_simd, CodeGen,
+ emitEmptyBoundParameters);
+}
+
 // Generate the instructions for '#pragma omp target update' directive.
 void CodeGenFunction::EmitOMPTargetUpdateDirective(
 const OMPTargetUpdateDirective &S) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 1a5c42f8f974d0..1a1b5885183054 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3869,6 +3869,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   const OMPParallelMasterTaskLoopDirective &S);
   void EmitOMPParallelMasterTaskLoopSimdDirective(
   const OMPParallelMasterTaskLoopSimdDirective &S);
+  void EmitOMPParallelMaskedTaskLoopSimdDirective(
+  const OMPParallelMaskedTaskLoopSimdDirective &S);
   void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
   void EmitOMPDistributeParallelForDirective(
   const OMPDistributeParallelForDirective &S);
diff --git a/clang/test/OpenMP/parallel_masked_taskloop_simd_codegen.c 
b/clang/test/OpenMP/parallel_masked_taskloop_simd_codegen.c
new file mode 100644
index 00..0b51b302f9fcda
--- /dev/null
+++ b/clang/test/OpenMP/parallel_masked_taskloop_simd_codegen.c
@@ -0,0 +1,62 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 
-x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void parallel_masked_taskloop_simd(){
+   #pragma omp parallel masked taskloop simd
+   for( int i = 0; i < N; i++)
+   ;
+
+}
+
+int main()
+{
+ parallel_masked_taskloop_simd();
+}
+// CHECK-LABEL: define dso_local void @parallel_masked_taskloop_simd(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr 
@[[GLOB1:[0-9]+]], i32 0, ptr @parallel_masked_taskloop_simd.omp_outlined)
+// CHECK-NEXT:ret void
+//
+//
+// CHECK-LABEL: define internal void 
@parallel_masked_taskloop_simd.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[DOTGLOBAL_TID__ADDR:%.*]]

[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop (PR #121741)

2025-01-07 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/121741

>From 0c790fc2768d58634e0455adf9f797a2456a7335 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 6 Jan 2025 03:35:46 -0600
Subject: [PATCH 1/3] codegen support for masked combined construct parallel
 masked taskloop

---
 clang/lib/CodeGen/CGStmt.cpp  |  3 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp| 18 ++
 clang/lib/CodeGen/CodeGenFunction.h   |  2 +
 .../OpenMP/parallel_masked_taskloop_codegen.c | 62 +++
 4 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/OpenMP/parallel_masked_taskloop_codegen.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..06c434992ccba9 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -345,7 +345,8 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 cast(*S));
 break;
   case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
-llvm_unreachable("parallel masked taskloop directive not supported yet.");
+EmitOMPParallelMaskedTaskLoopDirective(
+cast(*S));
 break;
   case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
 EmitOMPParallelMasterTaskLoopSimdDirective(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..e45a5ea60e13cd 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -8024,6 +8024,24 @@ void 
CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
  emitEmptyBoundParameters);
 }
 
+void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
+const OMPParallelMaskedTaskLoopDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+  PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  CGF.EmitOMPTaskLoopBasedDirective(S);
+};
+OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
+S.getBeginLoc());
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop, CodeGen,
+ emitEmptyBoundParameters);
+}
+
 void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
 const OMPParallelMasterTaskLoopSimdDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 1a5c42f8f974d0..1e8beca5513f05 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3867,6 +3867,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
   void EmitOMPParallelMasterTaskLoopDirective(
   const OMPParallelMasterTaskLoopDirective &S);
+  void EmitOMPParallelMaskedTaskLoopDirective(
+  const OMPParallelMaskedTaskLoopDirective &S);
   void EmitOMPParallelMasterTaskLoopSimdDirective(
   const OMPParallelMasterTaskLoopSimdDirective &S);
   void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
diff --git a/clang/test/OpenMP/parallel_masked_taskloop_codegen.c 
b/clang/test/OpenMP/parallel_masked_taskloop_codegen.c
new file mode 100644
index 00..ed89ef92bb768d
--- /dev/null
+++ b/clang/test/OpenMP/parallel_masked_taskloop_codegen.c
@@ -0,0 +1,62 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 
-x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void parallel_masked_taskloop(){
+   #pragma omp parallel masked taskloop
+   for( int i = 0; i < N; i++)
+   ;
+
+}
+
+int main()
+{
+ parallel_masked_taskloop();
+}
+// CHECK-LABEL: define dso_local void @parallel_masked_taskloop(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr 
@[[GLOB1:[0-9]+]], i32 0, ptr @parallel_masked_taskloop.omp_outlined)
+// CHECK-NEXT:ret void
+//
+//
+// CHECK-LABEL: define internal void @parallel_masked_taskloop.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:[[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:[[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// C

[clang] [OpenMP] Codegen support for masked combined construct (PR #120520)

2024-12-18 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/120520

>From ca5e6f208927fc9b82c6dce34ee46dbca2d83a58 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Wed, 18 Dec 2024 22:36:19 -0600
Subject: [PATCH 1/2] Codegen support for masked combined construct

---
 clang/lib/CodeGen/CGStmt.cpp|  12 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp  |  64 
 clang/lib/CodeGen/CodeGenFunction.h |   7 +
 clang/test/OpenMP/combined_masked.c | 486 
 4 files changed, 564 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/OpenMP/combined_masked.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6c7a594fb10c4c..3424f1f30c61ef 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -331,29 +331,31 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 EmitOMPMasterTaskLoopDirective(cast(*S));
 break;
   case Stmt::OMPMaskedTaskLoopDirectiveClass:
-llvm_unreachable("masked taskloop directive not supported yet.");
+EmitOMPMaskedTaskLoopDirective(cast(*S));
 break;
   case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
 EmitOMPMasterTaskLoopSimdDirective(
 cast(*S));
 break;
   case Stmt::OMPMaskedTaskLoopSimdDirectiveClass:
-llvm_unreachable("masked taskloop simd directive not supported yet.");
+EmitOMPMaskedTaskLoopSimdDirective(
+cast(*S));
 break;
   case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
 EmitOMPParallelMasterTaskLoopDirective(
 cast(*S));
 break;
   case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
-llvm_unreachable("parallel masked taskloop directive not supported yet.");
+EmitOMPParallelMaskedTaskLoopDirective(
+cast(*S));
 break;
   case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
 EmitOMPParallelMasterTaskLoopSimdDirective(
 cast(*S));
 break;
   case Stmt::OMPParallelMaskedTaskLoopSimdDirectiveClass:
-llvm_unreachable(
-"parallel masked taskloop simd directive not supported yet.");
+EmitOMPParallelMaskedTaskLoopSimdDirective(
+cast(*S));
 break;
   case Stmt::OMPDistributeDirectiveClass:
 EmitOMPDistributeDirective(cast(*S));
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..0f41e11953a948 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7994,6 +7994,19 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
 }
 
+void CodeGenFunction::EmitOMPMaskedTaskLoopDirective(
+const OMPMaskedTaskLoopDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+Action.Enter(CGF);
+EmitOMPTaskLoopBasedDirective(S);
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
+  CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
+}
+
+
 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
 const OMPMasterTaskLoopSimdDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
@@ -8006,6 +8019,19 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
 }
 
+void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective(
+const OMPMaskedTaskLoopSimdDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+Action.Enter(CGF);
+EmitOMPTaskLoopBasedDirective(S);
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  OMPLexicalScope Scope(*this, S);
+  CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
+}
+
+
 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
 const OMPParallelMasterTaskLoopDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
@@ -8024,6 +8050,25 @@ void 
CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
  emitEmptyBoundParameters);
 }
 
+void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
+const OMPParallelMaskedTaskLoopDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+  PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  CGF.EmitOMPTaskLoopBasedDirective(S);
+};
+OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
+S.getBeginLoc());
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  emitCommo

[clang] [OpenMP] Codegen support for masked combined construct (PR #120520)

2024-12-19 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/120520

>From ca5e6f208927fc9b82c6dce34ee46dbca2d83a58 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Wed, 18 Dec 2024 22:36:19 -0600
Subject: [PATCH 1/3] Codegen support for masked combined construct

---
 clang/lib/CodeGen/CGStmt.cpp|  12 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp  |  64 
 clang/lib/CodeGen/CodeGenFunction.h |   7 +
 clang/test/OpenMP/combined_masked.c | 486 
 4 files changed, 564 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/OpenMP/combined_masked.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6c7a594fb10c4c..3424f1f30c61ef 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -331,29 +331,31 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 EmitOMPMasterTaskLoopDirective(cast(*S));
 break;
   case Stmt::OMPMaskedTaskLoopDirectiveClass:
-llvm_unreachable("masked taskloop directive not supported yet.");
+EmitOMPMaskedTaskLoopDirective(cast(*S));
 break;
   case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
 EmitOMPMasterTaskLoopSimdDirective(
 cast(*S));
 break;
   case Stmt::OMPMaskedTaskLoopSimdDirectiveClass:
-llvm_unreachable("masked taskloop simd directive not supported yet.");
+EmitOMPMaskedTaskLoopSimdDirective(
+cast(*S));
 break;
   case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
 EmitOMPParallelMasterTaskLoopDirective(
 cast(*S));
 break;
   case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
-llvm_unreachable("parallel masked taskloop directive not supported yet.");
+EmitOMPParallelMaskedTaskLoopDirective(
+cast(*S));
 break;
   case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
 EmitOMPParallelMasterTaskLoopSimdDirective(
 cast(*S));
 break;
   case Stmt::OMPParallelMaskedTaskLoopSimdDirectiveClass:
-llvm_unreachable(
-"parallel masked taskloop simd directive not supported yet.");
+EmitOMPParallelMaskedTaskLoopSimdDirective(
+cast(*S));
 break;
   case Stmt::OMPDistributeDirectiveClass:
 EmitOMPDistributeDirective(cast(*S));
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..0f41e11953a948 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7994,6 +7994,19 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
 }
 
+void CodeGenFunction::EmitOMPMaskedTaskLoopDirective(
+const OMPMaskedTaskLoopDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+Action.Enter(CGF);
+EmitOMPTaskLoopBasedDirective(S);
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
+  CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
+}
+
+
 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
 const OMPMasterTaskLoopSimdDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
@@ -8006,6 +8019,19 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
 }
 
+void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective(
+const OMPMaskedTaskLoopSimdDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+Action.Enter(CGF);
+EmitOMPTaskLoopBasedDirective(S);
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  OMPLexicalScope Scope(*this, S);
+  CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
+}
+
+
 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
 const OMPParallelMasterTaskLoopDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
@@ -8024,6 +8050,25 @@ void 
CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
  emitEmptyBoundParameters);
 }
 
+void CodeGenFunction::EmitOMPParallelMaskedTaskLoopDirective(
+const OMPParallelMaskedTaskLoopDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+  PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  CGF.EmitOMPTaskLoopBasedDirective(S);
+};
+OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
+S.getBeginLoc());
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  emitCommo

[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-22 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/117196

>From d19f41d39237b3d4fd2923f037743ddd495d5c9f Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Thu, 21 Nov 2024 11:15:11 -0600
Subject: [PATCH 1/3] Initial Codegen changes for strict modifier with
 grainsize/num_tasks of taskloop construct

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp |  28 ++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |   1 +
 clang/lib/CodeGen/CGStmtOpenMP.cpp|   2 +
 .../taskloop_strictmodifier_codegen.cpp   | 256 ++
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |   3 +
 5 files changed, 290 insertions(+)
 create mode 100644 clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index cc389974e04081..361550d2f102b4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,6 +4666,33 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
CGF.getContext().VoidPtrTy);
   }
   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
+  if( Data.HasModifier ){
+llvm::Value *TaskArgs[] = {
+  UpLoc,
+  ThreadID,
+  Result.NewTask,
+  IfVal,
+  LBLVal.getPointer(CGF),
+  UBLVal.getPointer(CGF),
+  CGF.EmitLoadOfScalar(StLVal, Loc),
+  llvm::ConstantInt::getSigned(
+  CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+  llvm::ConstantInt::getSigned(
+  CGF.IntTy, Data.Schedule.getPointer()
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+  Data.Schedule.getPointer()
+  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+  /*isSigned=*/false)
+  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+  llvm::ConstantInt::get(CGF.Int32Ty, 1), //strict modifier enabled
+  Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+   : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_taskloop_5),
+  TaskArgs);
+   } else {
   llvm::Value *TaskArgs[] = {
   UpLoc,
   ThreadID,
@@ -4690,6 +4717,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
   CGM.getModule(), OMPRTL___kmpc_taskloop),
   TaskArgs);
+  }
 }
 
 /// Emit reduction operation for each element of array (required for
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h 
b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 5e7715743afb58..56d502d92806eb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,7 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
+  bool HasModifier = false;
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 390516fea38498..88c862d2975174 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7831,10 +7831,12 @@ void 
CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
 // grainsize clause
 Data.Schedule.setInt(/*IntVal=*/false);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+Data.HasModifier = (Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true 
: false;
   } else if (const auto *Clause = S.getSingleClause()) {
 // num_tasks clause
 Data.Schedule.setInt(/*IntVal=*/true);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+Data.HasModifier = (Clause->getModifier() == OMPC_NUMTASKS_strict) ? true 
: false;
   }
 
   auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
diff --git a/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp 
b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
new file mode 100644
index 00..d84ff181f66156
--- /dev/null
+++ b/clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
@@ -0,0 +1,256 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ 
-emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o 
%t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch 
%t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ 
-emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// 

[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-21 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale created 
https://github.com/llvm/llvm-project/pull/117196

Initial parsing/sema for 'strict' modifier with 'num_tasks' and ‘grainsize’ 
clause is present in these commits 
[grainsize_parsing](https://github.com/llvm/llvm-project/commit/ab9eac762c35068e77f57795e660d06f578c9614)
 and 
[num_tasks_parsing](https://github.com/llvm/llvm-project/commit/56c166017055595a9f26933e85bfd89e30c528d0#diff-4184486638e85284c3a2c961a81e7752231022daf97e411007c13a6732b50db9R6545)
 . However, this implementation appears incomplete as it lacks code generation 
support. A runtime patch was introduced in this runtime commit 
[runtime_patch](https://github.com/llvm/llvm-project/commit/540007b42701b5ac9adba076824bfd648a265413#diff-5e95f9319910d6965d09c301359dbe6b23f3eef5ce4d262ef2c2d2137875b5c4R374)
 , which adds a new API, _kmpc_taskloop_5, to accommodate the strict modifier. 
In this patch I have added codegen support. When the strict modifier is present 
alongside the grainsize or num_tasks clauses, the code emits a call to 
_kmpc_taskloop_5, which includes an additional parameter of type i32 with the 
value 1 to indicate the strict modifier. If the strict modifier is not present, 
it falls back to the existing _kmpc_taskloop API call.

>From d19f41d39237b3d4fd2923f037743ddd495d5c9f Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Thu, 21 Nov 2024 11:15:11 -0600
Subject: [PATCH] Initial Codegen changes for strict modifier with
 grainsize/num_tasks of taskloop construct

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp |  28 ++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |   1 +
 clang/lib/CodeGen/CGStmtOpenMP.cpp|   2 +
 .../taskloop_strictmodifier_codegen.cpp   | 256 ++
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |   3 +
 5 files changed, 290 insertions(+)
 create mode 100644 clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index cc389974e04081..361550d2f102b4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4666,6 +4666,33 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
CGF.getContext().VoidPtrTy);
   }
   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
+  if( Data.HasModifier ){
+llvm::Value *TaskArgs[] = {
+  UpLoc,
+  ThreadID,
+  Result.NewTask,
+  IfVal,
+  LBLVal.getPointer(CGF),
+  UBLVal.getPointer(CGF),
+  CGF.EmitLoadOfScalar(StLVal, Loc),
+  llvm::ConstantInt::getSigned(
+  CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
+  llvm::ConstantInt::getSigned(
+  CGF.IntTy, Data.Schedule.getPointer()
+ ? Data.Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+  Data.Schedule.getPointer()
+  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
+  /*isSigned=*/false)
+  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+  llvm::ConstantInt::get(CGF.Int32Ty, 1), //strict modifier enabled
+  Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+   : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_taskloop_5),
+  TaskArgs);
+   } else {
   llvm::Value *TaskArgs[] = {
   UpLoc,
   ThreadID,
@@ -4690,6 +4717,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
   CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
   CGM.getModule(), OMPRTL___kmpc_taskloop),
   TaskArgs);
+  }
 }
 
 /// Emit reduction operation for each element of array (required for
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h 
b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 5e7715743afb58..56d502d92806eb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -122,6 +122,7 @@ struct OMPTaskDataTy final {
   bool IsReductionWithTaskMod = false;
   bool IsWorksharingReduction = false;
   bool HasNowaitClause = false;
+  bool HasModifier = false;
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 390516fea38498..88c862d2975174 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7831,10 +7831,12 @@ void 
CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
 // grainsize clause
 Data.Schedule.setInt(/*IntVal=*/false);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+Data.HasModifier = 

[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-21 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale edited 
https://github.com/llvm/llvm-project/pull/117196
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-22 Thread CHANDRA GHALE via cfe-commits


@@ -4666,30 +4666,58 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction 
&CGF, SourceLocation Loc,
CGF.getContext().VoidPtrTy);
   }
   enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
-  llvm::Value *TaskArgs[] = {
-  UpLoc,
-  ThreadID,
-  Result.NewTask,
-  IfVal,
-  LBLVal.getPointer(CGF),
-  UBLVal.getPointer(CGF),
-  CGF.EmitLoadOfScalar(StLVal, Loc),
-  llvm::ConstantInt::getSigned(
-  CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
-  llvm::ConstantInt::getSigned(
-  CGF.IntTy, Data.Schedule.getPointer()
- ? Data.Schedule.getInt() ? NumTasks : Grainsize
- : NoSchedule),
-  Data.Schedule.getPointer()
-  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
-  /*isSigned=*/false)
-  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
-  Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- Result.TaskDupFn, CGF.VoidPtrTy)
-   : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
-  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-  CGM.getModule(), OMPRTL___kmpc_taskloop),
-  TaskArgs);
+  if (Data.HasModifier) {
+llvm::Value *TaskArgs[] = {
+UpLoc,
+ThreadID,
+Result.NewTask,
+IfVal,
+LBLVal.getPointer(CGF),
+UBLVal.getPointer(CGF),
+CGF.EmitLoadOfScalar(StLVal, Loc),

chandraghale wrote:

Fixed as suggested !!!

https://github.com/llvm/llvm-project/pull/117196
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct (PR #117196)

2024-11-23 Thread CHANDRA GHALE via cfe-commits


@@ -7831,10 +7831,14 @@ void 
CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
 // grainsize clause
 Data.Schedule.setInt(/*IntVal=*/false);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+Data.HasModifier =
+(Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
   } else if (const auto *Clause = S.getSingleClause()) {
 // num_tasks clause
 Data.Schedule.setInt(/*IntVal=*/true);
 Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+Data.HasModifier =
+(Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;

chandraghale wrote:

@shiltian  
We need to check which Clause is present (either GrainsizeClause or 
NumTasksClause) and handle the case where no clause is matched. Combining the 
conditions would require an extra variable to track the clause type. I suggest 
sticking with the current logic, as it is simple and clear. What do you think?

https://github.com/llvm/llvm-project/pull/117196
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct masked taskloop simd (PR #121916)

2025-01-12 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/121916

>From 7d03bd61553690f22c03b52ef2bda8a09938e7a1 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Tue, 7 Jan 2025 05:09:21 -0600
Subject: [PATCH] codegen support for masked combined construct masked taskloop
 simd

---
 clang/lib/CodeGen/CGStmt.cpp  |  3 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp| 12 +
 clang/lib/CodeGen/CodeGenFunction.h   |  2 +
 .../OpenMP/masked_taskloop_simd_codegen.c | 49 +++
 4 files changed, 65 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/OpenMP/masked_taskloop_simd_codegen.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..496a626f3be598 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -338,7 +338,8 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 cast(*S));
 break;
   case Stmt::OMPMaskedTaskLoopSimdDirectiveClass:
-llvm_unreachable("masked taskloop simd directive not supported yet.");
+EmitOMPMaskedTaskLoopSimdDirective(
+cast(*S));
 break;
   case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
 EmitOMPParallelMasterTaskLoopDirective(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..fa3a82dad003b9 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -8006,6 +8006,18 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
 }
 
+void CodeGenFunction::EmitOMPMaskedTaskLoopSimdDirective(
+const OMPMaskedTaskLoopSimdDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+Action.Enter(CGF);
+EmitOMPTaskLoopBasedDirective(S);
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  OMPLexicalScope Scope(*this, S);
+  CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
+}
+
 void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
 const OMPParallelMasterTaskLoopDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 1a5c42f8f974d0..aa08985351f811 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3865,6 +3865,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S);
   void
   EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
+  void
+  EmitOMPMaskedTaskLoopSimdDirective(const OMPMaskedTaskLoopSimdDirective &S);
   void EmitOMPParallelMasterTaskLoopDirective(
   const OMPParallelMasterTaskLoopDirective &S);
   void EmitOMPParallelMasterTaskLoopSimdDirective(
diff --git a/clang/test/OpenMP/masked_taskloop_simd_codegen.c 
b/clang/test/OpenMP/masked_taskloop_simd_codegen.c
new file mode 100644
index 00..f786bc582beb20
--- /dev/null
+++ b/clang/test/OpenMP/masked_taskloop_simd_codegen.c
@@ -0,0 +1,49 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 
-x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void masked_taskloop_simd(){
+   #pragma omp masked taskloop simd
+   for( int i = 0; i < N; i++)
+   ;
+
+}
+
+int main()
+{
+ masked_taskloop_simd();
+}
+// CHECK-LABEL: define dso_local void @masked_taskloop_simd(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// CHECK-NEXT:[[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT:[[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr 
@[[GLOB1:[0-9]+]])
+// CHECK-NEXT:[[TMP1:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 
[[TMP0]], i32 0)
+// CHECK-NEXT:[[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:br i1 [[TMP2]], label %[[OMP_IF_THEN:.*]], label 
%[[OMP_IF_END:.*]]
+// CHECK:   [[OMP_IF_THEN]]:
+// CHECK-NEXT:call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:[[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr 
@[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 0, ptr @.omp_task_entry.)
+// CHECK-NEXT:[[TMP4:%.*]] = getelementptr inbounds nuw 
[[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK-NEXT:[[TMP5:%.*]] = getelementptr inbounds nuw 
[[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 5
+// CHECK-NEXT:store i64 0, ptr [[TMP5]], align 8
+// CHECK-NEXT:[[TMP6:%.*]] = getelementptr inbounds nuw 
[[STRUCT_KMP_TASK_T]], ptr [[TMP4]], 

[clang] [OpenMP] codegen support for masked combined construct masked taskloop simd (PR #121916)

2025-01-12 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/121916
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct masked taskloop (PR #121914)

2025-01-12 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/121914
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct masked taskloop (PR #121914)

2025-01-12 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/121914

>From a15cfb56691aae4fb9b34d33c462fafab7ee4123 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Tue, 7 Jan 2025 04:51:54 -0600
Subject: [PATCH 1/2] codegen support for masked combined construct
 masked_taskloop

---
 clang/lib/CodeGen/CGStmt.cpp|  2 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp  | 12 
 clang/lib/CodeGen/CodeGenFunction.h |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..a8ba20c1d2d404 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -331,7 +331,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 EmitOMPMasterTaskLoopDirective(cast(*S));
 break;
   case Stmt::OMPMaskedTaskLoopDirectiveClass:
-llvm_unreachable("masked taskloop directive not supported yet.");
+EmitOMPMaskedTaskLoopDirective(cast(*S));
 break;
   case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
 EmitOMPMasterTaskLoopSimdDirective(
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..3e879b6b8e5834 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -7994,6 +7994,18 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
 }
 
+void CodeGenFunction::EmitOMPMaskedTaskLoopDirective(
+const OMPMaskedTaskLoopDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+Action.Enter(CGF);
+EmitOMPTaskLoopBasedDirective(S);
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
+  CGM.getOpenMPRuntime().emitMaskedRegion(*this, CodeGen, S.getBeginLoc());
+}
+
 void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
 const OMPMasterTaskLoopSimdDirective &S) {
   auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 1a5c42f8f974d0..c1d2be355ee7d6 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3863,6 +3863,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
   void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
   void EmitOMPMasterTaskLoopDirective(const OMPMasterTaskLoopDirective &S);
+  void EmitOMPMaskedTaskLoopDirective(const OMPMaskedTaskLoopDirective &S);
   void
   EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
   void EmitOMPParallelMasterTaskLoopDirective(

>From 3d89998415b82dfaf586208cc8b1a23bd2ce37f3 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Tue, 7 Jan 2025 04:52:28 -0600
Subject: [PATCH 2/2] masked_taskloop lit testcase

---
 clang/test/OpenMP/masked_taskloop_codegen.c | 50 +
 1 file changed, 50 insertions(+)
 create mode 100644 clang/test/OpenMP/masked_taskloop_codegen.c

diff --git a/clang/test/OpenMP/masked_taskloop_codegen.c 
b/clang/test/OpenMP/masked_taskloop_codegen.c
new file mode 100644
index 00..26f54c1797bbe3
--- /dev/null
+++ b/clang/test/OpenMP/masked_taskloop_codegen.c
@@ -0,0 +1,50 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 
-x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void masked_taskloop(){
+   #pragma omp masked taskloop
+   for( int i = 0; i < N; i++)
+   ;
+
+}
+
+int main()
+{
+ masked_taskloop();
+}
+// CHECK-LABEL: define dso_local void @masked_taskloop(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1
+// CHECK-NEXT:[[TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT:[[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr 
@[[GLOB1:[0-9]+]])
+// CHECK-NEXT:[[TMP1:%.*]] = call i32 @__kmpc_masked(ptr @[[GLOB1]], i32 
[[TMP0]], i32 0)
+// CHECK-NEXT:[[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:br i1 [[TMP2]], label %[[OMP_IF_THEN:.*]], label 
%[[OMP_IF_END:.*]]
+// CHECK:   [[OMP_IF_THEN]]:
+// CHECK-NEXT:call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]])
+// CHECK-NEXT:[[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr 
@[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 0, ptr @.omp_task_entry.)
+// CHECK-NEXT:[[TMP4:%.*]] = getelementptr inbounds nuw 
[[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0
+// CHECK-NEXT:[[TMP5:%.*]] = getelementptr inbounds nuw 
[[STRUCT_KM

[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop simd. (PR #121746)

2025-01-13 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/121746

>From 81f9c8f7eb18c0469b3c42db5150384cb57baef8 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 6 Jan 2025 04:25:49 -0600
Subject: [PATCH] codegen support for masked combined construct parallel masked
 taskloop simd

---
 clang/lib/CodeGen/CGStmt.cpp  |  4 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp| 18 ++
 clang/lib/CodeGen/CodeGenFunction.h   |  2 +
 .../parallel_masked_taskloop_simd_codegen.c   | 62 +++
 4 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/OpenMP/parallel_masked_taskloop_simd_codegen.c

diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 3974739d2abb47..1e40c844d09b9d 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -352,8 +352,8 @@ void CodeGenFunction::EmitStmt(const Stmt *S, 
ArrayRef Attrs) {
 cast(*S));
 break;
   case Stmt::OMPParallelMaskedTaskLoopSimdDirectiveClass:
-llvm_unreachable(
-"parallel masked taskloop simd directive not supported yet.");
+EmitOMPParallelMaskedTaskLoopSimdDirective(
+cast(*S));
 break;
   case Stmt::OMPDistributeDirectiveClass:
 EmitOMPDistributeDirective(cast(*S));
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp 
b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6cb37b20b7aeee..fd061edade2e6c 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -8042,6 +8042,24 @@ void 
CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
  emitEmptyBoundParameters);
 }
 
+void CodeGenFunction::EmitOMPParallelMaskedTaskLoopSimdDirective(
+const OMPParallelMaskedTaskLoopSimdDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+  PrePostActionTy &Action) {
+  Action.Enter(CGF);
+  CGF.EmitOMPTaskLoopBasedDirective(S);
+};
+OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+CGM.getOpenMPRuntime().emitMaskedRegion(CGF, TaskLoopCodeGen,
+S.getBeginLoc());
+  };
+  auto LPCRegion =
+  CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+  emitCommonOMPParallelDirective(*this, S, OMPD_masked_taskloop_simd, CodeGen,
+ emitEmptyBoundParameters);
+}
+
 // Generate the instructions for '#pragma omp target update' directive.
 void CodeGenFunction::EmitOMPTargetUpdateDirective(
 const OMPTargetUpdateDirective &S) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 1a5c42f8f974d0..1a1b5885183054 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3869,6 +3869,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   const OMPParallelMasterTaskLoopDirective &S);
   void EmitOMPParallelMasterTaskLoopSimdDirective(
   const OMPParallelMasterTaskLoopSimdDirective &S);
+  void EmitOMPParallelMaskedTaskLoopSimdDirective(
+  const OMPParallelMaskedTaskLoopSimdDirective &S);
   void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
   void EmitOMPDistributeParallelForDirective(
   const OMPDistributeParallelForDirective &S);
diff --git a/clang/test/OpenMP/parallel_masked_taskloop_simd_codegen.c 
b/clang/test/OpenMP/parallel_masked_taskloop_simd_codegen.c
new file mode 100644
index 00..0b51b302f9fcda
--- /dev/null
+++ b/clang/test/OpenMP/parallel_masked_taskloop_simd_codegen.c
@@ -0,0 +1,62 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=52 
-x c -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 100
+void parallel_masked_taskloop_simd(){
+   #pragma omp parallel masked taskloop simd
+   for( int i = 0; i < N; i++)
+   ;
+
+}
+
+int main()
+{
+ parallel_masked_taskloop_simd();
+}
+// CHECK-LABEL: define dso_local void @parallel_masked_taskloop_simd(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr 
@[[GLOB1:[0-9]+]], i32 0, ptr @parallel_masked_taskloop_simd.omp_outlined)
+// CHECK-NEXT:ret void
+//
+//
+// CHECK-LABEL: define internal void 
@parallel_masked_taskloop_simd.omp_outlined(
+// CHECK-SAME: ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef 
[[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:[[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:[[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:[[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]]

[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop simd. (PR #121746)

2025-01-14 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/121746
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct masked taskloop simd (PR #121916)

2025-01-10 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

@alexey-bataev  Can you approve this  and rest of splitted  PRs 
[121914](https://github.com/llvm/llvm-project/pull/121914)  and 
[121746](https://github.com/llvm/llvm-project/pull/121746 ) is the part of 
combined masked construct 
[PR-121741](https://github.com/llvm/llvm-project/pull/121741) already merged.  
OpenMPSupport.rst and and rel note already updated. 

https://github.com/llvm/llvm-project/pull/121916
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP] codegen support for masked combined construct parallel masked taskloop (PR #121741)

2025-01-09 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/121741
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0] Parse/Sema support for reduction over private variable with reduction clause. (PR #129938)

2025-03-18 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

1. Used tail-allocated storage for extra members added in OMPreductionClause. 
2. Modified assertion to diagnostic in parsing logic. 
3. Reduction clause modifiers reduction-modifier and original-sharingmodifier 
packed into existing structure. 
4. Added additional lit test cases. 

https://github.com/llvm/llvm-project/pull/129938
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0] Parse/Sema support for reduction over private variable with reduction clause. (PR #129938)

2025-03-20 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

Thank you for the review.

https://github.com/llvm/llvm-project/pull/129938
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Minor unused variable error for sanitizer builds (PR #132372)

2025-03-21 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale created 
https://github.com/llvm/llvm-project/pull/132372

Fix to issue [https://github.com/llvm/llvm-project/issues/132371 
](https://github.com/llvm/llvm-project/issues/132371 )
Minor error ,  sanitizer builds are failing for unused variable.  
sanitizer-aarch64-linux/build/llvm-project/clang/lib/Serialization/ASTReader.cpp:11764:17:
 error: unused variable 'I' [-Werror,-Wunused-variable]
 11764 |   for (unsigned I : llvm::seq(NumFlags))

This was modified as part of 
[https://github.com/llvm/llvm-project/pull/129938](https://github.com/llvm/llvm-project/pull/129938)
 , which got missed. 

>From 7769abef03a3a657942c24f794541ea01eee6743 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Fri, 21 Mar 2025 05:48:03 -0500
Subject: [PATCH] Minor unused variable error for sanitizer builds

---
 clang/lib/Serialization/ASTReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 930134bcd6501..c66ea5a2c0346 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11761,7 +11761,7 @@ void 
OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
   unsigned NumFlags = Record.readInt();
   SmallVector Flags;
   Flags.reserve(NumFlags);
-  for (unsigned I : llvm::seq(NumFlags))
+  for ([[maybe_unused]] unsigned I : llvm::seq(NumFlags))
 Flags.push_back(Record.readInt());
   C->setPrivateVariableReductionFlags(Flags);
 }

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Minor unused variable error for sanitizer builds (PR #132372)

2025-03-21 Thread CHANDRA GHALE via cfe-commits


@@ -11761,7 +11761,7 @@ void 
OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
   unsigned NumFlags = Record.readInt();
   SmallVector Flags;
   Flags.reserve(NumFlags);
-  for (unsigned I : llvm::seq(NumFlags))
+  for ([[maybe_unused]] unsigned I : llvm::seq(NumFlags))

chandraghale wrote:

it does

https://github.com/llvm/llvm-project/pull/132372
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Minor unused variable error for sanitizer builds (PR #132372)

2025-03-21 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/132372
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0] Parse/Sema support for reduction over private variable with reduction clause. (PR #129938)

2025-03-21 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/129938
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0] Parse/Sema support for reduction over private variable with reduction clause. (PR #129938)

2025-03-19 Thread CHANDRA GHALE via cfe-commits


@@ -18933,12 +18945,35 @@ static bool actOnOMPReductionKindClause(
 reportOriginalDsa(S, Stack, D, DVar);
 continue;
   }
+  // OpenMP 6.0 [ 7.6.10 ]
+  // Support Reduction over private variables with reduction clause.
+  // A list item in a reduction clause can now be private in the enclosing
+  // context. For orphaned constructs it is assumed to be shared unless the
+  // original(private) modifier appears in the clause.
+  DVar = Stack->getImplicitDSA(D, true);
+  bool IsOrphaned = false;
+  OpenMPDirectiveKind CurrDir = Stack->getCurrentDirective();
+  OpenMPDirectiveKind ParentDir = Stack->getParentDirective();
+  // Check if the construct is orphaned (has no enclosing OpenMP context)
+  IsOrphaned = (ParentDir == OMPD_unknown);
+  IsPrivate =
+  ((isOpenMPPrivate(DVar.CKind) && DVar.CKind != OMPC_reduction &&
+isOpenMPWorksharingDirective(CurrDir) &&
+!isOpenMPParallelDirective(CurrDir) &&
+!isOpenMPTeamsDirective(CurrDir) &&
+!isOpenMPSimdDirective(ParentDir)) ||
+   (IsOrphaned && DVar.CKind == OMPC_unknown) ||
+   RD.OrigSharingModifier != OMPC_ORIGINAL_SHARING_shared);

chandraghale wrote:

done


https://github.com/llvm/llvm-project/pull/129938
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0] Parse/Sema support for reduction over private variable with reduction clause. (PR #129938)

2025-03-19 Thread CHANDRA GHALE via cfe-commits


@@ -3757,6 +3768,31 @@ class OMPReductionClause final
   /// reduction copies.
   void setRHSExprs(ArrayRef RHSExprs);
 
+  /// Set the list private reduction flags
+  void setPrivateVariableReductionFlags(ArrayRef Flags) {
+assert(Flags.size() == varlist_size() &&
+   "Number of private flags does not match vars");
+std::copy(Flags.begin(), Flags.end(), getTrailingObjects());

chandraghale wrote:

done

https://github.com/llvm/llvm-project/pull/129938
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0] Parse/Sema support for reduction over private variable with reduction clause. (PR #129938)

2025-03-19 Thread CHANDRA GHALE via cfe-commits


@@ -18933,12 +18945,35 @@ static bool actOnOMPReductionKindClause(
 reportOriginalDsa(S, Stack, D, DVar);
 continue;
   }
+  // OpenMP 6.0 [ 7.6.10 ]
+  // Support Reduction over private variables with reduction clause.
+  // A list item in a reduction clause can now be private in the enclosing
+  // context. For orphaned constructs it is assumed to be shared unless the
+  // original(private) modifier appears in the clause.
+  DVar = Stack->getImplicitDSA(D, true);
+  bool IsOrphaned = false;
+  OpenMPDirectiveKind CurrDir = Stack->getCurrentDirective();
+  OpenMPDirectiveKind ParentDir = Stack->getParentDirective();
+  // Check if the construct is orphaned (has no enclosing OpenMP context)
+  IsOrphaned = (ParentDir == OMPD_unknown);

chandraghale wrote:

done

https://github.com/llvm/llvm-project/pull/129938
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-08 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale edited 
https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-07 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale edited 
https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-10 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709

>From a05af192052de8503fb4945bfb853b3f2c14e4c9 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 7 Apr 2025 13:58:25 -0500
Subject: [PATCH 1/4] Codegen for Reduction over private variables with
 reduction clause

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 147 +++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  14 ++
 clang/lib/CodeGen/CGStmtOpenMP.cpp|  12 +-
 .../OpenMP/for_private_reduction_codegen.cpp  | 236 ++
 4 files changed, 406 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/OpenMP/for_private_reduction_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 06a652c146fb9..3424227e5da79 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}
+
+LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
+LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
+RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
+auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {
+  if (BO == BO_Mul) {
+llvm::Value *OldScalar = OldVal.getScalarVal();
+llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
+llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
+ret

[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-10 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,151 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I :
+   llvm::seq(std::min(ReductionOps.size(), LHSExprs.size( {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}

chandraghale wrote:

Fixing by adding CXXOperatorCallExpr to handle user-defined reduction 
operators. 

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-08 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}
+
+LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
+LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
+RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
+auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {
+  if (BO == BO_Mul) {
+llvm::Value *OldScalar = OldVal.getScalarVal();
+llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
+llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
+return RValue::get(Result);
+  } else {
+OpaqueValueExpr OVE(BinOp->getLHS()->getExprLoc(),
+BinOp->getLHS()->getType(),
+ExprValueKind::VK_PRValue);
+CodeGenFunction::OpaqueValueMapping OldValMapping(CGF, &OVE, OldVal);
+return CGF.EmitAnyExpr(BinOp->getRHS());
+  }
+};
+
+(void)CGF.EmitOMPAtomicSimpleUpdateExpr(
+SharedLV, PrivateRV, BO, true,
+llvm::AtomicOrdering::SequentiallyConsistent, Loc, UpdateOp);
+  }
+
+  // Final barrier
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  // Broadcast final result
+  llvm::Value *FinalResult = CGF.Builder.CreateLoad(SharedResult);
+
+  // Update private variables with final result
+  for (unsigned I = 0; I < Pr

[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-08 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}
+
+LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
+LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
+RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
+auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {

chandraghale wrote:

Done !!

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-08 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}

chandraghale wrote:

Done !!

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-08 Thread CHANDRA GHALE via cfe-commits


@@ -5201,6 +5345,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, 
SourceLocation Loc,
 
   CGF.EmitBranch(DefaultBB);
   CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
+  if (Options.IsPrivateVarReduction) {
+emitPrivateReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, ReductionOps);
+  }

chandraghale wrote:

Done !!

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-09 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,151 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I :
+   llvm::seq(std::min(ReductionOps.size(), LHSExprs.size( {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}

chandraghale wrote:

This is required , We need to look through the RHS to get the actual reduction 
operator. For instance: 
`-BinaryOperator 0xce76050  'int' lvalue '='// Assignment 
operator
  |-DeclRefExpr 0xce75fa0  'int' lvalue Var 0xce75678 'sum'  // LHS
  `-BinaryOperator 0xce76030  'int' '+'   // RHS 
contains actual reduction op
|-ImplicitCastExpr 0xce76000  'int' 
| `-DeclRefExpr 0xce75fc0  'int' lvalue Var 0xce75678 'sum'
`-ImplicitCastExpr 0xce76018  'int' 
  `-DeclRefExpr 0xce75fe0  'int' lvalue Var 0xce75e00 'i'


https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-09 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale edited 
https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-09 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709

>From a05af192052de8503fb4945bfb853b3f2c14e4c9 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 7 Apr 2025 13:58:25 -0500
Subject: [PATCH 1/3] Codegen for Reduction over private variables with
 reduction clause

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 147 +++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  14 ++
 clang/lib/CodeGen/CGStmtOpenMP.cpp|  12 +-
 .../OpenMP/for_private_reduction_codegen.cpp  | 236 ++
 4 files changed, 406 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/OpenMP/for_private_reduction_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 06a652c146fb9..3424227e5da79 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}
+
+LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
+LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
+RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
+auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {
+  if (BO == BO_Mul) {
+llvm::Value *OldScalar = OldVal.getScalarVal();
+llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
+llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
+ret

[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-09 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,151 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I :
+   llvm::seq(std::min(ReductionOps.size(), LHSExprs.size( {
+if (I >= LHSExprs.size()) {
+  break;
+}

chandraghale wrote:

Done !!


https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-09 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,151 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));

chandraghale wrote:

Should be good, using atomic operations with SequentiallyConsistent ordering 
also have synchronization barriers at critical points.

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-10 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale edited 
https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-10 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709

>From a05af192052de8503fb4945bfb853b3f2c14e4c9 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 7 Apr 2025 13:58:25 -0500
Subject: [PATCH 1/2] Codegen for Reduction over private variables with
 reduction clause

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 147 +++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  14 ++
 clang/lib/CodeGen/CGStmtOpenMP.cpp|  12 +-
 .../OpenMP/for_private_reduction_codegen.cpp  | 236 ++
 4 files changed, 406 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/OpenMP/for_private_reduction_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 06a652c146fb9..3424227e5da79 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}
+
+LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
+LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
+RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
+auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {
+  if (BO == BO_Mul) {
+llvm::Value *OldScalar = OldVal.getScalarVal();
+llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
+llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
+ret

[clang] Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-04-07 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale created 
https://github.com/llvm/llvm-project/pull/134709

Codegen support for reduction over private variable with reduction clause. 
Section 7.6.10 in in OpenMP 6.0 spec.
- An internal shared copy is initialized with an initializer value.
- The shared copy is updated by combining its value with the values from the 
private copies created by the clause.
- Once an encountering thread verifies that all updates are complete, its 
original list item is updated by merging its value with that of the shared copy 
and then broadcast to all threads.

>From a05af192052de8503fb4945bfb853b3f2c14e4c9 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 7 Apr 2025 13:58:25 -0500
Subject: [PATCH] Codegen for Reduction over private variables with reduction
 clause

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 147 +++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  14 ++
 clang/lib/CodeGen/CGStmtOpenMP.cpp|  12 +-
 .../OpenMP/for_private_reduction_codegen.cpp  | 236 ++
 4 files changed, 406 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/OpenMP/for_private_reduction_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 06a652c146fb9..3424227e5da79 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}
+

[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-05 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709

>From a05af192052de8503fb4945bfb853b3f2c14e4c9 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 7 Apr 2025 13:58:25 -0500
Subject: [PATCH 1/8] Codegen for Reduction over private variables with
 reduction clause

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 147 +++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  14 ++
 clang/lib/CodeGen/CGStmtOpenMP.cpp|  12 +-
 .../OpenMP/for_private_reduction_codegen.cpp  | 236 ++
 4 files changed, 406 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/OpenMP/for_private_reduction_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 06a652c146fb9..3424227e5da79 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}
+
+LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
+LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
+RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
+auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {
+  if (BO == BO_Mul) {
+llvm::Value *OldScalar = OldVal.getScalarVal();
+llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
+llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
+ret

[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-01 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709

>From a05af192052de8503fb4945bfb853b3f2c14e4c9 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 7 Apr 2025 13:58:25 -0500
Subject: [PATCH 1/6] Codegen for Reduction over private variables with
 reduction clause

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 147 +++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  14 ++
 clang/lib/CodeGen/CGStmtOpenMP.cpp|  12 +-
 .../OpenMP/for_private_reduction_codegen.cpp  | 236 ++
 4 files changed, 406 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/OpenMP/for_private_reduction_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 06a652c146fb9..3424227e5da79 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}
+
+LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
+LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
+RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
+auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {
+  if (BO == BO_Mul) {
+llvm::Value *OldScalar = OldVal.getScalarVal();
+llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
+llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
+ret

[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-01 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709

>From a05af192052de8503fb4945bfb853b3f2c14e4c9 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 7 Apr 2025 13:58:25 -0500
Subject: [PATCH 1/7] Codegen for Reduction over private variables with
 reduction clause

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 147 +++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  14 ++
 clang/lib/CodeGen/CGStmtOpenMP.cpp|  12 +-
 .../OpenMP/for_private_reduction_codegen.cpp  | 236 ++
 4 files changed, 406 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/OpenMP/for_private_reduction_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 06a652c146fb9..3424227e5da79 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}
+
+LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
+LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
+RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
+auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {
+  if (BO == BO_Mul) {
+llvm::Value *OldScalar = OldVal.getScalarVal();
+llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
+llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
+ret

[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-01 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

Refactored code to look up the expression and handle UDR, emitting it as is.  
updated the lit test.

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-01 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709

>From a05af192052de8503fb4945bfb853b3f2c14e4c9 Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Mon, 7 Apr 2025 13:58:25 -0500
Subject: [PATCH 1/5] Codegen for Reduction over private variables with
 reduction clause

---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 147 +++
 clang/lib/CodeGen/CGOpenMPRuntime.h   |  14 ++
 clang/lib/CodeGen/CGStmtOpenMP.cpp|  12 +-
 .../OpenMP/for_private_reduction_codegen.cpp  | 236 ++
 4 files changed, 406 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/OpenMP/for_private_reduction_codegen.cpp

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 06a652c146fb9..3424227e5da79 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I = 0; I < ReductionOps.size(); ++I) {
+if (I >= LHSExprs.size()) {
+  break;
+}
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+if (const auto *BORHS =
+dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+  BO = BORHS->getOpcode();
+}
+
+LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
+LValue LHSLV = CGF.EmitLValue(LHSExprs[I]);
+RValue PrivateRV = CGF.EmitLoadOfLValue(LHSLV, Loc);
+auto &&UpdateOp = [&CGF, PrivateRV, BinOp, BO](RValue OldVal) {
+  if (BO == BO_Mul) {
+llvm::Value *OldScalar = OldVal.getScalarVal();
+llvm::Value *PrivateScalar = PrivateRV.getScalarVal();
+llvm::Value *Result = CGF.Builder.CreateMul(OldScalar, PrivateScalar);
+ret

[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-05 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-05 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

@alexey-bataev  .  Reworked on UDR. Will be helpful if you will be able to 
review further. 

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-06 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,150 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  BinaryOperatorKind MainBO = BO_Comma;
+  if (const auto *BinOp = dyn_cast(ReductionOps[0])) {
+if (const auto *RHSExpr = BinOp->getRHS()) {
+  if (const auto *BORHS =
+  dyn_cast(RHSExpr->IgnoreParenImpCasts())) {
+MainBO = BORHS->getOpcode();
+  }
+}
+  }
+
+  llvm::Constant *InitVal = llvm::Constant::getNullValue(LLVMType);
+  const Expr *Private = Privates[0];
+
+  if (const auto *DRE = dyn_cast(Private)) {
+if (const auto *VD = dyn_cast(DRE->getDecl())) {
+  if (const Expr *Init = VD->getInit()) {
+if (Init->isConstantInitializer(CGF.getContext(), false)) {
+  Expr::EvalResult Result;
+  if (Init->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt()) {
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+}
+  }
+}
+  }
+}
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+  CGF.Builder.CreateStore(InitVal, SharedResult);
+  CGF.Builder.CreateBr(InitEndBB);
+
+  CGF.EmitBlock(InitEndBB);
+
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+
+  for (unsigned I :
+   llvm::seq(std::min(ReductionOps.size(), LHSExprs.size( {
+
+const auto *BinOp = dyn_cast(ReductionOps[I]);
+if (!BinOp || BinOp->getOpcode() != BO_Assign)
+  continue;
+
+const Expr *RHSExpr = BinOp->getRHS();
+if (!RHSExpr)
+  continue;
+
+BinaryOperatorKind BO = BO_Comma;
+const Expr *StripRHS = RHSExpr->IgnoreParenImpCasts();
+if (const auto *BORHS = dyn_cast(StripRHS)) {
+  BO = BORHS->getOpcode();
+} else if (const auto *OpCall = dyn_cast(StripRHS)) {
+  BO = BinaryOperator::getOverloadedOpcode(OpCall->getOperator());

chandraghale wrote:

Reworked on UDR

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] default clause replaced by otherwise clause for metadirective in OpenMP 5.2 (PR #128640)

2025-05-01 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

Committing to main, as already PR is approved.

https://github.com/llvm/llvm-project/pull/128640
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] default clause replaced by otherwise clause for metadirective in OpenMP 5.2 (PR #128640)

2025-05-01 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/128640
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-07 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,234 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  //  Create a shared global variable (__shared_reduction_var) to accumulate 
the
+  //  final result.
+  //
+  //  Call __kmpc_barrier to synchronize threads before initialization.
+  //
+  //  The master thread (thread_id == 0) initializes __shared_reduction_var
+  //with the identity value or initializer.
+  //
+  //  Call __kmpc_barrier to synchronize before combining.
+  //  For each i:
+  //- Thread enters critical section.
+  //- Reads its private value from LHSExprs[i].
+  //- Updates __shared_reduction_var[i] = 
RedOp_i(__shared_reduction_var[i],
+  //LHSExprs[i]).
+  //- Exits critical section.
+  //
+  //  Call __kmpc_barrier after combining.
+  //
+  //  Each thread copies __shared_reduction_var[i] back to LHSExprs[i].
+  //
+  //  Final __kmpc_barrier to synchronize after broadcasting
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  llvm::Constant *InitVal = nullptr;
+  const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps[0]);
+  // Determine the initial value for the shared reduction variable
+  if (!UDR) {
+InitVal = llvm::Constant::getNullValue(LLVMType);
+if (const auto *DRE = dyn_cast(Privates[0])) {
+  if (const auto *VD = dyn_cast(DRE->getDecl())) {
+const Expr *InitExpr = VD->getInit();
+if (InitExpr && !PrivateType->isAggregateType() &&
+!PrivateType->isAnyComplexType()) {
+  Expr::EvalResult Result;
+  if (InitExpr->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt())
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+  }
+}
+  }
+}
+  } else {
+InitVal = llvm::Constant::getNullValue(LLVMType);

chandraghale wrote:

Thanks !! I have handled this now via EmitSharedInit. 

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Inital support for privavate variable reduction (PR #127740)

2025-02-18 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale created 
https://github.com/llvm/llvm-project/pull/127740

None

>From cf392f05f9499fd0621ffec91a3b852d4b91820b Mon Sep 17 00:00:00 2001
From: Chandra Ghale 
Date: Tue, 18 Feb 2025 21:24:22 -0600
Subject: [PATCH] Inital support for privavate variable reduction

---
 clang/include/clang/Basic/OpenMPKinds.def |  7 +++
 clang/include/clang/Basic/OpenMPKinds.h   |  7 +++
 clang/include/clang/Sema/SemaOpenMP.h |  5 +-
 clang/lib/Parse/ParseOpenMP.cpp   | 27 ++
 clang/lib/Sema/SemaOpenMP.cpp | 65 +++
 5 files changed, 101 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/Basic/OpenMPKinds.def 
b/clang/include/clang/Basic/OpenMPKinds.def
index 3f25e7aafe23b..4f8396dc34ea8 100644
--- a/clang/include/clang/Basic/OpenMPKinds.def
+++ b/clang/include/clang/Basic/OpenMPKinds.def
@@ -71,6 +71,9 @@
 #ifndef OPENMP_REDUCTION_MODIFIER
 #define OPENMP_REDUCTION_MODIFIER(Name)
 #endif
+#ifndef OPENMP_ORIGINAL_SHARING_MODIFIER
+#define OPENMP_ORIGINAL_SHARING_MODIFIER(Name)
+#endif
 #ifndef OPENMP_ADJUST_ARGS_KIND
 #define OPENMP_ADJUST_ARGS_KIND(Name)
 #endif
@@ -202,6 +205,9 @@ OPENMP_REDUCTION_MODIFIER(default)
 OPENMP_REDUCTION_MODIFIER(inscan)
 OPENMP_REDUCTION_MODIFIER(task)
 
+OPENMP_ORIGINAL_SHARING_MODIFIER(shared)
+OPENMP_ORIGINAL_SHARING_MODIFIER(private)
+
 // Adjust-op kinds for the 'adjust_args' clause.
 OPENMP_ADJUST_ARGS_KIND(nothing)
 OPENMP_ADJUST_ARGS_KIND(need_device_ptr)
@@ -231,6 +237,7 @@ OPENMP_DOACROSS_MODIFIER(source_omp_cur_iteration)
 #undef OPENMP_BIND_KIND
 #undef OPENMP_ADJUST_ARGS_KIND
 #undef OPENMP_REDUCTION_MODIFIER
+#undef OPENMP_ORIGINAL_SHARING_MODIFIER
 #undef OPENMP_DEVICE_MODIFIER
 #undef OPENMP_ORDER_KIND
 #undef OPENMP_ORDER_MODIFIER
diff --git a/clang/include/clang/Basic/OpenMPKinds.h 
b/clang/include/clang/Basic/OpenMPKinds.h
index 900ad6ca6d66f..bbadda9fb03b4 100644
--- a/clang/include/clang/Basic/OpenMPKinds.h
+++ b/clang/include/clang/Basic/OpenMPKinds.h
@@ -190,6 +190,13 @@ enum OpenMPReductionClauseModifier {
   OMPC_REDUCTION_unknown,
 };
 
+/// OpenMP original sharing modifiers
+enum OpenMPOriginalSharingModifier {
+#define OPENMP_ORIGINAL_SHARING_MODIFIER(Name) OMPC_ORIGINAL_SHARING_##Name,
+#include "clang/Basic/OpenMPKinds.def"
+  OMPC_ORIGINAL_SHARING_default,
+};
+
 /// OpenMP adjust-op kinds for 'adjust_args' clause.
 enum OpenMPAdjustArgsOpKind {
 #define OPENMP_ADJUST_ARGS_KIND(Name) OMPC_ADJUST_ARGS_##Name,
diff --git a/clang/include/clang/Sema/SemaOpenMP.h 
b/clang/include/clang/Sema/SemaOpenMP.h
index 3d1cc4fab1c10..40e300f099826 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -1136,6 +1136,7 @@ class SemaOpenMP : public SemaBase {
 DeclarationNameInfo ReductionOrMapperId;
 int ExtraModifier = -1; ///< Additional modifier for linear, map, depend or
 ///< lastprivate clause.
+int OriginalSharingModifier = 0;  // Default is shared
 SmallVector
 MapTypeModifiers;
 SmallVector
@@ -1145,6 +1146,7 @@ class SemaOpenMP : public SemaBase {
 SmallVector MotionModifiersLoc;
 bool IsMapTypeImplicit = false;
 SourceLocation ExtraModifierLoc;
+SourceLocation OriginalSharingModifierLoc;
 SourceLocation OmpAllMemoryLoc;
 SourceLocation
 StepModifierLoc; /// 'step' modifier location for linear clause
@@ -1197,7 +1199,8 @@ class SemaOpenMP : public SemaBase {
   SourceLocation ModifierLoc, SourceLocation ColonLoc,
   SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec,
   const DeclarationNameInfo &ReductionId,
-  ArrayRef UnresolvedReductions = {});
+  ArrayRef UnresolvedReductions = {}, 
+  OpenMPOriginalSharingModifier OriginalShareModifier = 
OMPC_ORIGINAL_SHARING_default);
   /// Called on well-formed 'task_reduction' clause.
   OMPClause *ActOnOpenMPTaskReductionClause(
   ArrayRef VarList, SourceLocation StartLoc,
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index b4e973bc84a7b..f59ecd93de6d3 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -4594,6 +4594,33 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind 
DKind,
   assert(Tok.is(tok::comma) && "Expected comma.");
   (void)ConsumeToken();
 }
+// Handle original(private / shared) Modifier
+if (Kind == OMPC_reduction && getLangOpts().OpenMP >= 60  &&
+Tok.is(tok::identifier) && PP.getSpelling(Tok) == "original" &&
+NextToken().is(tok::l_paren)) {
+  // Parse original(private) modifier.
+  ConsumeToken(); 
+  BalancedDelimiterTracker ParenT(*this, tok::l_paren, tok::r_paren);
+  ParenT.consumeOpen();
+  if (Tok.is(tok::kw_private)) {
+Data.OriginalSharingModifier = OMPC_ORIGINAL_SHARING_private;
+Data.OriginalSharingModifierLoc = Tok.getLocation();
+ConsumeToken(); 
+  }
+  else if (Tok.is(tok::

[clang] [OpenMP 6.0] Parse/Sema support for reduction over private variable with reduction clause. (PR #129938)

2025-03-11 Thread CHANDRA GHALE via cfe-commits


@@ -4668,6 +4668,34 @@ bool Parser::ParseOpenMPVarList(OpenMPDirectiveKind 
DKind,
   assert(Tok.is(tok::comma) && "Expected comma.");
   (void)ConsumeToken();
 }
+// Handle original(private / shared) Modifier
+if (Kind == OMPC_reduction && getLangOpts().OpenMP >= 60 &&
+Tok.is(tok::identifier) && PP.getSpelling(Tok) == "original" &&
+NextToken().is(tok::l_paren)) {
+  // Parse original(private) modifier.
+  ConsumeToken();
+  BalancedDelimiterTracker ParenT(*this, tok::l_paren, tok::r_paren);
+  ParenT.consumeOpen();
+  if (Tok.is(tok::kw_private)) {
+Data.OriginalSharingModifier = OMPC_ORIGINAL_SHARING_private;
+Data.OriginalSharingModifierLoc = Tok.getLocation();
+ConsumeToken();
+  } else if (Tok.is(tok::identifier) &&
+ (PP.getSpelling(Tok) == "shared" ||
+  PP.getSpelling(Tok) == "default")) {
+Data.OriginalSharingModifier = OMPC_ORIGINAL_SHARING_shared;
+Data.OriginalSharingModifierLoc = Tok.getLocation();
+ConsumeToken();
+  } else {
+Diag(Tok.getLocation(), diag::err_expected)
+<< "'private or shared or default'";
+SkipUntil(tok::r_paren);
+return false;
+  }
+  ParenT.consumeClose();
+  assert(Tok.is(tok::comma) && "Expected comma.");
+  (void)ConsumeToken();
+}

chandraghale wrote:

@alexey-bataev  "I'm sorry, I didn't understand your comment. The changes are 
made to consume the newly added original-sharing-modifier [OpenMP 6.0 7.6.10]  
For example:
```
#pragma omp parallel reduction(original(private), +: x)
#pragma omp parallel reduction(original(shared), *: y)
```
Isn't it necessary for the parser to recognize the original(...) syntax and 
record the modifier? I suppose DSAStack does not natively handle this 
modifier." 

https://github.com/llvm/llvm-project/pull/129938
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Inital support for privavate variable reduction (PR #127740)

2025-03-05 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/127740
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] default clause replaced by otherwise clause for metadirective in OpenMP 5.2 (PR #125648)

2025-02-21 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

Merging as already approved !!

https://github.com/llvm/llvm-project/pull/125648
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] default clause replaced by otherwise clause for metadirective in OpenMP 5.2 (PR #125648)

2025-02-21 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale closed 
https://github.com/llvm/llvm-project/pull/125648
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0] Parse/Sema support for reduction over private variable with reduction clause. (PR #129938)

2025-03-27 Thread CHANDRA GHALE via cfe-commits


@@ -18933,12 +18945,35 @@ static bool actOnOMPReductionKindClause(
 reportOriginalDsa(S, Stack, D, DVar);
 continue;
   }
+  // OpenMP 6.0 [ 7.6.10 ]
+  // Support Reduction over private variables with reduction clause.
+  // A list item in a reduction clause can now be private in the enclosing
+  // context. For orphaned constructs it is assumed to be shared unless the
+  // original(private) modifier appears in the clause.
+  DVar = Stack->getImplicitDSA(D, true);
+  bool IsOrphaned = false;
+  OpenMPDirectiveKind CurrDir = Stack->getCurrentDirective();
+  OpenMPDirectiveKind ParentDir = Stack->getParentDirective();
+  // Check if the construct is orphaned (has no enclosing OpenMP context)
+  IsOrphaned = (ParentDir == OMPD_unknown);
+  IsPrivate =
+  ((isOpenMPPrivate(DVar.CKind) && DVar.CKind != OMPC_reduction &&
+isOpenMPWorksharingDirective(CurrDir) &&
+!isOpenMPParallelDirective(CurrDir) &&
+!isOpenMPTeamsDirective(CurrDir) &&
+!isOpenMPSimdDirective(ParentDir)) ||
+   (IsOrphaned && DVar.CKind == OMPC_unknown) ||
+   RD.OrigSharingModifier != OMPC_ORIGINAL_SHARING_shared);
+  // Disable private handling for OpenMP versions <= 5.2
+  if (S.getLangOpts().OpenMP <= 52)
+IsPrivate = false;

chandraghale wrote:

Modified, now IsPrivate flag check is enclosed for OpenMP 5.2 and later. For 
versions ≤5.2, the DSA private check falls back to the existing code.

https://github.com/llvm/llvm-project/pull/129938
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-12 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-09 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-07 Thread CHANDRA GHALE via cfe-commits


@@ -0,0 +1,538 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals all --include-generated-funcs 
--prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 
-x c++ -std=c++17  -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#define N 10
+class Sum {
+  int val;
+public:
+  Sum(int v = 0) : val(v) {}
+  Sum operator+(const Sum& rhs) const {
+return Sum(val + rhs.val);
+  }
+  Sum& operator+=(const Sum& rhs) {
+val += rhs.val;
+return *this;
+  }
+};
+#pragma omp declare reduction(sum_reduction : Sum : omp_out += omp_in) 
initializer(omp_priv = Sum(0))
+
+void func_red(){
+  Sum result(0);
+  Sum array[N];
+
+  for (int i = 0; i < N; i++) {
+array[i] = Sum(i);
+  }
+
+  #pragma omp parallel private(result)  num_threads(4)
+  {
+  #pragma omp  for reduction(sum_reduction:result)
+  for (int i = 0; i < N; i++) {
+result = result + array[i];
+  }
+  }
+}
+
+void do_red(int n, int *v, int &sum_v)
+ {
+ sum_v = 0;
+#pragma omp for reduction(original(private),+: sum_v)

chandraghale wrote:

Updated !!


https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-07 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,234 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  //  Create a shared global variable (__shared_reduction_var) to accumulate 
the
+  //  final result.
+  //
+  //  Call __kmpc_barrier to synchronize threads before initialization.
+  //
+  //  The master thread (thread_id == 0) initializes __shared_reduction_var
+  //with the identity value or initializer.
+  //
+  //  Call __kmpc_barrier to synchronize before combining.
+  //  For each i:
+  //- Thread enters critical section.
+  //- Reads its private value from LHSExprs[i].
+  //- Updates __shared_reduction_var[i] = 
RedOp_i(__shared_reduction_var[i],
+  //LHSExprs[i]).
+  //- Exits critical section.
+  //
+  //  Call __kmpc_barrier after combining.
+  //
+  //  Each thread copies __shared_reduction_var[i] back to LHSExprs[i].
+  //
+  //  Final __kmpc_barrier to synchronize after broadcasting
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  llvm::Constant *InitVal = nullptr;
+  const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps[0]);
+  // Determine the initial value for the shared reduction variable
+  if (!UDR) {
+InitVal = llvm::Constant::getNullValue(LLVMType);
+if (const auto *DRE = dyn_cast(Privates[0])) {
+  if (const auto *VD = dyn_cast(DRE->getDecl())) {
+const Expr *InitExpr = VD->getInit();
+if (InitExpr && !PrivateType->isAggregateType() &&
+!PrivateType->isAnyComplexType()) {
+  Expr::EvalResult Result;
+  if (InitExpr->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt())
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+  }
+}
+  }
+}
+  } else {
+InitVal = llvm::Constant::getNullValue(LLVMType);
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  // First barrier to ensure all threads are ready.
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+  // Initialize the shared variable by the master thread.
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+
+  auto EmitSharedInit = [&]() {
+if (const auto *DRE = dyn_cast(Privates[0])) {

chandraghale wrote:

I think yes, other privates values contribute their values to the shared 
SharedResult during the reduction accumulation. I understand do not explicitly 
determine the initial value.

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-07 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,234 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  //  Create a shared global variable (__shared_reduction_var) to accumulate 
the
+  //  final result.
+  //
+  //  Call __kmpc_barrier to synchronize threads before initialization.
+  //
+  //  The master thread (thread_id == 0) initializes __shared_reduction_var
+  //with the identity value or initializer.
+  //
+  //  Call __kmpc_barrier to synchronize before combining.
+  //  For each i:
+  //- Thread enters critical section.
+  //- Reads its private value from LHSExprs[i].
+  //- Updates __shared_reduction_var[i] = 
RedOp_i(__shared_reduction_var[i],
+  //LHSExprs[i]).
+  //- Exits critical section.
+  //
+  //  Call __kmpc_barrier after combining.

chandraghale wrote:

Agreed. Handling simple cases with atomics for performance is a good idea.I 
would like to address this in a follow-up patch. Currently, synchronization 
generated here is independent of the `nowait` clause.


https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-07 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,234 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  //  Create a shared global variable (__shared_reduction_var) to accumulate 
the
+  //  final result.
+  //
+  //  Call __kmpc_barrier to synchronize threads before initialization.

chandraghale wrote:

Agreed , Done !!

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-07 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,238 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(

chandraghale wrote:

- Updated the code with comments and clarified the logic
- Updated OpenMPSupport.rst and the release notes.
- Unified the logic as much as possible.
- In regular reduction codegen, threads update a shared variable directly. With 
private reduction, each thread first works with its own private copy, and then 
all these partial results are combined into a shared variable. The final result 
is copied back to each thread’s original variable

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-08 Thread CHANDRA GHALE via cfe-commits


@@ -4899,6 +4899,234 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, ArrayRef Privates,
+ArrayRef LHSExprs, ArrayRef RHSExprs,
+ArrayRef ReductionOps) {
+  if (LHSExprs.empty() || Privates.empty() || ReductionOps.empty())
+return;
+
+  if (LHSExprs.size() != Privates.size() ||
+  LHSExprs.size() != ReductionOps.size())
+return;
+
+  //  Create a shared global variable (__shared_reduction_var) to accumulate 
the
+  //  final result.
+  //
+  //  Call __kmpc_barrier to synchronize threads before initialization.
+  //
+  //  The master thread (thread_id == 0) initializes __shared_reduction_var
+  //with the identity value or initializer.
+  //
+  //  Call __kmpc_barrier to synchronize before combining.
+  //  For each i:
+  //- Thread enters critical section.
+  //- Reads its private value from LHSExprs[i].
+  //- Updates __shared_reduction_var[i] = 
RedOp_i(__shared_reduction_var[i],
+  //LHSExprs[i]).
+  //- Exits critical section.
+  //
+  //  Call __kmpc_barrier after combining.
+  //
+  //  Each thread copies __shared_reduction_var[i] back to LHSExprs[i].
+  //
+  //  Final __kmpc_barrier to synchronize after broadcasting
+  QualType PrivateType = Privates[0]->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  llvm::Constant *InitVal = nullptr;
+  const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps[0]);
+  // Determine the initial value for the shared reduction variable
+  if (!UDR) {
+InitVal = llvm::Constant::getNullValue(LLVMType);
+if (const auto *DRE = dyn_cast(Privates[0])) {
+  if (const auto *VD = dyn_cast(DRE->getDecl())) {
+const Expr *InitExpr = VD->getInit();
+if (InitExpr && !PrivateType->isAggregateType() &&
+!PrivateType->isAnyComplexType()) {
+  Expr::EvalResult Result;
+  if (InitExpr->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt())
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+  }
+}
+  }
+}
+  } else {
+InitVal = llvm::Constant::getNullValue(LLVMType);
+  }
+
+  // Create an internal shared variable
+  std::string SharedName = getName({"internal_private_var"});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::CommonLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  // First barrier to ensure all threads are ready.
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+  CGM.getModule(), OMPRTL___kmpc_barrier),
+  BarrierArgs);
+  // Initialize the shared variable by the master thread.
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+
+  auto EmitSharedInit = [&]() {
+if (const auto *DRE = dyn_cast(Privates[0])) {

chandraghale wrote:

Yes I guess I had n't handled this correctly. updated the code to iterate over 
all privates to support multiple reduced variables. I tested it with a few 
cases, where it worked as expected.

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-09 Thread CHANDRA GHALE via cfe-commits


@@ -0,0 +1,93 @@
+//RUN: %libomp-cxx-compile -fopenmp-version=60  && %libomp-run
+#include 
+#include 
+#include "omp_testsuite.h"
+
+#define N 10
+class Sum {
+  int val;
+
+public:
+  Sum(int v = 0) : val(v) {}
+  Sum operator+(const Sum &rhs) const { return Sum(val + rhs.val); }
+  Sum &operator+=(const Sum &rhs) {
+val += rhs.val;
+return *this;
+  }
+  int getValue() const { return val; }
+};
+
+// Declare OpenMP reduction
+#pragma omp declare reduction(sum_reduction:Sum : omp_out += omp_in)   
\
+initializer(omp_priv = Sum(0))
+
+int checkUserDefinedReduction() {
+  Sum final_result_udr(0);
+  Sum array_sum[N];
+  int error_flag = 0;
+  int expected_value = 0;
+  for (int i = 0; i < N; ++i) {
+array_sum[i] = Sum(i);
+expected_value += i; // Calculate expected sum: 0 + 1 + ... + (N-1)
+  }
+#pragma omp parallel num_threads(4)
+  {
+#pragma omp for reduction(sum_reduction : final_result_udr)

chandraghale wrote:

Added one reduction var. 

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-09 Thread CHANDRA GHALE via cfe-commits


@@ -530,6 +530,12 @@ OpenMP Support
 - Added support 'no_openmp_constructs' assumption clause.
 - Added support for 'self_maps' in map and requirement clause.
 - Added support for 'omp stripe' directive.
+- Fixed a crashing bug with ``omp unroll partial`` if the argument to
+  ``partial`` was an invalid expression. (#GH139267)
+- Fixed a crashing bug with ``omp tile sizes`` if the argument to ``sizes`` was
+  an invalid expression. (#GH139073)
+- Fixed a crashing bug with ``omp distribute dist_schedule`` if the argument to
+  ``dist_schedule`` was not strictly positive. (#GH139266)

chandraghale wrote:

Yeah .. I was trying to resolve merge conflict with this file. Will resolve 
this. 

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-09 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-09 Thread CHANDRA GHALE via cfe-commits


@@ -0,0 +1,93 @@
+//RUN: %libomp-cxx-compile -fopenmp-version=60  && %libomp-run
+#include 
+#include 
+#include "omp_testsuite.h"
+
+#define N 10
+class Sum {
+  int val;
+
+public:
+  Sum(int v = 0) : val(v) {}
+  Sum operator+(const Sum &rhs) const { return Sum(val + rhs.val); }
+  Sum &operator+=(const Sum &rhs) {
+val += rhs.val;
+return *this;
+  }
+  int getValue() const { return val; }
+};
+
+// Declare OpenMP reduction
+#pragma omp declare reduction(sum_reduction:Sum : omp_out += omp_in)   
\
+initializer(omp_priv = Sum(0))

chandraghale wrote:

Done !! 

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-09 Thread CHANDRA GHALE via cfe-commits


@@ -0,0 +1,93 @@
+//RUN: %libomp-cxx-compile -fopenmp-version=60  && %libomp-run
+#include 
+#include 
+#include "omp_testsuite.h"
+
+#define N 10
+class Sum {
+  int val;
+
+public:
+  Sum(int v = 0) : val(v) {}
+  Sum operator+(const Sum &rhs) const { return Sum(val + rhs.val); }
+  Sum &operator+=(const Sum &rhs) {
+val += rhs.val;
+return *this;
+  }
+  int getValue() const { return val; }
+};
+
+// Declare OpenMP reduction
+#pragma omp declare reduction(sum_reduction:Sum : omp_out += omp_in)   
\
+initializer(omp_priv = Sum(0))
+
+int checkUserDefinedReduction() {
+  Sum final_result_udr(0);
+  Sum array_sum[N];
+  int error_flag = 0;
+  int expected_value = 0;
+  for (int i = 0; i < N; ++i) {
+array_sum[i] = Sum(i);
+expected_value += i; // Calculate expected sum: 0 + 1 + ... + (N-1)
+  }
+#pragma omp parallel num_threads(4)
+  {
+#pragma omp for reduction(sum_reduction : final_result_udr)
+for (int i = 0; i < N; ++i) {
+  final_result_udr += array_sum[i];
+}
+
+if (final_result_udr.getValue() != expected_value)
+  error_flag += 1;
+  }
+  return error_flag;
+}
+
+void performReductions(int n_elements, const int *input_values,
+   int &sum_val_out, int &prod_val_out,
+   float &float_sum_val_out) {
+  // private variables for this thread's reduction.
+  sum_val_out = 0;
+  prod_val_out = 1;
+  float_sum_val_out = 0.0f;
+
+  const float kPiValue = 3.14f;
+#pragma omp for reduction(original(private), + : sum_val_out)  
\

chandraghale wrote:

Is nt this already reducing over 2 more variable *:prod_val_out and 
+:float_sum_val_out. 

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-09 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-09 Thread CHANDRA GHALE via cfe-commits

https://github.com/chandraghale updated 
https://github.com/llvm/llvm-project/pull/134709



  



Rate limit · GitHub


  body {
background-color: #f6f8fa;
color: #24292e;
font-family: -apple-system,BlinkMacSystemFont,Segoe 
UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;
font-size: 14px;
line-height: 1.5;
margin: 0;
  }

  .container { margin: 50px auto; max-width: 600px; text-align: center; 
padding: 0 24px; }

  a { color: #0366d6; text-decoration: none; }
  a:hover { text-decoration: underline; }

  h1 { line-height: 60px; font-size: 48px; font-weight: 300; margin: 0px; 
text-shadow: 0 1px 0 #fff; }
  p { color: rgba(0, 0, 0, 0.5); margin: 20px 0 40px; }

  ul { list-style: none; margin: 25px 0; padding: 0; }
  li { display: table-cell; font-weight: bold; width: 1%; }

  .logo { display: inline-block; margin-top: 35px; }
  .logo-img-2x { display: none; }
  @media
  only screen and (-webkit-min-device-pixel-ratio: 2),
  only screen and (   min--moz-device-pixel-ratio: 2),
  only screen and ( -o-min-device-pixel-ratio: 2/1),
  only screen and (min-device-pixel-ratio: 2),
  only screen and (min-resolution: 192dpi),
  only screen and (min-resolution: 2dppx) {
.logo-img-1x { display: none; }
.logo-img-2x { display: inline-block; }
  }

  #suggestions {
margin-top: 35px;
color: #ccc;
  }
  #suggestions a {
color: #66;
font-weight: 200;
font-size: 14px;
margin: 0 10px;
  }


  
  



  Whoa there!
  You have exceeded a secondary rate limit.
Please wait a few minutes before you try again;
in some cases this may take up to an hour.
  
  
https://support.github.com/contact";>Contact Support —
https://githubstatus.com";>GitHub Status —
https://twitter.com/githubstatus";>@githubstatus
  

  

  

  

  

  


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-09 Thread CHANDRA GHALE via cfe-commits

chandraghale wrote:

> Can you add a few runtime tests, some with UDR with classes, several 
> reduction items with the different types, to check that functional part works 
> correctly? You can add it to the libomp right in this patch. Just want to be 
> sure we do not miss anything here

Sure !!! Added few runtime tests. 
@alexey-bataev 

https://github.com/llvm/llvm-project/pull/134709
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)

2025-05-16 Thread CHANDRA GHALE via cfe-commits


@@ -4898,6 +4898,273 @@ void 
CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
   }
 }
 
+void CGOpenMPRuntime::emitPrivateReduction(
+CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
+const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
+
+  //  Create a shared global variable (__shared_reduction_var) to accumulate 
the
+  //  final result.
+  //
+  //  Call __kmpc_barrier to synchronize threads before initialization.
+  //
+  //  The master thread (thread_id == 0) initializes __shared_reduction_var
+  //with the identity value or initializer.
+  //
+  //  Call __kmpc_barrier to synchronize before combining.
+  //  For each i:
+  //- Thread enters critical section.
+  //- Reads its private value from LHSExprs[i].
+  //- Updates __shared_reduction_var[i] = 
RedOp_i(__shared_reduction_var[i],
+  //LHSExprs[i]).
+  //- Exits critical section.
+  //
+  //  Call __kmpc_barrier after combining.
+  //
+  //  Each thread copies __shared_reduction_var[i] back to LHSExprs[i].
+  //
+  //  Final __kmpc_barrier to synchronize after broadcasting
+  QualType PrivateType = Privates->getType();
+  llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+  llvm::Constant *InitVal = nullptr;
+  const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
+  // Determine the initial value for the shared reduction variable
+  if (!UDR) {
+InitVal = llvm::Constant::getNullValue(LLVMType);
+if (const auto *DRE = dyn_cast(Privates)) {
+  if (const auto *VD = dyn_cast(DRE->getDecl())) {
+const Expr *InitExpr = VD->getInit();
+if (InitExpr) {
+  Expr::EvalResult Result;
+  if (InitExpr->EvaluateAsRValue(Result, CGF.getContext())) {
+APValue &InitValue = Result.Val;
+if (InitValue.isInt())
+  InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+else if (InitValue.isFloat())
+  InitVal = llvm::ConstantFP::get(LLVMType, InitValue.getFloat());
+else if (InitValue.isComplexInt()) {
+  // For complex int: create struct { real, imag }
+  llvm::Constant *Real = llvm::ConstantInt::get(
+  cast(LLVMType)->getElementType(0),
+  InitValue.getComplexIntReal());
+  llvm::Constant *Imag = llvm::ConstantInt::get(
+  cast(LLVMType)->getElementType(1),
+  InitValue.getComplexIntImag());
+  InitVal = llvm::ConstantStruct::get(
+  cast(LLVMType), {Real, Imag});
+} else if (InitValue.isComplexFloat()) {
+  llvm::Constant *Real = llvm::ConstantFP::get(
+  cast(LLVMType)->getElementType(0),
+  InitValue.getComplexFloatReal());
+  llvm::Constant *Imag = llvm::ConstantFP::get(
+  cast(LLVMType)->getElementType(1),
+  InitValue.getComplexFloatImag());
+  InitVal = llvm::ConstantStruct::get(
+  cast(LLVMType), {Real, Imag});
+}
+  }
+}
+  }
+}
+  } else {
+InitVal = llvm::Constant::getNullValue(LLVMType);
+  }
+  std::string ReductionVarNameStr;
+  if (const auto *DRE = dyn_cast(Privates->IgnoreParenCasts()))
+ReductionVarNameStr = DRE->getDecl()->getNameAsString();
+  else
+ReductionVarNameStr = "unnamed_priv_var";
+
+  // Create an internal shared variable
+  std::string SharedName =
+  CGM.getOpenMPRuntime().getName({"internal_pivate_", 
ReductionVarNameStr});
+  llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+  CGM.getModule(), LLVMType, false, llvm::GlobalValue::InternalLinkage,
+  InitVal, ".omp.reduction." + SharedName, nullptr,
+  llvm::GlobalVariable::NotThreadLocal);
+
+  SharedVar->setAlignment(
+  llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+  Address SharedResult(SharedVar, SharedVar->getValueType(),
+   CGF.getContext().getTypeAlignInChars(PrivateType));
+
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+  llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+  llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+  llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+  llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+  llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+  ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+  CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+  CGF.EmitBlock(InitBB);
+
+  auto EmitSharedInit = [&]() {
+if (UDR) { // Check if it's a User-Defined Reduction
+  if (const Expr *UDRInitExpr = UDR->getInitializer()) {
+std::pair FnPair =
+getUserDefinedReduction(UDR);
+llvm::Function *InitializerFn = FnPair.second;
+if (InitializerFn) {
+  if (const auto *CE =
+  dyn_cas

  1   2   >