This revision was automatically updated to reflect the committed changes.
Closed by commit rG738bab743b5c: [OPENMP]Add support for allocate vars in 
untied tasks. (authored by ABataev).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D86558/new/

https://reviews.llvm.org/D86558

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/lib/CodeGen/CGOpenMPRuntime.h
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/test/OpenMP/allocate_codegen.cpp
  clang/test/OpenMP/for_lastprivate_codegen.cpp
  clang/test/OpenMP/for_linear_codegen.cpp
  clang/test/OpenMP/for_reduction_codegen_UDR.cpp
  clang/test/OpenMP/parallel_firstprivate_codegen.cpp
  clang/test/OpenMP/parallel_private_codegen.cpp
  clang/test/OpenMP/task_codegen.cpp

Index: clang/test/OpenMP/task_codegen.cpp
===================================================================
--- clang/test/OpenMP/task_codegen.cpp
+++ clang/test/OpenMP/task_codegen.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -DUNTIEDRT | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s -DUNTIEDRT
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT
 //
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
@@ -14,6 +14,19 @@
 #ifndef HEADER
 #define HEADER
 
+enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
+};
+
 // CHECK-DAG: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STRUCT_SHAREDS:%.+]] = type { i8*, [2 x [[STRUCT_S:%.+]]]* }
 // CHECK-DAG: [[STRUCT_SHAREDS1:%.+]] = type { [2 x [[STRUCT_S:%.+]]]* }
@@ -258,21 +271,26 @@
     a = 4;
     c = 5;
   }
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i32 0, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*))
 // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 {{%.*}}, i8* [[ORIG_TASK_PTR]])
-#pragma omp task untied
+#pragma omp task untied firstprivate(c) allocate(omp_pteam_mem_alloc:c)
   {
-    S s1;
+    S s1, s2;
+#ifdef UNTIEDRT
+#pragma omp allocate(s2) allocator(omp_pteam_mem_alloc)
+#endif
+    s2.a = 0;
 #pragma omp task
-    a = 4;
+    a = c = 4;
 #pragma omp taskyield
     s1 = S();
+    s2.a = 10;
 #pragma omp taskwait
   }
   return a;
 }
 // CHECK: define internal i32 [[TASK_ENTRY1]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1)
-// CHECK: store i32 15, i32* [[A_PTR:@.+]]
+// CHECK: store i32 15, i32* [[A_PTR:@.+]],
 // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]]
 // CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8
 // CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}}
@@ -294,10 +312,13 @@
 // CHECK: define internal i32
 // CHECK: store i32 4, i32* [[A_PTR]]
 
-// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1)
+// CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %{{.+}})
 // UNTIEDRT: [[S1_ADDR_PTR:%.+]] = alloca %struct.S*,
-// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]])
-// UNTIEDRT: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]],
+// UNTIEDRT: [[S2_ADDR_PTR_REF:%.+]] = alloca %struct.S**,
+// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]], %struct.S*** [[S2_ADDR_PTR_REF]])
+// UNTIEDRT-DAG: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]],
+// UNTIEDRT-DAG: [[S2_ADDR_PTR:%.+]] = load %struct.S**, %struct.S*** [[S2_ADDR_PTR_REF]],
+// UNTIEDRT-DAG: [[S2_ADDR:%.+]] = load %struct.S*, %struct.S** [[S2_ADDR_PTR]],
 // CHECK: switch i32 %{{.+}}, label %[[DONE:.+]] [
 
 // CHECK: [[DONE]]:
@@ -309,16 +330,25 @@
 // UNTIEDRT: br label %[[EXIT:[^,]+]]
 
 // UNTIEDRT: call void [[CONSTR:@.+]](%struct.S* [[S1_ADDR]])
+// UNTIEDRT: [[S2_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 %{{.+}}, i64 4, i8* inttoptr (i64 7 to i8*))
+// UNTIEDRT: [[S2_PTR:%.+]] = bitcast i8* [[S2_VOID_PTR]] to %struct.S*
+// UNTIEDRT: store %struct.S* [[S2_PTR]], %struct.S** [[S2_ADDR_PTR]],
+// UNTIEDRT: load i32*, i32** %
+// UNTIEDRT: store i32 2, i32* %
+// UNTIEDRT: call i32 @__kmpc_omp_task(%
+// UNTIEDRT: br label %[[EXIT]]
+
+// UNTIEDRT: call void [[CONSTR]](%struct.S* [[S2_ADDR]])
 // CHECK: call i8* @__kmpc_omp_task_alloc(
 // CHECK: call i32 @__kmpc_omp_task(%
 // CHECK: load i32*, i32** %
-// CHECK: store i32 2, i32* %
+// CHECK: store i32 {{2|3}}, i32* %
 // CHECK: call i32 @__kmpc_omp_task(%
 // UNTIEDRT: br label %[[EXIT]]
 
 // CHECK: call i32 @__kmpc_omp_taskyield(%
 // CHECK: load i32*, i32** %
-// CHECK: store i32 3, i32* %
+// CHECK: store i32 {{3|4}}, i32* %
 // CHECK: call i32 @__kmpc_omp_task(%
 // UNTIEDRT: br label %[[EXIT]]
 
@@ -331,10 +361,13 @@
 
 // CHECK: call i32 @__kmpc_omp_taskwait(%
 // CHECK: load i32*, i32** %
-// CHECK: store i32 4, i32* %
+// CHECK: store i32 {{4|5}}, i32* %
 // CHECK: call i32 @__kmpc_omp_task(%
 // UNTIEDRT: br label %[[EXIT]]
 
+// UNTIEDRT: call void [[DESTR]](%struct.S* [[S2_ADDR]])
+// UNTIEDRT: [[S2_VOID_PTR:%.+]] = bitcast %struct.S* [[S2_ADDR]] to i8*
+// UNTIEDRT: call void @__kmpc_free(i32 %{{.+}}, i8* [[S2_VOID_PTR]], i8* inttoptr (i64 7 to i8*))
 // UNTIEDRT: call void [[DESTR]](%struct.S* [[S1_ADDR]])
 // CHECK: br label %[[CLEANUP]]
 
Index: clang/test/OpenMP/parallel_private_codegen.cpp
===================================================================
--- clang/test/OpenMP/parallel_private_codegen.cpp
+++ clang/test/OpenMP/parallel_private_codegen.cpp
@@ -361,12 +361,13 @@
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_PTR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
 // CHECK: [[A_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8* inttoptr (i64 2 to i8*))
-// CHECK: [[A_PRIV:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32*
-// CHECK: store i{{[0-9]+}}* [[A_PRIV]], i{{[0-9]+}}** [[REF:%.+]],
+// CHECK: [[A_PRIV_ADDR:%.+]] = bitcast i8* [[A_VOID_PTR]] to i32*
+// CHECK: store i{{[0-9]+}}* [[A_PRIV_ADDR]], i{{[0-9]+}}** [[REF:%.+]],
 // CHECK-NEXT: [[A_PRIV:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[REF]],
 // CHECK-NEXT: [[A_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A_PRIV]],
 // CHECK-NEXT: [[INC:%.+]] = add nsw i{{[0-9]+}} [[A_VAL]], 1
 // CHECK-NEXT: store i{{[0-9]+}} [[INC]], i{{[0-9]+}}* [[A_PRIV]],
+// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_PRIV_ADDR]] to i8*
 // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 2 to i8*))
 // CHECK-NEXT: ret void
 
Index: clang/test/OpenMP/parallel_firstprivate_codegen.cpp
===================================================================
--- clang/test/OpenMP/parallel_firstprivate_codegen.cpp
+++ clang/test/OpenMP/parallel_firstprivate_codegen.cpp
@@ -423,6 +423,7 @@
 // CHECK-64: [[T_VAR_VAL:%.+]] = load i32, i32* [[BC]],
 // CHECK:    store i32 [[T_VAR_VAL]], i32* [[T_VAR_PRIV]],
 // CHECK:    store i32 0, i32* [[T_VAR_PRIV]],
+// CHECK:    [[T_VAR_VOID_PTR:%.+]] = bitcast i32* [[T_VAR_PRIV]] to i8*
 // CHECK:    call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*))
 // CHECK:    ret void
 
@@ -584,6 +585,7 @@
 // ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8
 // ARRAY: [[BC:%.+]] = bitcast double* [[VLA2_PTR]] to i8*
 // ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[BC]], i8* align 128 %{{.+}}, i64 [[SIZE]], i1 false)
+// ARRAY: [[VLA2_VOID_PTR:%.+]] = bitcast double* [[VLA2_PTR]] to i8*
 // ARRAY: call void @__kmpc_free(i32 [[GTID]], i8* [[VLA2_VOID_PTR]], i8* inttoptr (i64 8 to i8*))
 // ARRAY-NEXT: ret void
 #endif
Index: clang/test/OpenMP/for_reduction_codegen_UDR.cpp
===================================================================
--- clang/test/OpenMP/for_reduction_codegen_UDR.cpp
+++ clang/test/OpenMP/for_reduction_codegen_UDR.cpp
@@ -876,6 +876,7 @@
 // CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 4
 
 // CHECK: store [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [4 x [[S_FLOAT_TY]]]** %
+// CHECK: [[VAR3_VOID_PTR:%.+]] = bitcast [4 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]] to i8*
 // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[VAR3_VOID_PTR]], i8* inttoptr (i64 6 to i8*))
 // CHECK: ret void
 
Index: clang/test/OpenMP/for_linear_codegen.cpp
===================================================================
--- clang/test/OpenMP/for_linear_codegen.cpp
+++ clang/test/OpenMP/for_linear_codegen.cpp
@@ -414,6 +414,7 @@
 // CHECK: [[ADD:%.+]] = add nsw i64 [[LVAR_VAL]], 3
 // CHECK: store i64 [[ADD]], i64* [[LVAR_PRIV]],
 // CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+// CHECK: [[LVAR_VOID_PTR:%.+]] = bitcast i64* [[LVAR_PRIV]] to i8*
 // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[LVAR_VOID_PTR]], i8* inttoptr (i64 5 to i8*))
 // CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
Index: clang/test/OpenMP/for_lastprivate_codegen.cpp
===================================================================
--- clang/test/OpenMP/for_lastprivate_codegen.cpp
+++ clang/test/OpenMP/for_lastprivate_codegen.cpp
@@ -654,7 +654,8 @@
 // CHECK-NEXT: br label %[[LAST_DONE]]
 // CHECK: [[LAST_DONE]]
 
-// CHECK:      call void @__kmpc_free(i32 [[GTID]], i8* [[F_VOID_PTR]], i8* inttoptr (i64 3 to i8*))
+// CHECK: [[F_VOID_PTR:%.+]] = bitcast float* [[F_PRIV]] to i8*
+// CHECK-NEXT:      call void @__kmpc_free(i32 [[GTID]], i8* [[F_VOID_PTR]], i8* inttoptr (i64 3 to i8*))
 // CHECK-NEXT: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK-NEXT: ret void
 
Index: clang/test/OpenMP/allocate_codegen.cpp
===================================================================
--- clang/test/OpenMP/allocate_codegen.cpp
+++ clang/test/OpenMP/allocate_codegen.cpp
@@ -85,6 +85,7 @@
 // CHECK-NOT:  {{__kmpc_alloc|__kmpc_free}}
 // CHECK:      store i32 %{{.+}}, i32* [[V_ADDR]],
 // CHECK-NEXT: [[V_VAL:%.+]] = load i32, i32* [[V_ADDR]],
+// CHECK-NEXT: [[V_VOID_ADDR:%.+]] = bitcast i32* [[V_ADDR]] to i8*
 // CHECK-NEXT: call void @__kmpc_free(i32 [[GTID]], i8* [[V_VOID_ADDR]], i8* inttoptr (i64 6 to i8*))
 // CHECK-NOT:  {{__kmpc_alloc|__kmpc_free}}
 // CHECK:      ret i32 [[V_VAL]]
@@ -101,7 +102,9 @@
 // CHECK: [[Z_ADDR:%.+]] = bitcast i8* [[Z_VOID_PTR]] to float**
 // CHECK: store float* %{{.+}}, float** [[Z_ADDR]],
 #pragma omp allocate(a,z) allocator(omp_default_mem_alloc)
+// CHECK-NEXT: [[Z_VOID_PTR:%.+]] = bitcast float** [[Z_ADDR]] to i8*
 // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[Z_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
+// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8*
 // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 1 to i8*))
 // CHECK: ret void
 }
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1563,6 +1563,17 @@
                                               CapturedVars, IfCond);
 }
 
+static bool isAllocatableDecl(const VarDecl *VD) {
+  const VarDecl *CVD = VD->getCanonicalDecl();
+  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
+    return false;
+  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
+  // Use the default allocation.
+  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
+            AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
+           !AA->getAllocator());
+}
+
 static void emitEmptyBoundParameters(CodeGenFunction &,
                                      const OMPExecutableDirective &,
                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
@@ -1575,12 +1586,7 @@
   if (!VD)
     return Address::invalid();
   const VarDecl *CVD = VD->getCanonicalDecl();
-  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
-    return Address::invalid();
-  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
-  // Use the default allocation.
-  if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
-      !AA->getAllocator())
+  if (!isAllocatableDecl(CVD))
     return Address::invalid();
   llvm::Value *Size;
   CharUnits Align = CGM.getContext().getDeclAlign(CVD);
@@ -1596,6 +1602,7 @@
     Size = CGM.getSize(Sz.alignTo(Align));
   }
 
+  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
   assert(AA->getAllocator() &&
          "Expected allocator expression for non-default allocator.");
   llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
@@ -3931,7 +3938,8 @@
   auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
                     CapturedRegion](CodeGenFunction &CGF,
                                     PrePostActionTy &Action) {
-    llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> UntiedLocalVars;
+    llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, std::pair<Address, Address>>
+        UntiedLocalVars;
     // Set proper addresses for generated private copies.
     OMPPrivateScope Scope(CGF);
     llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
@@ -3976,9 +3984,11 @@
         QualType Ty = VD->getType().getNonReferenceType();
         if (VD->getType()->isLValueReferenceType())
           Ty = CGF.getContext().getPointerType(Ty);
+        if (isAllocatableDecl(VD))
+          Ty = CGF.getContext().getPointerType(Ty);
         Address PrivatePtr = CGF.CreateMemTemp(
             CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
-        UntiedLocalVars.try_emplace(VD, PrivatePtr);
+        UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid());
         CallArgs.push_back(PrivatePtr.getPointer());
       }
       CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
@@ -4002,9 +4012,18 @@
       // Adjust mapping for internal locals by mapping actual memory instead of
       // a pointer to this memory.
       for (auto &Pair : UntiedLocalVars) {
-        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
-                            CGF.getContext().getDeclAlign(Pair.first));
-        Pair.getSecond() = Replacement;
+        if (isAllocatableDecl(Pair.first)) {
+          llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
+          Address Replacement(Ptr, CGF.getPointerAlign());
+          Pair.getSecond().first = Replacement;
+          Ptr = CGF.Builder.CreateLoad(Replacement);
+          Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
+          Pair.getSecond().second = Replacement;
+        } else {
+          llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
+          Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
+          Pair.getSecond().first = Replacement;
+        }
       }
     }
     if (Data.Reductions) {
@@ -4100,7 +4119,7 @@
     }
     (void)InRedScope.Privatize();
 
-    CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF.CGM,
+    CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
                                                              UntiedLocalVars);
     Action.Enter(CGF);
     BodyGen(CGF);
Index: clang/lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.h
+++ clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -253,9 +253,9 @@
 
   public:
     UntiedTaskLocalDeclsRAII(
-        CodeGenModule &CGM,
-        const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address>
-            &LocalVars);
+        CodeGenFunction &CGF,
+        const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
+                             std::pair<Address, Address>> &LocalVars);
     ~UntiedTaskLocalDeclsRAII();
   };
 
@@ -432,6 +432,8 @@
                                 std::tuple<QualType, const FieldDecl *,
                                            const FieldDecl *, LValue>>>
       LastprivateConditionalToTypes;
+  /// Maps function to the position of the untied task locals stack.
+  llvm::DenseMap<llvm::Function *, unsigned> FunctionToUntiedTaskStackMap;
   /// Type kmp_critical_name, originally defined as typedef kmp_int32
   /// kmp_critical_name[8];
   llvm::ArrayType *KmpCriticalNameTy;
@@ -720,7 +722,8 @@
   llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack;
 
   using UntiedLocalVarsAddressesMap =
-      llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address>;
+      llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
+                     std::pair<Address, Address>>;
   llvm::SmallVector<UntiedLocalVarsAddressesMap, 4> UntiedLocalVarsStack;
 
   /// Stack for list of addresses of declarations in current context marked as
@@ -1882,6 +1885,9 @@
 
   /// Destroys user defined allocators specified in the uses_allocators clause.
   void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator);
+
+  /// Returns true if the variable is a local variable in untied task.
+  bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const;
 };
 
 /// Class supports emissionof SIMD-only code.
Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1526,6 +1526,7 @@
     FunctionUDMMap.erase(I);
   }
   LastprivateConditionalToTypes.erase(CGF.CurFn);
+  FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
 }
 
 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
@@ -3382,6 +3383,17 @@
 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
 } // anonymous namespace
 
+static bool isAllocatableDecl(const VarDecl *VD) {
+  const VarDecl *CVD = VD->getCanonicalDecl();
+  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
+    return false;
+  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
+  // Use the default allocation.
+  return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
+            AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
+           !AA->getAllocator());
+}
+
 static RecordDecl *
 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
   if (!Privates.empty()) {
@@ -3396,9 +3408,12 @@
       QualType Type = VD->getType().getNonReferenceType();
       // If the private variable is a local variable with lvalue ref type,
       // allocate the pointer instead of the pointee type.
-      if (Pair.second.isLocalPrivate() &&
-          VD->getType()->isLValueReferenceType())
-        Type = C.getPointerType(Type);
+      if (Pair.second.isLocalPrivate()) {
+        if (VD->getType()->isLValueReferenceType())
+          Type = C.getPointerType(Type);
+        if (isAllocatableDecl(VD))
+          Type = C.getPointerType(Type);
+      }
       FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
       if (VD->hasAttrs()) {
         for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
@@ -3700,6 +3715,8 @@
     QualType Ty = VD->getType().getNonReferenceType();
     if (VD->getType()->isLValueReferenceType())
       Ty = C.getPointerType(Ty);
+    if (isAllocatableDecl(VD))
+      Ty = C.getPointerType(Ty);
     Args.push_back(ImplicitParamDecl::Create(
         C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
         C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
@@ -3780,8 +3797,10 @@
   FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
   for (const PrivateDataTy &Pair : Privates) {
     // Do not initialize private locals.
-    if (Pair.second.isLocalPrivate())
+    if (Pair.second.isLocalPrivate()) {
+      ++FI;
       continue;
+    }
     const VarDecl *VD = Pair.second.PrivateCopy;
     const Expr *Init = VD->getAnyInitializer();
     if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
@@ -4146,8 +4165,12 @@
                          /*PrivateElemInit=*/nullptr));
     ++I;
   }
-  for (const VarDecl *VD : Data.PrivateLocals)
-    Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
+  for (const VarDecl *VD : Data.PrivateLocals) {
+    if (isAllocatableDecl(VD))
+      Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
+    else
+      Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
+  }
   llvm::stable_sort(Privates,
                     [](const PrivateDataTy &L, const PrivateDataTy &R) {
                       return L.first > R.first;
@@ -11225,44 +11248,27 @@
   return CGF.GetAddrOfLocalVar(NativeParam);
 }
 
-namespace {
-/// Cleanup action for allocate support.
-class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
-public:
-  static const int CleanupArgs = 3;
-
-private:
-  llvm::FunctionCallee RTLFn;
-  llvm::Value *Args[CleanupArgs];
-
-public:
-  OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
-                       ArrayRef<llvm::Value *> CallArgs)
-      : RTLFn(RTLFn) {
-    assert(CallArgs.size() == CleanupArgs &&
-           "Size of arguments does not match.");
-    std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
-  }
-  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
-    if (!CGF.HaveInsertPoint())
-      return;
-    CGF.EmitRuntimeCall(RTLFn, Args);
-  }
-};
-} // namespace
-
 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
                                                    const VarDecl *VD) {
   if (!VD)
     return Address::invalid();
+  Address UntiedAddr = Address::invalid();
+  Address UntiedRealAddr = Address::invalid();
+  auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
+  if (It != FunctionToUntiedTaskStackMap.end()) {
+    const UntiedLocalVarsAddressesMap &UntiedData =
+        UntiedLocalVarsStack[It->second];
+    auto I = UntiedData.find(VD);
+    if (I != UntiedData.end()) {
+      UntiedAddr = I->second.first;
+      UntiedRealAddr = I->second.second;
+    }
+  }
   const VarDecl *CVD = VD->getCanonicalDecl();
   if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
-    const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
     // Use the default allocation.
-    if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
-         AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
-        !AA->getAllocator())
-      return Address::invalid();
+    if (!isAllocatableDecl(VD))
+      return UntiedAddr;
     llvm::Value *Size;
     CharUnits Align = CGM.getContext().getDeclAlign(CVD);
     if (CVD->getType()->isVariablyModifiedType()) {
@@ -11277,43 +11283,80 @@
       Size = CGM.getSize(Sz.alignTo(Align));
     }
     llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
+    const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
     assert(AA->getAllocator() &&
            "Expected allocator expression for non-default allocator.");
     llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
     // According to the standard, the original allocator type is a enum
     // (integer). Convert to pointer type, if required.
-    if (Allocator->getType()->isIntegerTy())
-      Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
-    else if (Allocator->getType()->isPointerTy())
-      Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-          Allocator, CGM.VoidPtrTy);
+    Allocator = CGF.EmitScalarConversion(
+        Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
+        AA->getAllocator()->getExprLoc());
     llvm::Value *Args[] = {ThreadID, Size, Allocator};
 
     llvm::Value *Addr =
         CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
                                 CGM.getModule(), OMPRTL___kmpc_alloc),
                             Args, getName({CVD->getName(), ".void.addr"}));
-    llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
-                                                                Allocator};
     llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
         CGM.getModule(), OMPRTL___kmpc_free);
-
-    CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
-                                                  llvm::makeArrayRef(FiniArgs));
+    QualType Ty = CGM.getContext().getPointerType(CVD->getType());
     Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        Addr,
-        CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
-        getName({CVD->getName(), ".addr"}));
-    return Address(Addr, Align);
+        Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
+    if (UntiedAddr.isValid())
+      CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
+
+    // Cleanup action for allocate support.
+    class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
+      llvm::FunctionCallee RTLFn;
+      unsigned LocEncoding;
+      Address Addr;
+      const Expr *Allocator;
+
+    public:
+      OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
+                           Address Addr, const Expr *Allocator)
+          : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
+            Allocator(Allocator) {}
+      void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+        if (!CGF.HaveInsertPoint())
+          return;
+        llvm::Value *Args[3];
+        Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
+            CGF, SourceLocation::getFromRawEncoding(LocEncoding));
+        Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            Addr.getPointer(), CGF.VoidPtrTy);
+        llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
+        // According to the standard, the original allocator type is a enum
+        // (integer). Convert to pointer type, if required.
+        AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
+                                            CGF.getContext().VoidPtrTy,
+                                            Allocator->getExprLoc());
+        Args[2] = AllocVal;
+
+        CGF.EmitRuntimeCall(RTLFn, Args);
+      }
+    };
+    Address VDAddr =
+        UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
+    CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
+        NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
+        VDAddr, AA->getAllocator());
+    if (UntiedRealAddr.isValid())
+      if (auto *Region =
+              dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+        Region->emitUntiedSwitch(CGF);
+    return VDAddr;
   }
-  if (UntiedLocalVarsStack.empty())
-    return Address::invalid();
-  const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back();
-  auto It = UntiedData.find(VD);
-  if (It == UntiedData.end())
-    return Address::invalid();
+  return UntiedAddr;
+}
 
-  return It->second;
+bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
+                                             const VarDecl *VD) const {
+  auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
+  if (It == FunctionToUntiedTaskStackMap.end())
+    return false;
+  return UntiedLocalVarsStack[It->second].count(VD) > 0;
 }
 
 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
@@ -11349,11 +11392,14 @@
 }
 
 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
-    CodeGenModule &CGM,
-    const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> &LocalVars)
-    : CGM(CGM), NeedToPush(!LocalVars.empty()) {
+    CodeGenFunction &CGF,
+    const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
+                         std::pair<Address, Address>> &LocalVars)
+    : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
   if (!NeedToPush)
     return;
+  CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
+      CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
   CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to