fghanim created this revision.
fghanim added a reviewer: jdoerfert.
Herald added subscribers: cfe-commits, sstefan1, guansong, yaxunl.
Herald added a project: clang.
fghanim added a child revision: D79677: [clang][OpenMP][OMPIRBuilder] Adding 
some Privatization clauses to OpenMP `parallel` Directive.
fghanim added a parent revision: D79676: [Clang][OpenMP][OMPBuilder] Moving OMP 
allocation and cache creation code to OMPBuilderCBHelpers.

Adding support/implementation of `firstprivate` to OMPBuilder's
implementation of `omp parallel`


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D81484

Files:
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/OpenMP/parallel_firstprivate_codegen.cpp

Index: clang/test/OpenMP/parallel_firstprivate_codegen.cpp
===================================================================
--- clang/test/OpenMP/parallel_firstprivate_codegen.cpp
+++ clang/test/OpenMP/parallel_firstprivate_codegen.cpp
@@ -1,8 +1,11 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-32,CHECK,CHECK-32
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-32,CHECK,CHECK-32
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA -check-prefix=LAMBDA-32 %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS -check-prefix=BLOCKS-32 %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-32,IRBUILDER,IRBUILDER-32
+// RUN: %clang_cc1 -fopenmp  -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp  -fopenmp-enable-irbuilder -x c++ -triple i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-32,IRBUILDER,IRBUILDER-32
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple i386-pc-linux-gnu -emit-pch -o %t %s
@@ -11,11 +14,14 @@
 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
 
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-64,CHECK,CHECK-64
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-64,CHECK,CHECK-64
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA -check-prefix=LAMBDA-64 %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS -check-prefix=BLOCKS-64 %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,ALL-64,IRBUILDER,IRBUILDER-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,ALL-64,IRBUILDER,IRBUILDER-64
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-pc-linux-gnu -emit-pch -o %t %s
@@ -62,6 +68,7 @@
   int e[4];
   SS(int &d) : a(0), b(0), c(d) {
 #pragma omp parallel firstprivate(a, b, c, e)
+
 #ifdef LAMBDA
     [&]() {
       ++this->a, --b, (this)->c /= 1;
@@ -119,12 +126,12 @@
   ~S() {}
 };
 
-// CHECK: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
+// ALL: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
 // LAMBDA: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
 // BLOCKS: [[SS_TY:%.+]] = type { i{{[0-9]+}}, i8
-// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
+// ALL-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// ALL-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// ALL-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
 
 template <typename T>
 T tmain() {
@@ -342,43 +349,72 @@
 #endif
 }
 
-// CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: [[T_VAR:%.+]] = alloca i32,
-// CHECK: [[T_VARCAST:%.+]] = alloca [[iz:i64|i32]],
-// CHECK: [[SIVARCAST:%.+]] = alloca [[iz]],
-// CHECK: [[A:%.+]] = alloca i32,
-// CHECK: [[T_VARCAST1:%.+]] = alloca [[iz:i64|i32]],
-// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]],
-// CHECK-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST]] to i32*
-// CHECK-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]],
-// CHECK-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST]],
-// CHECK: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST]],
-// CHECK: [[SIVARVAL:%.+]] = load i32, i32* @{{.+}},
-// CHECK-64: [[SIVARCONV:%.+]] = bitcast i64* [[SIVARCAST]] to i32*
-// CHECK-64: store i32 [[SIVARVAL]], i32* [[SIVARCONV]],
-// CHECK-32: store i32 [[SIVARVAL]], i32* [[SIVARCAST]],
-// CHECK: [[SIVARPVT:%.+]] = load [[iz]], [[iz]]* [[SIVARCAST]],
+// ALL: define {{.*}}i{{[0-9]+}} @main()
+// ALL: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// ALL: [[T_VAR:%.+]] = alloca i32,
+// ALL: [[T_VARCAST:%.+]] = alloca [[iz:i64|i32]],
+// ALL: [[SIVARCAST:%.+]] = alloca [[iz]],
+// ALL: [[A:%.+]] = alloca i32,
+// ALL: [[T_VARCAST1:%.+]] = alloca [[iz:i64|i32]],
+// ALL: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// ALL: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]],
+// ALL-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST]] to i32*
+// ALL-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]],
+// ALL-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST]],
+// ALL: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST]],
+// ALL: [[SIVARVAL:%.+]] = load i32, i32* @{{.+}},
+// ALL-64: [[SIVARCONV:%.+]] = bitcast i64* [[SIVARCAST]] to i32*
+// ALL-64: store i32 [[SIVARVAL]], i32* [[SIVARCONV]],
+// ALL-32: store i32 [[SIVARVAL]], i32* [[SIVARCAST]],
+// ALL: [[SIVARPVT:%.+]] = load [[iz]], [[iz]]* [[SIVARCAST]],
 // CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, [[iz]], [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[MAIN_MICROTASK:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]],{{.*}}[[iz]] [[SIVARPVT]]
-// CHECK: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]],
-// CHECK-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST1]] to i32*
-// CHECK-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]],
-// CHECK-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST1]],
-// CHECK: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST1]],
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[iz]])* [[MAIN_MICROTASK1:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]])
-// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
-// CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
-// CHECK: ret
+// IRBUILDER: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[iz]], [2 x i32]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}})* [[MAIN_MICROTASK:@.+]] to void {{.*}}{{.*}}[[iz]] [[T_VARPVT]],{{.*}}[[iz]] [[SIVARPVT]]
+// ALL: [[T_VARVAL:%.+]] = load i32, i32* [[T_VAR]],
+// ALL-64: [[T_VARCONV:%.+]] = bitcast i64* [[T_VARCAST1]] to i32*
+// ALL-64: store i32 [[T_VARVAL]], i32* [[T_VARCONV]],
+// ALL-32: store i32 [[T_VARVAL]], i32* [[T_VARCAST1]],
+// ALL: [[T_VARPVT:%.+]] = load [[iz]], [[iz]]* [[T_VARCAST1]],
+// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[iz]])* [[MAIN_MICROTASK1:@.+]] to void {{.*}}[[iz]] [[T_VARPVT]])
+// ALL: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
+// ALL: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
+// ALL: ret
+//
+// IRBUILDER:    define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[iz]] [[T_VAR:%.+]])
+// IRBUILDER: 	 [[GTID_LOCAL:%tid.addr.+]] = alloca i32
+// IRBUILDER: 	 [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]]
+// IRBUILDER: 	 store i32 [[GTID_LD]], i32* [[GTID_LOCAL]]
+// IRBUILDER: 	 [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]]
+// IRBUILDER:    store [[iz]] [[T_VAR]], [[iz]]* [[T_VAR_ADDR:%.+]],
+// IRBUILDER-64: [[BC:%.+]] = bitcast [[iz]]* [[T_VAR_ADDR]] to i32*
+// IRBUILDER:		 [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}})
+// IRBUILDER:    [[T_VAR_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], [[iz]] 4, i8* inttoptr ([[iz]] 1 to i8*))
+// IRBUILDER:		 [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}})
+// IRBUILDER:    [[T_VAR_PRIV:%.+]] = bitcast i8* [[T_VAR_VOID_PTR]] to i32*
+// IRBUILDER-32: [[T_VAR_VAL:%.+]] = load i32, i32* [[T_VAR_ADDR]],
+// IRBUILDER-64: [[T_VAR_VAL:%.+]] = load i32, i32* [[BC]],
+// IRBUILDER:    store i32 [[T_VAR_VAL]], i32* [[T_VAR_PRIV]],
+// IRBUILDER:    ret void
+// IRBUILDER:    store i32 0, i32* [[T_VAR_PRIV]],
+// IRBUILDER:    call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*))
 //
 // CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i32]* nonnull align 4 dereferenceable(8) %{{.+}}, [[iz]] {{.*}}%{{.+}}, [2 x [[S_FLOAT_TY]]]* nonnull align 4 dereferenceable(8) %{{.+}}, [[S_FLOAT_TY]]* nonnull align 4 dereferenceable(4) %{{.+}}, [[iz]] {{.*}}[[SIVAR:%.+]])
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// IRBUILDER: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[iz]] {{.*}}%{{.+}}, [2 x i32]* [[VEC_REF:%.+]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF:%.+]], [[S_FLOAT_TY]]* %{{.+}}, [[iz]] {{.*}}[[SIVAR:%.+]])
+// ALL: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[SIVAR7_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+// ALL: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// ALL: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// IRBUILDER: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// ALL: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// IRBUILDER: [[SIVAR7_PRIV:%.+]] = alloca i{{[0-9]+}},
+// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32
+// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]]
+// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]]
+// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]]
+// IRBUILDER-64: [[T_VAR_CONV:%.+]] = bitcast i64* [[T_VAR_PRIV]] to i32*
+// IRBUILDER: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// IRBUILDER: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
 
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
 // CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** %
 // CHECK-64: [[T_VAR_CONV:%.+]] = bitcast i64* [[T_VAR_PRIV]] to i32*
@@ -386,31 +422,35 @@
 // CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // CHECK-NOT: load i{{[0-9]+}}*, i{{[0-9]+}}** %
 // CHECK-64: [[SIVAR7_CONV:%.+]] = bitcast i64* [[SIVAR7_PRIV]] to i32*
+
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]],
-// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]*
-// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
-// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
-// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]]
-// CHECK: [[S_ARR_BODY]]
-// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
-// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]])
-// CHECK: call {{.*}} [[ST_TY_DESTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP]])
-// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
-// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
-// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
+// ALL: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST]], i8* align {{[0-9]+}} [[VEC_SRC]],
+// ALL: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// ALL: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]*
+// ALL: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
+// ALL: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
+// ALL: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]]
+// IRBUILDER: ret void
+// ALL: [[S_ARR_BODY]]
+// ALL: call {{.*}} [[ST_TY_DEFAULT_CONSTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
+// ALL: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]])
+// ALL: call {{.*}} [[ST_TY_DESTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP]])
+// ALL: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
+// IRBUILDER: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
+// ALL: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
+// ALL: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
+// IRBUILDER-64: [[SIVAR7_CONV:%.+]] = bitcast i64* [[SIVAR7_PRIV]] to i32*
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
 
-// CHECK-64: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_CONV]],
-// CHECK-32: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_PRIV]],
+// ALL-64: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_CONV]],
+// ALL-32: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_PRIV]],
+// IRBUILDER: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
 
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
-// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
+// ALL-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
 // CHECK: ret void
 
-
 // CHECK:    define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[iz]] [[T_VAR:%.+]])
 // CHECK:    [[GTID_ADDR:%.+]] = alloca i32*,
 // CHECK:    store [[iz]] [[T_VAR]], [[iz]]* [[T_VAR_ADDR:%.+]],
@@ -426,13 +466,13 @@
 // CHECK:    call void @__kmpc_free(i32 [[GTID]], i8* [[T_VAR_VOID_PTR]], i8* inttoptr ([[iz]] 1 to i8*))
 // CHECK:    ret void
 
-
-// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
-// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
-// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// ALL: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
+// ALL: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// ALL: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
 // CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
-// CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
-// CHECK: ret
+// IRBUILDER: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// ALL: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
+// ALL: ret i{{[0-9]+}}
 //
 // CHECK: define {{.+}} @{{.+}}([[SS_TY]]*
 // CHECK: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
@@ -446,14 +486,70 @@
 // CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[SS_TY]]*, [[iz]], [[iz]], [[iz]], [4 x i32]*)* [[SS_MICROTASK:@.+]] to void
 // CHECK: ret
 
+// IRBUILDER: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* %{{.+}}, [2 x i32]* %{{.+}}, [2 x [[S_INT_TY]]]* %{{.+}}, [[S_INT_TY]]* %{{.+}})
+// IRBUILDER: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}*, align 128
+// IRBUILDER: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], align 128
+// IRBUILDER: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], align 128
+// IRBUILDER: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128
+// IRBUILDER: [[GTID_LOCAL:%tid.addr.+]] = alloca i32
+// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]]
+// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]]
+// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]]
+// IRBUILDER: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// IRBUILDER: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// IRBUILDER: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
+// IRBUILDER: call void @llvm.memcpy.{{.+}}(i8* align 128 [[VEC_DEST]], i8* align 128 [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, i1
+// IRBUILDER: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// IRBUILDER: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_REF]] to [[S_INT_TY]]*
+// IRBUILDER: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
+// IRBUILDER: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
+// IRBUILDER: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]]
+
+// IRBUILDER: ret void
+
+// IRBUILDER: [[S_ARR_BODY]]
+// IRBUILDER: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
+// IRBUILDER: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]])
+// IRBUILDER: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
+// IRBUILDER: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
+// IRBUILDER: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
+// IRBUILDER: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
+// IRBUILDER: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
+// IRBUILDER: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
+// IRBUILDER: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}*
+// IRBUILDER: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
+// IRBUILDER-NOT: call {{.*}}void @__kmpc_barrier(
+
+// IRBUILDER-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
+
+// IRBUILDER: define {{.+}} @{{.+}}([[SS_TY]]*
+// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
+// IRBUILDER: store i{{[0-9]+}} 0, i{{[0-9]+}}* %
+// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
+// IRBUILDER: store i8
+// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
+// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 0
+// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 1
+// IRBUILDER: getelementptr inbounds [[SS_TY]], [[SS_TY]]* %{{.+}}, i32 0, i32 2
+// IRBUILDER: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32**, [[iz]], [[iz]], i32**, [[iz]], [4 x i32]**)* [[SS_MICROTASK:@.+]] to void
+// IRBUILDER: ret
+
 // CHECK: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [[SS_TY]]* %{{.+}}, [[iz]] {{.+}}, [[iz]] {{.+}}, [[iz]] {{.+}}, [4 x i{{[0-9]+}}]* {{.+}})
-// CHECK: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[E_PRIV:%.+]] = alloca [4 x i{{[0-9]+}}],
-// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A_PRIV]]
-// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[B_PRIV]]
-// CHECK: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[C_PRIV]]
+// IRBUILDER: define internal void [[SS_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** %{{.+}}, [[iz]] {{.+}}, [[iz]] {{.+}}, i32** %{{.+}}, [[iz]] {{.+}}, [4 x i{{[0-9]+}}]** {{.+}})
+// ALL: [[A_PRIV:%.+]] = alloca i{{[0-9]+}},
+// ALL: [[B_PRIV:%.+]] = alloca i{{[0-9]+}},
+// ALL: [[C_PRIV:%.+]] = alloca i{{[0-9]+}},
+// ALL: [[E_PRIV:%.+]] = alloca [4 x i{{[0-9]+}}],
+// ALL: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[A_PRIV]]
+// IRBUILDER-64: [[A_CONV:%.+]] = bitcast i64* [[A_PRIV:%.+]] to i32*
+// IRBUILDER-64: store i32* [[A_CONV]], i32** [[REFA:%.+]],
+// IRBUILDER-32: store i32* [[A_PRIV]], i32** [[REFA:%.+]],
+// ALL: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[B_PRIV]]
+// IRBUILDER-64: [[B_CONV:%.+]] = bitcast i64* [[B_PRIV:%.+]] to i32*
+// ALL: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[C_PRIV]]
+// IRBUILDER-64: [[C_CONV:%.+]] = bitcast i64* [[C_PRIV:%.+]] to i32*
+// IRBUILDER-64: store i32* [[C_CONV]], i32** [[REFC:%.+]],
+// IRBUILDER-32: store i32* [[C_PRIV]], i32** [[REFC:%.+]],
 // CHECK-64: [[A_CONV:%.+]] = bitcast i64* [[A_PRIV:%.+]] to i32*
 // CHECK-64: [[B_CONV:%.+]] = bitcast i64* [[B_PRIV:%.+]] to i32*
 // CHECK-64: [[C_CONV:%.+]] = bitcast i64* [[C_PRIV:%.+]] to i32*
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -1567,6 +1567,14 @@
 
     using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
 
+    struct CapturedVarInfo {
+
+      enum CapturedVarKind { ByValue, ByRef };
+
+      llvm::Value *PassedValue;
+      CapturedVarKind CapturedKind;
+    };
+
     /// Cleanup action for allocate support.
     class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
 
@@ -1598,9 +1606,35 @@
     /// Gets the OpenMP-specific address of the local variable /p VD.
     static Address getAddressOfLocalVariable(CodeGenFunction &CGF,
                                              const VarDecl *VD);
+
+    /// Emit first private clause
+    ///
+    /// \param CGF CodeGenFunction for function containing the OMP directive
+    /// this is associated with \param D The directive the firstprivate is
+    /// associated with \param PrivateScope for all captured variables in
+    /// current associated directive \param CapturedVarsInfoMap Map of captured
+    /// variables and generated associated values
+    ///
+    /// \return True if any variable were generated
+    static bool EmitOMPFirstprivateClause(
+        CodeGenFunction &CGF, const OMPExecutableDirective &D,
+        OMPPrivateScope &PrivateScope,
+        llvm::SmallDenseMap<const VarDecl *, CapturedVarInfo>
+            &CapturedVarsInfoMap);
+
     static bool EmitOMPCopyinClause(CodeGenFunction &CGF,
                                     const OMPExecutableDirective &D,
                                     InsertPointTy AllocaIP);
+
+    /// Create specialized alloca to handle lastprivate conditionals.
+    static Address emitLastprivateConditionalInit(CodeGenFunction &CGF,
+                                                  const VarDecl *VD);
+
+    static void GenerateOpenMPCapturedVars(
+        CodeGenFunction &CFG, const CapturedStmt &S,
+        llvm::SmallDenseMap<const VarDecl *, CapturedVarInfo>
+            &CapturedVarsInfoMap);
+
     /// Get the platform-specific name separator.
     /// \param Parts different parts of the final name that needs separation
     /// \param FirstSeparator First separator used between the initial two
@@ -1610,6 +1644,7 @@
     static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
                                              StringRef FirstSeparator = ".",
                                              StringRef Separator = ".");
+
     /// Emit the Finalization for an OMP region
     /// \param CGF	The Codegen function this belongs to
     /// \param IP	Insertion point for generating the finalization code.
@@ -1713,7 +1748,16 @@
       ~InlinedRegionBodyRAII() { CGF.AllocaInsertPt = OldAllocaIP; }
     };
   };
+
 private:
+  /// Maps local variables marked as lastprivate conditional to their internal
+  /// types.
+  llvm::DenseMap<llvm::Function *,
+                 llvm::DenseMap<CanonicalDeclPtr<const Decl>,
+                                std::tuple<QualType, const FieldDecl *,
+                                           const FieldDecl *, LValue>>>
+      LastprivateConditionalToTypes;
+
   /// CXXThisDecl - When generating code for a C++ member function,
   /// this will hold the implicit 'this' declaration.
   ImplicitParamDecl *CXXABIThisDecl = nullptr;
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1552,6 +1552,52 @@
                                      const OMPExecutableDirective &,
                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
 
+static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
+                                       QualType FieldTy) {
+  auto *Field = FieldDecl::Create(
+      C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
+      C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
+      /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
+  Field->setAccess(AS_public);
+  DC->addDecl(Field);
+  return Field;
+}
+
+Address CodeGenFunction::OMPBuilderCBHelpers::emitLastprivateConditionalInit(
+    CodeGenFunction &CGF, const VarDecl *VD) {
+  ASTContext &C = CGF.CGM.getContext();
+  auto I = CGF.LastprivateConditionalToTypes.find(CGF.CurFn);
+  if (I == CGF.LastprivateConditionalToTypes.end())
+    I = CGF.LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
+  QualType NewType;
+  const FieldDecl *VDField;
+  const FieldDecl *FiredField;
+  LValue BaseLVal;
+  auto VI = I->getSecond().find(VD);
+  if (VI == I->getSecond().end()) {
+    RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
+    RD->startDefinition();
+    VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
+    FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
+    RD->completeDefinition();
+    NewType = C.getRecordType(RD);
+    Address Addr =
+        CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
+    BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
+    I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
+  } else {
+    NewType = std::get<0>(VI->getSecond());
+    VDField = std::get<1>(VI->getSecond());
+    FiredField = std::get<2>(VI->getSecond());
+    BaseLVal = std::get<3>(VI->getSecond());
+  }
+  LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredField);
+  CGF.EmitStoreOfScalar(
+      llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
+      FiredLVal);
+  return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
+}
+
 bool CodeGenFunction::OMPBuilderCBHelpers::EmitOMPCopyinClause(
     CodeGenFunction &CGF, const OMPExecutableDirective &D,
     InsertPointTy AllocaIP) {
@@ -1728,6 +1774,293 @@
   }
   return OS.str().str();
 }
+
+bool CodeGenFunction::OMPBuilderCBHelpers::EmitOMPFirstprivateClause(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D,
+    OMPPrivateScope &PrivateScope,
+    llvm::SmallDenseMap<const VarDecl *, CapturedVarInfo>
+        &CapturedVarsInfoMap) {
+  if (!CGF.HaveInsertPoint())
+    return false;
+
+  CodeGenModule &CGM = CGF.CGM;
+  bool DeviceConstTarget =
+      CGF.getLangOpts().OpenMPIsDevice &&
+      isOpenMPTargetExecutionDirective(D.getDirectiveKind());
+  bool FirstprivateIsLastprivate = false;
+  llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
+  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
+    for (const auto *D : C->varlists())
+      Lastprivates.try_emplace(
+          cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
+          C->getKind());
+  }
+  llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
+  llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
+  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
+  // Force emission of the firstprivate copy if the directive does not emit
+  // outlined function, like omp for, omp simd, omp distribute etc.
+  bool MustEmitFirstprivateCopy =
+      CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
+  for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
+    const auto *IRef = C->varlist_begin();
+    const auto *InitsRef = C->inits().begin();
+    for (const Expr *IInit : C->private_copies()) {
+      const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      bool ThisFirstprivateIsLastprivate =
+          Lastprivates.count(OrigVD->getCanonicalDecl()) > 0;
+      const FieldDecl *FD = CGF.CapturedStmtInfo->lookup(OrigVD);
+      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
+      if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
+          !FD->getType()->isReferenceType() &&
+          (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
+        if (CapturedVarsInfoMap[OrigVD].CapturedKind !=
+                CapturedVarInfo::ByValue ||
+            OrigVD->isConstexpr()) {
+          EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
+          ++IRef;
+          ++InitsRef;
+          continue;
+        }
+      }
+      // Do not emit copy for firstprivate constant variables in target regions,
+      // captured by reference.
+      if (DeviceConstTarget && OrigVD->getType().isConstant(CGF.getContext()) &&
+          FD && FD->getType()->isReferenceType() &&
+          (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
+        // TODO: Move and modify this function based on target regions after
+        // they land
+        (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF,
+                                                                    OrigVD);
+        ++IRef;
+        ++InitsRef;
+        continue;
+      }
+      FirstprivateIsLastprivate =
+          FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
+      if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
+        const auto *VDInit =
+            cast<VarDecl>(cast<DeclRefExpr>(*InitsRef)->getDecl());
+        bool IsRegistered;
+        DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
+                        /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
+                        (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
+        LValue OriginalLVal;
+        if (!FD) {
+          // Check if the firstprivate variable is just a constant value.
+          ConstantEmission CE = CGF.tryEmitAsConstant(&DRE);
+          if (CE && !CE.isReference()) {
+            // Constant value, no need to create a copy.
+            ++IRef;
+            ++InitsRef;
+            continue;
+          }
+          if (CE && CE.isReference()) {
+            OriginalLVal = CE.getReferenceLValue(CGF, &DRE);
+          } else {
+            assert(!CE && "Expected non-constant firstprivate.");
+            OriginalLVal = CGF.EmitLValue(&DRE);
+          }
+        } else {
+          OriginalLVal = CGF.EmitLValue(&DRE);
+        }
+        QualType Type = VD->getType();
+        if (Type->isArrayType()) {
+          // Emit VarDecl with copy init for arrays.
+          // Get the address of the original variable captured in current
+          // captured region.
+          IsRegistered = PrivateScope.addPrivate(
+              OrigVD, [&CGF, VD, Type, OriginalLVal, VDInit]() {
+                AutoVarEmission Emission = CGF.EmitAutoVarAlloca(*VD);
+                const Expr *Init = VD->getInit();
+                if (!isa<CXXConstructExpr>(Init) ||
+                    CGF.isTrivialInitializer(Init)) {
+                  // Perform simple memcpy.
+                  LValue Dest =
+                      CGF.MakeAddrLValue(Emission.getAllocatedAddress(), Type);
+                  CGF.EmitAggregateAssign(Dest, OriginalLVal, Type);
+                } else {
+                  CGF.EmitOMPAggregateAssign(
+                      Emission.getAllocatedAddress(),
+                      OriginalLVal.getAddress(CGF), Type,
+                      [&CGF, VDInit, Init](Address DestElement,
+                                           Address SrcElement) {
+                        // Clean up any temporaries needed by the
+                        // initialization.
+                        RunCleanupsScope InitScope(CGF);
+                        // Emit initialization for single element.
+                        CGF.setAddrOfLocalVar(VDInit, SrcElement);
+                        CGF.EmitAnyExprToMem(Init, DestElement,
+                                             Init->getType().getQualifiers(),
+                                             /*IsInitializer*/ false);
+                        CGF.LocalDeclMap.erase(VDInit);
+                      });
+                }
+                CGF.EmitAutoVarCleanups(Emission);
+                return Emission.getAllocatedAddress();
+              });
+        } else {
+          Address OriginalAddr = OriginalLVal.getAddress(CGF);
+          IsRegistered = PrivateScope.addPrivate(
+              OrigVD, [&CGF, VDInit, OriginalAddr, VD,
+                       ThisFirstprivateIsLastprivate, OrigVD, &Lastprivates,
+                       IRef, &IInit, &CGM, &CapturedVarsInfoMap, &FD]() {
+                Address VarAddr = OriginalAddr;
+                ASTContext &Ctx = CGF.getContext();
+                QualType UIntPtrTy = Ctx.getUIntPtrType();
+                llvm::Value *V = CapturedVarsInfoMap[OrigVD].PassedValue;
+                if (CapturedVarsInfoMap[OrigVD].CapturedKind ==
+                    CapturedVarInfo::ByValue) {
+                  CGF.setAddrOfLocalVar(VDInit, OriginalAddr);
+                  CharUnits AddrAlign = Ctx.getDeclAlign(&*VD);
+                  Address DeclPtr = CGF.CreateMemTemp(UIntPtrTy, AddrAlign,
+                                                      VD->getName() + ".addr");
+                  LValue DstLV = CGF.MakeAddrLValue(DeclPtr, UIntPtrTy,
+                                                    AlignmentSource::Decl);
+                  CGF.EmitStoreOfScalar(V, DstLV);
+                  if (!OrigVD->getType()->isPointerType()) {
+                    VarAddr = DeclPtr;
+                    if (VD->getType() != UIntPtrTy)
+                      VarAddr = castValueFromUintptr(
+                          CGF, (*IRef)->getExprLoc(), VD->getType(),
+                          VD->getName(),
+                          CGF.MakeAddrLValue(DeclPtr, UIntPtrTy));
+                    Address OMPAddress = getAddressOfLocalVariable(CGF, VD);
+                    if (OMPAddress.isValid()) {
+                      LValue VarAddrLV = CGF.MakeAddrLValue(
+                          VarAddr, VD->getType(), AlignmentSource::Decl);
+                      llvm::Value *CV =
+                          CGF.EmitLoadOfScalar(VarAddrLV, IInit->getBeginLoc());
+                      CGF.EmitStoreOfScalar(
+                          CV, CGF.MakeAddrLValue(OMPAddress, VD->getType(),
+                                                 AlignmentSource::Decl));
+                      VarAddr = OMPAddress;
+                    }
+                    CGF.setAddrOfLocalVar(VD, VarAddr);
+                  } else {
+                    llvm_unreachable("Unhandled Captured by Value VarDecl!");
+                  }
+                } else {
+                  QualType VDPtrTy = Ctx.getPointerType(VD->getType());
+                  CharUnits AddrAlign = Ctx.getDeclAlign(&*VD);
+                  Address DeclPtr = CGF.CreateMemTemp(VDPtrTy, AddrAlign,
+                                                      VD->getName() + ".addr");
+                  LValue DstLV = CGF.MakeAddrLValue(DeclPtr, VDPtrTy,
+                                                    AlignmentSource::Decl);
+                  CGF.EmitStoreOfScalar(V, DstLV);
+                  llvm::Value *PtrLd =
+                      CGF.EmitLoadOfScalar(DstLV, IInit->getBeginLoc());
+                  VarAddr = Address(PtrLd, AddrAlign);
+                  if (VD->getType() != VDPtrTy)
+                    VarAddr = castValueFromUintptr(
+                        CGF, (*IRef)->getExprLoc(), VD->getType(),
+                        VD->getName(), CGF.MakeAddrLValue(VarAddr, VDPtrTy));
+                  CGF.setAddrOfLocalVar(VD, VarAddr);
+                  CGF.setAddrOfLocalVar(VDInit, VarAddr);
+                  const auto *cleanups =
+                      dyn_cast<ExprWithCleanups>(VD->getInit());
+                  const Expr *Init =
+                      (cleanups) ? cleanups->getSubExpr() : VD->getInit();
+                  if (isa<CXXConstructExpr>(Init) &&
+                      !CGF.isTrivialInitializer(Init)) {
+                    if (cleanups) {
+                      CGF.enterFullExpression(cleanups);
+                      RunCleanupsScope InitScope(CGF);
+                    }
+
+                    Address DstPtr = CGF.CreateMemTemp(VD->getType(), AddrAlign,
+                                                       VD->getName());
+                    CGF.EmitAnyExprToMem(Init, DstPtr,
+                                         Init->getType().getQualifiers(),
+                                         /*IsInitializer*/ false);
+                    CGF.LocalDeclMap.erase(VDInit);
+                    VarAddr = DstPtr;
+                    // TODO emit cleanup info for variable
+                  }
+                }
+
+                CGF.LocalDeclMap.erase(VDInit);
+                if (ThisFirstprivateIsLastprivate &&
+                    Lastprivates[OrigVD->getCanonicalDecl()] ==
+                        OMPC_LASTPRIVATE_conditional) {
+                  // Create/init special variable for lastprivate conditionals.
+                  Address VDAddr = Address::invalid();
+                  VDAddr = emitLastprivateConditionalInit(CGF, OrigVD);
+                  llvm::Value *V = CGF.EmitLoadOfScalar(
+                      CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD),
+                                         (*IRef)->getType(),
+                                         AlignmentSource::Decl),
+                      (*IRef)->getExprLoc());
+                  CGF.EmitStoreOfScalar(
+                      V, CGF.MakeAddrLValue(VDAddr, (*IRef)->getType(),
+                                            AlignmentSource::Decl));
+                  CGF.LocalDeclMap.erase(VD);
+                  CGF.setAddrOfLocalVar(VD, VDAddr);
+                  return VDAddr;
+                }
+                return CGF.GetAddrOfLocalVar(VD);
+              });
+        }
+        assert(IsRegistered &&
+               "firstprivate var already registered as private");
+        // Silence the warning about unused variable.
+        (void)IsRegistered;
+      }
+      ++IRef;
+      ++InitsRef;
+    }
+  }
+  return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
+}
+
+void CodeGenFunction::OMPBuilderCBHelpers::GenerateOpenMPCapturedVars(
+    CodeGenFunction &CGF, const CapturedStmt &S,
+    llvm::SmallDenseMap<const VarDecl *, CapturedVarInfo>
+        &CapturedVarsValueMap) {
+  const RecordDecl *RD = S.getCapturedRecordDecl();
+  auto CurField = RD->field_begin();
+  auto CurCap = S.captures().begin();
+  for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
+                                                 E = S.capture_init_end();
+       I != E; ++I, ++CurField, ++CurCap) {
+    if (CurField->hasCapturedVLAType() || CurCap->capturesThis()) {
+      // do nothing
+    } else if (CurCap->capturesVariableByCopy()) {
+      llvm::Value *CV =
+          CGF.EmitLoadOfScalar(CGF.EmitLValue(*I), CurCap->getLocation());
+
+      // If the field is not a pointer, we need to save the actual value
+      // and load it as a void pointer.
+      if (!CurField->getType()->isAnyPointerType()) {
+        ASTContext &Ctx = CGF.getContext();
+        Address DstAddr = CGF.CreateMemTemp(
+            Ctx.getUIntPtrType(),
+            Twine(CurCap->getCapturedVar()->getName(), ".casted"));
+        LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
+
+        llvm::Value *SrcAddrVal = CGF.EmitScalarConversion(
+            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
+            Ctx.getPointerType(CurField->getType()), CurCap->getLocation());
+        LValue SrcLV =
+            CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, CurField->getType());
+
+        // Store the value using the source type pointer.
+        CGF.EmitStoreThroughLValue(RValue::get(CV), SrcLV);
+
+        // Load the value using the destination type pointer.
+        CV = CGF.EmitLoadOfScalar(DstLV, CurCap->getLocation());
+      }
+      CapturedVarsValueMap[CurCap->getCapturedVar()] = {
+          CV, CapturedVarInfo::ByValue};
+    } else {
+      assert(CurCap->capturesVariable() && "Expected capture by reference.");
+      CapturedVarsValueMap[CurCap->getCapturedVar()] = {
+          CGF.EmitLValue(*I).getAddress(CGF).getPointer(),
+          CapturedVarInfo::ByRef};
+    }
+  }
+}
+
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
   if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
     // Check if we have any if clause associated with the directive.
@@ -1832,6 +2165,7 @@
 
     CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+
     Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB,
                                                  FiniCB, IfCond, NumThreads,
                                                  ProcBind, S.hasCancel()));
@@ -3526,9 +3860,6 @@
     if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
       Hint = HintClause->getHint();
 
-    // TODO: This is slightly different from what's currently being done in
-    // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
-    // about typing is final.
     llvm::Value *HintInst = nullptr;
     if (Hint)
       HintInst =
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D81484: [Clang][OpenMP... Fady Ghanim via Phabricator via cfe-commits

Reply via email to