fghanim created this revision.
fghanim added a reviewer: jdoerfert.
Herald added subscribers: cfe-commits, aaron.ballman, sstefan1, guansong, 
yaxunl.
Herald added a project: clang.
fghanim added a child revision: D79677: [clang][OpenMP][OMPIRBuilder] Adding 
some Privatization clauses to OpenMP `parallel` Directive.
fghanim added a parent revision: D79676: [Clang][OpenMP][OMPBuilder] Moving OMP 
allocation and cache creation code to OMPBuilderCBHelpers.
fghanim marked 3 inline comments as done.
fghanim added inline comments.


================
Comment at: clang/lib/CodeGen/CGStmtOpenMP.cpp:1554
                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
 
+bool CodeGenFunction::OMPBuilderCBHelpers::EmitOMPCopyinClause(
----------------
Note to reviewers:
This keeps most of old implementation of `copyin` clause, except it uses part 
of the `OMPBuilder` now. Changes are noted below.


================
Comment at: clang/lib/CodeGen/CGStmtOpenMP.cpp:1582
+          assert(CGF.CapturedStmtInfo->lookup(VD) &&
+                 "Copyin threadprivates should have been captured!");
+          const auto *VDCanon = VD->getCanonicalDecl();
----------------
added following few lines


================
Comment at: clang/lib/CodeGen/CGStmtOpenMP.cpp:1600
+        }
+
+        // Get the address of the threadprivate variable.
----------------
Wherever needed - changed the following section to use `OMPBuilder`


Adding support / implementation for `copyin` in OMPBuilder's
implementation of `omp parallel`


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D81483

Files:
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/OpenMP/parallel_copyin_codegen.cpp

Index: clang/test/OpenMP/parallel_copyin_codegen.cpp
===================================================================
--- clang/test/OpenMP/parallel_copyin_codegen.cpp
+++ clang/test/OpenMP/parallel_copyin_codegen.cpp
@@ -1,9 +1,12 @@
-// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,CHECK
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-linux -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-linux -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,CHECK
 // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DLAMBDA -triple x86_64-linux -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -fblocks -DBLOCKS -triple x86_64-linux -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck %s -check-prefixes=ALL,IRBUILDER
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-linux -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fnoopenmp-use-tls -x c++ -triple x86_64-linux -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s -check-prefixes=ALL,IRBUILDER
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -fnoopenmp-use-tls -x c++ -triple x86_64-linux -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 // RUN: %clang_cc1 -fopenmp-simd -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-linux -emit-pch -o %t %s
@@ -46,21 +49,22 @@
   ~S() {}
 };
 
-// CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+// ALL-DAG: [[S_FLOAT_TY:%.+]] = type { float }
+// ALL-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
 // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// IRBUILDER-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 34, i32 0, i32 0, i8*
 // TLS-CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // TLS-CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
 // TLS-CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 
-// CHECK-DAG: [[T_VAR:@.+]] = internal global i{{[0-9]+}} 1122,
-// CHECK-DAG: [[VEC:@.+]] = internal global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
-// CHECK-DAG: [[S_ARR:@.+]] = internal global [2 x [[S_FLOAT_TY]]] zeroinitializer,
-// CHECK-DAG: [[VAR:@.+]] = internal global [[S_FLOAT_TY]] zeroinitializer,
-// CHECK-DAG: [[TMAIN_T_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global i{{[0-9]+}} 333,
-// CHECK-DAG: [[TMAIN_VEC:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 3, i{{[0-9]+}} 3],
-// CHECK-DAG: [[TMAIN_S_ARR:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x [[S_INT_TY]]] zeroinitializer,
-// CHECK-DAG: [[TMAIN_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global [[S_INT_TY]] zeroinitializer,
+// ALL-DAG: [[T_VAR:@.+]] = internal global i{{[0-9]+}} 1122,
+// ALL-DAG: [[VEC:@.+]] = internal global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
+// ALL-DAG: [[S_ARR:@.+]] = internal global [2 x [[S_FLOAT_TY]]] zeroinitializer,
+// ALL-DAG: [[VAR:@.+]] = internal global [[S_FLOAT_TY]] zeroinitializer,
+// ALL-DAG: [[TMAIN_T_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global i{{[0-9]+}} 333,
+// ALL-DAG: [[TMAIN_VEC:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 3, i{{[0-9]+}} 3],
+// ALL-DAG: [[TMAIN_S_ARR:@.+]] = linkonce_odr {{(dso_local )?}}global [2 x [[S_INT_TY]]] zeroinitializer,
+// ALL-DAG: [[TMAIN_VAR:@.+]] = linkonce_odr {{(dso_local )?}}global [[S_INT_TY]] zeroinitializer,
 // TLS-CHECK-DAG: [[T_VAR:@.+]] = internal thread_local global i{{[0-9]+}} 1122,
 // TLS-CHECK-DAG: [[VEC:@.+]] = internal thread_local global [2 x i{{[0-9]+}}] [i{{[0-9]+}} 1, i{{[0-9]+}} 2],
 // TLS-CHECK-DAG: [[S_ARR:@.+]] = internal thread_local global [2 x [[S_FLOAT_TY]]] zeroinitializer,
@@ -230,14 +234,14 @@
 #endif
 }
 
-// CHECK-LABEL: @main
-// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN:@.+]]([[S_FLOAT_TY]]* [[TEST]], [[S_FLOAT_TY]]*
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*))
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*))
-// CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
-// CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
-// CHECK: ret
+// ALL-LABEL: @main
+// ALL: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// ALL: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN:@.+]]([[S_FLOAT_TY]]* [[TEST]], [[S_FLOAT_TY]]*
+// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*))
+// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*))
+// ALL: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
+// ALL: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
+// ALL: ret
 
 // TLS-CHECK-LABEL: @main
 // TLS-CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
@@ -248,22 +252,56 @@
 // TLS-CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // TLS-CHECK: ret
 
-// CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// IRBUILDER: define internal {{.*}}void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32
+// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]]
+// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]]
+// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]]
+
+// threadprivate_t_var = t_var;
+// IRBUILDER: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]]
+// IRBUILDER: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
+// IRBUILDER: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}}
+// IRBUILDER: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
+
+// IRBUILDER: ret void
+
+// IRBUILDER: [[DONE]]
+// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}})
+// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]])
+// IRBUILDER: add nsw i32 %{{.+}}, 1
+
+// IRBUILDER: [[NOT_MASTER]]
+// IRBUILDER: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]],
+// IRBUILDER: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
+
+// ALL: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
 
+// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32
+// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]]
+// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]]
+// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]]
+
 // TLS-CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
 // TLS-CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
 // threadprivate_t_var = t_var;
-// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]]
-// CHECK: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
-// CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}}
-// CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
-// CHECK: [[NOT_MASTER]]
-// CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]],
-// CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
+// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[T_VAR]]
+// ALL: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
+// ALL: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}}), %{{.+}}
+// ALL: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
+
+// IRBUILDER: ret void
+// IRBUILDER: [[DONE]]
+// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}})
+// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]])
+
+// ALL: [[NOT_MASTER]]
+// ALL: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]],
+// ALL: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
 
 // TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** %
 // TLS-CHECK: [[MASTER_REF2:%.+]] = load [2 x i32]*, [2 x i32]** %
@@ -278,21 +316,21 @@
 // TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[T_VAR]]
 
 // threadprivate_vec = vec;
-// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]]
-// CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}}  %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
+// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]]
+// ALL: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}}  %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
 
 // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF2]] to i8*
 // TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), i8* align {{[0-9]+}} [[MASTER_CAST]]
 
 // threadprivate_s_arr = s_arr;
-// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[S_ARR]]
-// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
-// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
-// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]]
-// CHECK: [[S_ARR_BODY]]
-// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}})
-// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
+// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[S_ARR]]
+// ALL: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// ALL: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
+// ALL: [[IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
+// ALL: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]]
+// ALL: [[S_ARR_BODY]]
+// ALL: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}})
+// ALL: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
 
 // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[MASTER_REF3]] to [[S_FLOAT_TY]]*
 // TLS-CHECK-DAG: [[S_ARR_SRC_BEGIN:%.+]] = phi [[S_FLOAT_TY]]* {{.*}}[[MASTER_CAST]]
@@ -305,8 +343,8 @@
 // TLS-CHECK: [[ARR_DONE]]
 
 // threadprivate_var = var;
-// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VAR]]
-// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{%.+}}, [[S_FLOAT_TY]]* {{.*}}[[VAR]])
+// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VAR]]
+// ALL: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{%.+}}, [[S_FLOAT_TY]]* {{.*}}[[VAR]])
 // CHECK: [[DONE]]
 
 // TLS-CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.*}}[[VAR]], [[S_FLOAT_TY]]* {{.*}}[[MASTER_REF4]])
@@ -356,36 +394,67 @@
 // TLS-CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // TLS-CHECK: ret void
 
-// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
-// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
-// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]*
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*))
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*))
-// CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
-// CHECK: ret
+// ALL: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
+// ALL: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// ALL: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]*
+// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*))
+// ALL: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*))
+// ALL: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
+// ALL: ret
 
 // TLS-CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // TLS-CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // TLS-CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]*
 // TLS-CHECK:     call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*),
 // TLS-CHECK:     call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*),
+
+// IRBUILDER: define internal {{.*}}void [[TMAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32
+// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]]
+// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]]
+// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]]
+// threadprivate_t_var = t_var;
+// IRBUILDER: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]]
+// IRBUILDER: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
+// IRBUILDER: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}}
+// IRBUILDER: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
+// IRBUILDER: ret void
+// IRBUILDER: [[DONE]]
+// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}})
+// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]])
+// IRBUILDER: [[NOT_MASTER]]
+// IRBUILDER: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]],
+// IRBUILDER: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
+
 //
-// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// ALL: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
+
+// IRBUILDER: [[GTID_LOCAL:%.+]] = alloca i32
+// IRBUILDER: [[GTID_LD:%.+]] = load i32, i32* [[GTID_ADDR]]
+// IRBUILDER: store i32 [[GTID_LD]], i32* [[GTID_LOCAL]]
+// IRBUILDER: [[GTID:%.+]] = load i32, i32* [[GTID_LOCAL]]
 //
 // TLS-CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // TLS-CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
 // threadprivate_t_var = t_var;
-// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]]
-// CHECK: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
-// CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}}
-// CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
-// CHECK: [[NOT_MASTER]]
-// CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], align 128
-// CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128
+// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_T_VAR]]
+// ALL: ptrtoint i{{[0-9]+}}* %{{.+}} to i{{[0-9]+}}
+// ALL: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}}
+// ALL: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
+
+// IRBUILDER: ret void
+
+// IRBUILDER: [[DONE]]
+// IRBUILDER: [[GTID_CALL:%.+]] = call i32 @__kmpc_global_thread_num(%{{.+}}* @{{.+}})
+// IRBUILDER: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID_CALL]])
+
+// ALL: [[NOT_MASTER]]
+// ALL: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], align 128
+// ALL: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128
 
 // TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** %
 // TLS-CHECK: [[MASTER_REF1:%.+]] = load [2 x i32]*, [2 x i32]** %
@@ -400,21 +469,21 @@
 // TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[TMAIN_T_VAR]], align 128
 
 // threadprivate_vec = vec;
-// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]]
-// CHECK: call {{.*}}void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*),
+// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]]
+// ALL: call {{.*}}void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} %{{.+}}, i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*),
 
 // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF1]] to i8*
 // TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* align {{[0-9]+}} bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), i8* align {{[0-9]+}} [[MASTER_CAST]]
 
 // threadprivate_s_arr = s_arr;
-// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_S_ARR]]
-// CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
-// CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
-// CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]]
-// CHECK: [[S_ARR_BODY]]
-// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}})
-// CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
+// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_S_ARR]]
+// ALL: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* {{%.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// ALL: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
+// ALL: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
+// ALL: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]]
+// ALL: [[S_ARR_BODY]]
+// ALL: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}})
+// ALL: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
 
 // TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[MASTER_REF2]] to [[S_INT_TY]]*
 // TLS-CHECK-DAG: [[S_ARR_SRC_BEGIN:%.+]] = phi [[S_INT_TY]]* {{.*}}[[MASTER_CAST]]
@@ -427,8 +496,8 @@
 // TLS-CHECK: [[ARR_DONE]]
 
 // threadprivate_var = var;
-// CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VAR]]
-// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{%.+}}, [[S_INT_TY]]* {{.*}}[[TMAIN_VAR]])
+// ALL: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VAR]]
+// ALL: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{%.+}}, [[S_INT_TY]]* {{.*}}[[TMAIN_VAR]])
 // CHECK: [[DONE]]
 
 // TLS-CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.*}}[[TMAIN_VAR]], [[S_INT_TY]]* {{.*}}[[MASTER_REF3]])
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -1598,6 +1598,9 @@
     /// Gets the OpenMP-specific address of the local variable /p VD.
     static Address getAddressOfLocalVariable(CodeGenFunction &CGF,
                                              const VarDecl *VD);
+    static bool EmitOMPCopyinClause(CodeGenFunction &CGF,
+                                    const OMPExecutableDirective &D,
+                                    InsertPointTy AllocaIP);
     /// Get the platform-specific name separator.
     /// \param Parts different parts of the final name that needs separation
     /// \param FirstSeparator First separator used between the initial two
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1552,6 +1552,92 @@
                                      const OMPExecutableDirective &,
                                      llvm::SmallVectorImpl<llvm::Value *> &) {}
 
+bool CodeGenFunction::OMPBuilderCBHelpers::EmitOMPCopyinClause(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D,
+    InsertPointTy AllocaIP) {
+  if (!CGF.HaveInsertPoint())
+    return false;
+  // threadprivate_var1 = master_threadprivate_var1;
+  // operator=(threadprivate_var2, master_threadprivate_var2);
+  // ...
+  // __kmpc_barrier(&loc, global_tid);
+  llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder();
+  llvm::DenseSet<const VarDecl *> CopiedVars;
+  llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
+  for (const auto *C : D.template getClausesOfKind<OMPCopyinClause>()) {
+    auto IRef = C->varlist_begin();
+    auto ISrcRef = C->source_exprs().begin();
+    auto IDestRef = C->destination_exprs().begin();
+    for (const Expr *AssignOp : C->assignment_ops()) {
+      const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
+      QualType Type = VD->getType();
+      if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
+        // Get the address of the master variable. If we are emitting code with
+        // TLS support, the address is passed from the master as field in the
+        // captured declaration.
+        Address MasterAddr = Address::invalid();
+        if (CGF.getLangOpts().OpenMPUseTLS &&
+            CGF.getContext().getTargetInfo().isTLSSupported()) {
+          assert(CGF.CapturedStmtInfo->lookup(VD) &&
+                 "Copyin threadprivates should have been captured!");
+          const auto *VDCanon = VD->getCanonicalDecl();
+          auto I = CGF.LocalDeclMap.find(VDCanon);
+          if (I == CGF.LocalDeclMap.end()) {
+            Address Addr(CGF.CGM.GetAddrOfGlobal(VDCanon),
+                         CGF.getContext().getDeclAlign(VDCanon));
+            CGF.LocalDeclMap.try_emplace(VDCanon, Addr);
+          }
+          DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), true,
+                          (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
+          MasterAddr = CGF.EmitLValue(&DRE).getAddress(CGF);
+          CGF.LocalDeclMap.erase(VDCanon);
+        } else {
+          MasterAddr = Address(VD->isStaticLocal()
+                                   ? CGF.CGM.getStaticLocalDeclAddress(VD)
+                                   : CGF.CGM.GetAddrOfGlobal(VD),
+                               CGF.getContext().getDeclAlign(VD));
+        }
+
+        // Get the address of the threadprivate variable.
+        Address PrivateAddr = CGF.EmitLValue(*IRef).getAddress(CGF);
+        if (CopiedVars.size() == 1) {
+          // At first check if current thread is a master thread. If it is, no
+          // need to copy data.
+          InsertPointTy CopyBeginIP = OMPBuilder->CreateCopyinClauseBlocks(
+              AllocaIP, MasterAddr.getPointer(), PrivateAddr.getPointer(),
+              CGF.IntPtrTy, /*BranchtoEnd*/ false);
+          CGF.Builder.restoreIP(CopyBeginIP);
+          CopyBegin = CopyBeginIP.getBlock();
+          assert(CopyBegin && "CopyIn Basic Block was not generated!");
+          llvm::BranchInst *EntryCBI = llvm::dyn_cast_or_null<llvm::BranchInst>(
+              AllocaIP.getBlock()->getTerminator());
+          CopyEnd = EntryCBI ? EntryCBI->getSuccessor(1) : nullptr;
+          assert(CopyEnd && "No unique successor for CopyIn Basic Block!");
+        }
+        const auto *SrcVD =
+            cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
+        const auto *DestVD =
+            cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
+        CGF.EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
+      }
+      ++IRef;
+      ++ISrcRef;
+      ++IDestRef;
+    }
+  }
+  if (CopyEnd) {
+    // Exit out of copying procedure for non-master thread.
+    CGF.EmitBranch(CopyEnd);
+    if (llvm::Instruction *CopyEndTI =
+            CopyEnd ? CopyEnd->getTerminator() : nullptr)
+      CGF.Builder.SetInsertPoint(CopyEndTI);
+    else
+      CGF.Builder.SetInsertPoint(CopyEnd);
+    return true;
+  }
+  return false;
+}
+
 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
     CodeGenFunction &CGF, const VarDecl *VD) {
   CodeGenModule &CGM = CGF.CGM;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D81483: [Clang][OpenMP... Fady Ghanim via Phabricator via cfe-commits

Reply via email to