fghanim created this revision.
Herald added subscribers: cfe-commits, aaron.ballman, guansong, yaxunl.
Herald added a project: clang.
fghanim requested review of this revision.
Herald added a reviewer: jdoerfert.
Herald added a subscriber: sstefan1.

ADD support to allow `omp single` to be CG-ed by the OMPBuilder. This
also uses the OMPBuilder to generate `__kmpc_copyprivate` calls.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D85619

Files:
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/test/OpenMP/single_codegen.cpp

Index: clang/test/OpenMP/single_codegen.cpp
===================================================================
--- clang/test/OpenMP/single_codegen.cpp
+++ clang/test/OpenMP/single_codegen.cpp
@@ -1,11 +1,12 @@
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=OMP50,CHECK
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=OMP45,CHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=OMP50,ALL,CHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=OMP45,ALL,CHECK
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -DOMPBUILDER -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,OMPBUILDER
 
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=OMP50,CHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=OMP50,ALL,CHECK
 
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=OMP45,CHECK
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=OMP45,ALL,CHECK
 
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -std=c++11 -fopenmp -fnoopenmp-use-tls -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
@@ -30,13 +31,13 @@
   ~TestClass(){};
 };
 
-// CHECK-DAG:   [[TEST_CLASS_TY:%.+]] = type { i{{[0-9]+}} }
+// ALL-DAG:   [[TEST_CLASS_TY:%.+]] = type { i{{[0-9]+}} }
 // CHECK-DAG:   [[SST_TY:%.+]] = type { double }
 // CHECK-DAG:   [[SS_TY:%.+]] = type { i32, i8, i32* }
-// CHECK-DAG:   [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK:       [[IMPLICIT_BARRIER_SINGLE_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
+// ALL-DAG:   [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
+// ALL:       [[IMPLICIT_BARRIER_SINGLE_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
 
-// CHECK:       define void [[FOO:@.+]]()
+// ALL:       define void [[FOO:@.+]]()
 
 TestClass tc;
 TestClass tc2[2];
@@ -44,6 +45,7 @@
 
 void foo() { extern void mayThrow(); mayThrow(); }
 
+#ifndef OMPBUILDER
 struct SS {
   int a;
   int b : 4;
@@ -76,129 +78,132 @@
     }();
   }
 };
+#endif
 
-// CHECK-LABEL: @main
+// ALL-LABEL: @main
 // TERM_DEBUG-LABEL: @main
 int main() {
-  // CHECK:     alloca i32
-  // CHECK-DAG: [[A_ADDR:%.+]] = alloca i8
-  // CHECK-DAG: [[A2_ADDR:%.+]] = alloca [2 x i8]
-  // CHECK-DAG: [[C_ADDR:%.+]] = alloca [[TEST_CLASS_TY]]
-  // CHECK-DAG: [[DID_IT:%.+]] = alloca i32,
-  // CHECK-DAG: [[COPY_LIST:%.+]] = alloca [5 x i8*],
+  // ALL:     alloca i32
+  // ALL-DAG: [[A_ADDR:%.+]] = alloca i8
+  // ALL-DAG: [[A2_ADDR:%.+]] = alloca [2 x i8]
+  // ALL-DAG: [[C_ADDR:%.+]] = alloca [[TEST_CLASS_TY]]
+  // ALL-DAG: [[DID_IT:%.+]] = alloca i32,
+  // ALL-DAG: [[COPY_LIST:%.+]] = alloca [5 x i8*],
   char a;
   char a2[2];
   TestClass &c = tc;
+
+#ifndef OMPBUILDER
   SST<double> sst;
   SS ss(c.a);
+#endif
 
-// CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
-// CHECK:       [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK-NEXT:  [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK-NEXT:  br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
-// CHECK:       [[THEN]]
-// CHECK-NEXT:  store i8 2, i8* [[A_ADDR]]
-// CHECK-NEXT:  call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK-NEXT:  br label {{%?}}[[EXIT]]
-// CHECK:       [[EXIT]]
-// CHECK-NOT:   call {{.+}} @__kmpc_cancel_barrier
+// ALL:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
+// ALL:       [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT:  [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0
+// ALL-NEXT:  br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// ALL:       [[THEN]]
+// ALL-NEXT:  store i8 2, i8* [[A_ADDR]]
+// ALL-NEXT:  call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT:  br label {{%?}}[[EXIT]]
+// ALL:       [[EXIT]]
+// ALL-NOT:   call {{.+}} @__kmpc_cancel_barrier
 #pragma omp single nowait
   a = 2;
-// CHECK:       [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK-NEXT:  [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK-NEXT:  br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
-// CHECK:       [[THEN]]
-// CHECK-NEXT:  store i8 2, i8* [[A_ADDR]]
-// CHECK-NEXT:  call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK-NEXT:  br label {{%?}}[[EXIT]]
-// CHECK:       [[EXIT]]
-// CHECK:       call{{.*}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_SINGLE_LOC]], i32 [[GTID]])
+// ALL:       [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT:  [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0
+// ALL-NEXT:  br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// ALL:       [[THEN]]
+// ALL-NEXT:  store i8 2, i8* [[A_ADDR]]
+// ALL-NEXT:  call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT:  br label {{%?}}[[EXIT]]
+// ALL:       [[EXIT]]
+// ALL:       call{{.*}} @__kmpc_barrier([[IDENT_T_TY]]* [[IMPLICIT_BARRIER_SINGLE_LOC]], i32 [[GTID]])
 #pragma omp single
   a = 2;
-// CHECK:       store i32 0, i32* [[DID_IT]]
-// CHECK:       [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK-NEXT:  [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0
-// CHECK-NEXT:  br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
-// CHECK:       [[THEN]]
-// CHECK-NEXT:  invoke void [[FOO]]()
-// CHECK:       to label {{%?}}[[CONT:.+]] unwind
-// CHECK:       [[CONT]]
-// CHECK:       call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK:       store i32 1, i32* [[DID_IT]]
-// CHECK-NEXT:  br label {{%?}}[[EXIT]]
-// CHECK:       [[EXIT]]
-// CHECK:       [[A_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK:       store i8* [[A_ADDR]], i8** [[A_PTR_REF]],
-// CHECK:       [[C_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK:       store i8* {{.+}}, i8** [[C_PTR_REF]],
-// CHECK:       [[TC_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK:       [[TC_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached
-// CHECK:       [[TC_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC_THREADPRIVATE_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
-// CHECK:       [[TC_PTR_REF_VOID_PTR:%.+]] = bitcast [[TEST_CLASS_TY]]* [[TC_THREADPRIVATE_ADDR]] to i8*
-// CHECK:       store i8* [[TC_PTR_REF_VOID_PTR]], i8** [[TC_PTR_REF]],
-// CHECK:       [[A2_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK:       [[BITCAST:%.+]] = bitcast [2 x i8]* [[A2_ADDR]] to i8*
-// CHECK:       store i8* [[BITCAST]], i8** [[A2_PTR_REF]],
-// CHECK:       [[TC2_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
-// CHECK:       [[TC2_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached
-// CHECK:       [[TC2_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC2_THREADPRIVATE_ADDR_VOID_PTR]] to [2 x [[TEST_CLASS_TY]]]*
-// CHECK:       [[TC2_PTR_REF_VOID_PTR:%.+]] = bitcast [2 x [[TEST_CLASS_TY]]]* [[TC2_THREADPRIVATE_ADDR]] to i8*
-// CHECK:       store i8* [[TC2_PTR_REF_VOID_PTR]], i8** [[TC2_PTR_REF]],
-// CHECK:       [[COPY_LIST_VOID_PTR:%.+]] = bitcast [5 x i8*]* [[COPY_LIST]] to i8*
-// CHECK:       [[DID_IT_VAL:%.+]] = load i32, i32* [[DID_IT]],
-// CHECK:       call void @__kmpc_copyprivate([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i64 40, i8* [[COPY_LIST_VOID_PTR]], void (i8*, i8*)* [[COPY_FUNC:@.+]], i32 [[DID_IT_VAL]])
-// CHECK-NOT:   call {{.+}} @__kmpc_cancel_barrier
+// ALL:       store i32 0, i32* [[DID_IT]]
+// ALL:       [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL-NEXT:  [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0
+// ALL-NEXT:  br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// ALL:       [[THEN]]
+// ALL-NEXT:  invoke void [[FOO]]()
+// ALL:       to label {{%?}}[[CONT:.+]] unwind
+// ALL:       [[CONT]]
+// ALL:       call void @__kmpc_end_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
+// ALL:       store i32 1, i32* [[DID_IT]]
+// ALL-NEXT:  br label {{%?}}[[EXIT]]
+// ALL:       [[EXIT]]
+// ALL:       [[A_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// ALL:       store i8* [[A_ADDR]], i8** [[A_PTR_REF]],
+// ALL:       [[C_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// ALL:       store i8* {{.+}}, i8** [[C_PTR_REF]],
+// ALL:       [[TC_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// ALL:       [[TC_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached
+// ALL:       [[TC_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC_THREADPRIVATE_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
+// ALL:       [[TC_PTR_REF_VOID_PTR:%.+]] = bitcast [[TEST_CLASS_TY]]* [[TC_THREADPRIVATE_ADDR]] to i8*
+// ALL:       store i8* [[TC_PTR_REF_VOID_PTR]], i8** [[TC_PTR_REF]],
+// ALL:       [[A2_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// ALL:       [[BITCAST:%.+]] = bitcast [2 x i8]* [[A2_ADDR]] to i8*
+// ALL:       store i8* [[BITCAST]], i8** [[A2_PTR_REF]],
+// ALL:       [[TC2_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+// ALL:       [[TC2_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached
+// ALL:       [[TC2_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC2_THREADPRIVATE_ADDR_VOID_PTR]] to [2 x [[TEST_CLASS_TY]]]*
+// ALL:       [[TC2_PTR_REF_VOID_PTR:%.+]] = bitcast [2 x [[TEST_CLASS_TY]]]* [[TC2_THREADPRIVATE_ADDR]] to i8*
+// ALL:       store i8* [[TC2_PTR_REF_VOID_PTR]], i8** [[TC2_PTR_REF]],
+// ALL:       [[COPY_LIST_VOID_PTR:%.+]] = bitcast [5 x i8*]* [[COPY_LIST]] to i8*
+// ALL:       [[DID_IT_VAL:%.+]] = load i32, i32* [[DID_IT]],
+// ALL:       call void @__kmpc_copyprivate([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i64 40, i8* [[COPY_LIST_VOID_PTR]], void (i8*, i8*)* [[COPY_FUNC:@.+]], i32 [[DID_IT_VAL]])
+// ALL-NOT:   call {{.+}} @__kmpc_cancel_barrier
 #pragma omp single copyprivate(a, c, tc, a2, tc2)
   foo();
-// CHECK-NOT:   call i32 @__kmpc_single
-// CHECK-NOT:   call void @__kmpc_end_single
+  // ALL-NOT:   call i32 @__kmpc_single
+  // ALL-NOT:   call void @__kmpc_end_single
   return a;
 }
 
 // OMP50-LABEL: declare i8* @__kmpc_threadprivate_cached(
-// CHECK: void [[COPY_FUNC]](i8* %0, i8* %1)
-// CHECK: store i8* %0, i8** [[DST_ADDR_REF:%.+]],
-// CHECK: store i8* %1, i8** [[SRC_ADDR_REF:%.+]],
-// CHECK: [[DST_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_ADDR_REF]],
-// CHECK: [[DST_ADDR:%.+]] = bitcast i8* [[DST_ADDR_VOID_PTR]] to [5 x i8*]*
-// CHECK: [[SRC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_ADDR_REF]],
-// CHECK: [[SRC_ADDR:%.+]] = bitcast i8* [[SRC_ADDR_VOID_PTR]] to [5 x i8*]*
-// CHECK: [[DST_A_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[DST_A_ADDR:%.+]] = load i8*, i8** [[DST_A_ADDR_REF]],
-// CHECK: [[SRC_A_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[SRC_A_ADDR:%.+]] = load i8*, i8** [[SRC_A_ADDR_REF]],
-// CHECK: [[SRC_A_VAL:%.+]] = load i8, i8* [[SRC_A_ADDR]],
-// CHECK: store i8 [[SRC_A_VAL]], i8* [[DST_A_ADDR]],
-// CHECK: [[DST_C_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[DST_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_C_ADDR_REF]],
-// CHECK: [[DST_C_ADDR:%.+]] = bitcast i8* [[DST_C_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
-// CHECK: [[SRC_C_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[SRC_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_C_ADDR_REF]],
-// CHECK: [[SRC_C_ADDR:%.+]] = bitcast i8* [[SRC_C_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
-// CHECK: call{{.*}} [[TEST_CLASS_TY_ASSIGN:@.+]]([[TEST_CLASS_TY]]* [[DST_C_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_C_ADDR]])
-// CHECK: [[DST_TC_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: [[DST_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC_ADDR_REF]],
-// CHECK: [[DST_TC_ADDR:%.+]] = bitcast i8* [[DST_TC_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
-// CHECK: [[SRC_TC_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: [[SRC_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC_ADDR_REF]],
-// CHECK: [[SRC_TC_ADDR:%.+]] = bitcast i8* [[SRC_TC_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
-// CHECK: call{{.*}} [[TEST_CLASS_TY_ASSIGN]]([[TEST_CLASS_TY]]* [[DST_TC_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_TC_ADDR]])
-// CHECK: [[DST_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: [[DST_A2_ADDR:%.+]] = load i8*, i8** [[DST_A2_ADDR_REF]],
-// CHECK: [[SRC_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: [[SRC_A2_ADDR:%.+]] = load i8*, i8** [[SRC_A2_ADDR_REF]],
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* align 1 [[DST_A2_ADDR]], i8* align 1 [[SRC_A2_ADDR]], i{{[0-9]+}} 2, i1 false)
-// CHECK: [[DST_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
-// CHECK: [[DST_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC2_ADDR_REF]],
-// CHECK: [[DST_TC2_ADDR:%.+]] = bitcast i8* [[DST_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
-// CHECK: [[SRC_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
-// CHECK: [[SRC_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC2_ADDR_REF]],
-// CHECK: [[SRC_TC2_ADDR:%.+]] = bitcast i8* [[SRC_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
-// CHECK: br i1
-// CHECK: call{{.*}} [[TEST_CLASS_TY_ASSIGN]]([[TEST_CLASS_TY]]* %{{.+}}, [[TEST_CLASS_TY]]* {{.*}})
-// CHECK: br i1
-// CHECK: ret void
-
+// ALL: void [[COPY_FUNC]](i8* %0, i8* %1)
+// ALL: store i8* %0, i8** [[DST_ADDR_REF:%.+]],
+// ALL: store i8* %1, i8** [[SRC_ADDR_REF:%.+]],
+// ALL: [[DST_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_ADDR_REF]],
+// ALL: [[DST_ADDR:%.+]] = bitcast i8* [[DST_ADDR_VOID_PTR]] to [5 x i8*]*
+// ALL: [[SRC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_ADDR_REF]],
+// ALL: [[SRC_ADDR:%.+]] = bitcast i8* [[SRC_ADDR_VOID_PTR]] to [5 x i8*]*
+// ALL: [[DST_A_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// ALL: [[DST_A_ADDR:%.+]] = load i8*, i8** [[DST_A_ADDR_REF]],
+// ALL: [[SRC_A_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// ALL: [[SRC_A_ADDR:%.+]] = load i8*, i8** [[SRC_A_ADDR_REF]],
+// ALL: [[SRC_A_VAL:%.+]] = load i8, i8* [[SRC_A_ADDR]],
+// ALL: store i8 [[SRC_A_VAL]], i8* [[DST_A_ADDR]],
+// ALL: [[DST_C_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// ALL: [[DST_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_C_ADDR_REF]],
+// ALL: [[DST_C_ADDR:%.+]] = bitcast i8* [[DST_C_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
+// ALL: [[SRC_C_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// ALL: [[SRC_C_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_C_ADDR_REF]],
+// ALL: [[SRC_C_ADDR:%.+]] = bitcast i8* [[SRC_C_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
+// ALL: call{{.*}} [[TEST_CLASS_TY_ASSIGN:@.+]]([[TEST_CLASS_TY]]* [[DST_C_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_C_ADDR]])
+// ALL: [[DST_TC_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// ALL: [[DST_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC_ADDR_REF]],
+// ALL: [[DST_TC_ADDR:%.+]] = bitcast i8* [[DST_TC_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
+// ALL: [[SRC_TC_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// ALL: [[SRC_TC_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC_ADDR_REF]],
+// ALL: [[SRC_TC_ADDR:%.+]] = bitcast i8* [[SRC_TC_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
+// ALL: call{{.*}} [[TEST_CLASS_TY_ASSIGN]]([[TEST_CLASS_TY]]* [[DST_TC_ADDR]], [[TEST_CLASS_TY]]* {{.*}}[[SRC_TC_ADDR]])
+// ALL: [[DST_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// ALL: [[DST_A2_ADDR:%.+]] = load i8*, i8** [[DST_A2_ADDR_REF]],
+// ALL: [[SRC_A2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// ALL: [[SRC_A2_ADDR:%.+]] = load i8*, i8** [[SRC_A2_ADDR_REF]],
+// ALL: call void @llvm.memcpy.{{.+}}(i8* align 1 [[DST_A2_ADDR]], i8* align 1 [[SRC_A2_ADDR]], i{{[0-9]+}} 2, i1 false)
+// ALL: [[DST_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DST_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+// ALL: [[DST_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[DST_TC2_ADDR_REF]],
+// ALL: [[DST_TC2_ADDR:%.+]] = bitcast i8* [[DST_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
+// ALL: [[SRC_TC2_ADDR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[SRC_ADDR]], i{{[0-9]+}} 0, i{{[0-9]+}} 4
+// ALL: [[SRC_TC2_ADDR_VOID_PTR:%.+]] = load i8*, i8** [[SRC_TC2_ADDR_REF]],
+// ALL: [[SRC_TC2_ADDR:%.+]] = bitcast i8* [[SRC_TC2_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
+// ALL: br i1
+// ALL: call{{.*}} [[TEST_CLASS_TY_ASSIGN]]([[TEST_CLASS_TY]]* %{{.+}}, [[TEST_CLASS_TY]]* {{.*}})
+// ALL: br i1
+// ALL: ret void
 
 // OMP50-LABEL: void @_ZN3SSTIdEC2Ev(
 // OMP50: getelementptr inbounds [[SST_TY]], [[SST_TY]]* %{{.+}}, i32 0, i32 0
Index: clang/lib/CodeGen/CodeGenFunction.h
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.h
+++ clang/lib/CodeGen/CodeGenFunction.h
@@ -1640,6 +1640,28 @@
                                           const VarDecl *VD, Address VDAddr,
                                           SourceLocation Loc);
 
+    /// Generate the copy function used as callback in __kmpc_copyprivate , and
+    /// return a pointer to it
+    ///
+    /// \param CGM CG module where the function is created
+    /// \param ArgsType pointer type to copy private list
+    /// \param CopyprivateVars list of copyprivate clause variables
+    /// \param DstExprs list of destination expressions
+    /// \param SrcExprs list of source expressions
+    /// \param AssignmentOps list of assignment expression
+    /// \param Loc The location where the barrier directive was encountered
+    ///
+    static llvm::Value *emitCopyprivateCopyFunction(
+        CodeGenModule &CGM, llvm::Type *ArgsType,
+        ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DstExprs,
+        ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
+        SourceLocation Loc);
+
+    /// Given an array of pointers to variables, project the address of a
+    /// given variable.
+    static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
+                                          unsigned Index, const VarDecl *Var);
+
     /// Gets the OpenMP-specific address of the local variable /p VD.
     static Address getAddressOfLocalVariable(CodeGenFunction &CGF,
                                              const VarDecl *VD);
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3560,6 +3560,74 @@
                                               S.hasCancel());
 }
 
+Address CodeGenFunction::OMPBuilderCBHelpers::emitAddrOfVarFromArray(
+    CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var) {
+  // Pull out the pointer to the variable.
+  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
+  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
+
+  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
+  Addr = CGF.Builder.CreateElementBitCast(
+      Addr, CGF.ConvertTypeForMem(Var->getType()));
+  return Addr;
+}
+
+llvm::Value *CodeGenFunction::OMPBuilderCBHelpers::emitCopyprivateCopyFunction(
+    CodeGenModule &CGM, llvm::Type *ArgsType,
+    ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
+    ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
+    SourceLocation Loc) {
+  ASTContext &C = CGM.getContext();
+  // void copy_func(void *LHSArg, void *RHSArg);
+  FunctionArgList Args;
+  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+                           ImplicitParamDecl::Other);
+  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+                           ImplicitParamDecl::Other);
+  Args.push_back(&LHSArg);
+  Args.push_back(&RHSArg);
+  const auto &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  std::string Name =
+      CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
+  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
+                                    llvm::GlobalValue::InternalLinkage, Name,
+                                    &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+  // Dest = (void*[n])(LHSArg);
+  // Src = (void*[n])(RHSArg);
+  Address LHS(
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), ArgsType),
+      CGF.getPointerAlign());
+  Address RHS(
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), ArgsType),
+      CGF.getPointerAlign());
+  // *(Type0*)Dst[0] = *(Type0*)Src[0];
+  // *(Type1*)Dst[1] = *(Type1*)Src[1];
+  // ...
+  // *(Typen*)Dst[n] = *(Typen*)Src[n];
+  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
+    const auto *DestVar =
+        cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
+    Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
+
+    const auto *SrcVar =
+        cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
+    Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
+
+    const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
+    QualType Type = VD->getType();
+    CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
+  }
+  CGF.FinishFunction();
+  return Fn;
+}
+
 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
   llvm::SmallVector<const Expr *, 8> CopyprivateVars;
   llvm::SmallVector<const Expr *, 8> DestExprs;
@@ -3577,6 +3645,93 @@
     AssignmentOps.append(C->assignment_ops().begin(),
                          C->assignment_ops().end());
   }
+
+  if (CGM.getLangOpts().OpenMPIRBuilder) {
+    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+    ASTContext &Ctx = CGM.getContext();
+    const CapturedStmt *CS = S.getInnermostCapturedStmt();
+    const Stmt *SingleRegionBodyStmt = CS->getCapturedStmt();
+
+    bool HasCopyPrivate = !CopyprivateVars.empty();
+    Address DidIt = Address::invalid();
+    llvm::Value *DidItPtr = nullptr;
+    if (HasCopyPrivate) {
+      QualType KmpInt32Ty = CGM.getContext().getIntTypeForBitwidth(
+          /*DestWidth=*/32, /*Signed=*/1);
+      DidIt = CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
+      DidItPtr = DidIt.getPointer();
+    }
+
+    auto FiniCB = [this](InsertPointTy IP) {
+      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+    };
+
+    auto BodyGenCB = [SingleRegionBodyStmt, &DidIt,
+                      this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                            llvm::BasicBlock &FiniBB) {
+      // todo: Add support for first/lastprivate privatization
+
+      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SingleRegionBodyStmt,
+                                             CodeGenIP, FiniBB);
+      if (DidIt.isValid()) {
+        // did_it = 1;
+        auto term = this->Builder.GetInsertBlock()->getTerminator();
+        auto DidItMasterStore =
+            this->Builder.CreateStore(this->Builder.getInt32(1), DidIt);
+        DidItMasterStore->moveBefore(term);
+      }
+    };
+
+    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
+    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+    Builder.restoreIP(
+        OMPBuilder.CreateSingle(Builder, BodyGenCB, FiniCB, DidItPtr));
+
+    if (DidIt.isValid()) {
+      // Create a list of all private variables for copyprivate.
+      llvm::APInt ArraySize(/*unsigned int numBits=*/32,
+                            CopyprivateVars.size());
+      QualType CopyprivateArrayTy = Ctx.getConstantArrayType(
+          Ctx.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
+          /*IndexTypeQuals=*/0);
+      Address CopyprivateList =
+          CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
+      for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
+        Address Elem = Builder.CreateConstArrayGEP(CopyprivateList, I);
+        Builder.CreateStore(
+            Builder.CreatePointerBitCastOrAddrSpaceCast(
+                EmitLValue(CopyprivateVars[I]).getPointer(*this), VoidPtrTy),
+            Elem);
+      }
+      // Build function that copies private values from single region to all
+      // other threads in the corresponding parallel region.
+      std::string CopyFuncName = OMPBuilderCBHelpers::getNameWithSeparators(
+          {"omp", "copyprivate", "copy_func"});
+      llvm::Value *CpyFn = OMPBuilderCBHelpers::emitCopyprivateCopyFunction(
+          CGM, ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
+          CopyprivateVars, SrcExprs, DestExprs, AssignmentOps, S.getBeginLoc());
+
+      llvm::Value *ListSize = getTypeSize(CopyprivateArrayTy);
+      Address CL = Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
+                                                               VoidPtrTy);
+
+      OMPBuilder.CreateCopyPrivate(Builder, ListSize, CL.getPointer(), CpyFn,
+                                   DidItPtr);
+    }
+
+    // Emit an implicit barrier at the end (to avoid data race on firstprivate
+    // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
+    if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
+      CGM.getOpenMPRuntime().emitBarrierCall(
+          *this, S.getBeginLoc(),
+          S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
+    }
+    return;
+  }
+
   // Emit code for 'single' region along with 'copyprivate' clauses
   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
     Action.Enter(CGF);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to