sfantao updated this revision to Diff 32322.
sfantao added a comment.
Adress reviewers concerns.
Also fix issue with target regions with no arguments and in the VLA size
computation I found in the meantime.
http://reviews.llvm.org/D11361
Files:
include/clang/AST/Decl.h
include/clang/AST/Stmt.h
include/clang/Basic/CapturedStmt.h
include/clang/Sema/ScopeInfo.h
lib/CodeGen/CGExpr.cpp
lib/CodeGen/CGOpenMPRuntime.cpp
lib/CodeGen/CGOpenMPRuntime.h
lib/CodeGen/CGStmt.cpp
lib/CodeGen/CGStmtOpenMP.cpp
lib/CodeGen/CodeGenFunction.cpp
lib/CodeGen/CodeGenFunction.h
lib/Sema/SemaOpenMP.cpp
test/OpenMP/target_codegen.cpp
Index: test/OpenMP/target_codegen.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/target_codegen.cpp
@@ -0,0 +1,583 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+// REQUIRES: powerpc-registered-target
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[S1:%.+]] = type { double }
+
+// We have 8 target regions, but only 7 that actually will generate offloading
+// code, and only 6 will have mapped arguments.
+
+// CHECK-DAG: [[MAPT2:@.+]] = private constant [1 x i32] [i32 3]
+// CHECK-DAG: [[MAPT3:@.+]] = private constant [2 x i32] [i32 3, i32 3]
+// CHECK-DAG: [[MAPT4:@.+]] = private constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3]
+// CHECK-DAG: [[MAPT5:@.+]] = private constant [3 x i32] [i32 3, i32 3, i32 3]
+// CHECK-DAG: [[MAPT6:@.+]] = private constant [4 x i32] [i32 3, i32 3, i32 3, i32 3]
+// CHECK-DAG: [[MAPT7:@.+]] = private constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+template<typename tx, typename ty>
+struct TT{
+ tx X;
+ ty Y;
+};
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+ int a = 0;
+ short aa = 0;
+ float b[10];
+ float bn[n];
+ double c[5][10];
+ double cn[5][n];
+ TT<long, char> d;
+
+ // CHECK: br label %[[TRY:[^,]+]]
+ // CHECK: [[TRY]]
+ // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i32* null)
+ // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+ // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+ // CHECK: [[FAIL]]
+ // CHECK: call void [[HVT0:@.+]]()
+ // CHECK-NEXT: br label %[[END]]
+ // CHECK: [[END]]
+ #pragma omp target
+ {
+ }
+
+ // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}})
+ #pragma omp target if(0)
+ {
+ a += 1;
+ }
+
+ // CHECK: br label %[[TRY:[^,]+]]
+ // CHECK: [[TRY]]
+ // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0))
+
+ // CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]]
+ // CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+ // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+ // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+ // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+ // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+ // CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8*
+ // CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8*
+
+ // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+ // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+ // CHECK: [[FAIL]]
+ // CHECK: call void [[HVT2:@.+]](i32* {{[^,]+}})
+ // CHECK-NEXT: br label %[[END]]
+ // CHECK: [[END]]
+ #pragma omp target if(1)
+ {
+ a += 1;
+ }
+
+ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
+ // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+ // CHECK: [[TRY]]
+ // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0))
+
+ // CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]]
+ // CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+ // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+ // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+ // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+ // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+ // CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8*
+ // CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8*
+
+ // CHECK-DAG: store i64 2, i64* [[SADDR1:%.+]]
+ // CHECK-DAG: [[SADDR1]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]]
+ // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]]
+ // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]]
+ // CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
+ // CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
+ // CHECK-DAG: [[BP1]] = bitcast i16* %{{.+}} to i8*
+ // CHECK-DAG: [[P1]] = bitcast i16* %{{.+}} to i8*
+
+ // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+ // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]]
+ // CHECK: [[FAIL]]
+ // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}})
+ // CHECK-NEXT: br label %[[END]]
+ // CHECK: [[END]]
+ #pragma omp target if(n>10)
+ {
+ a += 1;
+ aa += 1;
+ }
+
+ // We capture 3 VLA sizes in this target region
+ // CHECK-DAG: store i64 %{{[^,]+}}, i64* [[VLA0:%[^,]+]]
+ // CHECK-DAG: store i64 %{{[^,]+}}, i64* [[VLA1:%[^,]+]]
+ // CHECK-DAG: store i64 %{{[^,]+}}, i64* [[VLA2:%[^,]+]]
+ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+ // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+ // CHECK: [[TRY]]
+ // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0))
+
+ // CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+ // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+ // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+ // CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]]
+ // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]]
+ // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]]
+ // CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]]
+ // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]]
+ // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]]
+ // CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX3:[0-9]+]]
+ // CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX3]]
+ // CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX3]]
+ // CHECK-DAG: [[SADDR4:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX4:[0-9]+]]
+ // CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX4]]
+ // CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX4]]
+ // CHECK-DAG: [[SADDR5:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX5:[0-9]+]]
+ // CHECK-DAG: [[BPADDR5:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX5]]
+ // CHECK-DAG: [[PADDR5:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX5]]
+ // CHECK-DAG: [[SADDR6:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX6:[0-9]+]]
+ // CHECK-DAG: [[BPADDR6:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX6]]
+ // CHECK-DAG: [[PADDR6:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX6]]
+ // CHECK-DAG: [[SADDR7:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX7:[0-9]+]]
+ // CHECK-DAG: [[BPADDR7:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX7]]
+ // CHECK-DAG: [[PADDR7:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX7]]
+ // CHECK-DAG: [[SADDR8:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX8:[0-9]+]]
+ // CHECK-DAG: [[BPADDR8:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX8]]
+ // CHECK-DAG: [[PADDR8:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX8]]
+
+ // The names below are not necessarily consistent with the names used for the
+ // addresses above as some are repeated.
+ // CHECK-DAG: [[BP0:%[^,]+]] = bitcast i64* [[VLA0]] to i8*
+ // CHECK-DAG: [[P0:%[^,]+]] = bitcast i64* [[VLA0]] to i8*
+ // CHECK-DAG: store i8* [[BP0]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i8* [[P0]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i64 8, i64* {{%[^,]+}}
+
+ // CHECK-DAG: [[BP1:%[^,]+]] = bitcast i64* [[VLA1]] to i8*
+ // CHECK-DAG: [[P1:%[^,]+]] = bitcast i64* [[VLA1]] to i8*
+ // CHECK-DAG: store i8* [[BP1]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i8* [[P1]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i64 8, i64* {{%[^,]+}}
+
+ // CHECK-DAG: [[BP2:%[^,]+]] = bitcast i64* [[VLA2]] to i8*
+ // CHECK-DAG: [[P2:%[^,]+]] = bitcast i64* [[VLA2]] to i8*
+ // CHECK-DAG: store i8* [[BP2]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i8* [[P2]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i64 8, i64* {{%[^,]+}}
+
+ // CHECK-DAG: [[BP3:%[^,]+]] = bitcast i32* %{{.+}} to i8*
+ // CHECK-DAG: [[P3:%[^,]+]] = bitcast i32* %{{.+}} to i8*
+ // CHECK-DAG: store i8* [[BP3]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i8* [[P3]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i64 4, i64* {{%[^,]+}}
+
+ // CHECK-DAG: [[BP4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8*
+ // CHECK-DAG: [[P4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8*
+ // CHECK-DAG: store i8* [[BP4]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i8* [[P4]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i64 40, i64* {{%[^,]+}}
+
+ // CHECK-DAG: [[BP5:%[^,]+]] = bitcast float* %{{.+}} to i8*
+ // CHECK-DAG: [[P5:%[^,]+]] = bitcast float* %{{.+}} to i8*
+ // CHECK-DAG: store i8* [[BP5]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i8* [[P5]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i64 4, i64* {{%[^,]+}}
+
+ // CHECK-DAG: [[BP6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8*
+ // CHECK-DAG: [[P6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8*
+ // CHECK-DAG: store i8* [[BP6]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i8* [[P6]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i64 400, i64* {{%[^,]+}}
+
+ // CHECK-DAG: [[BP7:%[^,]+]] = bitcast double* %{{.+}} to i8*
+ // CHECK-DAG: [[P7:%[^,]+]] = bitcast double* %{{.+}} to i8*
+ // CHECK-DAG: store i8* [[BP7]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i8* [[P7]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i64 8, i64* {{%[^,]+}}
+
+ // CHECK-DAG: [[BP8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8*
+ // CHECK-DAG: [[P8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8*
+ // CHECK-DAG: store i8* [[BP8]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i8* [[P8]], i8** {{%[^,]+}}
+ // CHECK-DAG: store i64 16, i64* {{%[^,]+}}
+
+ // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+ // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]]
+ // CHECK: [[FAIL]]
+ // CHECK: call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+ // CHECK-NEXT: br label %[[END]]
+ // CHECK: [[END]]
+ #pragma omp target if(n>20)
+ {
+ a += 1;
+ b[2] += 1.0;
+ bn[3] += 1.0;
+ c[1][2] += 1.0;
+ cn[1][3] += 1.0;
+ d.X += 1;
+ d.Y += 1;
+ }
+
+ return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK: define internal void [[HVT0]]
+
+// CHECK: define internal void [[HVT1]]
+// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]]
+// CHECK-DAG: load i32, i32* [[USE_A]]
+
+// CHECK: define internal void [[HVT2]]
+// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]]
+// CHECK-DAG: load i32, i32* [[USE_A]]
+
+// CHECK: define internal void [[HVT3]]
+// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG: [[LOCALX_AA:%.+]] = alloca i16*
+// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG: store i16* [[ARG_AA:%.+]], i16** [[LOCALX_AA]]
+// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]]
+// CHECK-DAG: [[USE_AA:%.+]] = load i16*, i16** [[LOCALX_AA:%.+]]
+// CHECK-DAG: load i32, i32* [[USE_A]]
+// CHECK-DAG: load i16, i16* [[USE_AA]]
+
+// CHECK: define internal void [[HVT4]]
+// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG: [[LOCALX_B:%.+]] = alloca [10 x float]*
+// CHECK-DAG: [[LOCALX_BN:%.+]] = alloca float*
+// CHECK-DAG: [[LOCALX_C:%.+]] = alloca [5 x [10 x double]]*
+// CHECK-DAG: [[LOCALX_CN:%.+]] = alloca double*
+// CHECK-DAG: [[LOCALX_D:%.+]] = alloca [[TT]]*
+// CHECK-DAG: [[LOCALX_VLA1:%.+]] = alloca i64*
+// CHECK-DAG: [[LOCALX_VLA2:%.+]] = alloca i64*
+// CHECK-DAG: [[LOCALX_VLA3:%.+]] = alloca i64*
+// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCALX_B]]
+// CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCALX_BN]]
+// CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCALX_C]]
+// CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCALX_CN]]
+// CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCALX_D]]
+// CHECK-DAG: store i64* [[ARG_VLA1:%.+]], i64** [[LOCALX_VLA1]]
+// CHECK-DAG: store i64* [[ARG_VLA2:%.+]], i64** [[LOCALX_VLA2]]
+// CHECK-DAG: store i64* [[ARG_VLA3:%.+]], i64** [[LOCALX_VLA3]]
+// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]]
+// CHECK-DAG: [[USE_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCALX_B:%.+]]
+// CHECK-DAG: [[USE_BN:%.+]] = load float*, float** [[LOCALX_BN:%.+]]
+// CHECK-DAG: [[USE_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCALX_C:%.+]]
+// CHECK-DAG: [[USE_CN:%.+]] = load double*, double** [[LOCALX_CN:%.+]]
+// CHECK-DAG: [[USE_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCALX_D:%.+]]
+// CHECK-DAG: [[USE_VLA1:%.+]] = load i64*, i64** [[LOCALX_VLA1:%.+]]
+// CHECK-DAG: [[USE_VLA2:%.+]] = load i64*, i64** [[LOCALX_VLA2:%.+]]
+// CHECK-DAG: [[USE_VLA3:%.+]] = load i64*, i64** [[LOCALX_VLA3:%.+]]
+// CHECK-DAG: load i32, i32* [[USE_A]]
+// CHECK-DAG: getelementptr inbounds [10 x float], [10 x float]* [[USE_B]], i{{.*}} 0, i{{.*}} 2
+// CHECK-DAG: getelementptr inbounds float, float* [[USE_BN]], i{{.*}} 3
+// CHECK-DAG: getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[USE_C]], i{{.*}} 0, i{{.*}} 1
+// CHECK-DAG: [[VLAMUL:%.+]] = mul {{.*}}i64 1, %{{.*}}
+// CHECK-DAG: getelementptr inbounds double, double* [[USE_CN]], i{{.*}} [[VLAMUL]]
+// CHECK-DAG: load i64, i64* [[USE_VLA1]]
+// CHECK-DAG: load i64, i64* [[USE_VLA2]]
+// CHECK-DAG: load i64, i64* [[USE_VLA3]]
+
+template<typename tx>
+tx ftemplate(int n) {
+ tx a = 0;
+ short aa = 0;
+ tx b[10];
+
+ #pragma omp target if(n>40)
+ {
+ a += 1;
+ aa += 1;
+ b[2] += 1;
+ }
+
+ return a;
+}
+
+static
+int fstatic(int n) {
+ int a = 0;
+ short aa = 0;
+ char aaa = 0;
+ int b[10];
+
+ #pragma omp target if(n>50)
+ {
+ a += 1;
+ aa += 1;
+ aaa += 1;
+ b[2] += 1;
+ }
+
+ return a;
+}
+
+struct S1 {
+ double a;
+
+ int r1(int n){
+ int b = n+1;
+ short int c[2][n];
+
+ #pragma omp target if(n>60)
+ {
+ this->a = (double)b + 1.5;
+ c[1][1] = ++a;
+ }
+
+ return c[1][1] + (int)b;
+ }
+};
+
+// CHECK: define {{.*}}@{{.*}}bar{{.*}}
+int bar(int n){
+ int a = 0;
+
+ // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}})
+ a += foo(n);
+
+ S1 S;
+ // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
+ a += S.r1(n);
+
+ // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
+ a += fstatic(n);
+
+ // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
+ a += ftemplate<int>(n);
+
+ return a;
+}
+
+//
+// CHECK: define {{.*}}[[FS1]]
+//
+// We capture 2 VLA sizes in this target region
+// CHECK-DAG: store i64 %{{[^,]+}}, i64* [[VLA0:%[^,]+]]
+// CHECK-DAG: store i64 %{{[^,]+}}, i64* [[VLA1:%[^,]+]]
+// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
+// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK: [[TRY]]
+// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0))
+
+// CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+// CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]]
+// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]]
+// CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]]
+// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]]
+// CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX3:[0-9]+]]
+// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX3]]
+// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX3]]
+
+// The names below are not necessarily consistent with the names used for the
+// addresses above as some are repeated.
+// CHECK-DAG: [[BP0:%[^,]+]] = bitcast i64* [[VLA0]] to i8*
+// CHECK-DAG: [[P0:%[^,]+]] = bitcast i64* [[VLA0]] to i8*
+// CHECK-DAG: store i8* [[BP0]], i8** {{%[^,]+}}
+// CHECK-DAG: store i8* [[P0]], i8** {{%[^,]+}}
+// CHECK-DAG: store i64 8, i64* {{%[^,]+}}
+
+// CHECK-DAG: [[BP1:%[^,]+]] = bitcast i64* [[VLA1]] to i8*
+// CHECK-DAG: [[P1:%[^,]+]] = bitcast i64* [[VLA1]] to i8*
+// CHECK-DAG: store i8* [[BP1]], i8** {{%[^,]+}}
+// CHECK-DAG: store i8* [[P1]], i8** {{%[^,]+}}
+// CHECK-DAG: store i64 8, i64* {{%[^,]+}}
+
+// CHECK-DAG: [[BP2:%[^,]+]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG: [[P2:%[^,]+]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG: store i8* [[BP2]], i8** {{%[^,]+}}
+// CHECK-DAG: store i8* [[P2]], i8** {{%[^,]+}}
+// CHECK-DAG: store i64 4, i64* {{%[^,]+}}
+
+// CHECK-DAG: [[BP3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8*
+// CHECK-DAG: [[P3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8*
+// CHECK-DAG: store i8* [[BP3]], i8** {{%[^,]+}}
+// CHECK-DAG: store i8* [[P3]], i8** {{%[^,]+}}
+// CHECK-DAG: store i64 8, i64* {{%[^,]+}}
+
+// CHECK-DAG: [[BP4:%[^,]+]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG: [[P4:%[^,]+]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG: store i8* [[BP4]], i8** {{%[^,]+}}
+// CHECK-DAG: store i8* [[P4]], i8** {{%[^,]+}}
+// CHECK-DAG: store i64 2, i64* {{%[^,]+}}
+
+// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]]
+// CHECK: [[FAIL]]
+// CHECK: call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT: br label %[[END]]
+// CHECK: [[END]]
+
+//
+// CHECK: define {{.*}}[[FSTATIC]]
+//
+// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
+// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK: [[TRY]]
+// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0))
+
+// CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]]
+// CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+// CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+// CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+// CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8*
+
+// CHECK-DAG: store i64 2, i64* [[SADDR1:%.+]]
+// CHECK-DAG: [[SADDR1]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]]
+// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]]
+// CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
+// CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
+// CHECK-DAG: [[BP1]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG: [[P1]] = bitcast i16* %{{.+}} to i8*
+
+// CHECK-DAG: store i64 1, i64* [[SADDR2:%.+]]
+// CHECK-DAG: [[SADDR2]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]]
+// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]]
+// CHECK-DAG: store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]]
+// CHECK-DAG: store i8* [[P2:%[^,]+]], i8** [[PADDR2]]
+
+// CHECK-DAG: store i64 40, i64* [[SADDR3:%.+]]
+// CHECK-DAG: [[SADDR3]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX3:[0-9]+]]
+// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX3]]
+// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX3]]
+// CHECK-DAG: store i8* [[BP3:%[^,]+]], i8** [[BPADDR3]]
+// CHECK-DAG: store i8* [[P3:%[^,]+]], i8** [[PADDR3]]
+// CHECK-DAG: [[BP3]] = bitcast [10 x i32]* %{{.+}} to i8*
+// CHECK-DAG: [[P3]] = bitcast [10 x i32]* %{{.+}} to i8*
+
+// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]]
+// CHECK: [[FAIL]]
+// CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT: br label %[[END]]
+// CHECK: [[END]]
+
+//
+// CHECK: define {{.*}}[[FTEMPLATE]]
+//
+// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
+// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK: [[TRY]]
+// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0))
+
+// CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]]
+// CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]]
+// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]]
+// CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+// CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+// CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8*
+// CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8*
+
+// CHECK-DAG: store i64 2, i64* [[SADDR1:%.+]]
+// CHECK-DAG: [[SADDR1]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]]
+// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]]
+// CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
+// CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
+// CHECK-DAG: [[BP1]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG: [[P1]] = bitcast i16* %{{.+}} to i8*
+
+// CHECK-DAG: store i64 40, i64* [[SADDR2:%.+]]
+// CHECK-DAG: [[SADDR2]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]]
+// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]]
+// CHECK-DAG: store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]]
+// CHECK-DAG: store i8* [[P2:%[^,]+]], i8** [[PADDR2]]
+// CHECK-DAG: [[BP2]] = bitcast [10 x i32]* %{{.+}} to i8*
+// CHECK-DAG: [[P2]] = bitcast [10 x i32]* %{{.+}} to i8*
+
+// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
+// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]]
+// CHECK: [[FAIL]]
+// CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT: br label %[[END]]
+// CHECK: [[END]]
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions of the callees of bar().
+
+// CHECK: define internal void [[HVT7]]
+// CHECK-DAG: [[LOCALX_THIS:%.+]] = alloca [[S1]]*
+// CHECK-DAG: [[LOCALX_B:%.+]] = alloca i32*
+// CHECK-DAG: [[LOCALX_C:%.+]] = alloca i16*
+// CHECK-DAG: [[LOCALX_VLA1:%.+]] = alloca i64*
+// CHECK-DAG: [[LOCALX_VLA2:%.+]] = alloca i64*
+// CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCALX_THIS]]
+// CHECK-DAG: store i32* [[ARG_B:%.+]], i32** [[LOCALX_B]]
+// CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCALX_C]]
+// CHECK-DAG: store i64* [[ARG_VLA1:%.+]], i64** [[LOCALX_VLA1]]
+// CHECK-DAG: store i64* [[ARG_VLA2:%.+]], i64** [[LOCALX_VLA2]]
+// CHECK-DAG: [[USE_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCALX_THIS]]
+// CHECK-DAG: [[USE_B:%.+]] = load i32*, i32** [[LOCALX_B]]
+// CHECK-DAG: [[USE_C:%.+]] = load i16*, i16** [[LOCALX_C]]
+// CHECK-DAG: [[USE_VLA1:%.+]] = load i64*, i64** [[LOCALX_VLA1]]
+// CHECK-DAG: [[USE_VLA2:%.+]] = load i64*, i64** [[LOCALX_VLA2]]
+// CHECK-DAG: getelementptr inbounds [[S1]], [[S1]]* [[USE_THIS]], i{{.*}} 0, i{{.*}} 0
+// CHECK-DAG: load i32, i32* [[USE_B]]
+// CHECK-DAG: [[VLAMUL:%.+]] = mul {{.*}}i64 1, %{{.*}}
+// CHECK-DAG: getelementptr inbounds i16, i16* [[USE_C]], i{{.*}} [[VLAMUL]]
+// CHECK-DAG: load i64, i64* [[USE_VLA1]]
+// CHECK-DAG: load i64, i64* [[USE_VLA2]]
+
+// CHECK: define internal void [[HVT6]]
+// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG: [[LOCALX_AA:%.+]] = alloca i16*
+// CHECK-DAG: [[LOCALX_AAA:%.+]] = alloca i8*
+// CHECK-DAG: [[LOCALX_B:%.+]] = alloca [10 x i32]*
+// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG: store i16* [[ARG_AA:%.+]], i16** [[LOCALX_AA]]
+// CHECK-DAG: store i8* [[ARG_AAA:%.+]], i8** [[LOCALX_AAA]]
+// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCALX_B]]
+// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A]]
+// CHECK-DAG: [[USE_AA:%.+]] = load i16*, i16** [[LOCALX_AA]]
+// CHECK-DAG: [[USE_AAA:%.+]] = load i8*, i8** [[LOCALX_AAA]]
+// CHECK-DAG: [[USE_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCALX_B]]
+// CHECK-DAG: load i32, i32* [[USE_A]]
+// CHECK-DAG: load i16, i16* [[USE_AA]]
+// CHECK-DAG: load i8, i8* [[USE_AAA]]
+// CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[USE_B]], i{{.*}} 0, i{{.*}} 2
+
+// CHECK: define internal void [[HVT5]]
+// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32*
+// CHECK-DAG: [[LOCALX_AA:%.+]] = alloca i16*
+// CHECK-DAG: [[LOCALX_B:%.+]] = alloca [10 x i32]*
+// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]]
+// CHECK-DAG: store i16* [[ARG_AA:%.+]], i16** [[LOCALX_AA]]
+// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCALX_B]]
+// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A]]
+// CHECK-DAG: [[USE_AA:%.+]] = load i16*, i16** [[LOCALX_AA]]
+// CHECK-DAG: [[USE_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCALX_B]]
+// CHECK-DAG: load i32, i32* [[USE_A]]
+// CHECK-DAG: load i16, i16* [[USE_AA]]
+// CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[USE_B]], i{{.*}} 0, i{{.*}} 2
+#endif
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -1308,15 +1308,22 @@
Params);
break;
}
- case OMPD_target_data:
- case OMPD_target: {
+ case OMPD_target_data: {
Sema::CapturedParamNameType Params[] = {
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
Params);
break;
}
+ case OMPD_target: {
+ Sema::CapturedParamNameType Params[] = {
+ std::make_pair(StringRef(), QualType()) // __context with shared vars
+ };
+ ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope,
+ CR_OpenMP_Target, Params);
+ break;
+ }
case OMPD_teams: {
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty);
@@ -1382,7 +1389,85 @@
}
}
}
- return ActOnCapturedRegionEnd(S.get());
+
+ CapturedStmt *Res = cast<CapturedStmt>(ActOnCapturedRegionEnd(S.get()).get());
+
+ // If this is an offloading captured region, we need change the captured
+ // declaration so that it uses as parameters each of the captured
+ // declarations.
+ if (Res->getCapturedRegionKind() == CR_OpenMP_Target) {
+ CapturedDecl *OldCD = Res->getCapturedDecl();
+ DeclContext *DC = OldCD->getDeclContext();
+
+ unsigned NumParams = Res->capture_size();
+ const RecordDecl *RD = Res->getCapturedRecordDecl();
+ CapturedDecl *NewCD = CapturedDecl::Create(Context, DC, NumParams);
+ DeclContext *NewDC = CapturedDecl::castToDeclContext(NewCD);
+ SourceLocation Loc = Res->getLocStart();
+
+ SmallVector<const FieldDecl *, 3> VLASizeFields;
+ unsigned i = 0;
+ auto ri = RD->field_begin();
+ for (CapturedStmt::const_capture_iterator ci = Res->capture_begin(),
+ ce = Res->capture_end();
+ ci != ce; ++ci, ++ri, ++i) {
+ StringRef Name;
+ QualType Ty;
+ if (ci->capturesVariableArrayType()) {
+ VLASizeFields.push_back(*ri);
+ Ty = Context.getPointerType(ri->getType());
+ Name = "__vla_size";
+ } else if (ci->capturesThis()) {
+ Ty = ri->getType();
+ Name = "__this";
+ } else {
+ const VarDecl *VD = ci->getCapturedVar();
+
+ Ty = VD->getType();
+
+ // If this declaration has an variable-length array type, we need to
+ // adapt the size expression to use the implicit parameter that
+ // captures it. We also need to make sure we adapt all the sizes if
+ // the type has several dimensions.
+ if (!VLASizeFields.empty()) {
+ assert(Ty.getTypePtr() ==
+ VLASizeFields.front()->getCapturedVLAType() &&
+ "Invalid VLA information!");
+
+ Ty = VLASizeFields.back()->getCapturedVLAType()->getElementType();
+ for (unsigned t = 1; !VLASizeFields.empty();
+ VLASizeFields.pop_back(), ++t) {
+ auto *OriginalVAT = VLASizeFields.back()->getCapturedVLAType();
+ auto SizeType = VLASizeFields.back()->getType();
+ auto *SizeDecl = NewCD->getParam(i - t);
+ auto *NewSizeRefExpr = buildDeclRefExpr(
+ *this, SizeDecl, SizeDecl->getType(), Loc, true);
+ auto *NewSizeValExpr =
+ new (Context) UnaryOperator(NewSizeRefExpr, UO_Deref, SizeType,
+ VK_RValue, OK_Ordinary, Loc);
+ Ty = Context.getVariableArrayType(
+ Ty, NewSizeValExpr, OriginalVAT->getSizeModifier(),
+ OriginalVAT->getIndexTypeCVRQualifiers(),
+ OriginalVAT->getBracketsRange());
+ }
+ }
+ Ty = Context.getPointerType(Ty);
+ Name = VD->getName();
+ }
+
+ IdentifierInfo *ParamName = &Context.Idents.get(Name);
+ ImplicitParamDecl *Param =
+ ImplicitParamDecl::Create(Context, NewDC, Loc, ParamName, Ty);
+ NewCD->setParam(i, Param);
+ }
+
+ NewCD->setBody(Res->getCapturedStmt());
+ Res->setCapturedDecl(NewCD);
+ DC->addDecl(NewCD);
+ DC->removeDecl(OldCD);
+ }
+
+ return Res;
}
static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -197,12 +197,12 @@
virtual llvm::Value *getContextValue() const { return ThisValue; }
/// \brief Lookup the captured field decl for a variable.
- virtual const FieldDecl *lookup(const VarDecl *VD) const {
+ virtual const Decl *lookup(const VarDecl *VD) const {
return CaptureFields.lookup(VD);
}
- bool isCXXThisExprCaptured() const { return getThisFieldDecl() != nullptr; }
- virtual FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; }
+ bool isCXXThisExprCaptured() const { return getThisDecl() != nullptr; }
+ virtual Decl *getThisDecl() const { return CXXThisFieldDecl; }
static bool classof(const CGCapturedStmtInfo *) {
return true;
@@ -1689,6 +1689,11 @@
std::pair<llvm::Value*,QualType> getVLASize(const VariableArrayType *vla);
std::pair<llvm::Value*,QualType> getVLASize(QualType vla);
+ /// getVLASizeMap - Returns an LLVM value that corresponds to the size of the
+ /// VLA size of the type \a type. Assumes that the type has already been
+ /// emitted with EmitVariablyModifiedType.
+ llvm::Value *getVLASizeMap(const VariableArrayType *vla);
+
/// LoadCXXThis - Load the value of 'this'. This function is only valid while
/// generating code for an C++ member function.
llvm::Value *LoadCXXThis() {
@@ -2211,6 +2216,8 @@
void EmitOMPFlushDirective(const OMPFlushDirective &S);
void EmitOMPOrderedDirective(const OMPOrderedDirective &S);
void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
+ typedef SmallVector<llvm::Value *, 4> OMPTargetDirectiveVLASizes;
+ void PrepareOMPTargetDirectiveBodyEmission(const OMPTargetDirective &S);
void EmitOMPTargetDirective(const OMPTargetDirective &S);
void EmitOMPTargetDataDirective(const OMPTargetDataDirective &S);
void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -1506,6 +1506,12 @@
return std::pair<llvm::Value*,QualType>(numElements, elementType);
}
+llvm::Value *CodeGenFunction::getVLASizeMap(const VariableArrayType *type) {
+ llvm::Value *vlaSize = VLASizeMap[type->getSizeExpr()];
+ assert(vlaSize && "No vla size matching the requested expression!");
+ return vlaSize;
+}
+
void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
assert(type->isVariablyModifiedType() &&
"Must pass variably modified type to EmitVLASizes!");
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -2121,8 +2121,79 @@
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
}
-void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
- llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
+void CodeGenFunction::PrepareOMPTargetDirectiveBodyEmission(
+ const OMPTargetDirective &S) {
+ const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
+ const RecordDecl *RD = CS.getCapturedRecordDecl();
+ const CapturedDecl *CD = CS.getCapturedDecl();
+
+ SmallVector<const FieldDecl *, 3> VLASizeFields;
+ auto ri = RD->field_begin();
+ auto pi = CD->param_begin();
+ for (CapturedStmt::const_capture_iterator ci = CS.capture_begin(),
+ ce = CS.capture_end();
+ ci != ce; ++ci, ++ri, ++pi) {
+
+ if (ci->capturesVariableArrayType()) {
+ VLASizeFields.push_back(*ri);
+ continue;
+ }
+
+ if (ci->capturesThis()) {
+ auto *ThisRef = LocalDeclMap[*pi];
+ auto Addr = MakeNaturalAlignAddrLValue(ThisRef, ri->getType());
+ CXXThisValue = EmitLoadOfLValue(Addr, CS.getLocStart()).getScalarVal();
+ continue;
+ }
+
+ // Find the expressions that give the VLA sizes and update the VLASizeMap.
+ if (!VLASizeFields.empty()) {
+ assert(ci->capturesVariable() &&
+ "Expecting declaration that has VLA type!");
+
+ const PointerType *PT = cast<PointerType>((*pi)->getType().getTypePtr());
+ QualType CurTy = PT->getPointeeType();
+
+ for (auto *FD : VLASizeFields) {
+ const VariableArrayType *VAT =
+ cast<VariableArrayType>(CurTy.getTypePtr());
+ auto *V = VLASizeMap[VAT->getSizeExpr()];
+ assert(V && "VLA Size value must exist!");
+ VLASizeMap[FD->getCapturedVLAType()->getSizeExpr()] = V;
+ CurTy = VAT->getElementType();
+ }
+
+ VLASizeFields.clear();
+ }
+ }
+}
+
+void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
+ LexicalScope Scope(*this, S.getSourceRange());
+
+ // Emit target region as a standalone region.
+ auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+ CGF.PrepareOMPTargetDirectiveBodyEmission(S);
+ CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ };
+
+ // Obtain the target region outlined function.
+ llvm::Value *Fn =
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(*this, S, CodeGen);
+
+ // Check if we have any if clause associated with the directive.
+ const Expr *IfCond = nullptr;
+ if (auto C = S.getSingleClause(OMPC_if)) {
+ IfCond = cast<OMPIfClause>(C)->getCondition();
+ }
+
+ // Check if we have any device clause associated with the directive.
+ const Expr *Device = nullptr;
+ if (auto C = S.getSingleClause(OMPC_device)) {
+ Device = cast<OMPDeviceClause>(C)->getDevice();
+ }
+
+ CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device);
}
void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
Index: lib/CodeGen/CGStmt.cpp
===================================================================
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -2125,28 +2125,33 @@
StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args,
CD->getLocation(),
CD->getBody()->getLocStart());
- // Set the context parameter in CapturedStmtInfo.
- llvm::Value *DeclPtr = LocalDeclMap[CD->getContextParam()];
- assert(DeclPtr && "missing context parameter for CapturedStmt");
- CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr));
-
- // Initialize variable-length arrays.
- LValue Base = MakeNaturalAlignAddrLValue(CapturedStmtInfo->getContextValue(),
- Ctx.getTagDeclType(RD));
- for (auto *FD : RD->fields()) {
- if (FD->hasCapturedVLAType()) {
- auto *ExprArg = EmitLoadOfLValue(EmitLValueForField(Base, FD),
- S.getLocStart()).getScalarVal();
- auto VAT = FD->getCapturedVLAType();
- VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+
+ // Initialize variable length arrays and 'this' using the context argument,
+ // if any. Otherwise the function implicit parameters will be used.
+ if (CD->hasContextParam()) {
+ llvm::Value *DeclPtr = LocalDeclMap[CD->getContextParam()];
+ assert(DeclPtr && "missing context parameter for CapturedStmt");
+ CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr));
+
+ // Initialize variable-length arrays.
+ LValue Base = MakeNaturalAlignAddrLValue(
+ CapturedStmtInfo->getContextValue(), Ctx.getTagDeclType(RD));
+ for (auto *FD : RD->fields()) {
+ if (FD->hasCapturedVLAType()) {
+ auto *ExprArg =
+ EmitLoadOfLValue(EmitLValueForField(Base, FD), S.getLocStart())
+ .getScalarVal();
+ auto VAT = FD->getCapturedVLAType();
+ VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+ }
}
- }
- // If 'this' is captured, load it into CXXThisValue.
- if (CapturedStmtInfo->isCXXThisExprCaptured()) {
- FieldDecl *FD = CapturedStmtInfo->getThisFieldDecl();
- LValue ThisLValue = EmitLValueForField(Base, FD);
- CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal();
+ // If 'this' is captured, load it into CXXThisValue.
+ if (CapturedStmtInfo->isCXXThisExprCaptured()) {
+ FieldDecl *FD = cast<FieldDecl>(CapturedStmtInfo->getThisDecl());
+ LValue ThisLValue = EmitLValueForField(Base, FD);
+ CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal();
+ }
}
PGO.assignRegionCounters(CD, F);
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -154,6 +154,14 @@
// Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 cncl_kind);
OMPRTL__kmpc_cancel,
+
+ //
+ // Offloading related calls
+ //
+ // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
+ // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+ // *arg_types);
+ OMPRTL__tgt_target,
};
/// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -177,6 +185,22 @@
/// \brief Implicit barrier in 'single' directive.
OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
};
+
+ /// \brief Values for bit flags used to specify the mapping type for
+ /// offloading.
+ enum OpenMPOffloadMappingFlags {
+ /// \brief Allocate memory on the device and move data from host to device.
+ OMP_MAP_TO = 0x01,
+ /// \brief Allocate memory on the device and move data from device to host.
+ OMP_MAP_FROM = 0x02,
+ };
+
+ enum OpenMPOffloadingReservedDeviceIDs {
+ /// \brief Device ID if the device was not defined, runtime should get it
+ /// from environment variables in the spec.
+ OMP_DEVICEID_UNDEF = -1,
+ };
+
CodeGenModule &CGM;
/// \brief Default const ident_t object used for initialization of all other
/// ident_t objects.
@@ -707,6 +731,28 @@
///
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
OpenMPDirectiveKind CancelRegion);
+
+ /// \brief Emit outilined function for 'target' directive.
+ /// \param D Directive to emit.
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ virtual llvm::Value *
+ emitTargetOutlinedFunction(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D,
+ const RegionCodeGenTy &CodeGen);
+
+ /// \brief Emit the target offloading code associated with \a D. The emitted
+ /// code attempts offloading the execution to the device, an the event of
+ /// a failure it executes the host version outlined in \a OutlinedFn.
+ /// \param D Directive to emit.
+ /// \param OutlinedFn Host version of the code to be offloaded.
+ /// \param IfCond Expression evaluated in if clause associated with the target
+ /// directive, or null if no if clause is used.
+ /// \param Device Expression evaluated in device clause associated with the
+ /// target directive, or null if no device clause is used.
+ virtual void emitTargetCall(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D,
+ llvm::Value *OutlinedFn, const Expr *IfCond,
+ const Expr *Device);
};
} // namespace CodeGen
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -41,6 +41,8 @@
/// \brief Region for constructs that do not require function outlining,
/// like 'for', 'sections', 'atomic' etc. directives.
InlinedRegion,
+ /// \brief Region with outlined function for standalone 'target' directive.
+ TargetRegion,
};
CGOpenMPRegionInfo(const CapturedStmt &CS,
@@ -164,16 +166,16 @@
llvm_unreachable("No context value for inlined OpenMP region");
}
/// \brief Lookup the captured field decl for a variable.
- const FieldDecl *lookup(const VarDecl *VD) const override {
+ const Decl *lookup(const VarDecl *VD) const override {
if (OuterRegionInfo)
return OuterRegionInfo->lookup(VD);
// If there is no outer outlined region,no need to lookup in a list of
// captured variables, we can use the original one.
return nullptr;
}
- FieldDecl *getThisFieldDecl() const override {
+ Decl *getThisDecl() const override {
if (OuterRegionInfo)
- return OuterRegionInfo->getThisFieldDecl();
+ return OuterRegionInfo->getThisDecl();
return nullptr;
}
/// \brief Get a variable or parameter for storing global thread id
@@ -204,6 +206,57 @@
CGOpenMPRegionInfo *OuterRegionInfo;
};
+/// \brief API for captured statement code generation in OpenMP target
+/// constructs. For this captures, implicit parameters are used instead of the
+/// captured fields.
+class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
+public:
+ CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
+ const RegionCodeGenTy &CodeGen)
+ : CGOpenMPRegionInfo(TargetRegion, CodeGen, OMPD_target),
+ CXXThisImplicitParamDecl(nullptr) {
+
+ CapturedDecl::param_iterator Param = CS.getCapturedDecl()->param_begin();
+ for (CapturedStmt::const_capture_iterator I = CS.capture_begin(),
+ E = CS.capture_end();
+ I != E; ++I, ++Param) {
+ if (I->capturesThis())
+ CXXThisImplicitParamDecl = *Param;
+ else if (I->capturesVariable())
+ CaptureImplicitParams[I->getCapturedVar()] = *Param;
+ }
+ }
+
+ /// \brief This is unused for target regions because each starts executing
+ /// with a single thread.
+ const VarDecl *getThreadIDVariable() const override { return nullptr; }
+
+ /// \brief Get the name of the capture helper.
+ StringRef getHelperName() const override { return ".omp_offloading."; }
+
+ static bool classof(const CGCapturedStmtInfo *Info) {
+ return CGOpenMPRegionInfo::classof(Info) &&
+ cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
+ }
+
+ /// \brief Lookup the captured implicit parameter declaration for a variable.
+ virtual const Decl *lookup(const VarDecl *VD) const override {
+ return CaptureImplicitParams.lookup(VD);
+ }
+
+ virtual Decl *getThisDecl() const override {
+ return CXXThisImplicitParamDecl;
+ }
+
+private:
+ /// \brief Keep the map between VarDecl and FieldDecl.
+ llvm::SmallDenseMap<const VarDecl *, ImplicitParamDecl *>
+ CaptureImplicitParams;
+
+ /// \brief Captured 'this' type.
+ ImplicitParamDecl *CXXThisImplicitParamDecl;
+};
+
/// \brief RAII for emitting code of OpenMP constructs.
class InlinedOpenMPRegionRAII {
CodeGenFunction &CGF;
@@ -838,6 +891,22 @@
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
break;
}
+ case OMPRTL__tgt_target: {
+ // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
+ // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+ // *arg_types);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ CGM.VoidPtrTy,
+ CGM.Int32Ty,
+ CGM.VoidPtrPtrTy,
+ CGM.VoidPtrPtrTy,
+ CGM.SizeTy->getPointerTo(),
+ CGM.Int32Ty->getPointerTo()};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
+ break;
+ }
}
return RTLFn;
}
@@ -2236,7 +2305,7 @@
if (Init) {
if (auto *Elem = Pair.second.PrivateElemInit) {
auto *OriginalVD = Pair.second.Original;
- auto *SharedField = CapturesInfo.lookup(OriginalVD);
+ auto *SharedField = cast<FieldDecl>(CapturesInfo.lookup(OriginalVD));
auto SharedRefLValue =
CGF.EmitLValueForField(SharedsBase, SharedField);
QualType Type = OriginalVD->getType();
@@ -2836,3 +2905,268 @@
}
}
+llvm::Value *
+CGOpenMPRuntime::emitTargetOutlinedFunction(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D,
+ const RegionCodeGenTy &CodeGen) {
+
+ const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+
+ // Generate the outlined target offloading function.
+ CodeGenFunction TargetCGF(CGM, true);
+ CGOpenMPTargetRegionInfo CGInfo(*CS, CodeGen);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(TargetCGF, &CGInfo);
+ return TargetCGF.GenerateCapturedStmtFunction(*CS);
+}
+
+void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D,
+ llvm::Value *OutlinedFn,
+ const Expr *IfCond, const Expr *Device) {
+
+ // Fill up the arrays with the all the captured variables.
+ SmallVector<llvm::Value *, 16> BasePointers;
+ SmallVector<llvm::Value *, 16> Pointers;
+ SmallVector<llvm::Value *, 16> Sizes;
+ SmallVector<unsigned, 16> MapTypes;
+
+ bool hasVLACaptures = false;
+ const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+ auto ri = CS.getCapturedRecordDecl()->field_begin();
+ auto ii = CS.capture_init_begin();
+ for (CapturedStmt::const_capture_iterator ci = CS.capture_begin(),
+ ce = CS.capture_end();
+ ci != ce; ++ci, ++ri, ++ii) {
+ StringRef Name;
+ QualType Ty;
+ llvm::Value *BasePointer;
+ llvm::Value *Pointer;
+ llvm::Value *Size;
+ unsigned MapType;
+
+ if (ci->capturesVariableArrayType()) {
+ llvm::Value *V = CGF.getVLASizeMap(ri->getCapturedVLAType());
+ LValue LV = CGF.MakeNaturalAlignAddrLValue(
+ CGF.CreateMemTemp(ri->getType(), "__vla_size"), ri->getType());
+ CGF.EmitStoreThroughLValue(RValue::get(V), LV);
+ BasePointer = Pointer = LV.getAddress();
+ uint64_t SizeVal =
+ CGM.getContext().getTypeSizeInChars(ri->getType()).getQuantity();
+ Size = llvm::ConstantInt::get(CGM.SizeTy, SizeVal);
+
+ hasVLACaptures = true;
+ // VLA sizes don't need to be copied back from the device.
+ MapType = CGOpenMPRuntime::OMP_MAP_TO;
+ } else if (ci->capturesThis()) {
+ BasePointer = Pointer = CGF.LoadCXXThis();
+ const PointerType *PtrTy = cast<PointerType>(ri->getType().getTypePtr());
+ uint64_t SizeVal = CGM.getContext()
+ .getTypeSizeInChars(PtrTy->getPointeeType())
+ .getQuantity();
+ Size = llvm::ConstantInt::get(CGM.SizeTy, SizeVal);
+
+ // Default map type.
+ MapType = CGOpenMPRuntime::OMP_MAP_TO | CGOpenMPRuntime::OMP_MAP_FROM;
+ } else {
+ BasePointer = Pointer =
+ CGF.EmitLValue(cast<DeclRefExpr>(*ii)).getAddress();
+
+ const ReferenceType *PtrTy =
+ cast<ReferenceType>(ri->getType().getTypePtr());
+ QualType ElementType = PtrTy->getPointeeType();
+
+ if (auto *VAT = dyn_cast<VariableArrayType>(ElementType.getTypePtr())) {
+ auto VATInfo = CGF.getVLASize(VAT);
+ Size = llvm::ConstantInt::get(
+ CGM.SizeTy,
+ CGM.getContext().getTypeSizeInChars(VATInfo.second).getQuantity());
+ Size = CGF.Builder.CreateNUWMul(Size, VATInfo.first);
+ } else {
+ uint64_t ElementTypeSize =
+ CGM.getContext().getTypeSizeInChars(ElementType).getQuantity();
+ Size = llvm::ConstantInt::get(CGM.SizeTy, ElementTypeSize);
+ }
+
+ // Default map type.
+ MapType = CGOpenMPRuntime::OMP_MAP_TO | CGOpenMPRuntime::OMP_MAP_FROM;
+ }
+
+ BasePointers.push_back(BasePointer);
+ Pointers.push_back(Pointer);
+ Sizes.push_back(Size);
+ MapTypes.push_back(MapType);
+ }
+
+ if (IfCond) {
+ // Check if the if clause conditional always evaluates to true or false.
+ // If it evaluates to false, we only need to emit the host version of the
+ // target region. If it evaluates to true, we can proceed with the codegen
+ // as if no if clause was provided.
+ bool CondConstant;
+ if (CGF.ConstantFoldsToSimpleInteger(IfCond, CondConstant)) {
+ if (CondConstant) {
+ IfCond = nullptr;
+ } else {
+ CGF.Builder.CreateCall(OutlinedFn, BasePointers);
+ return;
+ }
+ }
+ }
+
+ // Generate the code to launch the target region. The pattern is the
+ // following:
+ //
+ // ...
+ // br IfCond (if any), omp_offload, omp_offload_fail
+ //
+ // omp_offload.try:
+ // ; create arrays for offloading
+ // error = __tgt_target(...)
+ // br error, omp_offload_fail, omp_offload_end
+ //
+ // omp_offload.fail:
+ // host_version(...)
+ //
+ // omp_offload.end:
+ // ...
+ //
+
+ auto OffloadTryBlock = CGF.createBasicBlock("omp_offload.try");
+ auto OffloadFailBlock = CGF.createBasicBlock("omp_offload.fail");
+ auto ContBlock = CGF.createBasicBlock("omp_offload.end");
+
+ if (IfCond)
+ CGF.EmitBranchOnBoolExpr(IfCond, OffloadTryBlock, OffloadFailBlock,
+ /*TrueCount=*/0);
+
+ CGF.EmitBlock(OffloadTryBlock);
+
+ unsigned PointerNumVal = BasePointers.size();
+ llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
+ llvm::Value *BasePointersArray;
+ llvm::Value *PointersArray;
+ llvm::Value *SizesArray;
+ llvm::Value *MapTypesArray;
+
+ if (PointerNumVal) {
+ llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
+ QualType PointerArrayType = CGF.getContext().getConstantArrayType(
+ CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+
+ BasePointersArray =
+ CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs");
+ PointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs");
+
+ // If we don't have any VLA types, we can use a constant array for the map
+ // sizes, otherwise we need to fill up the arrays as we do for the pointers.
+ if (hasVLACaptures) {
+ QualType SizeArrayType = CGF.getContext().getConstantArrayType(
+ CGF.getContext().getIntTypeForBitwidth(64, /*Signed=*/true),
+ PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0);
+ SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
+ } else {
+ // We expect all the sizes to be constant, so we collect them to create
+ // a constant array.
+ SmallVector<uint64_t, 16> ConstSizes;
+ for (auto *V : Sizes)
+ ConstSizes.push_back(cast<llvm::ConstantInt>(V)->getZExtValue());
+
+ llvm::Constant *SizesArrayInit =
+ llvm::ConstantDataArray::get(CGF.Builder.getContext(), ConstSizes);
+ auto *SizesArrayGbl = new llvm::GlobalVariable(
+ CGM.getModule(), SizesArrayInit->getType(),
+ /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+ SizesArrayInit, ".offload_sizes");
+ SizesArrayGbl->setUnnamedAddr(true);
+ SizesArray = SizesArrayGbl;
+ }
+
+ // The map types are always constant so we don't need to generate code to
+ // fill arrays. Instead, we create an array constant.
+ llvm::Constant *MapTypesArrayInit =
+ llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
+ auto *MapTypesArrayGbl = new llvm::GlobalVariable(
+ CGM.getModule(), MapTypesArrayInit->getType(),
+ /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+ MapTypesArrayInit, ".offload_maptypes");
+ MapTypesArrayGbl->setUnnamedAddr(true);
+ MapTypesArray = MapTypesArrayGbl;
+
+ for (unsigned i = 0; i < PointerNumVal; ++i) {
+ llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
+ llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
+ 0, i);
+ CGF.Builder.CreateStore(
+ CGF.Builder.CreateBitCast(BasePointers[i], CGM.VoidPtrTy), BP);
+
+ llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
+ llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
+ i);
+ CGF.Builder.CreateStore(
+ CGF.Builder.CreateBitCast(Pointers[i], CGM.VoidPtrTy), P);
+
+ if (hasVLACaptures) {
+ llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
+ llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 0, i);
+ CGF.Builder.CreateStore(
+ CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
+ S);
+ }
+ }
+
+ BasePointersArray = CGF.Builder.CreateConstGEP2_32(
+ llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
+ 0, 0);
+ PointersArray = CGF.Builder.CreateConstGEP2_32(
+ llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0,
+ 0);
+ SizesArray = CGF.Builder.CreateConstGEP2_32(
+ llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, 0, 0);
+ MapTypesArray = CGF.Builder.CreateConstGEP2_32(
+ llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray, 0, 0);
+
+ } else {
+ BasePointersArray = llvm::Constant::getNullValue(CGM.VoidPtrPtrTy);
+ PointersArray = llvm::Constant::getNullValue(CGM.VoidPtrPtrTy);
+ SizesArray = llvm::Constant::getNullValue(CGM.SizeTy->getPointerTo());
+ MapTypesArray = llvm::Constant::getNullValue(CGM.Int32Ty->getPointerTo());
+ }
+
+ // On top of the arrays that were filled up, the target offloading call takes
+ // as arguments the device id as well as the host pointer. The host pointer
+ // is used by the runtime library to identify the current target region, so
+ // it only has to be unique and not necessarily point to anything. It could be
+ // the pointer to the outlined function that implements the target region, but
+ // we aren't using that so that the compiler doesn't need to keep that, and
+ // could therefore inline the host function if proven worthwhile during
+ // optimization.
+
+ llvm::Value *HostPtr = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+ llvm::GlobalValue::PrivateLinkage,
+ llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr");
+
+ // Emit device ID if any.
+ llvm::Value *DeviceID;
+ if (Device)
+ DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+ CGM.Int32Ty, /*isSigned=*/true);
+ else
+ DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+
+ llvm::Value *OffloadingArgs[] = {DeviceID, HostPtr, PointerNum,
+ BasePointersArray, PointersArray, SizesArray,
+ MapTypesArray};
+ auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
+ OffloadingArgs);
+ auto Error = CGF.Builder.CreateICmpNE(Return, CGF.Builder.getInt32(0));
+ CGF.Builder.CreateCondBr(Error, OffloadFailBlock, ContBlock);
+
+ CGF.EmitBlock(OffloadFailBlock);
+ CGF.Builder.CreateCall(OutlinedFn, BasePointers);
+ CGF.EmitBranch(ContBlock);
+
+ CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
+ return;
+}
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -1884,6 +1884,29 @@
return CGF.EmitLValueForField(LV, FD);
}
+static LValue EmitCapturedImplicitParamLValue(CodeGenFunction &CGF,
+ const ImplicitParamDecl *PD) {
+ // If the captured declaration is an implicit parameter, it should already
+ // exist in the local declaration map.
+ LValue LV = LValue::MakeAddr(CGF.GetAddrOfLocalVar(PD), PD->getType(),
+ CharUnits(), CGF.getContext());
+ RValue RV = CGF.EmitLoadOfLValue(LV, PD->getLocStart());
+ return LValue::MakeAddr(RV.getScalarVal(), PD->getType(), CharUnits(),
+ CGF.getContext());
+}
+
+static LValue EmitCapturedValue(CodeGenFunction &CGF, const Decl *D,
+ llvm::Value *ThisValue) {
+ switch (D->getKind()) {
+ default:
+ llvm_unreachable("Unexpected declaration kind for capture!");
+ case Decl::ImplicitParam:
+ return EmitCapturedImplicitParamLValue(CGF, cast<ImplicitParamDecl>(D));
+ case Decl::Field:
+ return EmitCapturedFieldLValue(CGF, cast<FieldDecl>(D), ThisValue);
+ }
+}
+
/// Named Registers are named metadata pointing to the register name
/// which will be read from/written to as an argument to the intrinsic
/// @llvm.read/write_register.
@@ -1943,8 +1966,8 @@
if (auto *V = LocalDeclMap.lookup(VD))
return MakeAddrLValue(V, T, Alignment);
else
- return EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
- CapturedStmtInfo->getContextValue());
+ return EmitCapturedValue(*this, CapturedStmtInfo->lookup(VD),
+ CapturedStmtInfo->getContextValue());
}
assert(isa<BlockDecl>(CurCodeDecl));
return MakeAddrLValue(GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>()),
Index: include/clang/Sema/ScopeInfo.h
===================================================================
--- include/clang/Sema/ScopeInfo.h
+++ include/clang/Sema/ScopeInfo.h
@@ -608,6 +608,8 @@
return "default captured statement";
case CR_OpenMP:
return "OpenMP region";
+ case CR_OpenMP_Target:
+ return "OpenMP target region";
}
llvm_unreachable("Invalid captured region kind!");
}
Index: include/clang/Basic/CapturedStmt.h
===================================================================
--- include/clang/Basic/CapturedStmt.h
+++ include/clang/Basic/CapturedStmt.h
@@ -16,7 +16,8 @@
/// \brief The different kinds of captured statement.
enum CapturedRegionKind {
CR_Default,
- CR_OpenMP
+ CR_OpenMP,
+ CR_OpenMP_Target
};
} // end namespace clang
Index: include/clang/AST/Stmt.h
===================================================================
--- include/clang/AST/Stmt.h
+++ include/clang/AST/Stmt.h
@@ -2059,7 +2059,7 @@
/// \brief The pointer part is the implicit the outlined function and the
/// int part is the captured region kind, 'CR_Default' etc.
- llvm::PointerIntPair<CapturedDecl *, 1, CapturedRegionKind> CapDeclAndKind;
+ llvm::PointerIntPair<CapturedDecl *, 2, CapturedRegionKind> CapDeclAndKind;
/// \brief The record for captured variables, a RecordDecl or CXXRecordDecl.
RecordDecl *TheRecordDecl;
Index: include/clang/AST/Decl.h
===================================================================
--- include/clang/AST/Decl.h
+++ include/clang/AST/Decl.h
@@ -3645,8 +3645,9 @@
llvm::PointerIntPair<Stmt *, 1, bool> BodyAndNothrow;
explicit CapturedDecl(DeclContext *DC, unsigned NumParams)
- : Decl(Captured, DC, SourceLocation()), DeclContext(Captured),
- NumParams(NumParams), ContextParam(0), BodyAndNothrow(nullptr, false) { }
+ : Decl(Captured, DC, SourceLocation()), DeclContext(Captured),
+ NumParams(NumParams), ContextParam(-1u),
+ BodyAndNothrow(nullptr, false) {}
ImplicitParamDecl *const *getParams() const {
return getTrailingObjects<ImplicitParamDecl *>();
@@ -3679,9 +3680,11 @@
getParams()[i] = P;
}
+ bool hasContextParam() const { return ContextParam < NumParams; }
+
/// \brief Retrieve the parameter containing captured variables.
ImplicitParamDecl *getContextParam() const {
- assert(ContextParam < NumParams);
+ assert(hasContextParam());
return getParam(ContextParam);
}
void setContextParam(unsigned i, ImplicitParamDecl *P) {
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits