sfantao updated this revision to Diff 32211. sfantao added a comment. This patch tries to avoid as much as possible changing the common infrastructure, by adapting the CapturedDecl creation in SEMA and by adding support to a second type of capture - ImplicitParamDecl (on top of the existent FieldDecl).
Also adds codegen for device clause as the Parsing and SEMA support was added in the meantime. The regression test was not updated yet. I wanted to make sure the direction this is taking is approved before diving into that. Thanks! Samuel http://reviews.llvm.org/D11361 Files: include/clang/AST/Decl.h include/clang/AST/Stmt.h include/clang/Basic/CapturedStmt.h include/clang/Sema/ScopeInfo.h lib/CodeGen/CGExpr.cpp lib/CodeGen/CGOpenMPRuntime.cpp lib/CodeGen/CGOpenMPRuntime.h lib/CodeGen/CGStmt.cpp lib/CodeGen/CGStmtOpenMP.cpp lib/CodeGen/CodeGenFunction.cpp lib/CodeGen/CodeGenFunction.h lib/Sema/SemaOpenMP.cpp test/OpenMP/target_codegen.cpp
Index: test/OpenMP/target_codegen.cpp =================================================================== --- /dev/null +++ test/OpenMP/target_codegen.cpp @@ -0,0 +1,583 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +// REQUIRES: powerpc-registered-target +#ifndef HEADER +#define HEADER + +// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } +// CHECK-DAG: [[S1:%.+]] = type { double } + +// We have 8 target regions, but only 7 that actually will generate offloading +// code, and only 6 will have mapped arguments. + +// CHECK-DAG: [[MAPT2:@.+]] = private constant [1 x i32] [i32 3] +// CHECK-DAG: [[MAPT3:@.+]] = private constant [2 x i32] [i32 3, i32 3] +// CHECK-DAG: [[MAPT4:@.+]] = private constant [9 x i32] [i32 3, i32 3, i32 1, i32 3, i32 3, i32 1, i32 1, i32 3, i32 3] +// CHECK-DAG: [[MAPT5:@.+]] = private constant [3 x i32] [i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT6:@.+]] = private constant [4 x i32] [i32 3, i32 3, i32 3, i32 3] +// CHECK-DAG: [[MAPT7:@.+]] = private constant [5 x i32] [i32 3, i32 3, i32 1, i32 1, i32 3] +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +template<typename tx, typename ty> +struct TT{ + tx X; + ty Y; +}; + +// CHECK: define {{.*}}[[FOO:@.+]]( +int foo(int n) { + int a = 0; + short aa = 0; + float b[10]; + float bn[n]; + double c[5][10]; + double cn[5][n]; + TT<long, char> d; + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i32* null) + // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT0:@.+]]() + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target + { + } + + // CHECK: call void [[HVT1:@.+]](i32* {{[^,]+}}) + #pragma omp target if(0) + { + a += 1; + } + + // CHECK: br label %[[TRY:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + + // CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]] + // CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] + // CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8* + // CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8* + + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT2:@.+]](i32* {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target if(1) + { + a += 1; + } + + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0)) + + // CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]] + // CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]] + // CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] + // CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] + // CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8* + // CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8* + + // CHECK-DAG: store i64 2, i64* [[SADDR1:%.+]] + // CHECK-DAG: [[SADDR1]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]] + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]] + // CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]] + // CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]] + // CHECK-DAG: [[BP1]] = bitcast i16* %{{.+}} to i8* + // CHECK-DAG: [[P1]] = bitcast i16* %{{.+}} to i8* + + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target if(n>10) + { + a += 1; + aa += 1; + } + + // We capture 3 VLA sizes in this target region + // CHECK-DAG: store i64 %{{[^,]+}}, i64* [[VLA0:%[^,]+]] + // CHECK-DAG: store i64 %{{[^,]+}}, i64* [[VLA1:%[^,]+]] + // CHECK-DAG: store i64 %{{[^,]+}}, i64* [[VLA2:%[^,]+]] + // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 + // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] + // CHECK: [[TRY]] + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0)) + + // CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]] + // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]] + // CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]] + // CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]] + // CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]] + // CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]] + // CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]] + // CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]] + // CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]] + // CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX3:[0-9]+]] + // CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX3]] + // CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX3]] + // CHECK-DAG: [[SADDR4:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX4:[0-9]+]] + // CHECK-DAG: [[BPADDR4:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX4]] + // CHECK-DAG: [[PADDR4:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX4]] + // CHECK-DAG: [[SADDR5:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX5:[0-9]+]] + // CHECK-DAG: [[BPADDR5:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX5]] + // CHECK-DAG: [[PADDR5:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX5]] + // CHECK-DAG: [[SADDR6:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX6:[0-9]+]] + // CHECK-DAG: [[BPADDR6:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX6]] + // CHECK-DAG: [[PADDR6:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX6]] + // CHECK-DAG: [[SADDR7:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX7:[0-9]+]] + // CHECK-DAG: [[BPADDR7:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX7]] + // CHECK-DAG: [[PADDR7:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX7]] + // CHECK-DAG: [[SADDR8:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX8:[0-9]+]] + // CHECK-DAG: [[BPADDR8:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX8]] + // CHECK-DAG: [[PADDR8:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX8]] + + // The names below are not necessarily consistent with the names used for the + // addresses above as some are repeated. + // CHECK-DAG: [[BP0:%[^,]+]] = bitcast i64* [[VLA0]] to i8* + // CHECK-DAG: [[P0:%[^,]+]] = bitcast i64* [[VLA0]] to i8* + // CHECK-DAG: store i8* [[BP0]], i8** {{%[^,]+}} + // CHECK-DAG: store i8* [[P0]], i8** {{%[^,]+}} + // CHECK-DAG: store i64 8, i64* {{%[^,]+}} + + // CHECK-DAG: [[BP1:%[^,]+]] = bitcast i64* [[VLA1]] to i8* + // CHECK-DAG: [[P1:%[^,]+]] = bitcast i64* [[VLA1]] to i8* + // CHECK-DAG: store i8* [[BP1]], i8** {{%[^,]+}} + // CHECK-DAG: store i8* [[P1]], i8** {{%[^,]+}} + // CHECK-DAG: store i64 8, i64* {{%[^,]+}} + + // CHECK-DAG: [[BP2:%[^,]+]] = bitcast i64* [[VLA2]] to i8* + // CHECK-DAG: [[P2:%[^,]+]] = bitcast i64* [[VLA2]] to i8* + // CHECK-DAG: store i8* [[BP2]], i8** {{%[^,]+}} + // CHECK-DAG: store i8* [[P2]], i8** {{%[^,]+}} + // CHECK-DAG: store i64 8, i64* {{%[^,]+}} + + // CHECK-DAG: [[BP3:%[^,]+]] = bitcast i32* %{{.+}} to i8* + // CHECK-DAG: [[P3:%[^,]+]] = bitcast i32* %{{.+}} to i8* + // CHECK-DAG: store i8* [[BP3]], i8** {{%[^,]+}} + // CHECK-DAG: store i8* [[P3]], i8** {{%[^,]+}} + // CHECK-DAG: store i64 4, i64* {{%[^,]+}} + + // CHECK-DAG: [[BP4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8* + // CHECK-DAG: [[P4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8* + // CHECK-DAG: store i8* [[BP4]], i8** {{%[^,]+}} + // CHECK-DAG: store i8* [[P4]], i8** {{%[^,]+}} + // CHECK-DAG: store i64 40, i64* {{%[^,]+}} + + // CHECK-DAG: [[BP5:%[^,]+]] = bitcast float* %{{.+}} to i8* + // CHECK-DAG: [[P5:%[^,]+]] = bitcast float* %{{.+}} to i8* + // CHECK-DAG: store i8* [[BP5]], i8** {{%[^,]+}} + // CHECK-DAG: store i8* [[P5]], i8** {{%[^,]+}} + // CHECK-DAG: store i64 4, i64* {{%[^,]+}} + + // CHECK-DAG: [[BP6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8* + // CHECK-DAG: [[P6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8* + // CHECK-DAG: store i8* [[BP6]], i8** {{%[^,]+}} + // CHECK-DAG: store i8* [[P6]], i8** {{%[^,]+}} + // CHECK-DAG: store i64 400, i64* {{%[^,]+}} + + // CHECK-DAG: [[BP7:%[^,]+]] = bitcast double* %{{.+}} to i8* + // CHECK-DAG: [[P7:%[^,]+]] = bitcast double* %{{.+}} to i8* + // CHECK-DAG: store i8* [[BP7]], i8** {{%[^,]+}} + // CHECK-DAG: store i8* [[P7]], i8** {{%[^,]+}} + // CHECK-DAG: store i64 8, i64* {{%[^,]+}} + + // CHECK-DAG: [[BP8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8* + // CHECK-DAG: [[P8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8* + // CHECK-DAG: store i8* [[BP8]], i8** {{%[^,]+}} + // CHECK-DAG: store i8* [[P8]], i8** {{%[^,]+}} + // CHECK-DAG: store i64 16, i64* {{%[^,]+}} + + // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 + // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]] + // CHECK: [[FAIL]] + // CHECK: call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) + // CHECK-NEXT: br label %[[END]] + // CHECK: [[END]] + #pragma omp target if(n>20) + { + a += 1; + b[2] += 1.0; + bn[3] += 1.0; + c[1][2] += 1.0; + cn[1][3] += 1.0; + d.X += 1; + d.Y += 1; + } + + return a; +} + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions in foo(). + +// CHECK: define internal void [[HVT0]] + +// CHECK: define internal void [[HVT1]] +// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32* +// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]] +// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]] +// CHECK-DAG: load i32, i32* [[USE_A]] + +// CHECK: define internal void [[HVT2]] +// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32* +// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]] +// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]] +// CHECK-DAG: load i32, i32* [[USE_A]] + +// CHECK: define internal void [[HVT3]] +// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32* +// CHECK-DAG: [[LOCALX_AA:%.+]] = alloca i16* +// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]] +// CHECK-DAG: store i16* [[ARG_AA:%.+]], i16** [[LOCALX_AA]] +// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]] +// CHECK-DAG: [[USE_AA:%.+]] = load i16*, i16** [[LOCALX_AA:%.+]] +// CHECK-DAG: load i32, i32* [[USE_A]] +// CHECK-DAG: load i16, i16* [[USE_AA]] + +// CHECK: define internal void [[HVT4]] +// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32* +// CHECK-DAG: [[LOCALX_B:%.+]] = alloca [10 x float]* +// CHECK-DAG: [[LOCALX_BN:%.+]] = alloca float* +// CHECK-DAG: [[LOCALX_C:%.+]] = alloca [5 x [10 x double]]* +// CHECK-DAG: [[LOCALX_CN:%.+]] = alloca double* +// CHECK-DAG: [[LOCALX_D:%.+]] = alloca [[TT]]* +// CHECK-DAG: [[LOCALX_VLA1:%.+]] = alloca i64* +// CHECK-DAG: [[LOCALX_VLA2:%.+]] = alloca i64* +// CHECK-DAG: [[LOCALX_VLA3:%.+]] = alloca i64* +// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]] +// CHECK-DAG: store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCALX_B]] +// CHECK-DAG: store float* [[ARG_BN:%.+]], float** [[LOCALX_BN]] +// CHECK-DAG: store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCALX_C]] +// CHECK-DAG: store double* [[ARG_CN:%.+]], double** [[LOCALX_CN]] +// CHECK-DAG: store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCALX_D]] +// CHECK-DAG: store i64* [[ARG_VLA1:%.+]], i64** [[LOCALX_VLA1]] +// CHECK-DAG: store i64* [[ARG_VLA2:%.+]], i64** [[LOCALX_VLA2]] +// CHECK-DAG: store i64* [[ARG_VLA3:%.+]], i64** [[LOCALX_VLA3]] +// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A:%.+]] +// CHECK-DAG: [[USE_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCALX_B:%.+]] +// CHECK-DAG: [[USE_BN:%.+]] = load float*, float** [[LOCALX_BN:%.+]] +// CHECK-DAG: [[USE_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCALX_C:%.+]] +// CHECK-DAG: [[USE_CN:%.+]] = load double*, double** [[LOCALX_CN:%.+]] +// CHECK-DAG: [[USE_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCALX_D:%.+]] +// CHECK-DAG: [[USE_VLA1:%.+]] = load i64*, i64** [[LOCALX_VLA1:%.+]] +// CHECK-DAG: [[USE_VLA2:%.+]] = load i64*, i64** [[LOCALX_VLA2:%.+]] +// CHECK-DAG: [[USE_VLA3:%.+]] = load i64*, i64** [[LOCALX_VLA3:%.+]] +// CHECK-DAG: load i32, i32* [[USE_A]] +// CHECK-DAG: getelementptr inbounds [10 x float], [10 x float]* [[USE_B]], i{{.*}} 0, i{{.*}} 2 +// CHECK-DAG: getelementptr inbounds float, float* [[USE_BN]], i{{.*}} 3 +// CHECK-DAG: getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[USE_C]], i{{.*}} 0, i{{.*}} 1 +// CHECK-DAG: [[VLAMUL:%.+]] = mul {{.*}}i64 1, %{{.*}} +// CHECK-DAG: getelementptr inbounds double, double* [[USE_CN]], i{{.*}} [[VLAMUL]] +// CHECK-DAG: load i64, i64* [[USE_VLA1]] +// CHECK-DAG: load i64, i64* [[USE_VLA2]] +// CHECK-DAG: load i64, i64* [[USE_VLA3]] + +template<typename tx> +tx ftemplate(int n) { + tx a = 0; + short aa = 0; + tx b[10]; + + #pragma omp target if(n>40) + { + a += 1; + aa += 1; + b[2] += 1; + } + + return a; +} + +static +int fstatic(int n) { + int a = 0; + short aa = 0; + char aaa = 0; + int b[10]; + + #pragma omp target if(n>50) + { + a += 1; + aa += 1; + aaa += 1; + b[2] += 1; + } + + return a; +} + +struct S1 { + double a; + + int r1(int n){ + int b = n+1; + short int c[2][n]; + + #pragma omp target if(n>60) + { + this->a = (double)b + 1.5; + c[1][1] = ++a; + } + + return c[1][1] + (int)b; + } +}; + +// CHECK: define {{.*}}@{{.*}}bar{{.*}} +int bar(int n){ + int a = 0; + + // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}}) + a += foo(n); + + S1 S; + // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}}) + a += S.r1(n); + + // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}}) + a += fstatic(n); + + // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}}) + a += ftemplate<int>(n); + + return a; +} + +// +// CHECK: define {{.*}}[[FS1]] +// +// We capture 2 VLA sizes in this target region +// CHECK-DAG: store i64 %{{[^,]+}}, i64* [[VLA0:%[^,]+]] +// CHECK-DAG: store i64 %{{[^,]+}}, i64* [[VLA1:%[^,]+]] +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0)) + +// CHECK-DAG: [[SADDR0:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]] +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]] +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]] +// CHECK-DAG: [[SADDR1:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]] +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]] +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]] +// CHECK-DAG: [[SADDR2:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]] +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]] +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]] +// CHECK-DAG: [[SADDR3:%.+]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX3:[0-9]+]] +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX3]] +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX3]] + +// The names below are not necessarily consistent with the names used for the +// addresses above as some are repeated. +// CHECK-DAG: [[BP0:%[^,]+]] = bitcast i64* [[VLA0]] to i8* +// CHECK-DAG: [[P0:%[^,]+]] = bitcast i64* [[VLA0]] to i8* +// CHECK-DAG: store i8* [[BP0]], i8** {{%[^,]+}} +// CHECK-DAG: store i8* [[P0]], i8** {{%[^,]+}} +// CHECK-DAG: store i64 8, i64* {{%[^,]+}} + +// CHECK-DAG: [[BP1:%[^,]+]] = bitcast i64* [[VLA1]] to i8* +// CHECK-DAG: [[P1:%[^,]+]] = bitcast i64* [[VLA1]] to i8* +// CHECK-DAG: store i8* [[BP1]], i8** {{%[^,]+}} +// CHECK-DAG: store i8* [[P1]], i8** {{%[^,]+}} +// CHECK-DAG: store i64 8, i64* {{%[^,]+}} + +// CHECK-DAG: [[BP2:%[^,]+]] = bitcast i32* %{{.+}} to i8* +// CHECK-DAG: [[P2:%[^,]+]] = bitcast i32* %{{.+}} to i8* +// CHECK-DAG: store i8* [[BP2]], i8** {{%[^,]+}} +// CHECK-DAG: store i8* [[P2]], i8** {{%[^,]+}} +// CHECK-DAG: store i64 4, i64* {{%[^,]+}} + +// CHECK-DAG: [[BP3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8* +// CHECK-DAG: [[P3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8* +// CHECK-DAG: store i8* [[BP3]], i8** {{%[^,]+}} +// CHECK-DAG: store i8* [[P3]], i8** {{%[^,]+}} +// CHECK-DAG: store i64 8, i64* {{%[^,]+}} + +// CHECK-DAG: [[BP4:%[^,]+]] = bitcast i16* %{{.+}} to i8* +// CHECK-DAG: [[P4:%[^,]+]] = bitcast i16* %{{.+}} to i8* +// CHECK-DAG: store i8* [[BP4]], i8** {{%[^,]+}} +// CHECK-DAG: store i8* [[P4]], i8** {{%[^,]+}} +// CHECK-DAG: store i64 2, i64* {{%[^,]+}} + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + +// +// CHECK: define {{.*}}[[FSTATIC]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0)) + +// CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]] +// CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]] +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]] +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]] +// CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] +// CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] +// CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8* +// CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8* + +// CHECK-DAG: store i64 2, i64* [[SADDR1:%.+]] +// CHECK-DAG: [[SADDR1]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]] +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]] +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]] +// CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]] +// CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]] +// CHECK-DAG: [[BP1]] = bitcast i16* %{{.+}} to i8* +// CHECK-DAG: [[P1]] = bitcast i16* %{{.+}} to i8* + +// CHECK-DAG: store i64 1, i64* [[SADDR2:%.+]] +// CHECK-DAG: [[SADDR2]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]] +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]] +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]] +// CHECK-DAG: store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]] +// CHECK-DAG: store i8* [[P2:%[^,]+]], i8** [[PADDR2]] + +// CHECK-DAG: store i64 40, i64* [[SADDR3:%.+]] +// CHECK-DAG: [[SADDR3]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX3:[0-9]+]] +// CHECK-DAG: [[BPADDR3:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX3]] +// CHECK-DAG: [[PADDR3:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX3]] +// CHECK-DAG: store i8* [[BP3:%[^,]+]], i8** [[BPADDR3]] +// CHECK-DAG: store i8* [[P3:%[^,]+]], i8** [[PADDR3]] +// CHECK-DAG: [[BP3]] = bitcast [10 x i32]* %{{.+}} to i8* +// CHECK-DAG: [[P3]] = bitcast [10 x i32]* %{{.+}} to i8* + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + +// +// CHECK: define {{.*}}[[FTEMPLATE]] +// +// CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 +// CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] +// CHECK: [[TRY]] +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* [[S:%[^,]+]], i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0)) + +// CHECK-DAG: store i64 4, i64* [[SADDR0:%.+]] +// CHECK-DAG: [[SADDR0]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX0:[0-9]+]] +// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX0]] +// CHECK-DAG: [[PADDR0:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX0]] +// CHECK-DAG: store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]] +// CHECK-DAG: store i8* [[P0:%[^,]+]], i8** [[PADDR0]] +// CHECK-DAG: [[BP0]] = bitcast i32* %{{.+}} to i8* +// CHECK-DAG: [[P0]] = bitcast i32* %{{.+}} to i8* + +// CHECK-DAG: store i64 2, i64* [[SADDR1:%.+]] +// CHECK-DAG: [[SADDR1]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX1:[0-9]+]] +// CHECK-DAG: [[BPADDR1:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX1]] +// CHECK-DAG: [[PADDR1:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX1]] +// CHECK-DAG: store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]] +// CHECK-DAG: store i8* [[P1:%[^,]+]], i8** [[PADDR1]] +// CHECK-DAG: [[BP1]] = bitcast i16* %{{.+}} to i8* +// CHECK-DAG: [[P1]] = bitcast i16* %{{.+}} to i8* + +// CHECK-DAG: store i64 40, i64* [[SADDR2:%.+]] +// CHECK-DAG: [[SADDR2]] = getelementptr inbounds i64, i64* [[S]], i32 [[IDX2:[0-9]+]] +// CHECK-DAG: [[BPADDR2:%.+]] = getelementptr inbounds i8*, i8** [[BP]], i32 [[IDX2]] +// CHECK-DAG: [[PADDR2:%.+]] = getelementptr inbounds i8*, i8** [[P]], i32 [[IDX2]] +// CHECK-DAG: store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]] +// CHECK-DAG: store i8* [[P2:%[^,]+]], i8** [[PADDR2]] +// CHECK-DAG: [[BP2]] = bitcast [10 x i32]* %{{.+}} to i8* +// CHECK-DAG: [[P2]] = bitcast [10 x i32]* %{{.+}} to i8* + +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL]], label %[[END:[^,]+]] +// CHECK: [[FAIL]] +// CHECK: call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}) +// CHECK-NEXT: br label %[[END]] +// CHECK: [[END]] + +// Check that the offloading functions are emitted and that the arguments are +// correct and loaded correctly for the target regions of the callees of bar(). + +// CHECK: define internal void [[HVT7]] +// CHECK-DAG: [[LOCALX_THIS:%.+]] = alloca [[S1]]* +// CHECK-DAG: [[LOCALX_B:%.+]] = alloca i32* +// CHECK-DAG: [[LOCALX_C:%.+]] = alloca i16* +// CHECK-DAG: [[LOCALX_VLA1:%.+]] = alloca i64* +// CHECK-DAG: [[LOCALX_VLA2:%.+]] = alloca i64* +// CHECK-DAG: store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCALX_THIS]] +// CHECK-DAG: store i32* [[ARG_B:%.+]], i32** [[LOCALX_B]] +// CHECK-DAG: store i16* [[ARG_C:%.+]], i16** [[LOCALX_C]] +// CHECK-DAG: store i64* [[ARG_VLA1:%.+]], i64** [[LOCALX_VLA1]] +// CHECK-DAG: store i64* [[ARG_VLA2:%.+]], i64** [[LOCALX_VLA2]] +// CHECK-DAG: [[USE_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCALX_THIS]] +// CHECK-DAG: [[USE_B:%.+]] = load i32*, i32** [[LOCALX_B]] +// CHECK-DAG: [[USE_C:%.+]] = load i16*, i16** [[LOCALX_C]] +// CHECK-DAG: [[USE_VLA1:%.+]] = load i64*, i64** [[LOCALX_VLA1]] +// CHECK-DAG: [[USE_VLA2:%.+]] = load i64*, i64** [[LOCALX_VLA2]] +// CHECK-DAG: getelementptr inbounds [[S1]], [[S1]]* [[USE_THIS]], i{{.*}} 0, i{{.*}} 0 +// CHECK-DAG: load i32, i32* [[USE_B]] +// CHECK-DAG: [[VLAMUL:%.+]] = mul {{.*}}i64 1, %{{.*}} +// CHECK-DAG: getelementptr inbounds i16, i16* [[USE_C]], i{{.*}} [[VLAMUL]] +// CHECK-DAG: load i64, i64* [[USE_VLA1]] +// CHECK-DAG: load i64, i64* [[USE_VLA2]] + +// CHECK: define internal void [[HVT6]] +// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32* +// CHECK-DAG: [[LOCALX_AA:%.+]] = alloca i16* +// CHECK-DAG: [[LOCALX_AAA:%.+]] = alloca i8* +// CHECK-DAG: [[LOCALX_B:%.+]] = alloca [10 x i32]* +// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]] +// CHECK-DAG: store i16* [[ARG_AA:%.+]], i16** [[LOCALX_AA]] +// CHECK-DAG: store i8* [[ARG_AAA:%.+]], i8** [[LOCALX_AAA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCALX_B]] +// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A]] +// CHECK-DAG: [[USE_AA:%.+]] = load i16*, i16** [[LOCALX_AA]] +// CHECK-DAG: [[USE_AAA:%.+]] = load i8*, i8** [[LOCALX_AAA]] +// CHECK-DAG: [[USE_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCALX_B]] +// CHECK-DAG: load i32, i32* [[USE_A]] +// CHECK-DAG: load i16, i16* [[USE_AA]] +// CHECK-DAG: load i8, i8* [[USE_AAA]] +// CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[USE_B]], i{{.*}} 0, i{{.*}} 2 + +// CHECK: define internal void [[HVT5]] +// CHECK-DAG: [[LOCALX_A:%.+]] = alloca i32* +// CHECK-DAG: [[LOCALX_AA:%.+]] = alloca i16* +// CHECK-DAG: [[LOCALX_B:%.+]] = alloca [10 x i32]* +// CHECK-DAG: store i32* [[ARG_A:%.+]], i32** [[LOCALX_A]] +// CHECK-DAG: store i16* [[ARG_AA:%.+]], i16** [[LOCALX_AA]] +// CHECK-DAG: store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCALX_B]] +// CHECK-DAG: [[USE_A:%.+]] = load i32*, i32** [[LOCALX_A]] +// CHECK-DAG: [[USE_AA:%.+]] = load i16*, i16** [[LOCALX_AA]] +// CHECK-DAG: [[USE_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCALX_B]] +// CHECK-DAG: load i32, i32* [[USE_A]] +// CHECK-DAG: load i16, i16* [[USE_AA]] +// CHECK-DAG: getelementptr inbounds [10 x i32], [10 x i32]* [[USE_B]], i{{.*}} 0, i{{.*}} 2 +#endif Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -1308,15 +1308,22 @@ Params); break; } - case OMPD_target_data: - case OMPD_target: { + case OMPD_target_data: { Sema::CapturedParamNameType Params[] = { std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, Params); break; } + case OMPD_target: { + Sema::CapturedParamNameType Params[] = { + std::make_pair(StringRef(), QualType()) // __context with shared vars + }; + ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, + CR_OpenMP_Target, Params); + break; + } case OMPD_teams: { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty); @@ -1382,7 +1389,85 @@ } } } - return ActOnCapturedRegionEnd(S.get()); + + CapturedStmt *Res = cast<CapturedStmt>(ActOnCapturedRegionEnd(S.get()).get()); + + // If this is an offloading captured region, we need change the captured + // declaration so that it uses as parameters each of the captured + // declarations. + if (Res->getCapturedRegionKind() == CR_OpenMP_Target) { + CapturedDecl *OldCD = Res->getCapturedDecl(); + DeclContext *DC = OldCD->getDeclContext(); + + unsigned NumParams = Res->capture_size(); + const RecordDecl *RD = Res->getCapturedRecordDecl(); + CapturedDecl *NewCD = CapturedDecl::Create(Context, DC, NumParams); + DeclContext *NewDC = CapturedDecl::castToDeclContext(NewCD); + SourceLocation Loc = Res->getLocStart(); + + SmallVector<const FieldDecl *, 3> VLASizeFields; + unsigned i = 0; + auto ri = RD->field_begin(); + for (CapturedStmt::const_capture_iterator ci = Res->capture_begin(), + ce = Res->capture_end(); + ci != ce; ++ci, ++ri, ++i) { + StringRef Name; + QualType Ty; + if (ci->capturesVariableArrayType()) { + VLASizeFields.push_back(*ri); + Ty = Context.getPointerType(ri->getType()); + Name = "__vla_size"; + } else if (ci->capturesThis()) { + Ty = ri->getType(); + Name = "__this"; + } else { + const VarDecl *VD = ci->getCapturedVar(); + + Ty = VD->getType(); + + // If this declaration has an variable-length array type, we need to + // adapt the size expression to use the implicit parameter that + // captures it. We also need to make sure we adapt all the sizes if + // the type has several dimensions. + if (!VLASizeFields.empty()) { + assert(Ty.getTypePtr() == + VLASizeFields.front()->getCapturedVLAType() && + "Invalid VLA information!"); + + Ty = VLASizeFields.back()->getCapturedVLAType()->getElementType(); + for (unsigned t = 1; !VLASizeFields.empty(); + VLASizeFields.pop_back(), ++t) { + auto *OriginalVAT = VLASizeFields.back()->getCapturedVLAType(); + auto SizeType = VLASizeFields.back()->getType(); + auto *SizeDecl = NewCD->getParam(i - t); + auto *NewSizeRefExpr = buildDeclRefExpr( + *this, SizeDecl, SizeDecl->getType(), Loc, true); + auto *NewSizeValExpr = + new (Context) UnaryOperator(NewSizeRefExpr, UO_Deref, SizeType, + VK_RValue, OK_Ordinary, Loc); + Ty = Context.getVariableArrayType( + Ty, NewSizeValExpr, OriginalVAT->getSizeModifier(), + OriginalVAT->getIndexTypeCVRQualifiers(), + OriginalVAT->getBracketsRange()); + } + } + Ty = Context.getPointerType(Ty); + Name = VD->getName(); + } + + IdentifierInfo *ParamName = &Context.Idents.get(Name); + ImplicitParamDecl *Param = + ImplicitParamDecl::Create(Context, NewDC, Loc, ParamName, Ty); + NewCD->setParam(i, Param); + } + + NewCD->setBody(Res->getCapturedStmt()); + Res->setCapturedDecl(NewCD); + DC->addDecl(NewCD); + DC->removeDecl(OldCD); + } + + return Res; } static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack, Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -197,12 +197,12 @@ virtual llvm::Value *getContextValue() const { return ThisValue; } /// \brief Lookup the captured field decl for a variable. - virtual const FieldDecl *lookup(const VarDecl *VD) const { + virtual const Decl *lookup(const VarDecl *VD) const { return CaptureFields.lookup(VD); } - bool isCXXThisExprCaptured() const { return getThisFieldDecl() != nullptr; } - virtual FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; } + bool isCXXThisExprCaptured() const { return getThisDecl() != nullptr; } + virtual Decl *getThisDecl() const { return CXXThisFieldDecl; } static bool classof(const CGCapturedStmtInfo *) { return true; @@ -1689,6 +1689,11 @@ std::pair<llvm::Value*,QualType> getVLASize(const VariableArrayType *vla); std::pair<llvm::Value*,QualType> getVLASize(QualType vla); + /// getVLASizeMap - Returns an LLVM value that corresponds to the size of the + /// VLA size of the type \a type. Assumes that the type has already been + /// emitted with EmitVariablyModifiedType. + llvm::Value *getVLASizeMap(const VariableArrayType *vla); + /// LoadCXXThis - Load the value of 'this'. This function is only valid while /// generating code for an C++ member function. llvm::Value *LoadCXXThis() { @@ -2211,6 +2216,8 @@ void EmitOMPFlushDirective(const OMPFlushDirective &S); void EmitOMPOrderedDirective(const OMPOrderedDirective &S); void EmitOMPAtomicDirective(const OMPAtomicDirective &S); + typedef SmallVector<llvm::Value *, 4> OMPTargetDirectiveVLASizes; + void PrepareOMPTargetDirectiveBodyEmission(const OMPTargetDirective &S); void EmitOMPTargetDirective(const OMPTargetDirective &S); void EmitOMPTargetDataDirective(const OMPTargetDataDirective &S); void EmitOMPTeamsDirective(const OMPTeamsDirective &S); Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -1506,6 +1506,12 @@ return std::pair<llvm::Value*,QualType>(numElements, elementType); } +llvm::Value *CodeGenFunction::getVLASizeMap(const VariableArrayType *type) { + llvm::Value *vlaSize = VLASizeMap[type->getSizeExpr()]; + assert(vlaSize && "No vla size matching the requested expression!"); + return vlaSize; +} + void CodeGenFunction::EmitVariablyModifiedType(QualType type) { assert(type->isVariablyModifiedType() && "Must pass variably modified type to EmitVLASizes!"); Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -2121,8 +2121,81 @@ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); } -void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) { - llvm_unreachable("CodeGen for 'omp target' is not supported yet."); +void CodeGenFunction::PrepareOMPTargetDirectiveBodyEmission( + const OMPTargetDirective &S) { + const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); + const RecordDecl *RD = CS.getCapturedRecordDecl(); + const CapturedDecl *CD = CS.getCapturedDecl(); + + SmallVector<const FieldDecl *, 3> VLASizeFields; + auto ri = RD->field_begin(); + auto pi = CD->param_begin(); + for (CapturedStmt::const_capture_iterator ci = CS.capture_begin(), + ce = CS.capture_end(); + ci != ce; ++ci, ++ri, ++pi) { + + if (ci->capturesVariableArrayType()) { + VLASizeFields.push_back(*ri); + continue; + } + + if (ci->capturesThis()) { + auto *ThisRef = LocalDeclMap[*pi]; + LValue Addr = LValue::MakeAddr(ThisRef, ri->getType(), CharUnits(), + CGM.getContext()); + CXXThisValue = EmitLoadOfLValue(Addr, CS.getLocStart()).getScalarVal(); + ; + continue; + } + + // Find the expressions that give the VLA sizes and update the VLASizeMap. + if (!VLASizeFields.empty()) { + assert(ci->capturesVariable() && + "Expecting declaration that has VLA type!"); + + const PointerType *PT = cast<PointerType>((*pi)->getType().getTypePtr()); + QualType CurTy = PT->getPointeeType(); + + for (auto *FD : VLASizeFields) { + const VariableArrayType *VAT = + cast<VariableArrayType>(CurTy.getTypePtr()); + auto *V = VLASizeMap[VAT->getSizeExpr()]; + assert(V && "VLA Size value must exist!"); + VLASizeMap[FD->getCapturedVLAType()->getSizeExpr()] = V; + CurTy = VAT->getElementType(); + } + + VLASizeFields.clear(); + } + } +} + +void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { + LexicalScope Scope(*this, S.getSourceRange()); + + // Emit target region as a standalone region. + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.PrepareOMPTargetDirectiveBodyEmission(S); + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }; + + // Obtain the target region outlined function. + llvm::Value *Fn = + CGM.getOpenMPRuntime().emitTargetOutlinedFunction(*this, S, CodeGen); + + // Check if we have any if clause associated with the directive. + const Expr *IfCond = nullptr; + if (auto C = S.getSingleClause(OMPC_if)) { + IfCond = cast<OMPIfClause>(C)->getCondition(); + } + + // Check if we have any device clause associated with the directive. + const Expr *Device = nullptr; + if (auto C = S.getSingleClause(OMPC_device)) { + Device = cast<OMPDeviceClause>(C)->getDevice(); + } + + CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device); } void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { Index: lib/CodeGen/CGStmt.cpp =================================================================== --- lib/CodeGen/CGStmt.cpp +++ lib/CodeGen/CGStmt.cpp @@ -2125,28 +2125,33 @@ StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), CD->getBody()->getLocStart()); - // Set the context parameter in CapturedStmtInfo. - llvm::Value *DeclPtr = LocalDeclMap[CD->getContextParam()]; - assert(DeclPtr && "missing context parameter for CapturedStmt"); - CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr)); - - // Initialize variable-length arrays. - LValue Base = MakeNaturalAlignAddrLValue(CapturedStmtInfo->getContextValue(), - Ctx.getTagDeclType(RD)); - for (auto *FD : RD->fields()) { - if (FD->hasCapturedVLAType()) { - auto *ExprArg = EmitLoadOfLValue(EmitLValueForField(Base, FD), - S.getLocStart()).getScalarVal(); - auto VAT = FD->getCapturedVLAType(); - VLASizeMap[VAT->getSizeExpr()] = ExprArg; + + // Initialize variable length arrays and 'this' using the context argument, + // if any. Otherwise the function implicit parameters will be used. + if (CD->hasContextParam()) { + llvm::Value *DeclPtr = LocalDeclMap[CD->getContextParam()]; + assert(DeclPtr && "missing context parameter for CapturedStmt"); + CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr)); + + // Initialize variable-length arrays. + LValue Base = MakeNaturalAlignAddrLValue( + CapturedStmtInfo->getContextValue(), Ctx.getTagDeclType(RD)); + for (auto *FD : RD->fields()) { + if (FD->hasCapturedVLAType()) { + auto *ExprArg = + EmitLoadOfLValue(EmitLValueForField(Base, FD), S.getLocStart()) + .getScalarVal(); + auto VAT = FD->getCapturedVLAType(); + VLASizeMap[VAT->getSizeExpr()] = ExprArg; + } } - } - // If 'this' is captured, load it into CXXThisValue. - if (CapturedStmtInfo->isCXXThisExprCaptured()) { - FieldDecl *FD = CapturedStmtInfo->getThisFieldDecl(); - LValue ThisLValue = EmitLValueForField(Base, FD); - CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal(); + // If 'this' is captured, load it into CXXThisValue. + if (CapturedStmtInfo->isCXXThisExprCaptured()) { + FieldDecl *FD = cast<FieldDecl>(CapturedStmtInfo->getThisDecl()); + LValue ThisLValue = EmitLValueForField(Base, FD); + CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal(); + } } PGO.assignRegionCounters(CD, F); Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -154,6 +154,14 @@ // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, // kmp_int32 cncl_kind); OMPRTL__kmpc_cancel, + + // + // Offloading related calls + // + // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t + // arg_num, void** args_base, void **args, int64_t *arg_sizes, int32_t + // *arg_types); + OMPRTL__tgt_target, }; /// \brief Values for bit flags used in the ident_t to describe the fields. @@ -177,6 +185,22 @@ /// \brief Implicit barrier in 'single' directive. OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 }; + + /// \brief Values for bit flags used to specify the mapping type for + /// offloading. + enum OpenMPOffloadMappingFlags { + /// \brief Allocate memory on the device and move data from host to device. + OMP_MAP_TO = 0x01, + /// \brief Allocate memory on the device and move data from device to host. + OMP_MAP_FROM = 0x02, + }; + + enum OpenMPOffloadingReservedDeviceIDs { + /// \brief Device ID if the device was not defined, runtime should get it + /// from environment variables in the spec. + OMP_DEVICEID_UNDEF = -1, + }; + CodeGenModule &CGM; /// \brief Default const ident_t object used for initialization of all other /// ident_t objects. @@ -707,6 +731,28 @@ /// virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion); + + /// \brief Emit outilined function for 'target' directive. + /// \param D Directive to emit. + /// \param CodeGen Code generation sequence for the \a D directive. + virtual llvm::Value * + emitTargetOutlinedFunction(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + const RegionCodeGenTy &CodeGen); + + /// \brief Emit the target offloading code associated with \a D. The emitted + /// code attempts offloading the execution to the device, an the event of + /// a failure it executes the host version outlined in \a OutlinedFn. + /// \param D Directive to emit. + /// \param OutlinedFn Host version of the code to be offloaded. + /// \param IfCond Expression evaluated in if clause associated with the target + /// directive, or null if no if clause is used. + /// \param Device Expression evaluated in device clause associated with the + /// target directive, or null if no device clause is used. + virtual void emitTargetCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + llvm::Value *OutlinedFn, const Expr *IfCond, + const Expr *Device); }; } // namespace CodeGen Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -41,6 +41,8 @@ /// \brief Region for constructs that do not require function outlining, /// like 'for', 'sections', 'atomic' etc. directives. InlinedRegion, + /// \brief Region with outlined function for standalone 'target' directive. + TargetRegion, }; CGOpenMPRegionInfo(const CapturedStmt &CS, @@ -164,16 +166,16 @@ llvm_unreachable("No context value for inlined OpenMP region"); } /// \brief Lookup the captured field decl for a variable. - const FieldDecl *lookup(const VarDecl *VD) const override { + const Decl *lookup(const VarDecl *VD) const override { if (OuterRegionInfo) return OuterRegionInfo->lookup(VD); // If there is no outer outlined region,no need to lookup in a list of // captured variables, we can use the original one. return nullptr; } - FieldDecl *getThisFieldDecl() const override { + Decl *getThisDecl() const override { if (OuterRegionInfo) - return OuterRegionInfo->getThisFieldDecl(); + return OuterRegionInfo->getThisDecl(); return nullptr; } /// \brief Get a variable or parameter for storing global thread id @@ -204,6 +206,57 @@ CGOpenMPRegionInfo *OuterRegionInfo; }; +/// \brief API for captured statement code generation in OpenMP target +/// constructs. For this captures, implicit parameters are used instead of the +/// captured fields. +class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { +public: + CGOpenMPTargetRegionInfo(const CapturedStmt &CS, + const RegionCodeGenTy &CodeGen) + : CGOpenMPRegionInfo(TargetRegion, CodeGen, OMPD_target), + CXXThisImplicitParamDecl(nullptr) { + + CapturedDecl::param_iterator Param = CS.getCapturedDecl()->param_begin(); + for (CapturedStmt::const_capture_iterator I = CS.capture_begin(), + E = CS.capture_end(); + I != E; ++I, ++Param) { + if (I->capturesThis()) + CXXThisImplicitParamDecl = *Param; + else if (I->capturesVariable()) + CaptureImplicitParams[I->getCapturedVar()] = *Param; + } + } + + /// \brief This is unused for target regions because each starts executing + /// with a single thread. + const VarDecl *getThreadIDVariable() const override { return nullptr; } + + /// \brief Get the name of the capture helper. + StringRef getHelperName() const override { return ".omp_offloading."; } + + static bool classof(const CGCapturedStmtInfo *Info) { + return CGOpenMPRegionInfo::classof(Info) && + cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; + } + + /// \brief Lookup the captured implicit parameter declaration for a variable. + virtual const Decl *lookup(const VarDecl *VD) const override { + return CaptureImplicitParams.lookup(VD); + } + + virtual Decl *getThisDecl() const override { + return CXXThisImplicitParamDecl; + } + +private: + /// \brief Keep the map between VarDecl and FieldDecl. + llvm::SmallDenseMap<const VarDecl *, ImplicitParamDecl *> + CaptureImplicitParams; + + /// \brief Captured 'this' type. + ImplicitParamDecl *CXXThisImplicitParamDecl; +}; + /// \brief RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; @@ -838,6 +891,22 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); break; } + case OMPRTL__tgt_target: { + // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t + // arg_num, void** args_base, void **args, int64_t *arg_sizes, int32_t + // *arg_types); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.Int64Ty->getPointerTo(), + CGM.Int32Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); + break; + } } return RTLFn; } @@ -2236,7 +2305,7 @@ if (Init) { if (auto *Elem = Pair.second.PrivateElemInit) { auto *OriginalVD = Pair.second.Original; - auto *SharedField = CapturesInfo.lookup(OriginalVD); + auto *SharedField = cast<FieldDecl>(CapturesInfo.lookup(OriginalVD)); auto SharedRefLValue = CGF.EmitLValueForField(SharedsBase, SharedField); QualType Type = OriginalVD->getType(); @@ -2836,3 +2905,260 @@ } } +llvm::Value * +CGOpenMPRuntime::emitTargetOutlinedFunction(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + const RegionCodeGenTy &CodeGen) { + + const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt()); + + // Generate the outlined target offloading function. + CodeGenFunction TargetCGF(CGM, true); + CGOpenMPTargetRegionInfo CGInfo(*CS, CodeGen); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(TargetCGF, &CGInfo); + return TargetCGF.GenerateCapturedStmtFunction(*CS); +} + +void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + llvm::Value *OutlinedFn, + const Expr *IfCond, const Expr *Device) { + + // Fill up the arrays with the all the captured variables. + SmallVector<llvm::Value *, 16> BasePointers; + SmallVector<llvm::Value *, 16> Pointers; + SmallVector<llvm::Value *, 16> Sizes; + SmallVector<unsigned, 16> MapTypes; + + bool hasVLACaptures = false; + const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + auto ri = CS.getCapturedRecordDecl()->field_begin(); + auto ii = CS.capture_init_begin(); + for (CapturedStmt::const_capture_iterator ci = CS.capture_begin(), + ce = CS.capture_end(); + ci != ce; ++ci, ++ri, ++ii) { + StringRef Name; + QualType Ty; + llvm::Value *BasePointer; + llvm::Value *Pointer; + llvm::Value *Size; + unsigned MapType; + + if (ci->capturesVariableArrayType()) { + llvm::Value *V = CGF.getVLASizeMap(ri->getCapturedVLAType()); + LValue LV = CGF.MakeNaturalAlignAddrLValue( + CGF.CreateMemTemp(ri->getType(), "__vla_size"), ri->getType()); + CGF.EmitStoreThroughLValue(RValue::get(V), LV); + BasePointer = Pointer = LV.getAddress(); + uint64_t SizeVal = CGM.getContext().getTypeSize(ri->getType()) / 8; + Size = CGF.Builder.getInt64(SizeVal); + + hasVLACaptures = true; + // VLA sizes don't need to be copied back from the device. + MapType = CGOpenMPRuntime::OMP_MAP_TO; + } else if (ci->capturesThis()) { + BasePointer = Pointer = CGF.LoadCXXThis(); + const PointerType *PtrTy = cast<PointerType>(ri->getType().getTypePtr()); + uint64_t SizeVal = + CGM.getContext().getTypeSize(PtrTy->getPointeeType()) / 8; + Size = CGF.Builder.getInt64(SizeVal); + + // Default map type. + MapType = CGOpenMPRuntime::OMP_MAP_TO | CGOpenMPRuntime::OMP_MAP_FROM; + } else { + BasePointer = Pointer = + CGF.EmitLValue(cast<DeclRefExpr>(*ii)).getAddress(); + + const ReferenceType *PtrTy = + cast<ReferenceType>(ri->getType().getTypePtr()); + QualType ElementType = PtrTy->getPointeeType(); + + if (auto *VAT = dyn_cast<VariableArrayType>(ElementType.getTypePtr())) + Size = CGF.getVLASize(VAT).first; + else { + uint64_t ElementTypeSize = + CGM.getContext().getTypeSize(ElementType) / 8; + Size = CGF.Builder.getInt64(ElementTypeSize); + } + + // Default map type. + MapType = CGOpenMPRuntime::OMP_MAP_TO | CGOpenMPRuntime::OMP_MAP_FROM; + } + + BasePointers.push_back(BasePointer); + Pointers.push_back(Pointer); + Sizes.push_back(Size); + MapTypes.push_back(MapType); + } + + if (IfCond) { + // Check if the if clause conditional always evaluates to true or false. + // If it evaluates to false, we only need to emit the host version of the + // target region. If it evaluates to true, we can proceed with the codegen + // as if no if clause was provided. + bool CondConstant; + if (CGF.ConstantFoldsToSimpleInteger(IfCond, CondConstant)) { + if (CondConstant) { + IfCond = nullptr; + } else { + CGF.Builder.CreateCall(OutlinedFn, BasePointers); + return; + } + } + } + + // Generate the code to launch the target region. The pattern is the + // following: + // + // ... + // br IfCond (if any), omp_offload, omp_offload_fail + // + // omp_offload.try: + // ; create arrays for offloading + // error = __tgt_target(...) + // br error, omp_offload_fail, omp_offload_end + // + // omp_offload.fail: + // host_version(...) + // + // omp_offload.end: + // ... + // + + auto OffloadTryBlock = CGF.createBasicBlock("omp_offload.try"); + auto OffloadFailBlock = CGF.createBasicBlock("omp_offload.fail"); + auto ContBlock = CGF.createBasicBlock("omp_offload.end"); + + if (IfCond) + CGF.EmitBranchOnBoolExpr(IfCond, OffloadTryBlock, OffloadFailBlock, + /*TrueCount=*/0); + + CGF.EmitBlock(OffloadTryBlock); + + unsigned PointerNumVal = BasePointers.size(); + llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); + llvm::Value *BasePointersArray; + llvm::Value *PointersArray; + llvm::Value *SizesArray; + llvm::Value *MapTypesArray; + + if (PointerNumVal) { + llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true); + QualType PointerArrayType = CGF.getContext().getConstantArrayType( + CGF.getContext().VoidPtrTy, PointerNumAP, ArrayType::Normal, + /*IndexTypeQuals=*/0); + + BasePointersArray = + CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs"); + PointersArray = CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs"); + + // If we don't have any VLA types, we can use a constant array for the map + // sizes, otherwise we need to fill up the arrays as we do for the pointers. + if (hasVLACaptures) { + QualType SizeArrayType = CGF.getContext().getConstantArrayType( + CGF.getContext().getIntTypeForBitwidth(64, /*Signed=*/true), + PointerNumAP, ArrayType::Normal, /*IndexTypeQuals=*/0); + SizesArray = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes"); + } else { + // We expect all the sizes to be constant, so we collect them to create + // a constant array. + SmallVector<uint64_t, 16> ConstSizes; + for (auto *V : Sizes) + ConstSizes.push_back(cast<llvm::ConstantInt>(V)->getZExtValue()); + + llvm::Constant *SizesArrayInit = + llvm::ConstantDataArray::get(CGF.Builder.getContext(), ConstSizes); + auto *SizesArrayGbl = new llvm::GlobalVariable( + CGM.getModule(), SizesArrayInit->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, + SizesArrayInit, ".offload_sizes"); + SizesArrayGbl->setUnnamedAddr(true); + SizesArray = SizesArrayGbl; + } + + // The map types are always constant so we don't need to generate code to + // fill arrays. Instead, we create an array constant. + llvm::Constant *MapTypesArrayInit = + llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); + auto *MapTypesArrayGbl = new llvm::GlobalVariable( + CGM.getModule(), MapTypesArrayInit->getType(), + /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, + MapTypesArrayInit, ".offload_maptypes"); + MapTypesArrayGbl->setUnnamedAddr(true); + MapTypesArray = MapTypesArrayGbl; + + for (unsigned i = 0; i < PointerNumVal; ++i) { + llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, + 0, i); + CGF.Builder.CreateStore( + CGF.Builder.CreateBitCast(BasePointers[i], CGM.VoidPtrTy), BP); + + llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0, + i); + CGF.Builder.CreateStore( + CGF.Builder.CreateBitCast(Pointers[i], CGM.VoidPtrTy), P); + + if (hasVLACaptures) { + llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( + llvm::ArrayType::get(CGM.Int64Ty, PointerNumVal), SizesArray, 0, i); + CGF.Builder.CreateStore( + CGF.Builder.CreateIntCast(Sizes[i], CGM.Int64Ty, /*isSigned=*/true), + S); + } + } + } else { + BasePointersArray = llvm::Constant::getNullValue(CGM.VoidPtrPtrTy); + PointersArray = llvm::Constant::getNullValue(CGM.VoidPtrPtrTy); + SizesArray = llvm::Constant::getNullValue(CGM.Int64Ty->getPointerTo()); + MapTypesArray = llvm::Constant::getNullValue(CGM.Int32Ty->getPointerTo()); + } + + // On top of the arrays that were filled up, the target offloading call takes + // as arguments the device id as well as the host pointer. The host pointer + // is used by the runtime library to identify the current target region, so + // it only has to be unique and not necessarily point to anything. It could be + // the pointer to the outlined function that implements the target region, but + // we aren't using that so that the compiler doesn't need to keep that, and + // could therefore inline the host function if proven worthwhile during + // optimization. + + llvm::Value *HostPtr = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, + llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr"); + + // Emit device ID if any. + llvm::Value *DeviceID; + if (Device) + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGM.Int32Ty, /*isSigned=*/true); + else + DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); + + llvm::Value *OffloadingArgs[] = { + DeviceID, HostPtr, PointerNum, + CGF.Builder.CreateConstGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, + 0, 0), + CGF.Builder.CreateConstGEP2_32( + llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, 0, + 0), + CGF.Builder.CreateConstGEP2_32( + llvm::ArrayType::get(CGM.Int64Ty, PointerNumVal), SizesArray, 0, 0), + CGF.Builder.CreateConstGEP2_32( + llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray, 0, + 0)}; + auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), + OffloadingArgs); + auto Error = CGF.Builder.CreateICmpNE(Return, CGF.Builder.getInt32(0)); + CGF.Builder.CreateCondBr(Error, OffloadFailBlock, ContBlock); + + CGF.EmitBlock(OffloadFailBlock); + CGF.Builder.CreateCall(OutlinedFn, BasePointers); + CGF.EmitBranch(ContBlock); + + CGF.EmitBlock(ContBlock, /*IsFinished=*/true); + return; +} Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -1884,6 +1884,29 @@ return CGF.EmitLValueForField(LV, FD); } +static LValue EmitCapturedImplicitParamLValue(CodeGenFunction &CGF, + const ImplicitParamDecl *PD) { + // If the captured declaration is an implicit parameter, it should already + // exist in the local declaration map. + LValue LV = LValue::MakeAddr(CGF.GetAddrOfLocalVar(PD), PD->getType(), + CharUnits(), CGF.getContext()); + RValue RV = CGF.EmitLoadOfLValue(LV, PD->getLocStart()); + return LValue::MakeAddr(RV.getScalarVal(), PD->getType(), CharUnits(), + CGF.getContext()); +} + +static LValue EmitCapturedValue(CodeGenFunction &CGF, const Decl *D, + llvm::Value *ThisValue) { + switch (D->getKind()) { + default: + llvm_unreachable("Unexpected declaration kind for capture!"); + case Decl::ImplicitParam: + return EmitCapturedImplicitParamLValue(CGF, cast<ImplicitParamDecl>(D)); + case Decl::Field: + return EmitCapturedFieldLValue(CGF, cast<FieldDecl>(D), ThisValue); + } +} + /// Named Registers are named metadata pointing to the register name /// which will be read from/written to as an argument to the intrinsic /// @llvm.read/write_register. @@ -1943,8 +1966,8 @@ if (auto *V = LocalDeclMap.lookup(VD)) return MakeAddrLValue(V, T, Alignment); else - return EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD), - CapturedStmtInfo->getContextValue()); + return EmitCapturedValue(*this, CapturedStmtInfo->lookup(VD), + CapturedStmtInfo->getContextValue()); } assert(isa<BlockDecl>(CurCodeDecl)); return MakeAddrLValue(GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>()), Index: include/clang/Sema/ScopeInfo.h =================================================================== --- include/clang/Sema/ScopeInfo.h +++ include/clang/Sema/ScopeInfo.h @@ -608,6 +608,8 @@ return "default captured statement"; case CR_OpenMP: return "OpenMP region"; + case CR_OpenMP_Target: + return "OpenMP target region"; } llvm_unreachable("Invalid captured region kind!"); } Index: include/clang/Basic/CapturedStmt.h =================================================================== --- include/clang/Basic/CapturedStmt.h +++ include/clang/Basic/CapturedStmt.h @@ -16,7 +16,8 @@ /// \brief The different kinds of captured statement. enum CapturedRegionKind { CR_Default, - CR_OpenMP + CR_OpenMP, + CR_OpenMP_Target }; } // end namespace clang Index: include/clang/AST/Stmt.h =================================================================== --- include/clang/AST/Stmt.h +++ include/clang/AST/Stmt.h @@ -2059,7 +2059,7 @@ /// \brief The pointer part is the implicit the outlined function and the /// int part is the captured region kind, 'CR_Default' etc. - llvm::PointerIntPair<CapturedDecl *, 1, CapturedRegionKind> CapDeclAndKind; + llvm::PointerIntPair<CapturedDecl *, 2, CapturedRegionKind> CapDeclAndKind; /// \brief The record for captured variables, a RecordDecl or CXXRecordDecl. RecordDecl *TheRecordDecl; Index: include/clang/AST/Decl.h =================================================================== --- include/clang/AST/Decl.h +++ include/clang/AST/Decl.h @@ -3645,8 +3645,9 @@ llvm::PointerIntPair<Stmt *, 1, bool> BodyAndNothrow; explicit CapturedDecl(DeclContext *DC, unsigned NumParams) - : Decl(Captured, DC, SourceLocation()), DeclContext(Captured), - NumParams(NumParams), ContextParam(0), BodyAndNothrow(nullptr, false) { } + : Decl(Captured, DC, SourceLocation()), DeclContext(Captured), + NumParams(NumParams), ContextParam(-1u), + BodyAndNothrow(nullptr, false) {} ImplicitParamDecl *const *getParams() const { return getTrailingObjects<ImplicitParamDecl *>(); @@ -3679,9 +3680,11 @@ getParams()[i] = P; } + bool hasContextParam() const { return ContextParam < NumParams; } + /// \brief Retrieve the parameter containing captured variables. ImplicitParamDecl *getContextParam() const { - assert(ContextParam < NumParams); + assert(hasContextParam()); return getParam(ContextParam); } void setContextParam(unsigned i, ImplicitParamDecl *P) {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits