huntergr updated this revision to Diff 93841.
huntergr added a reviewer: kkwli0.
huntergr added a comment.
Changed to transform combined constructs to simd in ParseOpenMP.cpp instead of
creating a new pragma handler. This also made it easier to add support for
'declare simd': only needed the addition of a check for the option when code
generating functions to enable it, so I've added a RUN line to test it in the
'declare simd' codegen tests.
https://reviews.llvm.org/D31417
Files:
docs/ClangCommandLineReference.rst
docs/UsersManual.rst
include/clang/Basic/LangOptions.def
include/clang/Driver/Options.td
lib/CodeGen/CodeGenFunction.cpp
lib/CodeGen/CodeGenModule.cpp
lib/Driver/ToolChains/Clang.cpp
lib/Frontend/CompilerInvocation.cpp
lib/Parse/ParseOpenMP.cpp
lib/Parse/ParsePragma.cpp
lib/Sema/SemaExpr.cpp
lib/Sema/SemaOpenMP.cpp
test/OpenMP/declare_simd_codegen.cpp
test/OpenMP/linking.c
test/OpenMP/simd_only.c
Index: test/OpenMP/simd_only.c
===================================================================
--- /dev/null
+++ test/OpenMP/simd_only.c
@@ -0,0 +1,157 @@
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple aarch64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+// CHECK-LABEL: @simd_plain
+// CHECK-LABEL: omp.inner.for.body:
+// CHECK: load float, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access
+// CHECK: load float, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access
+// CHECK: store float %{{.*}}, float* %arrayidx{{.*}} !llvm.mem.parallel_loop_access
+// CHECK: ret void
+void simd_plain(float *a, float *b, float *c, int N) {
+ #pragma omp simd
+ for (int i = 0; i < N; i += 2)
+ a[i] = b[i] * c[i];
+}
+
+// CHECK-LABEL: @simd_safelen_clause
+// CHECK-NOT: !llvm.mem.parallel_loop_access
+// CHECK-LABEL: omp.inner.for.inc:
+// CHECK: br label %omp.inner.for.cond, !llvm.loop
+// CHECK: ret void
+void simd_safelen_clause(float *a, float *b, float *c, int N) {
+ #pragma omp simd safelen(4)
+ for (int i = 0; i < N; i += 2)
+ a[i] = b[i] * c[i];
+}
+
+extern long long initial_val();
+
+// CHECK-LABEL: @simd_simdlen_and_linear_clause
+// CHECK: omp.inner.for.body:
+// CHECK: !llvm.mem.parallel_loop_access
+// CHECK: ret void
+void simd_simdlen_and_linear_clause(float *a, float *b, float *c, int N) {
+ long long lv = initial_val();
+ #pragma omp simd simdlen(2) linear(lv: 4)
+ for (int i = 0; i < N; ++i) {
+ a[lv] = b[lv] * c[lv];
+ lv += 4;
+ }
+}
+
+extern float gfloat;
+
+// CHECK-LABEL: @simd_aligned_and_private_clause
+// CHECK-LABEL: entry:
+// CHECK: %gfloat = alloca float, align 4
+// CHECK: store float 1.000000e+00, float* @gfloat, align 4
+// CHECK-LABEL: omp.inner.for.body:
+// CHECK-NOT: @gfloat
+// CHECK: load{{.*}}!llvm.mem.parallel_loop_access
+// CHECK: store float {{.*}}, float* %gfloat, align 4, !llvm.mem.parallel_loop_access
+// CHECK: %[[FADD:add[0-9]+]] = fadd float %{{[0-9]+}}, 2.000000e+00
+// CHECK: store float %[[FADD]], float* {{.*}}, align 4, !llvm.mem.parallel_loop_access
+// CHECK: ret void
+void simd_aligned_and_private_clause(float *a, float *b, float *c, int N) {
+ gfloat = 1.0f;
+ #pragma omp simd aligned(a:4) private(gfloat)
+ for (int i = 0; i < N; i += 2) {
+ gfloat = b[i] * c[i];
+ a[i] = gfloat + 2.0f;
+ }
+}
+
+// CHECK-LABEL: @simd_lastprivate_and_reduction_clause
+// CHECK-LABEL: entry:
+// CHECK: %[[SUMVAR:sum[0-9]+]] = alloca float, align 4
+// CHECK: store float 0.000000e+00, float* %[[SUMVAR]], align 4
+// CHECK-LABEL: omp.inner.for.body
+// CHECK: %[[LOAD:[0-9]+]] = load float, float* %[[SUMVAR]], align 4, !llvm.mem.parallel_loop_access
+// CHECK: %[[FADD:add[0-9]+]] = fadd float %[[LOAD]], %mul{{.*}}
+// CHECK: store float %[[FADD]], float* %[[SUMVAR]], align 4, !llvm.mem.parallel_loop_access
+// CHECK: store i32{{.*}}, i32* %[[IDXVAR:idx[0-9]+]]
+// CHECK-LABEL: omp.inner.for.end:
+// CHECK-DAG: %[[TMP1:[0-9]+]] = load i32, i32* %[[IDXVAR]], align 4
+// CHECK-DAG: store i32 %[[TMP1]], i32* %idx, align 4
+// CHECK-DAG: %[[INITVAL:[0-9]+]] = load float, float* %sum, align 4
+// CHECK-DAG: %[[TMP2:[0-9]+]] = load float, float* %[[SUMVAR]], align 4
+// CHECK-DAG: %[[SUMMED:add[0-9]+]] = fadd float %[[INITVAL]], %[[TMP2]]
+// CHECK-DAG: store float %[[SUMMED]], float* %sum, align 4
+// CHECK-LABEL: simd.if.end:
+// CHECK: %[[OUTVAL:[0-9]+]] = load float, float* %sum, align 4
+// CHECK: %[[OUTADDR:[0-9]+]] = load float*, float** %a.addr, align 8
+// CHECK: store float %[[OUTVAL]], float* %[[OUTADDR]], align 4
+// CHECK: %[[RETIDX:[0-9]+]] = load i32, i32* %idx, align 4
+// CHECK: ret i32 %[[RETIDX]]
+int simd_lastprivate_and_reduction_clause(float *a, float *b, float *c, int N) {
+ float sum = 0.0f;
+ int idx;
+ #pragma omp simd lastprivate(idx) reduction(+:sum)
+ for (int i = 0; i < N; ++i) {
+ sum += b[i] * c[i];
+ idx = i * 2;
+ }
+
+ *a = sum;
+ return idx;
+}
+
+// CHECK-LABEL: @simd_collapse_clause
+// CHECK: omp.inner.for.body:
+// CHECK-NOT: for.body:
+// CHECK: ret void
+void simd_collapse_clause(float **a, float **b, float **c, int N, int M) {
+ #pragma omp simd collapse(2)
+ for (int i = 0; i < N; ++i)
+ for (int j = 0; j < N; ++j)
+ a[i][j] = b[i][j] * c[i][j];
+}
+
+// Negative tests; no simd directive, so should be normal code.
+
+// CHECK-LABEL: @parallel_for
+// CHECK-NOT: call void {{.*}} @__kmpc_fork_call
+// CHECK-NOT: @.omp_outlined.
+// CHECK-NOT: omp.inner.for.body:
+// CHECK: ret void
+void parallel_for(float *a, float *b, float *c, int N) {
+ #pragma omp parallel for
+ for (int i = 0; i < N; ++i)
+ a[i] = b[i] * c[i];
+}
+
+extern void long_running_func(int);
+
+// CHECK-LABEL: @taskloop
+// CHECK-NOT: call i8* @__kmpc_omp_task_alloc
+// CHECK-NOT: call void @__kmpc_taskloop
+// CHECK: ret void
+void taskloop(int N) {
+ #pragma omp taskloop
+ for (int i = 0; i < N; ++i)
+ long_running_func(i);
+}
+
+// Combined constructs; simd part should work, rest should be ignored.
+
+// CHECK-LABEL: @parallel_for_simd
+// CHECK-NOT: call void {{.*}} @__kmpc_fork_call
+// CHECK-NOT: @.omp_outlined.
+// CHECK: omp.inner.for.body:
+// CHECK: ret void
+void parallel_for_simd(float *a, float *b, float *c, int N) {
+#pragma omp parallel for simd num_threads(2) simdlen(4)
+ for (int i = 0; i < N; ++i)
+ a[i] = b[i] * c[i];
+}
+
+// Make sure there's no declarations for libomp runtime functions
+// CHECK-NOT: declare void @__kmpc
+
+// CHECK-LABEL: !llvm.ident = !{!0}
+
+// simd_safelen_clause width md
+// CHECK-DAG: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 4}
+// simd_simdlen_clause width md
+// CHECK-DAG: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 2}
Index: test/OpenMP/linking.c
===================================================================
--- test/OpenMP/linking.c
+++ test/OpenMP/linking.c
@@ -89,3 +89,14 @@
// CHECK-MSVC-ILINK-64-SAME: -libpath:{{.+}}/../lib
// CHECK-MSVC-ILINK-64-SAME: -defaultlib:libiomp5md.lib
//
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: -fopenmp-simd -target aarch64-linux-gnu \
+// RUN: | FileCheck --check-prefix=CHECK-SIMD-ONLY-AA64 %s
+// CHECK-SIMD-ONLY-AA64-NOT: "-l[[DEFAULT_OPENMP_LIB]]"
+// CHECK-SIMD-ONLY-AA64-NOT: "-lpthread"
+//
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN: -fopenmp-simd -target x86_64-unknown_linux \
+// RUN: | FileCheck --check-prefix=CHECK-SIMD-ONLY-X64 %s
+// CHECK-SIMD-ONLY-X64-NOT: "-l[[DEFAULT_OPENMP_LIB]]"
+// CHECK-SIMD-ONLY-X64-NOT: "-lpthread"
Index: test/OpenMP/declare_simd_codegen.cpp
===================================================================
--- test/OpenMP/declare_simd_codegen.cpp
+++ test/OpenMP/declare_simd_codegen.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
// expected-no-diagnostics
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -983,7 +983,7 @@
}
VarDecl *Sema::IsOpenMPCapturedDecl(ValueDecl *D) {
- assert(LangOpts.OpenMP && "OpenMP is not allowed");
+ assert((LangOpts.OpenMP || LangOpts.OpenMPSimd) && "OpenMP is not allowed");
D = getCanonicalDecl(D);
// If we are attempting to capture a global variable in a directive with
@@ -1029,7 +1029,7 @@
}
bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) {
- assert(LangOpts.OpenMP && "OpenMP is not allowed");
+ assert((LangOpts.OpenMP || LangOpts.OpenMPSimd) && "OpenMP is not allowed");
return DSAStack->hasExplicitDSA(
D, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
}
Index: lib/Sema/SemaExpr.cpp
===================================================================
--- lib/Sema/SemaExpr.cpp
+++ lib/Sema/SemaExpr.cpp
@@ -13961,7 +13961,8 @@
// Capture global variables if it is required to use private copy of this
// variable.
bool IsGlobal = !Var->hasLocalStorage();
- if (IsGlobal && !(LangOpts.OpenMP && IsOpenMPCapturedDecl(Var)))
+ if (IsGlobal && !((LangOpts.OpenMP || LangOpts.OpenMPSimd) &&
+ IsOpenMPCapturedDecl(Var)))
return true;
// Walk up the stack to determine whether we can capture the variable,
Index: lib/Parse/ParsePragma.cpp
===================================================================
--- lib/Parse/ParsePragma.cpp
+++ lib/Parse/ParsePragma.cpp
@@ -213,7 +213,7 @@
PP.AddPragmaHandler("OPENCL", FPContractHandler.get());
}
- if (getLangOpts().OpenMP)
+ if (getLangOpts().OpenMP || getLangOpts().OpenMPSimd)
OpenMPHandler.reset(new PragmaOpenMPHandler());
else
OpenMPHandler.reset(new PragmaNoOpenMPHandler());
Index: lib/Parse/ParseOpenMP.cpp
===================================================================
--- lib/Parse/ParseOpenMP.cpp
+++ lib/Parse/ParseOpenMP.cpp
@@ -149,6 +149,33 @@
DKind = F[i][2];
}
}
+
+ // If we're only interested in the simd pragmas, convert any combined
+ // construct with a simd directive to just 'simd' or 'declare simd',
+ // and any other to 'unknown'.
+ if (P.getLangOpts().OpenMPSimd) {
+ switch (DKind) {
+ default:
+ DKind = OMPD_unknown;
+ break;
+ case OMPD_declare_simd:
+ break;
+ case OMPD_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_for_simd:
+ case OMPD_taskloop_simd:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_distribute_simd:
+ case OMPD_target_simd:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams_distribute_simd:
+ DKind = OMPD_simd;
+ break;
+ }
+ }
+
return DKind < OMPD_unknown ? static_cast<OpenMPDirectiveKind>(DKind)
: OMPD_unknown;
}
@@ -1015,7 +1042,10 @@
SkipUntil(tok::annot_pragma_openmp_end);
break;
case OMPD_unknown:
- Diag(Tok, diag::err_omp_unknown_directive);
+ // Don't report unknown directives if we're only looking at simd,
+ // as the filter function will have switched the kind.
+ if (!getLangOpts().OpenMPSimd)
+ Diag(Tok, diag::err_omp_unknown_directive);
SkipUntil(tok::annot_pragma_openmp_end);
break;
}
@@ -1105,6 +1135,18 @@
OpenMPClauseKind CKind, bool FirstClause) {
OMPClause *Clause = nullptr;
bool ErrorFound = false;
+
+ // If we're only interpreting 'simd' directives, filter out clauses that
+ // don't apply without an error.
+ if (DKind == OMPD_simd && getLangOpts().OpenMPSimd &&
+ !isAllowedClauseForDirective(DKind, CKind)) {
+
+ if (PP.LookAhead(/*N=*/0).is(tok::l_paren))
+ SkipUntil(tok::r_paren);
+
+ return nullptr;
+ }
+
// Check if clause is allowed for the given directive.
if (CKind != OMPC_unknown && !isAllowedClauseForDirective(DKind, CKind)) {
Diag(Tok, diag::err_omp_unexpected_clause) << getOpenMPClauseName(CKind)
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -2199,6 +2199,7 @@
Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls);
Opts.OpenMPIsDevice =
Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device);
+ Opts.OpenMPSimd = !Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_simd);
if (Opts.OpenMP) {
int Version =
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -3203,7 +3203,9 @@
// semantic analysis, etc.
break;
}
- }
+ } else if (Args.hasFlag(options::OPT_fopenmp_simd,
+ options::OPT_fno_openmp_simd, /*Default=*/false))
+ CmdArgs.push_back("-fopenmp-simd");
const SanitizerArgs &Sanitize = getToolChain().getSanitizerArgs();
Sanitize.addArgs(getToolChain(), Args, CmdArgs, InputType);
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp
+++ lib/CodeGen/CodeGenModule.cpp
@@ -119,7 +119,7 @@
createObjCRuntime();
if (LangOpts.OpenCL)
createOpenCLRuntime();
- if (LangOpts.OpenMP)
+ if (LangOpts.OpenMP || LangOpts.OpenMPSimd)
createOpenMPRuntime();
if (LangOpts.CUDA)
createCUDARuntime();
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -794,7 +794,8 @@
}
if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
- if (CGM.getLangOpts().OpenMP && FD->hasAttr<OMPDeclareSimdDeclAttr>())
+ if ((CGM.getLangOpts().OpenMP || CGM.getLangOpts().OpenMPSimd)
+ && FD->hasAttr<OMPDeclareSimdDeclAttr>())
CGM.getOpenMPRuntime().emitDeclareSimdFunction(FD, Fn);
// Add no-jump-tables value.
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -1261,6 +1261,8 @@
def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group<f_Group>;
def fopenmp : Flag<["-"], "fopenmp">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
def fno_openmp : Flag<["-"], "fno-openmp">, Group<f_Group>, Flags<[NoArgumentUnused]>;
+def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
+def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group<f_Group>, Flags<[NoArgumentUnused]>;
def fopenmp_version_EQ : Joined<["-"], "fopenmp-version=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>;
def fopenmp_EQ : Joined<["-"], "fopenmp=">, Group<f_Group>;
def fopenmp_use_tls : Flag<["-"], "fopenmp-use-tls">, Group<f_Group>, Flags<[NoArgumentUnused]>;
Index: include/clang/Basic/LangOptions.def
===================================================================
--- include/clang/Basic/LangOptions.def
+++ include/clang/Basic/LangOptions.def
@@ -187,6 +187,7 @@
LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns")
LANGOPT(CUDA , 1, 0, "CUDA")
LANGOPT(OpenMP , 32, 0, "OpenMP support and version of OpenMP (31, 40 or 45)")
+LANGOPT(OpenMPSimd , 1, 0, "OpenMP support for simd and declare simd directives only")
LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls")
LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device")
LANGOPT(RenderScript , 1, 0, "RenderScript")
Index: docs/UsersManual.rst
===================================================================
--- docs/UsersManual.rst
+++ docs/UsersManual.rst
@@ -1988,6 +1988,11 @@
Use `-fopenmp` to enable OpenMP. Support for OpenMP can be disabled with
`-fno-openmp`.
+Use `-fopenmp-simd` to enable OpenMP simd features only, without linking
+the runtime library; for combined constructs
+(e.g. ``#pragma omp parallel for simd``) the non-simd directives and clauses
+will be ignored. This can be disabled with `-fno-openmp-simd`.
+
Controlling implementation limits
---------------------------------
Index: docs/ClangCommandLineReference.rst
===================================================================
--- docs/ClangCommandLineReference.rst
+++ docs/ClangCommandLineReference.rst
@@ -1451,6 +1451,8 @@
.. option:: -fopenmp, -fno-openmp
+.. option:: -fopenmp-simd, -fno-openmp-simd
+
.. option:: -fopenmp-dump-offload-linker-script
.. option:: -fopenmp-use-tls
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits