hishiguro created this revision.
hishiguro added a reviewer: ABataev.
hishiguro added a project: clang.
Herald added subscribers: cfe-commits, guansong, yaxunl.
Herald added a reviewer: jdoerfert.
Fixes PR45753
When a program that contains a loop to which both "omp parallel for" pragma and
"clang loop" pragma are associated is compiled with the -fopenmp option, "clang
loop" pragma will not take effect.
The example below should not be vectorized by the "clang loop` pragma but is it
actually vectorized.
The cause is that "llvm.loop.vectorize.width" is not output to the IR when
-fopenmp is specified.
The fix attaches attributes if they exist in the loop.
[example.c]
int a[100], b[100];
int foo() {
#pragma omp parallel for
#pragma clang loop vectorize(disable)
for (int i=0; i<100; i++)
a[i]+=b[i]*i;
}
[compile]
clang -O2 -fopenmp a.c -c -Rpass=vect
a.c:3:1: remark: vectorized loop (vectorization width: 4, interleaved count: 2)
[-Rpass=loop-vectorize] #pragma omp parallel for ^
[IR]
- -fopenmp
$ clang -O2 a.c -S -emit-llvm -mllvm -disable-llvm-optzns -o - -fopenmp |grep
"vectorize\.width"
$
- -fno-openmp
$ clang -O2 a.c -S -emit-llvm -mllvm -disable-llvm-optzns -o - -fno-openmp
|grep "vectorize\.width"
!7 = !{!"llvm.loop.vectorize.width", i32 1}
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D79921
Files:
clang/lib/CodeGen/CGStmtOpenMP.cpp
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1730,9 +1730,22 @@
auto CondBlock = createBasicBlock("omp.inner.for.cond");
EmitBlock(CondBlock);
const SourceRange R = S.getSourceRange();
- LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
- SourceLocToDebugLoc(R.getEnd()));
+ // If attributes are attached, push to the basic block with them.
+ const AttributedStmt *AS = nullptr;
+ if (auto *OMPD = dyn_cast<OMPParallelForDirective>(&S)) {
+ const CapturedStmt *CS = OMPD->getCapturedStmt(OMPD_parallel);
+ const Stmt *SS = CS->getCapturedStmt();
+ AS = dyn_cast_or_null<AttributedStmt>(SS);
+ }
+ if (AS)
+ LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
+ AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
+ SourceLocToDebugLoc(R.getEnd()));
+ else
+ LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
+ SourceLocToDebugLoc(R.getEnd()));
+
// If there are any cleanups between here and the loop-exit scope,
// create a block to stage a loop exit along.
llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1730,9 +1730,22 @@
auto CondBlock = createBasicBlock("omp.inner.for.cond");
EmitBlock(CondBlock);
const SourceRange R = S.getSourceRange();
- LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
- SourceLocToDebugLoc(R.getEnd()));
+ // If attributes are attached, push to the basic block with them.
+ const AttributedStmt *AS = nullptr;
+ if (auto *OMPD = dyn_cast<OMPParallelForDirective>(&S)) {
+ const CapturedStmt *CS = OMPD->getCapturedStmt(OMPD_parallel);
+ const Stmt *SS = CS->getCapturedStmt();
+ AS = dyn_cast_or_null<AttributedStmt>(SS);
+ }
+ if (AS)
+ LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
+ AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
+ SourceLocToDebugLoc(R.getEnd()));
+ else
+ LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
+ SourceLocToDebugLoc(R.getEnd()));
+
// If there are any cleanups between here and the loop-exit scope,
// create a block to stage a loop exit along.
llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits