arnamoy10 updated this revision to Diff 392189. arnamoy10 added a comment. Addressing reviewers comments Major changes are as follows:
1. Skipping unsupported clauses and skip the case when there is an ordered directive inside the simd construct 2. Traversing all the blocks in the Canonical loop body (not only the first block of the loop body) to look for locations to insert metadata 3. Making metadata unique for each Canonical loop 4. Update test case to reflect changes. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D114379/new/ https://reviews.llvm.org/D114379 Files: clang/lib/CodeGen/CGStmtOpenMP.cpp clang/test/OpenMP/irbuilder_simd.cpp llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/AssumptionCache.h" @@ -2116,6 +2117,19 @@ Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID); } +/// Attach metadata access.group to the load and store instructions of \p block +static void addSimdMetadata(BasicBlock *Block, + ArrayRef<Metadata *> Properties) { + for (auto &I : *Block) { + if (I.mayReadFromMemory() || I.mayWriteToMemory()) { + Instruction *instr = dyn_cast<Instruction>(&I); + LLVMContext &C = instr->getContext(); + MDNode *LoopID = MDNode::get(C, Properties); + instr->setMetadata("llvm.access.group", LoopID); + } + } +} + void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) { LLVMContext &Ctx = Builder.getContext(); addLoopMetadata( @@ -2131,6 +2145,52 @@ }); } +void OpenMPIRBuilder::applySimd(DebugLoc, CanonicalLoopInfo *Loop) { + LLVMContext &Ctx = Builder.getContext(); + addLoopMetadata( + Loop, + {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.parallel_accesses")), + MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.vectorize.enable"))}); + + // Find the set of basic blocks reachable from the body unto the + // exit block. May have to enhance this collection for nested loops. + BasicBlock *body = Loop->getBody(); + BasicBlock *exit = Loop->getExit(); + + FunctionAnalysisManager FAM; + FAM.registerPass([]() { return DominatorTreeAnalysis(); }); + DominatorTreeAnalysis DTA; + DominatorTree &&DT = DTA.run(*(Loop->getBody()->getParent()), FAM); + + llvm::SmallSet<BasicBlock *, 8> reachable; + llvm::SmallVector<BasicBlock *, 8> worklist; + + llvm::SmallSet<BasicBlock *, 8> skipBBs; + skipBBs.insert(Loop->getCond()); + skipBBs.insert(Loop->getHeader()); + + worklist.push_back(body); + reachable.insert(body); + while (!worklist.empty()) { + BasicBlock *front = worklist.pop_back_val(); + for (BasicBlock *succ : successors(front)) { + if (reachable.count(succ) == 0) { + /// We need the check here to ensure that we don't run + /// infinitely if the CFG has a loop in it + /// i.e. the BB reaches itself directly or indirectly + worklist.push_back(succ); + if (!DT.dominates(exit, succ) && skipBBs.count(succ) == 0) + reachable.insert(succ); + } + } + } + + MDNode *N = MDNode::getDistinct(Ctx, MDString::get(Ctx, "llvm.access.group")); + for (auto BB : reachable) { + addSimdMetadata(BB, {N}); + } +} + /// Create the TargetMachine object to query the backend for optimization /// preferences. /// Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -517,6 +517,12 @@ void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI); + /// Add metadata to simd-ize a loop. + /// + /// \param DL Debug location for instructions added by unrolling. + /// \param Loop The loop to simd-ize. + void applySimd(DebugLoc DL, CanonicalLoopInfo *Loop); + /// Generator for '#omp flush' /// /// \param Loc The location where the flush directive was encountered Index: clang/test/OpenMP/irbuilder_simd.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/irbuilder_simd.cpp @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics +// RUN: %clang_cc1 -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECKTWOLOOPS +// expected-no-diagnostics + +struct S { + int a, b; +}; + +struct P { + int a, b; +}; + +void simple(float *a, float *b, int *c) { + S s, *p; + P pp; +#pragma omp simd + for (int i = 3; i < 32; i += 5) { + // llvm.access.group test + // CHECK: %[[A_ADDR:.+]] = alloca float*, align 8 + // CHECK: %[[B_ADDR:.+]] = alloca float*, align 8 + // CHECK: %[[S:.+]] = alloca %struct.S, align 4 + // CHECK: %[[P:.+]] = alloca %struct.S*, align 8 + // CHECK: %[[I:.+]] = alloca i32, align 4 + // CHECK: %[[TMP3:.+]] = load float*, float** %[[B_ADDR:.+]], align 8, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[TMP4:.+]] = load i32, i32* %[[I:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4:.+]] to i64 + // CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP3:.+]], i64 %[[IDXPROM:.+]] + // CHECK-NEXT: %[[TMP5:.+]] = load float, float* %[[ARRAYIDX:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[A2:.+]] = getelementptr inbounds %struct.S, %struct.S* %[[S:.+]], i32 0, i32 0 + // CHECK-NEXT: %[[TMP6:.+]] = load i32, i32* %[[A2:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[CONV:.+]] = sitofp i32 %[[TMP6:.+]] to float + // CHECK-NEXT: %[[ADD:.+]] = fadd float %[[TMP5:.+]], %[[CONV:.+]] + // CHECK-NEXT: %[[TMP7:.+]] = load %struct.S*, %struct.S** %[[P:.+]], align 8, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[A3:.+]] = getelementptr inbounds %struct.S, %struct.S* %[[TMP7:.+]], i32 0, i32 0 + // CHECK-NEXT: %[[TMP8:.+]] = load i32, i32* %[[A3:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[CONV4:.+]] = sitofp i32 %[[TMP8:.+]] to float + // CHECK-NEXT: %[[ADD5:.+]] = fadd float %[[ADD:.+]], %[[CONV4:.+]] + // CHECK-NEXT: %[[TMP9:.+]] = load float*, float** %[[A_ADDR:.+]], align 8, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[TMP10:.+]] = load i32, i32* %[[I:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // CHECK-NEXT: %[[IDXPROM6:.+]] = sext i32 %[[TMP10:.+]] to i64 + // CHECK-NEXT: %[[ARRAYIDX7:.+]] = getelementptr inbounds float, float* %[[TMP9:.+]], i64 %[[IDXPROM6:.+]] + // CHECK-NEXT: store float %[[ADD5:.+]], float* %[[ARRAYIDX7:.+]], align 4, !llvm.access.group ![[META3:[0-9]+]] + // llvm.loop test + // CHECK: %[[OMP_LOOPDOTNEXT:.+]] = add nuw i32 %[[OMP_LOOPDOTIV:.+]], 1 + // CHECK-NEXT: br label %omp_loop.header, !llvm.loop !5 + a[i] = b[i] + s.a + p->a; + } + +#pragma omp simd + for (int j = 3; j < 32; j += 5) { + // test if unique access groups were used for a second loop + // CHECKTWOLOOPS: %[[A22:.+]] = getelementptr inbounds %struct.P, %struct.P* %[[PP:.+]], i32 0, i32 0 + // CHECKTWOLOOPS-NEXT: %[[TMP14:.+]] = load i32, i32* %[[A22:.+]], align 4, !llvm.access.group ![[META8:[0-9]+]] + // CHECKTWOLOOPS-NEXT: %[[TMP15:.+]] = load i32*, i32** %[[C_ADDR:.+]], align 8, !llvm.access.group ![[META8:[0-9]+]] + // CHECKTWOLOOPS-NEXT: %[[TMP16:.+]] = load i32, i32* %[[J:.+]], align 4, !llvm.access.group ![[META8:[0-9]+]] + // CHECKTWOLOOPS-NEXT: %[[IDXPROM23:.+]] = sext i32 %[[TMP16:.+]] to i64 + // CHECKTWOLOOPS-NEXT: %[[ARRAYIDX24:.+]] = getelementptr inbounds i32, i32* %[[TMP15:.+]], i64 %[[IDXPROM23:.+]] + // CHECKTWOLOOPS-NEXT: store i32 %[[TMP14:.+]], i32* %[[ARRAYIDX24:.+]], align 4, !llvm.access.group ![[META8:[0-9]+]] + // check llvm.loop metadata + // CHECKTWOLOOPS: %[[OMP_LOOPDOTNEXT:.+]] = add nuw i32 %[[OMP_LOOPDOTIV:.+]], 1 + // CHECKTWOLOOPS-NEXT: br label %[[OMP_LLOP_BODY:.*]], !llvm.loop ![[META10:[0-9]+]] + c[j] = pp.a; + } +} + +// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK-NEXT: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} +// CHECK-NEXT: ![[META2:[0-9]+]] = +// CHECK-NEXT: ![[META3:[0-9]+]] = !{![[META4:[0-9]+]]} +// CHECK-NEXT: ![[META4:[0-9]+]] = distinct !{!"llvm.access.group"} +// CHECK-NEXT: ![[META5:[0-9]+]] = distinct !{![[META5:[0-9]+]], ![[META6:[0-9]+]], ![[META7:[0-9]+]]} +// CHECK-NEXT: ![[META6:[0-9]+]] = !{!"llvm.loop.parallel_accesses"} +// CHECK-NEXT: ![[META7:[0-9]+]] = !{!"llvm.loop.vectorize.enable"} +// CHECK-NEXT: ![[META8:[0-9]+]] = !{![[META9:[0-9]+]]} +// CHECK-NEXT: ![[META9:[0-9]+]] = distinct !{!"llvm.access.group"} +// CHECK-NEXT: ![[META10:[0-9]+]] = distinct !{![[META10:[0-9]+]], ![[META6:[0-9]+]], ![[META7:[0-9]+]]} Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2582,7 +2582,70 @@ } } +static bool isSupportedByOpenMPIRBuilder(const OMPExecutableDirective &S) { + // Check for unsupported clauses + for (OMPClause *C : S.clauses()) + if (isa<OMPIfClause>(C) || isa<OMPSafelenClause>(C) || + isa<OMPSimdlenClause>(C) || isa<OMPLinearClause>(C) || + isa<OMPAlignedClause>(C) || isa<OMPNontemporalClause>(C) || + isa<OMPPrivateClause>(C) || isa<OMPLastprivateClause>(C) || + isa<OMPReductionClause>(C) || isa<OMPCollapseClause>(C) || + isa<OMPOrderClause>(C)) + return false; + // Check if we have a statement with the ordered directive. + // Visit the statement hierarchy to find a compound statement + // with a ordered directive in it. + if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) { + if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { + for (const Stmt *SubStmt : SyntacticalLoop->children()) { + if (!SubStmt) + continue; + if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) { + for (const Stmt *CSSubStmt : CS->children()) { + if (!CSSubStmt) + continue; + if (isa<OMPOrderedDirective>(CSSubStmt)) { + return false; + } + } + } + } + } + } + return true; +} + void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + bool UseOMPIRBuilder = + CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); + if (UseOMPIRBuilder) { + auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF, + PrePostActionTy &) { + // Use the OpenMPIRBuilder if enabled. + if (UseOMPIRBuilder) { + // Emit the associated statement and get its loop representation. + llvm::DebugLoc DL = SourceLocToDebugLoc(S.getBeginLoc()); + const Stmt *Inner = S.getRawStmt(); + llvm::CanonicalLoopInfo *CLI = + EmitOMPCollapsedCanonicalLoopNest(Inner, 1); + + llvm::OpenMPIRBuilder &OMPBuilder = + CGM.getOpenMPRuntime().getOMPBuilder(); + // Add SIMD specific metadata + OMPBuilder.applySimd(DL, CLI); + return; + } + }; + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, + CodeGenIRBuilder); + } + return; + } + ParentLoopDirectiveForScanRegion ScanRegion(*this, S); OMPFirstScanLoop = true; auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits