Looks like the new tests don't pass on Windows: http://lab.llvm.org:8011/builders/clang-x64-ninja-win7/builds/10365 On Mar 4, 2016 12:29 PM, "Carlo Bertolli via cfe-commits" < cfe-commits@lists.llvm.org> wrote:
> Author: cbertol > Date: Fri Mar 4 14:24:58 2016 > New Revision: 262741 > > URL: http://llvm.org/viewvc/llvm-project?rev=262741&view=rev > Log: > [OPENMP] Codegen for distribute directive > > This patch provide basic implementation of codegen for teams directive, > excluding all clauses except dist_schedule. It also fixes parts of AST > reader/writer to enable correct pre-compiled header handling. > > http://reviews.llvm.org/D17170 > > > Added: > cfe/trunk/test/OpenMP/distribute_codegen.cpp > Modified: > cfe/trunk/include/clang/AST/StmtOpenMP.h > cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp > cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h > cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp > cfe/trunk/lib/CodeGen/CodeGenFunction.h > cfe/trunk/lib/Serialization/ASTReaderStmt.cpp > cfe/trunk/lib/Serialization/ASTWriterStmt.cpp > > Modified: cfe/trunk/include/clang/AST/StmtOpenMP.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/StmtOpenMP.h?rev=262741&r1=262740&r2=262741&view=diff > > ============================================================================== > --- cfe/trunk/include/clang/AST/StmtOpenMP.h (original) > +++ cfe/trunk/include/clang/AST/StmtOpenMP.h Fri Mar 4 14:24:58 2016 > @@ -595,49 +595,56 @@ public: > } > Expr *getIsLastIterVariable() const { > assert((isOpenMPWorksharingDirective(getDirectiveKind()) || > - isOpenMPTaskLoopDirective(getDirectiveKind())) && > + isOpenMPTaskLoopDirective(getDirectiveKind()) || > + isOpenMPDistributeDirective(getDirectiveKind())) && > "expected worksharing loop directive"); > return const_cast<Expr *>(reinterpret_cast<const Expr *>( > *std::next(child_begin(), IsLastIterVariableOffset))); > } > Expr *getLowerBoundVariable() const { > assert((isOpenMPWorksharingDirective(getDirectiveKind()) || > - isOpenMPTaskLoopDirective(getDirectiveKind())) && > + isOpenMPTaskLoopDirective(getDirectiveKind()) || > + isOpenMPDistributeDirective(getDirectiveKind())) && > "expected worksharing loop directive"); > return const_cast<Expr *>(reinterpret_cast<const Expr *>( > *std::next(child_begin(), LowerBoundVariableOffset))); > } > Expr *getUpperBoundVariable() const { > assert((isOpenMPWorksharingDirective(getDirectiveKind()) || > - isOpenMPTaskLoopDirective(getDirectiveKind())) && > + isOpenMPTaskLoopDirective(getDirectiveKind()) || > + isOpenMPDistributeDirective(getDirectiveKind())) && > "expected worksharing loop directive"); > return const_cast<Expr *>(reinterpret_cast<const Expr *>( > *std::next(child_begin(), UpperBoundVariableOffset))); > } > Expr *getStrideVariable() const { > assert((isOpenMPWorksharingDirective(getDirectiveKind()) || > - isOpenMPTaskLoopDirective(getDirectiveKind())) && > + isOpenMPTaskLoopDirective(getDirectiveKind()) || > + isOpenMPDistributeDirective(getDirectiveKind())) && > "expected worksharing loop directive"); > return const_cast<Expr *>(reinterpret_cast<const Expr *>( > *std::next(child_begin(), StrideVariableOffset))); > } > Expr *getEnsureUpperBound() const { > assert((isOpenMPWorksharingDirective(getDirectiveKind()) || > - isOpenMPTaskLoopDirective(getDirectiveKind())) && > + isOpenMPTaskLoopDirective(getDirectiveKind()) || > + isOpenMPDistributeDirective(getDirectiveKind())) && > "expected worksharing loop directive"); > return const_cast<Expr *>(reinterpret_cast<const Expr *>( > *std::next(child_begin(), EnsureUpperBoundOffset))); > } > Expr *getNextLowerBound() const { > assert((isOpenMPWorksharingDirective(getDirectiveKind()) || > - isOpenMPTaskLoopDirective(getDirectiveKind())) && > + isOpenMPTaskLoopDirective(getDirectiveKind()) || > + isOpenMPDistributeDirective(getDirectiveKind())) && > "expected worksharing loop directive"); > return const_cast<Expr *>(reinterpret_cast<const Expr *>( > *std::next(child_begin(), NextLowerBoundOffset))); > } > Expr *getNextUpperBound() const { > assert((isOpenMPWorksharingDirective(getDirectiveKind()) || > - isOpenMPTaskLoopDirective(getDirectiveKind())) && > + isOpenMPTaskLoopDirective(getDirectiveKind()) || > + isOpenMPDistributeDirective(getDirectiveKind())) && > "expected worksharing loop directive"); > return const_cast<Expr *>(reinterpret_cast<const Expr *>( > *std::next(child_begin(), NextUpperBoundOffset))); > > Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=262741&r1=262740&r2=262741&view=diff > > ============================================================================== > --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original) > +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Fri Mar 4 14:24:58 2016 > @@ -425,6 +425,9 @@ enum OpenMPSchedType { > OMP_ord_runtime = 69, > OMP_ord_auto = 70, > OMP_sch_default = OMP_sch_static, > + /// \brief dist_schedule types > + OMP_dist_sch_static_chunked = 91, > + OMP_dist_sch_static = 92, > }; > > enum OpenMPRTLFunction { > @@ -2148,12 +2151,26 @@ static OpenMPSchedType getRuntimeSchedul > llvm_unreachable("Unexpected runtime schedule"); > } > > +/// \brief Map the OpenMP distribute schedule to the runtime enumeration. > +static OpenMPSchedType > +getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool > Chunked) { > + // only static is allowed for dist_schedule > + return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; > +} > + > bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind > ScheduleKind, > bool Chunked) const { > auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, > /*Ordered=*/false); > return Schedule == OMP_sch_static; > } > > +bool CGOpenMPRuntime::isStaticNonchunked( > + OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { > + auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked); > + return Schedule == OMP_dist_sch_static; > +} > + > + > bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) > const { > auto Schedule = > getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, > /*Ordered=*/false); > @@ -2194,6 +2211,55 @@ void CGOpenMPRuntime::emitForDispatchIni > CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); > } > > +static void emitForStaticInitCall(CodeGenFunction &CGF, > + SourceLocation Loc, > + llvm::Value * UpdateLocation, > + llvm::Value * ThreadId, > + llvm::Constant * ForStaticInitFunction, > + OpenMPSchedType Schedule, > + unsigned IVSize, bool IVSigned, bool > Ordered, > + Address IL, Address LB, Address UB, > + Address ST, llvm::Value *Chunk) { > + if (!CGF.HaveInsertPoint()) > + return; > + > + assert(!Ordered); > + assert(Schedule == OMP_sch_static || Schedule == > OMP_sch_static_chunked || > + Schedule == OMP_ord_static || Schedule == > OMP_ord_static_chunked || > + Schedule == OMP_dist_sch_static || > + Schedule == OMP_dist_sch_static_chunked); > + > + // Call __kmpc_for_static_init( > + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, > + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, > + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, > + // kmp_int[32|64] incr, kmp_int[32|64] chunk); > + if (Chunk == nullptr) { > + assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || > + Schedule == OMP_dist_sch_static) && > + "expected static non-chunked schedule"); > + // If the Chunk was not specified in the clause - use default value > 1. > + Chunk = CGF.Builder.getIntN(IVSize, 1); > + } else { > + assert((Schedule == OMP_sch_static_chunked || > + Schedule == OMP_ord_static_chunked || > + Schedule == OMP_dist_sch_static_chunked) && > + "expected static chunked schedule"); > + } > + llvm::Value *Args[] = { > + UpdateLocation, > + ThreadId, > + CGF.Builder.getInt32(Schedule), // Schedule type > + IL.getPointer(), // &isLastIter > + LB.getPointer(), // &LB > + UB.getPointer(), // &UB > + ST.getPointer(), // &Stride > + CGF.Builder.getIntN(IVSize, 1), // Incr > + Chunk // Chunk > + }; > + CGF.EmitRuntimeCall(ForStaticInitFunction, Args); > +} > + > void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, > SourceLocation Loc, > OpenMPScheduleClauseKind > ScheduleKind, > @@ -2201,41 +2267,27 @@ void CGOpenMPRuntime::emitForStaticInit( > bool Ordered, Address IL, Address > LB, > Address UB, Address ST, > llvm::Value *Chunk) { > - if (!CGF.HaveInsertPoint()) > - return; > - OpenMPSchedType Schedule = > - getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered); > - assert(!Ordered); > - assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked > || > - Schedule == OMP_ord_static || Schedule == > OMP_ord_static_chunked); > - > - // Call __kmpc_for_static_init( > - // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, > - // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, > - // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, > - // kmp_int[32|64] incr, kmp_int[32|64] chunk); > - if (Chunk == nullptr) { > - assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) && > - "expected static non-chunked schedule"); > - // If the Chunk was not specified in the clause - use default value 1. > - Chunk = CGF.Builder.getIntN(IVSize, 1); > - } else { > - assert((Schedule == OMP_sch_static_chunked || > - Schedule == OMP_ord_static_chunked) && > - "expected static chunked schedule"); > - } > - llvm::Value *Args[] = { > - emitUpdateLocation(CGF, Loc), > - getThreadID(CGF, Loc), > - CGF.Builder.getInt32(Schedule), // Schedule type > - IL.getPointer(), // &isLastIter > - LB.getPointer(), // &LB > - UB.getPointer(), // &UB > - ST.getPointer(), // &Stride > - CGF.Builder.getIntN(IVSize, 1), // Incr > - Chunk // Chunk > - }; > - CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), > Args); > + OpenMPSchedType ScheduleNum = getRuntimeSchedule(ScheduleKind, Chunk != > nullptr, > + Ordered); > + auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); > + auto *ThreadId = getThreadID(CGF, Loc); > + auto *StaticInitFunction = createForStaticInitFunction(IVSize, > IVSigned); > + emitForStaticInitCall(CGF, Loc, UpdatedLocation, ThreadId, > StaticInitFunction, > + ScheduleNum, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); > +} > + > +void CGOpenMPRuntime::emitDistributeStaticInit(CodeGenFunction &CGF, > + SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, > + unsigned IVSize, bool IVSigned, > + bool Ordered, Address IL, Address LB, > + Address UB, Address ST, > + llvm::Value *Chunk) { > + OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != > nullptr); > + auto *UpdatedLocation = emitUpdateLocation(CGF, Loc); > + auto *ThreadId = getThreadID(CGF, Loc); > + auto *StaticInitFunction = createForStaticInitFunction(IVSize, > IVSigned); > + emitForStaticInitCall(CGF, Loc, UpdatedLocation, ThreadId, > StaticInitFunction, > + ScheduleNum, IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); > } > > void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, > > Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=262741&r1=262740&r2=262741&view=diff > > ============================================================================== > --- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original) > +++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Fri Mar 4 14:24:58 2016 > @@ -493,6 +493,14 @@ public: > virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, > bool Chunked) const; > > + /// \brief Check if the specified \a ScheduleKind is static non-chunked. > + /// This kind of distribute directive is emitted without outer loop. > + /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' > clause. > + /// \param Chunked True if chunk is specified in the clause. > + /// > + virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind > ScheduleKind, > + bool Chunked) const; > + > /// \brief Check if the specified \a ScheduleKind is dynamic. > /// This kind of worksharing directive is emitted without outer loop. > /// \param ScheduleKind Schedule Kind specified in the 'schedule' > clause. > @@ -536,6 +544,31 @@ public: > Address UB, Address ST, > llvm::Value *Chunk = nullptr); > > + /// > + /// \param CGF Reference to current CodeGenFunction. > + /// \param Loc Clang source location. > + /// \param SchedKind Schedule kind, specified by the 'dist_schedule' > clause. > + /// \param IVSize Size of the iteration variable in bits. > + /// \param IVSigned Sign of the interation variable. > + /// \param Ordered true if loop is ordered, false otherwise. > + /// \param IL Address of the output variable in which the flag of the > + /// last iteration is returned. > + /// \param LB Address of the output variable in which the lower > iteration > + /// number is returned. > + /// \param UB Address of the output variable in which the upper > iteration > + /// number is returned. > + /// \param ST Address of the output variable in which the stride value > is > + /// returned nesessary to generated the static_chunked scheduled loop. > + /// \param Chunk Value of the chunk for the static_chunked scheduled > loop. > + /// For the default (nullptr) value, the chunk 1 will be used. > + /// > + virtual void emitDistributeStaticInit(CodeGenFunction &CGF, > SourceLocation Loc, > + OpenMPDistScheduleClauseKind > SchedKind, > + unsigned IVSize, bool IVSigned, > + bool Ordered, Address IL, Address > LB, > + Address UB, Address ST, > + llvm::Value *Chunk = nullptr); > + > /// \brief Call the appropriate runtime routine to notify that we > finished > /// iteration of the ordered loop with the dynamic scheduling. > /// > > Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=262741&r1=262740&r2=262741&view=diff > > ============================================================================== > --- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original) > +++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Fri Mar 4 14:24:58 2016 > @@ -1410,82 +1410,15 @@ void CodeGenFunction::EmitOMPSimdDirecti > CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); > } > > -void CodeGenFunction::EmitOMPForOuterLoop( > - OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, > +void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool > IsMonotonic, > const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, > Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { > auto &RT = CGM.getOpenMPRuntime(); > > - // Dynamic scheduling of the outer loop (dynamic, guided, auto, > runtime). > - const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); > - > - assert((Ordered || > - !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != > nullptr)) && > - "static non-chunked schedule does not need outer loop"); > - > - // Emit outer loop. > - // > - // OpenMP [2.7.1, Loop Construct, Description, table 2-1] > - // When schedule(dynamic,chunk_size) is specified, the iterations are > - // distributed to threads in the team in chunks as the threads request > them. > - // Each thread executes a chunk of iterations, then requests another > chunk, > - // until no chunks remain to be distributed. Each chunk contains > chunk_size > - // iterations, except for the last chunk to be distributed, which may > have > - // fewer iterations. When no chunk_size is specified, it defaults to 1. > - // > - // When schedule(guided,chunk_size) is specified, the iterations are > assigned > - // to threads in the team in chunks as the executing threads request > them. > - // Each thread executes a chunk of iterations, then requests another > chunk, > - // until no chunks remain to be assigned. For a chunk_size of 1, the > size of > - // each chunk is proportional to the number of unassigned iterations > divided > - // by the number of threads in the team, decreasing to 1. For a > chunk_size > - // with value k (greater than 1), the size of each chunk is determined > in the > - // same way, with the restriction that the chunks do not contain fewer > than k > - // iterations (except for the last chunk to be assigned, which may have > fewer > - // than k iterations). > - // > - // When schedule(auto) is specified, the decision regarding scheduling > is > - // delegated to the compiler and/or runtime system. The programmer > gives the > - // implementation the freedom to choose any possible mapping of > iterations to > - // threads in the team. > - // > - // When schedule(runtime) is specified, the decision regarding > scheduling is > - // deferred until run time, and the schedule and chunk size are taken > from the > - // run-sched-var ICV. If the ICV is set to auto, the schedule is > - // implementation defined > - // > - // while(__kmpc_dispatch_next(&LB, &UB)) { > - // idx = LB; > - // while (idx <= UB) { BODY; ++idx; > - // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. > - // } // inner loop > - // } > - // > - // OpenMP [2.7.1, Loop Construct, Description, table 2-1] > - // When schedule(static, chunk_size) is specified, iterations are > divided into > - // chunks of size chunk_size, and the chunks are assigned to the > threads in > - // the team in a round-robin fashion in the order of the thread number. > - // > - // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { > - // while (idx <= UB) { BODY; ++idx; } // inner loop > - // LB = LB + ST; > - // UB = UB + ST; > - // } > - // > - > const Expr *IVExpr = S.getIterationVariable(); > const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); > const bool IVSigned = > IVExpr->getType()->hasSignedIntegerRepresentation(); > > - if (DynamicOrOrdered) { > - llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); > - RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, > - IVSize, IVSigned, Ordered, UBVal, Chunk); > - } else { > - RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, > - IVSize, IVSigned, Ordered, IL, LB, UB, ST, > Chunk); > - } > - > auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); > > // Start the loop with a block that tests the condition. > @@ -1565,6 +1498,111 @@ void CodeGenFunction::EmitOMPForOuterLoo > // Tell the runtime we are done. > if (!DynamicOrOrdered) > RT.emitForStaticFinish(*this, S.getLocEnd()); > + > +} > + > +void CodeGenFunction::EmitOMPForOuterLoop( > + OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, > + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, > + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { > + auto &RT = CGM.getOpenMPRuntime(); > + > + // Dynamic scheduling of the outer loop (dynamic, guided, auto, > runtime). > + const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); > + > + assert((Ordered || > + !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != > nullptr)) && > + "static non-chunked schedule does not need outer loop"); > + > + // Emit outer loop. > + // > + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] > + // When schedule(dynamic,chunk_size) is specified, the iterations are > + // distributed to threads in the team in chunks as the threads request > them. > + // Each thread executes a chunk of iterations, then requests another > chunk, > + // until no chunks remain to be distributed. Each chunk contains > chunk_size > + // iterations, except for the last chunk to be distributed, which may > have > + // fewer iterations. When no chunk_size is specified, it defaults to 1. > + // > + // When schedule(guided,chunk_size) is specified, the iterations are > assigned > + // to threads in the team in chunks as the executing threads request > them. > + // Each thread executes a chunk of iterations, then requests another > chunk, > + // until no chunks remain to be assigned. For a chunk_size of 1, the > size of > + // each chunk is proportional to the number of unassigned iterations > divided > + // by the number of threads in the team, decreasing to 1. For a > chunk_size > + // with value k (greater than 1), the size of each chunk is determined > in the > + // same way, with the restriction that the chunks do not contain fewer > than k > + // iterations (except for the last chunk to be assigned, which may have > fewer > + // than k iterations). > + // > + // When schedule(auto) is specified, the decision regarding scheduling > is > + // delegated to the compiler and/or runtime system. The programmer > gives the > + // implementation the freedom to choose any possible mapping of > iterations to > + // threads in the team. > + // > + // When schedule(runtime) is specified, the decision regarding > scheduling is > + // deferred until run time, and the schedule and chunk size are taken > from the > + // run-sched-var ICV. If the ICV is set to auto, the schedule is > + // implementation defined > + // > + // while(__kmpc_dispatch_next(&LB, &UB)) { > + // idx = LB; > + // while (idx <= UB) { BODY; ++idx; > + // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. > + // } // inner loop > + // } > + // > + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] > + // When schedule(static, chunk_size) is specified, iterations are > divided into > + // chunks of size chunk_size, and the chunks are assigned to the > threads in > + // the team in a round-robin fashion in the order of the thread number. > + // > + // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { > + // while (idx <= UB) { BODY; ++idx; } // inner loop > + // LB = LB + ST; > + // UB = UB + ST; > + // } > + // > + > + const Expr *IVExpr = S.getIterationVariable(); > + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); > + const bool IVSigned = > IVExpr->getType()->hasSignedIntegerRepresentation(); > + > + if (DynamicOrOrdered) { > + llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); > + RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, > + IVSize, IVSigned, Ordered, UBVal, Chunk); > + } else { > + RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, > IVSigned, > + Ordered, IL, LB, UB, ST, Chunk); > + } > + > + EmitOMPOuterLoop(IsMonotonic, DynamicOrOrdered, S, LoopScope, Ordered, > LB, UB, > + ST, IL, Chunk); > +} > + > +void CodeGenFunction::EmitOMPDistributeOuterLoop( > + OpenMPDistScheduleClauseKind ScheduleKind, > + const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, > + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { > + > + auto &RT = CGM.getOpenMPRuntime(); > + > + // Emit outer loop. > + // Same behavior as a OMPForOuterLoop, except that schedule cannot be > + // dynamic > + // > + > + const Expr *IVExpr = S.getIterationVariable(); > + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); > + const bool IVSigned = > IVExpr->getType()->hasSignedIntegerRepresentation(); > + > + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, > + IVSize, IVSigned, /* Ordered = */ false, > + IL, LB, UB, ST, Chunk); > + > + EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ > false, > + S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, > Chunk); > } > > /// \brief Emit a helper variable and return corresponding lvalue. > @@ -2191,9 +2229,130 @@ void CodeGenFunction::EmitOMPFlushDirect > }(), S.getLocStart()); > } > > +void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective > &S) { > + // Emit the loop iteration variable. > + auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); > + auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); > + EmitVarDecl(*IVDecl); > + > + // Emit the iterations count variable. > + // If it is not a variable, Sema decided to calculate iterations count > on each > + // iteration (e.g., it is foldable into a constant). > + if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { > + EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); > + // Emit calculation of the iterations count. > + EmitIgnoredExpr(S.getCalcLastIteration()); > + } > + > + auto &RT = CGM.getOpenMPRuntime(); > + > + // Check pre-condition. > + { > + // Skip the entire loop if we don't meet the precondition. > + // If the condition constant folds and can be elided, avoid emitting > the > + // whole loop. > + bool CondConstant; > + llvm::BasicBlock *ContBlock = nullptr; > + if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { > + if (!CondConstant) > + return; > + } else { > + auto *ThenBlock = createBasicBlock("omp.precond.then"); > + ContBlock = createBasicBlock("omp.precond.end"); > + emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, > + getProfileCount(&S)); > + EmitBlock(ThenBlock); > + incrementProfileCounter(&S); > + } > + > + // Emit 'then' code. > + { > + // Emit helper vars inits. > + LValue LB = > + EmitOMPHelperVar(*this, > cast<DeclRefExpr>(S.getLowerBoundVariable())); > + LValue UB = > + EmitOMPHelperVar(*this, > cast<DeclRefExpr>(S.getUpperBoundVariable())); > + LValue ST = > + EmitOMPHelperVar(*this, > cast<DeclRefExpr>(S.getStrideVariable())); > + LValue IL = > + EmitOMPHelperVar(*this, > cast<DeclRefExpr>(S.getIsLastIterVariable())); > + > + OMPPrivateScope LoopScope(*this); > + emitPrivateLoopCounters(*this, LoopScope, S.counters(), > + S.private_counters()); > + (void)LoopScope.Privatize(); > + > + // Detect the distribute schedule kind and chunk. > + llvm::Value *Chunk = nullptr; > + OpenMPDistScheduleClauseKind ScheduleKind = > OMPC_DIST_SCHEDULE_unknown; > + if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { > + ScheduleKind = C->getDistScheduleKind(); > + if (const auto *Ch = C->getChunkSize()) { > + Chunk = EmitScalarExpr(Ch); > + Chunk = EmitScalarConversion(Chunk, Ch->getType(), > + S.getIterationVariable()->getType(), > + S.getLocStart()); > + } > + } > + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); > + const bool IVSigned = > IVExpr->getType()->hasSignedIntegerRepresentation(); > + > + // OpenMP [2.10.8, distribute Construct, Description] > + // If dist_schedule is specified, kind must be static. If specified, > + // iterations are divided into chunks of size chunk_size, chunks are > + // assigned to the teams of the league in a round-robin fashion in > the > + // order of the team number. When no chunk_size is specified, the > + // iteration space is divided into chunks that are approximately > equal > + // in size, and at most one chunk is distributed to each team of the > + // league. The size of the chunks is unspecified in this case. > + if (RT.isStaticNonchunked(ScheduleKind, > + /* Chunked */ Chunk != nullptr)) { > + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, > + IVSize, IVSigned, /* Ordered = */ false, > + IL.getAddress(), LB.getAddress(), > + UB.getAddress(), ST.getAddress()); > + auto LoopExit = > + getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); > + // UB = min(UB, GlobalUB); > + EmitIgnoredExpr(S.getEnsureUpperBound()); > + // IV = LB; > + EmitIgnoredExpr(S.getInit()); > + // while (idx <= UB) { BODY; ++idx; } > + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), > + S.getInc(), > + [&S, LoopExit](CodeGenFunction &CGF) { > + CGF.EmitOMPLoopBody(S, LoopExit); > + CGF.EmitStopPoint(&S); > + }, > + [](CodeGenFunction &) {}); > + EmitBlock(LoopExit.getBlock()); > + // Tell the runtime we are done. > + RT.emitForStaticFinish(*this, S.getLocStart()); > + } else { > + // Emit the outer loop, which requests its work chunk [LB..UB] > from > + // runtime and runs the inner loop to process it. > + EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, > + LB.getAddress(), UB.getAddress(), > ST.getAddress(), > + IL.getAddress(), Chunk); > + } > + } > + > + // We're now done with the loop, so jump to the continuation block. > + if (ContBlock) { > + EmitBranch(ContBlock); > + EmitBlock(ContBlock, true); > + } > + } > +} > + > void CodeGenFunction::EmitOMPDistributeDirective( > const OMPDistributeDirective &S) { > - llvm_unreachable("CodeGen for 'omp distribute' is not supported yet."); > + LexicalScope Scope(*this, S.getSourceRange()); > + auto &&CodeGen = [&S](CodeGenFunction &CGF) { > + CGF.EmitOMPDistributeLoop(S); > + }; > + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, > CodeGen, > + false); > } > > static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, > > Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=262741&r1=262740&r2=262741&view=diff > > ============================================================================== > --- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original) > +++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Fri Mar 4 14:24:58 2016 > @@ -2364,6 +2364,7 @@ public: > void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); > void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); > void EmitOMPDistributeDirective(const OMPDistributeDirective &S); > + void EmitOMPDistributeLoop(const OMPDistributeDirective &S); > > /// \brief Emit inner loop of the worksharing/simd construct. > /// > @@ -2393,11 +2394,18 @@ private: > /// \return true, if this construct has any lastprivate clause, false - > /// otherwise. > bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); > + void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered, > + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, > + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); > void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, > bool IsMonotonic, const OMPLoopDirective &S, > OMPPrivateScope &LoopScope, bool Ordered, > Address LB, > Address UB, Address ST, Address IL, > llvm::Value *Chunk); > + void EmitOMPDistributeOuterLoop( > + OpenMPDistScheduleClauseKind ScheduleKind, > + const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, > + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); > /// \brief Emit code for sections directive. > void EmitSections(const OMPExecutableDirective &S); > > > Modified: cfe/trunk/lib/Serialization/ASTReaderStmt.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReaderStmt.cpp?rev=262741&r1=262740&r2=262741&view=diff > > ============================================================================== > --- cfe/trunk/lib/Serialization/ASTReaderStmt.cpp (original) > +++ cfe/trunk/lib/Serialization/ASTReaderStmt.cpp Fri Mar 4 14:24:58 2016 > @@ -2307,7 +2307,8 @@ void ASTStmtReader::VisitOMPLoopDirectiv > D->setInit(Reader.ReadSubExpr()); > D->setInc(Reader.ReadSubExpr()); > if (isOpenMPWorksharingDirective(D->getDirectiveKind()) || > - isOpenMPTaskLoopDirective(D->getDirectiveKind())) { > + isOpenMPTaskLoopDirective(D->getDirectiveKind()) || > + isOpenMPDistributeDirective(D->getDirectiveKind())) { > D->setIsLastIterVariable(Reader.ReadSubExpr()); > D->setLowerBoundVariable(Reader.ReadSubExpr()); > D->setUpperBoundVariable(Reader.ReadSubExpr()); > > Modified: cfe/trunk/lib/Serialization/ASTWriterStmt.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriterStmt.cpp?rev=262741&r1=262740&r2=262741&view=diff > > ============================================================================== > --- cfe/trunk/lib/Serialization/ASTWriterStmt.cpp (original) > +++ cfe/trunk/lib/Serialization/ASTWriterStmt.cpp Fri Mar 4 14:24:58 2016 > @@ -2095,7 +2095,8 @@ void ASTStmtWriter::VisitOMPLoopDirectiv > Writer.AddStmt(D->getInit()); > Writer.AddStmt(D->getInc()); > if (isOpenMPWorksharingDirective(D->getDirectiveKind()) || > - isOpenMPTaskLoopDirective(D->getDirectiveKind())) { > + isOpenMPTaskLoopDirective(D->getDirectiveKind()) || > + isOpenMPDistributeDirective(D->getDirectiveKind())) { > Writer.AddStmt(D->getIsLastIterVariable()); > Writer.AddStmt(D->getLowerBoundVariable()); > Writer.AddStmt(D->getUpperBoundVariable()); > > Added: cfe/trunk/test/OpenMP/distribute_codegen.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/distribute_codegen.cpp?rev=262741&view=auto > > ============================================================================== > --- cfe/trunk/test/OpenMP/distribute_codegen.cpp (added) > +++ cfe/trunk/test/OpenMP/distribute_codegen.cpp Fri Mar 4 14:24:58 2016 > @@ -0,0 +1,239 @@ > +// Test host codegen. > +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple > powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu > -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix > CHECK-64 > +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple > powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch > -o %t %s > +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown > -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s > -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix > CHECK-64 --check-prefix HCHECK > +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown > -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s > --check-prefix CHECK --check-prefix CHECK-32 --check-prefix HCHECK > +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple > i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s > +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown > -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s > -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 > --check-prefix HCHECK > + > +// Test target codegen - host bc file has to be created first. (no > significant differences with host version of target region) > +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple > powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu > -emit-llvm-bc %s -o %t-ppc-host.bc > +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple > powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu > -emit-llvm %s -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o - > | FileCheck %s > +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple > powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch > -fopenmp-is-device -omp-host-ir-file-path %t-ppc-host.bc -o %t %s > +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown > -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device > -omp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm > -o - | FileCheck %s > +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown > -omptargets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc > +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown > -omptargets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device > -omp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s > +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple > i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch > -fopenmp-is-device -omp-host-ir-file-path %t-x86-host.bc -o %t %s > +// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown > -omptargets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device > -omp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm > -o - | FileCheck %s > + > +// expected-no-diagnostics > +#ifndef HEADER > +#define HEADER > + > +// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } > +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] > c";unknown;unknown;0;0;;\00" > +// CHECK-DAG: [[DEF_LOC_0:@.+]] = private unnamed_addr constant %ident_t > { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x > i8]* [[STR]], i32 0, i32 0) } > + > +// CHECK-LABEL: define {{.*void}} > @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* > {{.+}}, float* {{.+}}) > +void without_schedule_clause(float *a, float *b, float *c, float *d) { > + #pragma omp target > + #pragma omp teams > + #pragma omp distribute > + for (int i = 33; i < 32000000; i += 7) { > + a[i] = b[i] * c[i] * d[i]; > + } > +} > + > +// CHECK: define {{.*}}void @.omp_outlined.(i32* noalias > [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** > dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** > dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** > dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** > dereferenceable({{[0-9]+}}) [[DPTR:%.+]]) > +// CHECK: [[TID_ADDR:%.+]] = alloca i32* > +// CHECK: [[IV:%.+iv]] = alloca i32 > +// CHECK: [[LB:%.+lb]] = alloca i32 > +// CHECK: [[UB:%.+ub]] = alloca i32 > +// CHECK: [[ST:%.+stride]] = alloca i32 > +// CHECK: [[LAST:%.+last]] = alloca i32 > +// CHECK-DAG: store i32* [[GBL_TIDP]], i32** [[TID_ADDR]] > +// CHECK-DAG: store i32 0, i32* [[LB]] > +// CHECK-DAG: store i32 4571423, i32* [[UB]] > +// CHECK-DAG: store i32 1, i32* [[ST]] > +// CHECK-DAG: store i32 0, i32* [[LAST]] > +// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]] > +// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]] > +// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* > [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, > i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1) > +// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]] > +// CHECK-DAG: [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423 > +// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]] > +// CHECK-DAG: [[BBCT]]: > +// CHECK-DAG: br label %[[BBCE:.+]] > +// CHECK-DAG: [[BBCF]]: > +// CHECK-DAG: [[UBV1:%.+]] = load i32, i32* [[UB]] > +// CHECK-DAG: br label %[[BBCE]] > +// CHECK: [[BBCE]]: > +// CHECK: [[SELUB:%.+]] = phi i32 [ 4571423, %[[BBCT]] ], [ [[UBV1]], > %[[BBCF]] ] > +// CHECK: store i32 [[SELUB]], i32* [[UB]] > +// CHECK: [[LBV0:%.+]] = load i32, i32* [[LB]] > +// CHECK: store i32 [[LBV0]], i32* [[IV]] > +// CHECK: br label %[[BBINNFOR:.+]] > +// CHECK: [[BBINNFOR]]: > +// CHECK: [[IVVAL0:%.+]] = load i32, i32* [[IV]] > +// CHECK: [[UBV2:%.+]] = load i32, i32* [[UB]] > +// CHECK: [[IVLEUB:%.+]] = icmp sle i32 [[IVVAL0]], [[UBV2]] > +// CHECK: br i1 [[IVLEUB]], label %[[BBINNBODY:.+]], label > %[[BBINNEND:.+]] > +// CHECK: [[BBINNBODY]]: > +// CHECK: {{.+}} = load i32, i32* [[IV]] > +// ... loop body ... > +// CHECK: br label %[[BBBODYCONT:.+]] > +// CHECK: [[BBBODYCONT]]: > +// CHECK: br label %[[BBINNINC:.+]] > +// CHECK: [[BBINNINC]]: > +// CHECK: [[IVVAL1:%.+]] = load i32, i32* [[IV]] > +// CHECK: [[IVINC:%.+]] = add nsw i32 [[IVVAL1]], 1 > +// CHECK: store i32 [[IVINC]], i32* [[IV]] > +// CHECK: br label %[[BBINNFOR]] > +// CHECK: [[BBINNEND]]: > +// CHECK: br label %[[LPEXIT:.+]] > +// CHECK: [[LPEXIT]]: > +// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_0]], i32 > [[GBL_TIDV]]) > +// CHECK: ret void > + > + > +// CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* > {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) > +void static_not_chunked(float *a, float *b, float *c, float *d) { > + #pragma omp target > + #pragma omp teams > + #pragma omp distribute dist_schedule(static) > + for (int i = 32000000; i > 33; i += -7) { > + a[i] = b[i] * c[i] * d[i]; > + } > +} > + > +// CHECK: define {{.*}}void @.omp_outlined.{{.*}}(i32* noalias > [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** > dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** > dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** > dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** > dereferenceable({{[0-9]+}}) [[DPTR:%.+]]) > +// CHECK: [[TID_ADDR:%.+]] = alloca i32* > +// CHECK: [[IV:%.+iv]] = alloca i32 > +// CHECK: [[LB:%.+lb]] = alloca i32 > +// CHECK: [[UB:%.+ub]] = alloca i32 > +// CHECK: [[ST:%.+stride]] = alloca i32 > +// CHECK: [[LAST:%.+last]] = alloca i32 > +// CHECK-DAG: store i32* [[GBL_TIDP]], i32** [[TID_ADDR]] > +// CHECK-DAG: store i32 0, i32* [[LB]] > +// CHECK-DAG: store i32 4571423, i32* [[UB]] > +// CHECK-DAG: store i32 1, i32* [[ST]] > +// CHECK-DAG: store i32 0, i32* [[LAST]] > +// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]] > +// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]] > +// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* > [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 92, i32* %.omp.is_last, i32* %.omp.lb, > i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 1) > +// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]] > +// CHECK-DAG: [[USWITCH:%.+]] = icmp sgt i32 [[UBV0]], 4571423 > +// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]] > +// CHECK-DAG: [[BBCT]]: > +// CHECK-DAG: br label %[[BBCE:.+]] > +// CHECK-DAG: [[BBCF]]: > +// CHECK-DAG: [[UBV1:%.+]] = load i32, i32* [[UB]] > +// CHECK-DAG: br label %[[BBCE]] > +// CHECK: [[BBCE]]: > +// CHECK: [[SELUB:%.+]] = phi i32 [ 4571423, %[[BBCT]] ], [ [[UBV1]], > %[[BBCF]] ] > +// CHECK: store i32 [[SELUB]], i32* [[UB]] > +// CHECK: [[LBV0:%.+]] = load i32, i32* [[LB]] > +// CHECK: store i32 [[LBV0]], i32* [[IV]] > +// CHECK: br label %[[BBINNFOR:.+]] > +// CHECK: [[BBINNFOR]]: > +// CHECK: [[IVVAL0:%.+]] = load i32, i32* [[IV]] > +// CHECK: [[UBV2:%.+]] = load i32, i32* [[UB]] > +// CHECK: [[IVLEUB:%.+]] = icmp sle i32 [[IVVAL0]], [[UBV2]] > +// CHECK: br i1 [[IVLEUB]], label %[[BBINNBODY:.+]], label > %[[BBINNEND:.+]] > +// CHECK: [[BBINNBODY]]: > +// CHECK: {{.+}} = load i32, i32* [[IV]] > +// ... loop body ... > +// CHECK: br label %[[BBBODYCONT:.+]] > +// CHECK: [[BBBODYCONT]]: > +// CHECK: br label %[[BBINNINC:.+]] > +// CHECK: [[BBINNINC]]: > +// CHECK: [[IVVAL1:%.+]] = load i32, i32* [[IV]] > +// CHECK: [[IVINC:%.+]] = add nsw i32 [[IVVAL1]], 1 > +// CHECK: store i32 [[IVINC]], i32* [[IV]] > +// CHECK: br label %[[BBINNFOR]] > +// CHECK: [[BBINNEND]]: > +// CHECK: br label %[[LPEXIT:.+]] > +// CHECK: [[LPEXIT]]: > +// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_0]], i32 > [[GBL_TIDV]]) > +// CHECK: ret void > + > + > +// CHECK-LABEL: define {{.*void}} @{{.*}}static_chunked{{.*}}(float* > {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}}) > +void static_chunked(float *a, float *b, float *c, float *d) { > + #pragma omp target > + #pragma omp teams > +#pragma omp distribute dist_schedule(static, 5) > + for (unsigned i = 131071; i <= 2147483647; i += 127) { > + a[i] = b[i] * c[i] * d[i]; > + } > +} > + > +// CHECK: define {{.*}}void @.omp_outlined.{{.*}}(i32* noalias > [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], float** > dereferenceable({{[0-9]+}}) [[APTR:%.+]], float** > dereferenceable({{[0-9]+}}) [[BPTR:%.+]], float** > dereferenceable({{[0-9]+}}) [[CPTR:%.+]], float** > dereferenceable({{[0-9]+}}) [[DPTR:%.+]]) > +// CHECK: [[TID_ADDR:%.+]] = alloca i32* > +// CHECK: [[IV:%.+iv]] = alloca i32 > +// CHECK: [[LB:%.+lb]] = alloca i32 > +// CHECK: [[UB:%.+ub]] = alloca i32 > +// CHECK: [[ST:%.+stride]] = alloca i32 > +// CHECK: [[LAST:%.+last]] = alloca i32 > +// CHECK-DAG: store i32* [[GBL_TIDP]], i32** [[TID_ADDR]] > +// CHECK-DAG: store i32 0, i32* [[LB]] > +// CHECK-DAG: store i32 16908288, i32* [[UB]] > +// CHECK-DAG: store i32 1, i32* [[ST]] > +// CHECK-DAG: store i32 0, i32* [[LAST]] > +// CHECK-DAG: [[GBL_TID:%.+]] = load i32*, i32** [[TID_ADDR]] > +// CHECK-DAG: [[GBL_TIDV:%.+]] = load i32, i32* [[GBL_TID]] > +// CHECK: call void @__kmpc_for_static_init_{{.+}}(%ident_t* > [[DEF_LOC_0]], i32 [[GBL_TIDV]], i32 91, i32* %.omp.is_last, i32* %.omp.lb, > i32* %.omp.ub, i32* %.omp.stride, i32 1, i32 5) > +// CHECK-DAG: [[UBV0:%.+]] = load i32, i32* [[UB]] > +// CHECK-DAG: [[USWITCH:%.+]] = icmp ugt i32 [[UBV0]], 16908288 > +// CHECK: br i1 [[USWITCH]], label %[[BBCT:.+]], label %[[BBCF:.+]] > +// CHECK-DAG: [[BBCT]]: > +// CHECK-DAG: br label %[[BBCE:.+]] > +// CHECK-DAG: [[BBCF]]: > +// CHECK-DAG: [[UBV1:%.+]] = load i32, i32* [[UB]] > +// CHECK-DAG: br label %[[BBCE]] > +// CHECK: [[BBCE]]: > +// CHECK: [[SELUB:%.+]] = phi i32 [ 16908288, %[[BBCT]] ], [ [[UBV1]], > %[[BBCF]] ] > +// CHECK: store i32 [[SELUB]], i32* [[UB]] > +// CHECK: [[LBV0:%.+]] = load i32, i32* [[LB]] > +// CHECK: store i32 [[LBV0]], i32* [[IV]] > +// CHECK: br label %[[BBINNFOR:.+]] > +// CHECK: [[BBINNFOR]]: > +// CHECK: [[IVVAL0:%.+]] = load i32, i32* [[IV]] > +// CHECK: [[UBV2:%.+]] = load i32, i32* [[UB]] > +// CHECK: [[IVLEUB:%.+]] = icmp ule i32 [[IVVAL0]], [[UBV2]] > +// CHECK: br i1 [[IVLEUB]], label %[[BBINNBODY:.+]], label > %[[BBINNEND:.+]] > +// CHECK: [[BBINNBODY]]: > +// CHECK: {{.+}} = load i32, i32* [[IV]] > +// ... loop body ... > +// CHECK: br label %[[BBBODYCONT:.+]] > +// CHECK: [[BBBODYCONT]]: > +// CHECK: br label %[[BBINNINC:.+]] > +// CHECK: [[BBINNINC]]: > +// CHECK: [[IVVAL1:%.+]] = load i32, i32* [[IV]] > +// CHECK: [[IVINC:%.+]] = add i32 [[IVVAL1]], 1 > +// CHECK: store i32 [[IVINC]], i32* [[IV]] > +// CHECK: br label %[[BBINNFOR]] > +// CHECK: [[BBINNEND]]: > +// CHECK: br label %[[LPEXIT:.+]] > +// CHECK: [[LPEXIT]]: > +// CHECK: call void @__kmpc_for_static_fini(%ident_t* [[DEF_LOC_0]], i32 > [[GBL_TIDV]]) > +// CHECK: ret void > + > +// CHECK-LABEL: test_precond > +void test_precond() { > + char a = 0; > + #pragma omp target > + #pragma omp teams > + #pragma omp distribute > + for(char i = a; i < 10; ++i); > +} > + > +// a is passed as a parameter to the outlined functions > +// CHECK: define {{.*}}void @.omp_outlined.{{.*}}(i32* noalias > [[GBL_TIDP:%.+]], i32* noalias [[BND_TID:%.+]], i8* > dereferenceable({{[0-9]+}}) [[APARM:%.+]]) > +// CHECK: store i8* [[APARM]], i8** [[APTRADDR:%.+]] > +// ..many loads of %0.. > +// CHECK: [[A2:%.+]] = load i8*, i8** [[APTRADDR]] > +// CHECK: [[AVAL0:%.+]] = load i8, i8* [[A2]] > +// CHECK: [[AVAL1:%.+]] = load i8, i8* [[A2]] > +// CHECK: [[AVAL2:%.+]] = load i8, i8* [[A2]] > +// CHECK: [[ACONV:%.+]] = sext i8 [[AVAL2]] to i32 > +// CHECK: [[ACMP:%.+]] = icmp slt i32 [[ACONV]], 10 > +// CHECK: br i1 [[ACMP]], label %[[PRECOND_THEN:.+]], label > %[[PRECOND_END:.+]] > +// CHECK: [[PRECOND_THEN]] > +// CHECK: call void @__kmpc_for_static_init_4 > +// CHECK: call void @__kmpc_for_static_fini > +// CHECK: [[PRECOND_END]] > + > +// no templates for now, as these require special handling in target > regions and/or declare target > + > +#endif > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits