llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-mlir-llvm Author: Amit Tiwari (amitamd7) <details> <summary>Changes</summary> This PR canonicalizes the Intra-tile in Loop Tiling. --- Patch is 672.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/191114.diff 23 Files Affected: - (modified) clang/lib/Sema/SemaOpenMP.cpp (+121-46) - (modified) clang/test/OpenMP/interchange_codegen.cpp (+1734-2449) - (modified) clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c (+34-33) - (modified) clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c (+44-43) - (modified) clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c (+45-44) - (modified) clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c (+33-32) - (modified) clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c (+34-33) - (modified) clang/test/OpenMP/tile_codegen.cpp (+1099-1405) - (modified) clang/test/OpenMP/tile_codegen_for_dependent.cpp (+146-162) - (modified) clang/test/OpenMP/tile_codegen_tile_for.cpp (+193-224) - (modified) clang/test/OpenMP/tile_messages.cpp (+1-1) - (added) clang/test/OpenMP/tile_rect_codegen.cpp (+50) - (added) clang/test/OpenMP/tile_rect_codegen_ir.cpp (+84) - (modified) clang/test/OpenMP/unroll_codegen_tile_for.cpp (+190-214) - (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+39-25) - (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+30-5) - (modified) mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir (+10-8) - (modified) mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir (+16-14) - (modified) mlir/test/Target/LLVMIR/openmp-cli-tile03.mlir (+63-57) - (modified) openmp/runtime/test/transform/tile/foreach.cpp (+36) - (modified) openmp/runtime/test/transform/tile/intfor.c (+39-39) - (modified) openmp/runtime/test/transform/tile/iterfor.cpp (+27) - (modified) openmp/runtime/test/transform/tile/parallel-wsloop-collapse-foreach.cpp (+108) ``````````diff diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index fada37ba45755..0aece2f027fe3 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -14957,8 +14957,10 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, // Create iteration variables for the generated loops. SmallVector<VarDecl *, 4> FloorIndVars; SmallVector<VarDecl *, 4> TileIndVars; + SmallVector<VarDecl *, 4> TileCntVars; FloorIndVars.resize(NumLoops); TileIndVars.resize(NumLoops); + TileCntVars.resize(NumLoops); for (unsigned I = 0; I < NumLoops; ++I) { OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I]; @@ -14978,27 +14980,101 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, FloorIndVars[I] = FloorCntDecl; } - // Iteration variable for the tile (i.e. inner) loop. + // Logical iteration variable for the tile loop. Retains the meaning of + // the original logical iteration number (floor_iv + tile_cnt) so that + // LoopHelper.Updates can derive the original loop variable unchanged. { - std::string TileCntName = + std::string TileIVName = (Twine(".tile_") + llvm::utostr(I) + ".iv." + OrigVarName).str(); - // Reuse the iteration variable created by checkOpenMPLoop. It is also - // used by the expressions to derive the original iteration variable's - // value from the logical iteration number. - auto *TileCntDecl = cast<VarDecl>(IterVarRef->getDecl()); - TileCntDecl->setDeclName( - &SemaRef.PP.getIdentifierTable().get(TileCntName)); - TileIndVars[I] = TileCntDecl; + auto *TileIVDecl = cast<VarDecl>(IterVarRef->getDecl()); + TileIVDecl->setDeclName(&SemaRef.PP.getIdentifierTable().get(TileIVName)); + TileIndVars[I] = TileIVDecl; + } + + // Loop counter for the rectangular tile loop [0, TileSize). + { + std::string TileCntName = + (Twine(".tile.cnt.") + llvm::utostr(I) + ".iv." + OrigVarName).str(); + VarDecl *TileCntDecl = + buildVarDecl(SemaRef, {}, CntTy, TileCntName, nullptr, OrigCntVar); + TileCntVars[I] = TileCntDecl; } addLoopPreInits(Context, LoopHelper, LoopStmts[I], OriginalInits[I], PreInits); + + // Declare the logical tile IV in PreInits so it is in scope for the + // entire loop nest (it will be assigned in each tile loop body). + Decl *TileIVDeclPtr = TileIndVars[I]; + PreInits.push_back(new (Context) DeclStmt( + DeclGroupRef::Create(Context, &TileIVDeclPtr, 1), {}, {})); } // Once the original iteration values are set, append the innermost body. Stmt *Inner = Body; + // Build a combined validity predicate that guards the innermost body. + // For each tiled dimension, check that the logical iteration number + // (.tile.iv) is within the original trip count. This is required because the + // tile loop now has rectangular (constant) bounds and may overshoot on the + // remainder tile. The predicate is: .tile.iv.0 < N0 && .tile.iv.1 < N1 ... + // + // Optimization: if every dimension's trip count is a compile-time constant + // that is evenly divisible by the corresponding tile size (also a constant), + // then the remainder tile is empty and the predicate is trivially true. + { + bool PredicateNeeded = false; + for (unsigned I = 0; I < NumLoops; ++I) { + Expr *TSExpr = SizesClause->getSizesRefs()[I]; + Expr *NExpr = LoopHelpers[I].NumIterations; + llvm::APSInt TileVal, TripVal; + bool TSConst = + !TSExpr->containsErrors() && TSExpr->isIntegerConstantExpr(Context); + bool NConst = NExpr->isIntegerConstantExpr(Context); + if (TSConst && NConst) { + Expr::EvalResult TSResult; + TSExpr->EvaluateAsInt(TSResult, Context); + TileVal = TSResult.Val.getInt(); + Expr::EvalResult NResult; + NExpr->EvaluateAsInt(NResult, Context); + TripVal = NResult.Val.getInt(); + if (TileVal.isStrictlyPositive() && (TripVal.srem(TileVal)).isZero()) + continue; + } + PredicateNeeded = true; + break; + } + + if (PredicateNeeded) { + Expr *CombinedPred = nullptr; + for (unsigned I = 0; I < NumLoops; ++I) { + auto *OrigCntVar = cast<DeclRefExpr>(LoopHelpers[I].Counters[0]); + QualType IVTy = LoopHelpers[I].NumIterations->getType(); + Expr *TileIVRef = buildDeclRefExpr(SemaRef, TileIndVars[I], IVTy, + OrigCntVar->getExprLoc()); + ExprResult DimPred = + SemaRef.BuildBinOp(CurScope, OrigCntVar->getExprLoc(), BO_LT, + TileIVRef, LoopHelpers[I].NumIterations); + if (!DimPred.isUsable()) + return StmtError(); + if (CombinedPred) { + ExprResult Combined = + SemaRef.BuildBinOp(CurScope, OrigCntVar->getExprLoc(), BO_LAnd, + CombinedPred, DimPred.get()); + if (!Combined.isUsable()) + return StmtError(); + CombinedPred = Combined.get(); + } else { + CombinedPred = DimPred.get(); + } + } + Inner = IfStmt::Create( + Context, SourceLocation(), IfStatementKind::Ordinary, nullptr, + nullptr, CombinedPred, SourceLocation(), SourceLocation(), Inner); + } + } + auto MakeDimTileSize = [&SemaRef = this->SemaRef, &CopyTransformer, &Context, SizesClause, CurScope](int I) -> Expr * { Expr *DimTileSizeExpr = SizesClause->getSizesRefs()[I]; @@ -15006,7 +15082,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, if (DimTileSizeExpr->containsErrors()) return nullptr; - if (isa<ConstantExpr>(DimTileSizeExpr)) + if (DimTileSizeExpr->isIntegerConstantExpr(Context)) return AssertSuccess(CopyTransformer.TransformExpr(DimTileSizeExpr)); // When the tile size is not a constant but a variable, it is possible to @@ -15042,6 +15118,9 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, }; // Create tile loops from the inside to the outside. + // Each tile loop uses .tile.cnt as its counter with rectangular bounds + // [0, TileSize), and computes .tile.iv = .floor.iv + .tile.cnt to set + // the logical iteration number for LoopHelper.Updates. for (int I = NumLoops - 1; I >= 0; --I) { OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I]; Expr *NumIterations = LoopHelper.NumIterations; @@ -15052,70 +15131,65 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, // Commonly used variables. One of the constraints of an AST is that every // node object must appear at most once, hence we define a lambda that // creates a new AST node at every use. + auto MakeTileCntRef = [&SemaRef = this->SemaRef, &TileCntVars, I, IVTy, + OrigCntVar]() { + return buildDeclRefExpr(SemaRef, TileCntVars[I], IVTy, + OrigCntVar->getExprLoc()); + }; auto MakeTileIVRef = [&SemaRef = this->SemaRef, &TileIndVars, I, IVTy, OrigCntVar]() { return buildDeclRefExpr(SemaRef, TileIndVars[I], IVTy, OrigCntVar->getExprLoc()); }; - // For init-statement: auto .tile.iv = .floor.iv + // For init-statement: auto .tile.cnt = 0 SemaRef.AddInitializerToDecl( - TileIndVars[I], - SemaRef - .DefaultLvalueConversion( - makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar)) - .get(), + TileCntVars[I], + SemaRef.ActOnIntegerConstant(LoopHelper.Init->getExprLoc(), 0).get(), /*DirectInit=*/false); - Decl *CounterDecl = TileIndVars[I]; + Decl *CounterDecl = TileCntVars[I]; StmtResult InitStmt = new (Context) DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1), OrigCntVar->getBeginLoc(), OrigCntVar->getEndLoc()); if (!InitStmt.isUsable()) return StmtError(); - // For cond-expression: - // .tile.iv < min(.floor.iv + DimTileSize, NumIterations) + // For cond-expression: .tile.cnt < DimTileSize (rectangular bound) Expr *DimTileSize = MakeDimTileSize(I); if (!DimTileSize) return StmtError(); - ExprResult EndOfTile = SemaRef.BuildBinOp( - CurScope, LoopHelper.Cond->getExprLoc(), BO_Add, - makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar), - DimTileSize); - if (!EndOfTile.isUsable()) - return StmtError(); - ExprResult IsPartialTile = - SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, - NumIterations, EndOfTile.get()); - if (!IsPartialTile.isUsable()) - return StmtError(); - ExprResult MinTileAndIterSpace = SemaRef.ActOnConditionalOp( - LoopHelper.Cond->getBeginLoc(), LoopHelper.Cond->getEndLoc(), - IsPartialTile.get(), NumIterations, EndOfTile.get()); - if (!MinTileAndIterSpace.isUsable()) - return StmtError(); ExprResult CondExpr = SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, - MakeTileIVRef(), MinTileAndIterSpace.get()); + MakeTileCntRef(), DimTileSize); if (!CondExpr.isUsable()) return StmtError(); - // For incr-statement: ++.tile.iv + // For incr-statement: ++.tile.cnt ExprResult IncrStmt = SemaRef.BuildUnaryOp( - CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeTileIVRef()); + CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeTileCntRef()); if (!IncrStmt.isUsable()) return StmtError(); - // Statements to set the original iteration variable's value from the - // logical iteration number. + // Compute the logical iteration number: + // .tile.iv = .floor.iv + .tile.cnt + ExprResult FloorPlusCnt = SemaRef.BuildBinOp( + CurScope, OrigCntVar->getExprLoc(), BO_Add, + makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar), + MakeTileCntRef()); + if (!FloorPlusCnt.isUsable()) + return StmtError(); + ExprResult TileIVAssign = + SemaRef.BuildBinOp(CurScope, OrigCntVar->getExprLoc(), BO_Assign, + MakeTileIVRef(), FloorPlusCnt.get()); + if (!TileIVAssign.isUsable()) + return StmtError(); + // Generated for loop is: // \code - // Original_for_init; - // for (auto .tile.iv = .floor.iv; - // .tile.iv < min(.floor.iv + DimTileSize, NumIterations); - // ++.tile.iv) { - // Original_Body; - // Original_counter_update; + // for (auto .tile.cnt = 0; .tile.cnt < DimTileSize; ++.tile.cnt) { + // .tile.iv = .floor.iv + .tile.cnt; + // Original_counter_update; // derives orig var from .tile.iv + // Inner; // predicated body or inner tile loops // } // \endcode // FIXME: If the innermost body is an loop itself, inserting these @@ -15123,6 +15197,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses, // for applying tiling again). If this is the case, sink the expressions // further into the inner loop. SmallVector<Stmt *, 4> BodyParts; + BodyParts.push_back(TileIVAssign.get()); BodyParts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end()); if (auto *SourceCXXFor = dyn_cast<CXXForRangeStmt>(LoopStmt)) BodyParts.push_back(SourceCXXFor->getLoopVarStmt()); diff --git a/clang/test/OpenMP/interchange_codegen.cpp b/clang/test/OpenMP/interchange_codegen.cpp index 8e833c9df324c..b062d42c9f162 100644 --- a/clang/test/OpenMP/interchange_codegen.cpp +++ b/clang/test/OpenMP/interchange_codegen.cpp @@ -123,6 +123,7 @@ extern "C" void foo10() { #endif /* HEADER */ + // CHECK1-LABEL: define {{[^@]+}}@body // CHECK1-SAME: (...) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: @@ -156,7 +157,7 @@ extern "C" void foo10() { // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: ret void // @@ -262,14 +263,14 @@ extern "C" void foo10() { // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4 -// CHECK1-NEXT: br label [[FOR_COND16]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND16]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: br label [[FOR_INC22:%.*]] // CHECK1: for.inc22: // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 // CHECK1-NEXT: [[INC23:%.*]] = add i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[INC23]], ptr [[DOTPERMUTED_0_IV_J]], align 4 -// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: for.end24: // CHECK1-NEXT: ret void // @@ -342,7 +343,7 @@ extern "C" void foo10() { // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4 -// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: @@ -439,7 +440,7 @@ extern "C" void foo10() { // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4 -// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: @@ -754,28 +755,28 @@ extern "C" void foo10() { // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPERMUTED_3_IV_I]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_3_IV_I]], align 4 -// CHECK1-NEXT: br label [[FOR_COND11]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND11]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: br label [[FOR_INC16:%.*]] // CHECK1: for.inc16: // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTPERMUTED_2_IV_L]], align 4 // CHECK1-NEXT: [[INC17:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[INC17]], ptr [[DOTPERMUTED_2_IV_L]], align 4 -// CHECK1-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: for.end18: // CHECK1-NEXT: br label [[FOR_INC19:%.*]] // CHECK1: for.inc19: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_K]], align 4 // CHECK1-NEXT: [[INC20:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC20]], ptr [[DOTPERMUTED_1_IV_K]], align 4 -// CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: for.end21: // CHECK1-NEXT: br label [[FOR_INC22:%.*]] // CHECK1: for.inc22: // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4 // CHECK1-NEXT: [[INC23:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[INC23]], ptr [[DOTPERMUTED_0_IV_J]], align 4 -// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: for.end24: // CHECK1-NEXT: ret void // @@ -810,22 +811,21 @@ extern "C" void foo10() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTNEW_STEP10:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTILE_0_IV_K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_14:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_16:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_16:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTFLOOR_0_IV_K:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTILE_0_IV_K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTILE_CNT_0_IV_K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I49:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J50:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTFLOOR_0_IV_K51:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTILE_0_IV_K52:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I35:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J36:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTFLOOR_0_IV_K37:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTILE_CNT_0_IV_K38:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4 // CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4 @@ -863,630 +863,452 @@ extern "C" void foo10() { // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4 // CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTCAPTURE_EXPR_14]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[_TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_16]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP17]], 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[_TMP2]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP18]], 32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD18]], [[ADD19]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4 -// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP19]], 1 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[_TMP2]], align 4 -// CHECK1-NEXT: [[ADD21:%.*]] = add i32 [[TMP20]], 32 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[ADD20]], [[COND_TRUE]] ], [ [[ADD21]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTCAPTURE_EXPR_17]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP23]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4 -// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP24]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i32 [[DIV26]] to i64 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32,... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/191114 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
