https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/76090
>From 7c31c8bc2acf60bd50cb6d63944ee8d4946b9638 Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Thu, 4 May 2023 21:33:24 +0100 Subject: [PATCH] [VPlan] Replace VPRecieOrVPValue with VP2VP recipe simplification. Move simplification of VPBlendRecipes from early VPlan construction to VPlan-to-VPlan based recipe simplification. This simplifies initial construction. Note that some in-loop reduction tests are failing at the moment, due to the reduction predicate being created after the reduction recipe. I will provide a patch for that soon. --- .../Transforms/Vectorize/LoopVectorize.cpp | 99 +++++++------------ .../Transforms/Vectorize/VPRecipeBuilder.h | 23 ++--- .../Transforms/Vectorize/VPlanTransforms.cpp | 34 ++++++- .../Transforms/Vectorize/VPlanTransforms.h | 1 - 4 files changed, 74 insertions(+), 83 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f82e161fb846d1..609a927d23754b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8256,31 +8256,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate( return nullptr; } -VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, - ArrayRef<VPValue *> Operands, - VPlanPtr &Plan) { - // If all incoming values are equal, the incoming VPValue can be used directly - // instead of creating a new VPBlendRecipe. - if (llvm::all_equal(Operands)) - return Operands[0]; - +VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, + ArrayRef<VPValue *> Operands, + VPlanPtr &Plan) { unsigned NumIncoming = Phi->getNumIncomingValues(); - // For in-loop reductions, we do not need to create an additional select. - VPValue *InLoopVal = nullptr; - for (unsigned In = 0; In < NumIncoming; In++) { - PHINode *PhiOp = - dyn_cast_or_null<PHINode>(Operands[In]->getUnderlyingValue()); - if (PhiOp && CM.isInLoopReduction(PhiOp)) { - assert(!InLoopVal && "Found more than one in-loop reduction!"); - InLoopVal = Operands[In]; - } - } - - assert((!InLoopVal || NumIncoming == 2) && - "Found an in-loop reduction for PHI with unexpected number of " - "incoming values"); - if (InLoopVal) - return Operands[Operands[0] == InLoopVal ? 1 : 0]; // We know that all PHIs in non-header blocks are converted into selects, so // we don't have to worry about the insertion order and we can just use the @@ -8292,13 +8271,13 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi, for (unsigned In = 0; In < NumIncoming; In++) { VPValue *EdgeMask = createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan); - assert((EdgeMask || NumIncoming == 1) && + assert((EdgeMask || NumIncoming == 1 || Operands[In] == Operands[0]) && "Multiple predecessors with one having a full mask"); OperandsWithMask.push_back(Operands[In]); if (EdgeMask) OperandsWithMask.push_back(EdgeMask); } - return toVPRecipeResult(new VPBlendRecipe(Phi, OperandsWithMask)); + return new VPBlendRecipe(Phi, OperandsWithMask); } VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, @@ -8464,9 +8443,8 @@ void VPRecipeBuilder::fixHeaderPhis() { } } -VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I, - VFRange &Range, - VPlan &Plan) { +VPRecipeBase *VPRecipeBuilder::handleReplication(Instruction *I, VFRange &Range, + VPlan &Plan) { bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange( [&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); }, Range); @@ -8518,14 +8496,12 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I, auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()), IsUniform, BlockInMask); - return toVPRecipeResult(Recipe); + return Recipe; } -VPRecipeOrVPValueTy -VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, - ArrayRef<VPValue *> Operands, - VFRange &Range, VPBasicBlock *VPBB, - VPlanPtr &Plan) { +VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe( + Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range, + VPBasicBlock *VPBB, VPlanPtr &Plan) { // First, check for specific widening recipes that deal with inductions, Phi // nodes, calls and memory operations. VPRecipeBase *Recipe; @@ -8538,7 +8514,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, recordRecipeOf(Phi); if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range))) - return toVPRecipeResult(Recipe); + return Recipe; VPHeaderPHIRecipe *PhiRecipe = nullptr; assert((Legal->isReductionVariable(Phi) || @@ -8570,13 +8546,13 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, recordRecipeOf(Inc); PhisToFix.push_back(PhiRecipe); - return toVPRecipeResult(PhiRecipe); + return PhiRecipe; } if (isa<TruncInst>(Instr) && (Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands, Range, *Plan))) - return toVPRecipeResult(Recipe); + return Recipe; // All widen recipes below deal only with VF > 1. if (LoopVectorizationPlanner::getDecisionAndClampRange( @@ -8584,29 +8560,29 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr, return nullptr; if (auto *CI = dyn_cast<CallInst>(Instr)) - return toVPRecipeResult(tryToWidenCall(CI, Operands, Range, Plan)); + return tryToWidenCall(CI, Operands, Range, Plan); if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr)) - return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan)); + return tryToWidenMemory(Instr, Operands, Range, Plan); if (!shouldWiden(Instr, Range)) return nullptr; if (auto GEP = dyn_cast<GetElementPtrInst>(Instr)) - return toVPRecipeResult(new VPWidenGEPRecipe( - GEP, make_range(Operands.begin(), Operands.end()))); + return new VPWidenGEPRecipe(GEP, + make_range(Operands.begin(), Operands.end())); if (auto *SI = dyn_cast<SelectInst>(Instr)) { - return toVPRecipeResult(new VPWidenSelectRecipe( - *SI, make_range(Operands.begin(), Operands.end()))); + return new VPWidenSelectRecipe( + *SI, make_range(Operands.begin(), Operands.end())); } if (auto *CI = dyn_cast<CastInst>(Instr)) { - return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0], - CI->getType(), *CI)); + return new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), + *CI); } - return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan)); + return tryToWiden(Instr, Operands, VPBB, Plan); } void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, @@ -8786,22 +8762,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) continue; - auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe( + VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe( Instr, Operands, Range, VPBB, Plan); - if (!RecipeOrValue) - RecipeOrValue = RecipeBuilder.handleReplication(Instr, Range, *Plan); - // If Instr can be simplified to an existing VPValue, use it. - if (isa<VPValue *>(RecipeOrValue)) { - auto *VPV = cast<VPValue *>(RecipeOrValue); - Plan->addVPValue(Instr, VPV); - // If the re-used value is a recipe, register the recipe for the - // instruction, in case the recipe for Instr needs to be recorded. - if (VPRecipeBase *R = VPV->getDefiningRecipe()) - RecipeBuilder.setRecipe(Instr, R); - continue; - } - // Otherwise, add the new recipe. - VPRecipeBase *Recipe = cast<VPRecipeBase *>(RecipeOrValue); + if (!Recipe) + Recipe = RecipeBuilder.handleReplication(Instr, Range, *Plan); for (auto *Def : Recipe->definedValues()) { auto *UV = Def->getUnderlyingValue(); Plan->addVPValue(UV, Def); @@ -8966,10 +8930,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { } // Adjust the recipes for reductions. For in-loop reductions the chain of -// instructions leading from the loop exit instr to the phi need to be converted -// to reductions, with one operand being vector and the other being the scalar -// reduction chain. For other reductions, a select is introduced between the phi -// and live-out recipes when folding the tail. +// instructions leading from the loop exit instr to the phi need to be +// converted to reductions, with one operand being vector and the other +// being the scalar reduction chain. For other reductions, a select is +// introduced between the phi and live-out recipes when folding the tail. void LoopVectorizationPlanner::adjustRecipesForReductions( VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) { @@ -9079,6 +9043,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator()); VecOp = FMulRecipe; } else { + if (PhiR->isInLoop() && isa<VPBlendRecipe>(CurrentLink)) + continue; + if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) { if (isa<VPWidenRecipe>(CurrentLink)) { assert(isa<CmpInst>(CurrentLinkI) && diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index 7ff6749a09089e..0d8d7fc6f81770 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -21,8 +21,6 @@ class LoopVectorizationLegality; class LoopVectorizationCostModel; class TargetLibraryInfo; -using VPRecipeOrVPValueTy = PointerUnion<VPRecipeBase *, VPValue *>; - /// Helper class to create VPRecipies from IR instructions. class VPRecipeBuilder { /// The loop that we evaluate. @@ -88,8 +86,8 @@ class VPRecipeBuilder { /// or a new VPBlendRecipe otherwise. Currently all such phi nodes are turned /// into a sequence of select instructions as the vectorizer currently /// performs full if-conversion. - VPRecipeOrVPValueTy tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands, - VPlanPtr &Plan); + VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands, + VPlanPtr &Plan); /// Handle call instructions. If \p CI can be widened for \p Range.Start, /// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same @@ -103,9 +101,6 @@ class VPRecipeBuilder { VPRecipeBase *tryToWiden(Instruction *I, ArrayRef<VPValue *> Operands, VPBasicBlock *VPBB, VPlanPtr &Plan); - /// Return a VPRecipeOrValueTy with VPRecipeBase * being set. This can be used to force the use as VPRecipeBase* for recipe sub-types that also inherit from VPValue. - VPRecipeOrVPValueTy toVPRecipeResult(VPRecipeBase *R) const { return R; } - public: VPRecipeBuilder(Loop *OrigLoop, const TargetLibraryInfo *TLI, LoopVectorizationLegality *Legal, @@ -116,12 +111,11 @@ class VPRecipeBuilder { /// Check if an existing VPValue can be used for \p Instr or a recipe can be /// create for \p I withing the given VF \p Range. If an existing VPValue can - /// be used or if a recipe can be created, return it. Otherwise return a - /// VPRecipeOrVPValueTy with nullptr. - VPRecipeOrVPValueTy tryToCreateWidenRecipe(Instruction *Instr, - ArrayRef<VPValue *> Operands, - VFRange &Range, VPBasicBlock *VPBB, - VPlanPtr &Plan); + /// be used or if a recipe can be created, return it. + VPRecipeBase *tryToCreateWidenRecipe(Instruction *Instr, + ArrayRef<VPValue *> Operands, + VFRange &Range, VPBasicBlock *VPBB, + VPlanPtr &Plan); /// Set the recipe created for given ingredient. This operation is a no-op for /// ingredients that were not marked using a nullptr entry in the map. @@ -165,8 +159,7 @@ class VPRecipeBuilder { /// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as /// last operand. Range.End may be decreased to ensure same recipe behavior /// from \p Range.Start to \p Range.End. - VPRecipeOrVPValueTy handleReplication(Instruction *I, VFRange &Range, - VPlan &Plan); + VPRecipeBase *handleReplication(Instruction *I, VFRange &Range, VPlan &Plan); /// Add the incoming values from the backedge to reduction & first-order /// recurrence cross-iteration phis. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 33132880d5a444..317d7806bf5511 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -806,6 +806,38 @@ static unsigned getOpcodeForRecipe(VPRecipeBase &R) { /// Try to simplify recipe \p R. static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { + // Try to remove redundant blend recipes. + if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) { + if (Blend->getNumIncomingValues() == 1) { + Blend->replaceAllUsesWith(Blend->getIncomingValue(0)); + Blend->eraseFromParent(); + return; + } + + bool AllEqual = true; + for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I) + AllEqual &= Blend->getIncomingValue(0) == Blend->getIncomingValue(I); + if (AllEqual) { + Blend->replaceAllUsesWith(Blend->getIncomingValue(0)); + Blend->eraseFromParent(); + return; + } + if (Blend->getNumIncomingValues() != 2) + return; + auto IsInLoopReduction = [](VPValue *VPV) { + auto *PhiR = dyn_cast<VPReductionPHIRecipe>(VPV); + return PhiR && PhiR->isInLoop(); + }; + if (IsInLoopReduction(Blend->getIncomingValue(0))) { + Blend->replaceAllUsesWith(Blend->getIncomingValue(1)); + Blend->eraseFromParent(); + } else if (IsInLoopReduction(Blend->getIncomingValue(1))) { + Blend->replaceAllUsesWith(Blend->getIncomingValue(0)); + Blend->eraseFromParent(); + } + return; + } + switch (getOpcodeForRecipe(R)) { case Instruction::Mul: { VPValue *A = R.getOperand(0); @@ -996,8 +1028,8 @@ void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) { removeRedundantCanonicalIVs(Plan); removeRedundantInductionCasts(Plan); - optimizeInductions(Plan, SE); simplifyRecipes(Plan, SE.getContext()); + optimizeInductions(Plan, SE); removeDeadRecipes(Plan); createAndOptimizeReplicateRegions(Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 3bf91115debb7d..7ce2e5974f60b0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -112,7 +112,6 @@ struct VPlanTransforms { /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing /// them with already existing recipes expanding the same SCEV expression. static void removeRedundantExpandSCEVRecipes(VPlan &Plan); - }; } // namespace llvm _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits