https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/78113
>From 36b085f21b76d7bf7c9965a86a09d1cef4fe9329 Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Sun, 14 Jan 2024 14:13:08 +0000 Subject: [PATCH 1/7] [VPlan] Add new VPUniformPerUFRecipe, use for step truncation. Add a new recipe to model uniform-per-UF instructions, without relying on an underlying instruction. Initially, it supports uniform cast-ops and is therefore storing the result type. Not relying on an underlying instruction (like the current VPReplicateRecipe) allows to create instances without a corresponding instruction. In the future, to plan is to extend this recipe to handle all opcodes needed to replace the uniform part of VPReplicateRecipe. --- llvm/lib/Transforms/Vectorize/VPlan.h | 30 ++++++++++++ .../Transforms/Vectorize/VPlanAnalysis.cpp | 6 ++- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 49 ++++++++++++++++--- .../Transforms/Vectorize/VPlanTransforms.cpp | 9 ++++ llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 + .../LoopVectorize/cast-induction.ll | 4 +- .../interleave-and-scalarize-only.ll | 3 +- .../pr46525-expander-insertpoint.ll | 2 +- 8 files changed, 93 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4b4f4911eb6415..d5985224cccc48 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1945,6 +1945,36 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue { } }; +/// VPUniformPerUFRecipe represents an instruction with Opcode that is uniform +/// per UF, i.e. it generates a single scalar instance per UF. +/// TODO: at the moment, only Cast opcodes are supported, extend to support +/// missing opcodes to replace uniform part of VPReplicateRecipe. +class VPUniformPerUFRecipe : public VPRecipeBase, public VPValue { + unsigned Opcode; + + /// Result type for the cast. + Type *ResultTy; + + Value *generate(VPTransformState &State, unsigned Part); + +public: + VPUniformPerUFRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) + : VPRecipeBase(VPDef::VPUniformPerUFSC, {Op}), VPValue(this), + Opcode(Opcode), ResultTy(ResultTy) {} + + ~VPUniformPerUFRecipe() override = default; + + VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) + + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for generating conditional branches on the bits of a mask. class VPBranchOnMaskRecipe : public VPRecipeBase { public: diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 97a8a1803bbf5a..d71b0703994450 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -230,7 +230,11 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { return V->getUnderlyingValue()->getType(); }) .Case<VPWidenCastRecipe>( - [](const VPWidenCastRecipe *R) { return R->getResultType(); }); + [](const VPWidenCastRecipe *R) { return R->getResultType(); }) + .Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) { + return R->getSCEV()->getType(); + }); + assert(ResultTy && "could not infer type for the given VPValue"); CachedTypes[V] = ResultTy; return ResultTy; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1f844bce23102e..423504e8f7e05e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -164,6 +164,8 @@ bool VPRecipeBase::mayHaveSideEffects() const { auto *R = cast<VPReplicateRecipe>(this); return R->getUnderlyingInstr()->mayHaveSideEffects(); } + case VPUniformPerUFSC: + return false; default: return true; } @@ -1117,13 +1119,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { // Ensure step has the same type as that of scalar IV. Type *BaseIVTy = BaseIV->getType()->getScalarType(); - if (BaseIVTy != Step->getType()) { - // TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to - // avoid separate truncate here. - assert(Step->getType()->isIntegerTy() && - "Truncation requires an integer step"); - Step = State.Builder.CreateTrunc(Step, BaseIVTy); - } + assert(BaseIVTy == Step->getType()); // We build scalar steps for both integer and floating-point induction // variables. Here, we determine the kind of arithmetic we will perform. @@ -1469,6 +1465,45 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +Value *VPUniformPerUFRecipe ::generate(VPTransformState &State, unsigned Part) { + switch (Opcode) { + case Instruction::SExt: + case Instruction::ZExt: + case Instruction::Trunc: { + Value *Op = State.get(getOperand(0), VPIteration(Part, 0)); + return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); + } + default: + llvm_unreachable("opcode not implemented yet"); + } +} + +void VPUniformPerUFRecipe ::execute(VPTransformState &State) { + bool UniformAcrossUFs = all_of(operands(), [](VPValue *Op) { + return Op->isDefinedOutsideVectorRegions(); + }); + for (unsigned Part = 0; Part != State.UF; ++Part) { + Value *Res; + // Only generate a single instance, if the recipe is uniform across all UFs. + if (Part > 0 && UniformAcrossUFs) + Res = State.get(this, VPIteration(0, 0)); + else + Res = generate(State, Part); + State.set(this, Res, VPIteration(Part, 0)); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPUniformPerUFRecipe ::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "UNIFORM-PER-UF "; + printAsOperand(O, SlotTracker); + O << " = " << Instruction::getOpcodeName(Opcode) << " "; + printOperands(O, SlotTracker); + O << " to " << *ResultTy; +} +#endif + void VPBranchOnMaskRecipe::execute(VPTransformState &State) { assert(State.Instance && "Branch on Mask works only on single instance."); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b3694e74a38509..6ba8901e76aa50 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -505,6 +505,15 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); } + VPTypeAnalysis TypeInfo(SE.getContext()); + if (TypeInfo.inferScalarType(BaseIV) != TypeInfo.inferScalarType(Step)) { + Step = new VPUniformPerUFRecipe(Instruction::Trunc, Step, + TypeInfo.inferScalarType(BaseIV)); + auto *VecPreheader = + cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor()); + VecPreheader->appendRecipe(Step->getDefiningRecipe()); + } + VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); HeaderVPBB->insert(Steps, IP); return Steps; diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 8cc98f4abf933e..009edea39a3c43 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -362,6 +362,7 @@ class VPDef { // START: Phi-like recipes. Need to be kept together. VPBlendSC, VPPredInstPHISC, + VPUniformPerUFSC, // START: SubclassID for recipes that inherit VPHeaderPHIRecipe. // VPHeaderPHIRecipe need to be kept together. VPCanonicalIVPHISC, diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll index c5edf9831d7d90..4121a1399c47f5 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll @@ -83,12 +83,14 @@ define void @cast_variable_step(i64 %step) { ; VF4: middle.block: ; ; IC2-LABEL: @cast_variable_step( +; IC2: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32 +; IC2: br label %vector.body + ; IC2-LABEL: vector.body: ; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ] ; IC2-NEXT: [[MUL:%.+]] = mul i64 %index, %step ; IC2-NEXT: [[OFFSET_IDX:%.+]] = add i64 10, [[MUL]] ; IC2-NEXT: [[TRUNC_OFF:%.+]] = trunc i64 [[OFFSET_IDX]] to i32 -; IC2-NEXT: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32 ; IC2-NEXT: [[STEP0:%.+]] = mul i32 0, [[TRUNC_STEP]] ; IC2-NEXT: [[T0:%.+]] = add i32 [[TRUNC_OFF]], [[STEP0]] ; IC2-NEXT: [[STEP1:%.+]] = mul i32 1, [[TRUNC_STEP]] diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 297cd2a7c12f9a..6410a556589f94 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -184,6 +184,7 @@ exit: ; DBG-NEXT: No successors ; DBG-EMPTY: ; DBG-NEXT: vector.ph: +; DBG-NEXT: UNIFORM-PER-UF vp<[[CAST:%.+]]> = trunc ir<1> to i32 ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: <x1> vector loop: { @@ -191,7 +192,7 @@ exit: ; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]> ; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<1> (truncated to i32) -; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1> +; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[CAST]]> ; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]> ; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst> ; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll index ea3de4a0fbb363..f0220f5e766b23 100644 --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -43,7 +43,7 @@ define void @test(i16 %x, i64 %y, ptr %ptr) { ; CHECK-NEXT: [[V3:%.*]] = add i8 [[V2]], 1 ; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i8 [[V3]], 5 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[INC]] -; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: loop.exit: ; CHECK-NEXT: [[DIV_1:%.*]] = udiv i64 [[Y]], [[ADD]] ; CHECK-NEXT: [[V1:%.*]] = add i64 [[DIV_1]], 1 >From 6b3e52eebb0bc89e802c6d83afc2b2f79e5123a9 Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Sun, 21 Jan 2024 21:05:36 +0000 Subject: [PATCH 2/7] !fixup specialize to VPScalarCastRecipe for now. --- llvm/lib/Transforms/Vectorize/VPlan.h | 59 +++++++++---------- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 14 +++-- .../Transforms/Vectorize/VPlanTransforms.cpp | 4 +- llvm/lib/Transforms/Vectorize/VPlanValue.h | 2 +- .../interleave-and-scalarize-only.ll | 2 +- 5 files changed, 41 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index ebdc4678853894..59eccf135dbe43 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -859,6 +859,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenIntOrFpInductionSC: case VPRecipeBase::VPWidenPointerInductionSC: case VPRecipeBase::VPReductionPHISC: + case VPRecipeBase::VPScalarCastSC: return true; case VPRecipeBase::VPInterleaveSC: case VPRecipeBase::VPBranchOnMaskSC: @@ -1338,6 +1339,34 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags { Type *getResultType() const { return ResultTy; } }; +/// VPScalarCastRecipe is a recipe o create scalar cast instructions. +class VPScalarCastRecipe : public VPRecipeBase, public VPValue { + /// Cast instruction opcode. + Instruction::CastOps Opcode; + + /// Result type for the cast. + Type *ResultTy; + + Value *generate(VPTransformState &State, unsigned Part); + +public: + VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) + : VPRecipeBase(VPDef::VPScalarCastSC, {Op}), VPValue(this), + Opcode(Opcode), ResultTy(ResultTy) {} + + ~VPScalarCastRecipe() override = default; + + VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) + + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for widening Call instructions. class VPWidenCallRecipe : public VPSingleDefRecipe { /// ID of the vector intrinsic to call when widening the call. If set the @@ -2010,36 +2039,6 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags { } }; -/// VPUniformPerUFRecipe represents an instruction with Opcode that is uniform -/// per UF, i.e. it generates a single scalar instance per UF. -/// TODO: at the moment, only Cast opcodes are supported, extend to support -/// missing opcodes to replace uniform part of VPReplicateRecipe. -class VPUniformPerUFRecipe : public VPRecipeBase, public VPValue { - unsigned Opcode; - - /// Result type for the cast. - Type *ResultTy; - - Value *generate(VPTransformState &State, unsigned Part); - -public: - VPUniformPerUFRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) - : VPRecipeBase(VPDef::VPUniformPerUFSC, {Op}), VPValue(this), - Opcode(Opcode), ResultTy(ResultTy) {} - - ~VPUniformPerUFRecipe() override = default; - - VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) - - void execute(VPTransformState &State) override; - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. - void print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const override; -#endif -}; - /// A recipe for generating conditional branches on the bits of a mask. class VPBranchOnMaskRecipe : public VPRecipeBase { public: diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f6dd13c6375cfd..fe93bae09f0d42 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -164,7 +164,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { auto *R = cast<VPReplicateRecipe>(this); return R->getUnderlyingInstr()->mayHaveSideEffects(); } - case VPUniformPerUFSC: + case VPScalarCastSC: return false; default: return true; @@ -1465,7 +1465,9 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -Value *VPUniformPerUFRecipe ::generate(VPTransformState &State, unsigned Part) { +Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { + assert(vputils::onlyFirstLaneUsed(this) && + "Codegen only implemented for first lane only."); switch (Opcode) { case Instruction::SExt: case Instruction::ZExt: @@ -1478,7 +1480,7 @@ Value *VPUniformPerUFRecipe ::generate(VPTransformState &State, unsigned Part) { } } -void VPUniformPerUFRecipe ::execute(VPTransformState &State) { +void VPScalarCastRecipe ::execute(VPTransformState &State) { bool UniformAcrossUFs = all_of(operands(), [](VPValue *Op) { return Op->isDefinedOutsideVectorRegions(); }); @@ -1494,9 +1496,9 @@ void VPUniformPerUFRecipe ::execute(VPTransformState &State) { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void VPUniformPerUFRecipe ::print(raw_ostream &O, const Twine &Indent, - VPSlotTracker &SlotTracker) const { - O << Indent << "UNIFORM-PER-UF "; +void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "SCALAR-CAST "; printAsOperand(O, SlotTracker); O << " = " << Instruction::getOpcodeName(Opcode) << " "; printOperands(O, SlotTracker); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c73058a24eb155..c485eadf9e0f36 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -506,8 +506,8 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, VPTypeAnalysis TypeInfo(SE.getContext()); if (TypeInfo.inferScalarType(BaseIV) != TypeInfo.inferScalarType(Step)) { - Step = new VPUniformPerUFRecipe(Instruction::Trunc, Step, - TypeInfo.inferScalarType(BaseIV)); + Step = new VPScalarCastRecipe(Instruction::Trunc, Step, + TypeInfo.inferScalarType(BaseIV)); auto *VecPreheader = cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor()); VecPreheader->appendRecipe(Step->getDefiningRecipe()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 009edea39a3c43..bbbf2d3a965dbf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -362,7 +362,7 @@ class VPDef { // START: Phi-like recipes. Need to be kept together. VPBlendSC, VPPredInstPHISC, - VPUniformPerUFSC, + VPScalarCastSC, // START: SubclassID for recipes that inherit VPHeaderPHIRecipe. // VPHeaderPHIRecipe need to be kept together. VPCanonicalIVPHISC, diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 6410a556589f94..c3bf2eee1dfc88 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -184,7 +184,7 @@ exit: ; DBG-NEXT: No successors ; DBG-EMPTY: ; DBG-NEXT: vector.ph: -; DBG-NEXT: UNIFORM-PER-UF vp<[[CAST:%.+]]> = trunc ir<1> to i32 +; DBG-NEXT: SCALAR-CAST vp<[[CAST:%.+]]> = trunc ir<1> to i32 ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: <x1> vector loop: { >From 9331a454be3ca943244ddd02c934192eda98ec39 Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Mon, 22 Jan 2024 12:49:52 +0000 Subject: [PATCH 3/7] !fixup address latest comments, thanks! --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 1 + llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++++------ llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 7 +++---- llvm/lib/Transforms/Vectorize/VPlanValue.h | 2 +- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 6ca93e15719fb2..7d1708b36a8786 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2297,6 +2297,7 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue, ? B.CreateSExtOrTrunc(Index, StepTy) : B.CreateCast(Instruction::SIToFP, Index, StepTy); if (CastedIndex != Index) { + assert(!isa<SExtInst>(CastedIndex)); CastedIndex->setName(CastedIndex->getName() + ".cast"); Index = CastedIndex; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 59eccf135dbe43..4350b6f81e5580 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1339,9 +1339,8 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags { Type *getResultType() const { return ResultTy; } }; -/// VPScalarCastRecipe is a recipe o create scalar cast instructions. -class VPScalarCastRecipe : public VPRecipeBase, public VPValue { - /// Cast instruction opcode. +/// VPScalarCastRecipe is a recipe to create scalar cast instructions. +class VPScalarCastRecipe : public VPSingleDefRecipe { Instruction::CastOps Opcode; /// Result type for the cast. @@ -1351,17 +1350,16 @@ class VPScalarCastRecipe : public VPRecipeBase, public VPValue { public: VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) - : VPRecipeBase(VPDef::VPScalarCastSC, {Op}), VPValue(this), + : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode), ResultTy(ResultTy) {} ~VPScalarCastRecipe() override = default; - VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) + VP_CLASSOF_IMPL(VPDef::VPScalarCastSC) void execute(VPTransformState &State) override; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index fe93bae09f0d42..36c8f8e77e9353 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -117,6 +117,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { switch (getVPDefID()) { case VPDerivedIVSC: case VPPredInstPHISC: + case VPScalarCastSC: return false; case VPInstructionSC: switch (cast<VPInstruction>(this)->getOpcode()) { @@ -164,8 +165,6 @@ bool VPRecipeBase::mayHaveSideEffects() const { auto *R = cast<VPReplicateRecipe>(this); return R->getUnderlyingInstr()->mayHaveSideEffects(); } - case VPScalarCastSC: - return false; default: return true; } @@ -1119,7 +1118,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { // Ensure step has the same type as that of scalar IV. Type *BaseIVTy = BaseIV->getType()->getScalarType(); - assert(BaseIVTy == Step->getType()); + assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!"); // We build scalar steps for both integer and floating-point induction // variables. Here, we determine the kind of arithmetic we will perform. @@ -1467,7 +1466,7 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { assert(vputils::onlyFirstLaneUsed(this) && - "Codegen only implemented for first lane only."); + "Codegen only implemented for first lane."); switch (Opcode) { case Instruction::SExt: case Instruction::ZExt: diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index bbbf2d3a965dbf..c85f7715feaa2a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -350,6 +350,7 @@ class VPDef { VPInterleaveSC, VPReductionSC, VPReplicateSC, + VPScalarCastSC, VPScalarIVStepsSC, VPVectorPointerSC, VPWidenCallSC, @@ -362,7 +363,6 @@ class VPDef { // START: Phi-like recipes. Need to be kept together. VPBlendSC, VPPredInstPHISC, - VPScalarCastSC, // START: SubclassID for recipes that inherit VPHeaderPHIRecipe. // VPHeaderPHIRecipe need to be kept together. VPCanonicalIVPHISC, >From 9988d78ee278fc6664c2f3c6073cddf88ca50755 Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Mon, 22 Jan 2024 12:55:35 +0000 Subject: [PATCH 4/7] !fixup fix formatting --- llvm/lib/Transforms/Vectorize/VPlan.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4350b6f81e5580..6192dc09fe231d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1350,8 +1350,8 @@ class VPScalarCastRecipe : public VPSingleDefRecipe { public: VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) - : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), - Opcode(Opcode), ResultTy(ResultTy) {} + : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode), + ResultTy(ResultTy) {} ~VPScalarCastRecipe() override = default; >From 5500bdbe8c3576ad2b9f3f17166c4457d94bcb74 Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Mon, 22 Jan 2024 21:51:21 +0000 Subject: [PATCH 5/7] !fixup address comments, use to truncate VPDerivedIVRecipe. --- .../Transforms/Vectorize/LoopVectorize.cpp | 7 ----- llvm/lib/Transforms/Vectorize/VPlan.h | 16 ++++------ .../Transforms/Vectorize/VPlanAnalysis.cpp | 2 ++ .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 18 +++++++----- .../Transforms/Vectorize/VPlanTransforms.cpp | 29 ++++++++++++++----- 5 files changed, 40 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 7d1708b36a8786..cf8e98cbc38a86 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2297,7 +2297,6 @@ emitTransformedIndex(IRBuilderBase &B, Value *Index, Value *StartValue, ? B.CreateSExtOrTrunc(Index, StepTy) : B.CreateCast(Instruction::SIToFP, Index, StepTy); if (CastedIndex != Index) { - assert(!isa<SExtInst>(CastedIndex)); CastedIndex->setName(CastedIndex->getName() + ".cast"); Index = CastedIndex; } @@ -9285,12 +9284,6 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present<BinaryOperator>(FPBinOp)); DerivedIV->setName("offset.idx"); - if (TruncResultTy) { - assert(TruncResultTy != DerivedIV->getType() && - Step->getType()->isIntegerTy() && - "Truncation requires an integer step"); - DerivedIV = State.Builder.CreateTrunc(DerivedIV, TruncResultTy); - } assert(DerivedIV != CanonicalIV && "IV didn't need transforming?"); State.set(this, DerivedIV, VPIteration(0, 0)); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6192dc09fe231d..e6cbd81f062782 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1343,7 +1343,6 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags { class VPScalarCastRecipe : public VPSingleDefRecipe { Instruction::CastOps Opcode; - /// Result type for the cast. Type *ResultTy; Value *generate(VPTransformState &State, unsigned Part); @@ -1363,6 +1362,9 @@ class VPScalarCastRecipe : public VPSingleDefRecipe { void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override; #endif + + /// Returns the result type of the cast. + Type *getResultType() const { return ResultTy; } }; /// A recipe for widening Call instructions. @@ -2347,10 +2349,6 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe { /// an IV with different start and step values, using Start + CanonicalIV * /// Step. class VPDerivedIVRecipe : public VPSingleDefRecipe { - /// If not nullptr, the result of the induction will get truncated to - /// TruncResultTy. - Type *TruncResultTy; - /// Kind of the induction. const InductionDescriptor::InductionKind Kind; /// If not nullptr, the floating point induction binary operator. Must be set @@ -2359,10 +2357,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { public: VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, - VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, - Type *TruncResultTy) + VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), - TruncResultTy(TruncResultTy), Kind(IndDesc.getKind()), + Kind(IndDesc.getKind()), FPBinOp(dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())) { } @@ -2381,8 +2378,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe { #endif Type *getScalarType() const { - return TruncResultTy ? TruncResultTy - : getStartValue()->getLiveInIRValue()->getType(); + return getStartValue()->getLiveInIRValue()->getType(); } VPValue *getStartValue() const { return getOperand(0); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index d71b0703994450..515dc41a55ea1b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -231,6 +231,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { }) .Case<VPWidenCastRecipe>( [](const VPWidenCastRecipe *R) { return R->getResultType(); }) + .Case<VPScalarCastRecipe>( + [](const VPScalarCastRecipe *R) { return R->getResultType(); }) .Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) { return R->getSCEV()->getType(); }); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 36c8f8e77e9353..afad4f068dd80f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1097,9 +1097,6 @@ void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, getCanonicalIV()->printAsOperand(O, SlotTracker); O << " * "; getStepValue()->printAsOperand(O, SlotTracker); - - if (TruncResultTy) - O << " (truncated to " << *TruncResultTy << ")"; } #endif @@ -1464,6 +1461,12 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) { + return C->isDefinedOutsideVectorRegions() || + isa<VPDerivedIVRecipe>(C->getOperand(0)) || + isa<VPCanonicalIVPHIRecipe>(C->getOperand(0)); +} + Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { assert(vputils::onlyFirstLaneUsed(this) && "Codegen only implemented for first lane."); @@ -1480,13 +1483,12 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { } void VPScalarCastRecipe ::execute(VPTransformState &State) { - bool UniformAcrossUFs = all_of(operands(), [](VPValue *Op) { - return Op->isDefinedOutsideVectorRegions(); - }); + bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(this); for (unsigned Part = 0; Part != State.UF; ++Part) { Value *Res; - // Only generate a single instance, if the recipe is uniform across all UFs. - if (Part > 0 && UniformAcrossUFs) + // Only generate a single instance, if the recipe is uniform across UFs and + // VFs. + if (Part > 0 && IsUniformAcrossVFsAndUFs) Res = State.get(this, VPIteration(0, 0)); else Res = generate(State, Part); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index c485eadf9e0f36..49fc6431bb2b44 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -498,16 +498,31 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); Type *TruncTy = TruncI ? TruncI->getType() : IVTy; VPValue *BaseIV = CanonicalIV; + VPTypeAnalysis TypeInfo(SE.getContext()); + Type *StepTy = TypeInfo.inferScalarType(Step); if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, TruncTy)) { - BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step, - TruncI ? TruncI->getType() : nullptr); - HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); + // If the induction needs transforming besides truncating, create a + // VPDerivedIVRecipe. + if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, IVTy)) { + BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step); + HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); + } + if (TypeInfo.inferScalarType(BaseIV) != TruncTy) { + assert(TypeInfo.inferScalarType(BaseIV)->getScalarSizeInBits() > + TruncTy->getScalarSizeInBits() && + StepTy->isIntegerTy() && "Truncation requires an integer step"); + auto *T = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy); + HeaderVPBB->insert(T, IP); + BaseIV = T; + } } - VPTypeAnalysis TypeInfo(SE.getContext()); - if (TypeInfo.inferScalarType(BaseIV) != TypeInfo.inferScalarType(Step)) { - Step = new VPScalarCastRecipe(Instruction::Trunc, Step, - TypeInfo.inferScalarType(BaseIV)); + Type *BaseIVTy = TypeInfo.inferScalarType(BaseIV); + if (BaseIVTy != StepTy) { + assert(StepTy->getScalarSizeInBits() > BaseIVTy->getScalarSizeInBits() && + "Not truncating."); + + Step = new VPScalarCastRecipe(Instruction::Trunc, Step, BaseIVTy); auto *VecPreheader = cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor()); VecPreheader->appendRecipe(Step->getDefiningRecipe()); >From f1f1eff0dfe84b2fc34d65460fb62480fbb2b9fb Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Thu, 25 Jan 2024 11:28:48 +0000 Subject: [PATCH 6/7] !fixup address latest comments, thanks! --- llvm/lib/Transforms/Vectorize/VPlan.h | 5 +-- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 13 ++++-- .../Transforms/Vectorize/VPlanTransforms.cpp | 40 +++++++++---------- .../interleave-and-scalarize-only.ll | 4 +- 4 files changed, 32 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index e6cbd81f062782..6715f73e3fa20d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2283,10 +2283,9 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { } /// Check if the induction described by \p Kind, /p Start and \p Step is - /// canonical, i.e. has the same start, step (of 1), and type as the - /// canonical IV. + /// canonical, i.e. has the same start and step (of 1) as the canonical IV. bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, - VPValue *Step, Type *Ty) const; + VPValue *Step) const; }; /// A recipe for generating the active lane mask for the vector loop that is diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index afad4f068dd80f..f993844c3d898b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1461,6 +1461,11 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +/// Checks if \p C is uniform across all VFs and UFs. It is considered as such +/// if it is either defined outside the vector region or its operand is known to +/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI). +/// TODO: Uniformity should be associated with a VPValue and there should be a +/// generic way to check. static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) { return C->isDefinedOutsideVectorRegions() || isa<VPDerivedIVRecipe>(C->getOperand(0)) || @@ -1625,10 +1630,10 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, #endif bool VPCanonicalIVPHIRecipe::isCanonical( - InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step, - Type *Ty) const { - // The types must match and it must be an integer induction. - if (Ty != getScalarType() || Kind != InductionDescriptor::IK_IntInduction) + InductionDescriptor::InductionKind Kind, VPValue *Start, + VPValue *Step) const { + // Must be an integer induction. + if (Kind != InductionDescriptor::IK_IntInduction) return false; // Start must match the start value of this canonical induction. if (Start != getStartValue()) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 49fc6431bb2b44..a50e0c8bcd8296 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -491,32 +491,30 @@ void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, ScalarEvolution &SE, Instruction *TruncI, - Type *IVTy, VPValue *StartV, - VPValue *Step) { + VPValue *StartV, VPValue *Step) { VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); auto IP = HeaderVPBB->getFirstNonPhi(); VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); - Type *TruncTy = TruncI ? TruncI->getType() : IVTy; VPValue *BaseIV = CanonicalIV; VPTypeAnalysis TypeInfo(SE.getContext()); Type *StepTy = TypeInfo.inferScalarType(Step); - if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, TruncTy)) { - // If the induction needs transforming besides truncating, create a - // VPDerivedIVRecipe. - if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step, IVTy)) { - BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step); - HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); - } - if (TypeInfo.inferScalarType(BaseIV) != TruncTy) { - assert(TypeInfo.inferScalarType(BaseIV)->getScalarSizeInBits() > - TruncTy->getScalarSizeInBits() && - StepTy->isIntegerTy() && "Truncation requires an integer step"); - auto *T = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy); - HeaderVPBB->insert(T, IP); - BaseIV = T; - } + if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) { + BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step); + HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); + } + + // Truncate base induction if needed. + if (TruncI) { + Type *TruncTy = TruncI->getType(); + assert(TypeInfo.inferScalarType(BaseIV)->getScalarSizeInBits() > + TruncTy->getScalarSizeInBits() && + StepTy->isIntegerTy() && "Truncation requires an integer step"); + auto *T = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy); + HeaderVPBB->insert(T, IP); + BaseIV = T; } + // Truncate step if needed. Type *BaseIVTy = TypeInfo.inferScalarType(BaseIV); if (BaseIVTy != StepTy) { assert(StepTy->getScalarSizeInBits() > BaseIVTy->getScalarSizeInBits() && @@ -547,9 +545,9 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) { continue; const InductionDescriptor &ID = WideIV->getInductionDescriptor(); - VPValue *Steps = createScalarIVSteps( - Plan, ID, SE, WideIV->getTruncInst(), WideIV->getPHINode()->getType(), - WideIV->getStartValue(), WideIV->getStepValue()); + VPValue *Steps = + createScalarIVSteps(Plan, ID, SE, WideIV->getTruncInst(), + WideIV->getStartValue(), WideIV->getStepValue()); // Update scalar users of IV to use Step instead. if (!HasOnlyVectorVFs) diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index c3bf2eee1dfc88..81cc2024bb31a5 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -191,8 +191,8 @@ exit: ; DBG-NEXT: vector.body: ; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]> -; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<1> (truncated to i32) -; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[CAST]]> +; DBG-NEXT: SCALAR-CAST vp<[[TRUNC_IV:%.+]]> = trunc vp<[[CAN_IV]]> to i32 +; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[TRUNC_IV]]>, vp<[[CAST]]> ; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]> ; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst> ; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> >From d38223249db3da1dc85b19c29a2488a3532f44eb Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Thu, 25 Jan 2024 17:56:40 +0000 Subject: [PATCH 7/7] !fixup address latest comments, thanks! --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 1 + .../Transforms/Vectorize/VPlanTransforms.cpp | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index f993844c3d898b..dd9d211ef87bc2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1479,6 +1479,7 @@ Value *VPScalarCastRecipe ::generate(VPTransformState &State, unsigned Part) { case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc: { + // Note: SExt/ZExt not used yet. Value *Op = State.get(getOperand(0), VPIteration(Part, 0)); return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a50e0c8bcd8296..4173e11380cbb6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -495,15 +495,15 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock(); auto IP = HeaderVPBB->getFirstNonPhi(); VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV(); - VPValue *BaseIV = CanonicalIV; - VPTypeAnalysis TypeInfo(SE.getContext()); - Type *StepTy = TypeInfo.inferScalarType(Step); + VPSingleDefRecipe *BaseIV = CanonicalIV; if (!CanonicalIV->isCanonical(ID.getKind(), StartV, Step)) { BaseIV = new VPDerivedIVRecipe(ID, StartV, CanonicalIV, Step); - HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); + HeaderVPBB->insert(BaseIV, IP); } // Truncate base induction if needed. + VPTypeAnalysis TypeInfo(SE.getContext()); + Type *StepTy = TypeInfo.inferScalarType(Step); if (TruncI) { Type *TruncTy = TruncI->getType(); assert(TypeInfo.inferScalarType(BaseIV)->getScalarSizeInBits() > @@ -515,12 +515,12 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, } // Truncate step if needed. - Type *BaseIVTy = TypeInfo.inferScalarType(BaseIV); - if (BaseIVTy != StepTy) { - assert(StepTy->getScalarSizeInBits() > BaseIVTy->getScalarSizeInBits() && - "Not truncating."); + Type *ResultTy = TypeInfo.inferScalarType(BaseIV); + if (ResultTy != StepTy) { + assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() && + StepTy->isIntegerTy() && "Truncation requires an integer step"); - Step = new VPScalarCastRecipe(Instruction::Trunc, Step, BaseIVTy); + Step = new VPScalarCastRecipe(Instruction::Trunc, Step, ResultTy); auto *VecPreheader = cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor()); VecPreheader->appendRecipe(Step->getDefiningRecipe()); _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits