https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/78113
>From 36b085f21b76d7bf7c9965a86a09d1cef4fe9329 Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Sun, 14 Jan 2024 14:13:08 +0000 Subject: [PATCH] [VPlan] Add new VPUniformPerUFRecipe, use for step truncation. Add a new recipe to model uniform-per-UF instructions, without relying on an underlying instruction. Initially, it supports uniform cast-ops and is therefore storing the result type. Not relying on an underlying instruction (like the current VPReplicateRecipe) allows to create instances without a corresponding instruction. In the future, to plan is to extend this recipe to handle all opcodes needed to replace the uniform part of VPReplicateRecipe. --- llvm/lib/Transforms/Vectorize/VPlan.h | 30 ++++++++++++ .../Transforms/Vectorize/VPlanAnalysis.cpp | 6 ++- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 49 ++++++++++++++++--- .../Transforms/Vectorize/VPlanTransforms.cpp | 9 ++++ llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 + .../LoopVectorize/cast-induction.ll | 4 +- .../interleave-and-scalarize-only.ll | 3 +- .../pr46525-expander-insertpoint.ll | 2 +- 8 files changed, 93 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4b4f4911eb6415..d5985224cccc48 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1945,6 +1945,36 @@ class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue { } }; +/// VPUniformPerUFRecipe represents an instruction with Opcode that is uniform +/// per UF, i.e. it generates a single scalar instance per UF. +/// TODO: at the moment, only Cast opcodes are supported, extend to support +/// missing opcodes to replace uniform part of VPReplicateRecipe. +class VPUniformPerUFRecipe : public VPRecipeBase, public VPValue { + unsigned Opcode; + + /// Result type for the cast. + Type *ResultTy; + + Value *generate(VPTransformState &State, unsigned Part); + +public: + VPUniformPerUFRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) + : VPRecipeBase(VPDef::VPUniformPerUFSC, {Op}), VPValue(this), + Opcode(Opcode), ResultTy(ResultTy) {} + + ~VPUniformPerUFRecipe() override = default; + + VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC) + + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A recipe for generating conditional branches on the bits of a mask. class VPBranchOnMaskRecipe : public VPRecipeBase { public: diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 97a8a1803bbf5a..d71b0703994450 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -230,7 +230,11 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { return V->getUnderlyingValue()->getType(); }) .Case<VPWidenCastRecipe>( - [](const VPWidenCastRecipe *R) { return R->getResultType(); }); + [](const VPWidenCastRecipe *R) { return R->getResultType(); }) + .Case<VPExpandSCEVRecipe>([](const VPExpandSCEVRecipe *R) { + return R->getSCEV()->getType(); + }); + assert(ResultTy && "could not infer type for the given VPValue"); CachedTypes[V] = ResultTy; return ResultTy; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1f844bce23102e..423504e8f7e05e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -164,6 +164,8 @@ bool VPRecipeBase::mayHaveSideEffects() const { auto *R = cast<VPReplicateRecipe>(this); return R->getUnderlyingInstr()->mayHaveSideEffects(); } + case VPUniformPerUFSC: + return false; default: return true; } @@ -1117,13 +1119,7 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) { // Ensure step has the same type as that of scalar IV. Type *BaseIVTy = BaseIV->getType()->getScalarType(); - if (BaseIVTy != Step->getType()) { - // TODO: Also use VPDerivedIVRecipe when only the step needs truncating, to - // avoid separate truncate here. - assert(Step->getType()->isIntegerTy() && - "Truncation requires an integer step"); - Step = State.Builder.CreateTrunc(Step, BaseIVTy); - } + assert(BaseIVTy == Step->getType()); // We build scalar steps for both integer and floating-point induction // variables. Here, we determine the kind of arithmetic we will perform. @@ -1469,6 +1465,45 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +Value *VPUniformPerUFRecipe ::generate(VPTransformState &State, unsigned Part) { + switch (Opcode) { + case Instruction::SExt: + case Instruction::ZExt: + case Instruction::Trunc: { + Value *Op = State.get(getOperand(0), VPIteration(Part, 0)); + return State.Builder.CreateCast(Instruction::CastOps(Opcode), Op, ResultTy); + } + default: + llvm_unreachable("opcode not implemented yet"); + } +} + +void VPUniformPerUFRecipe ::execute(VPTransformState &State) { + bool UniformAcrossUFs = all_of(operands(), [](VPValue *Op) { + return Op->isDefinedOutsideVectorRegions(); + }); + for (unsigned Part = 0; Part != State.UF; ++Part) { + Value *Res; + // Only generate a single instance, if the recipe is uniform across all UFs. + if (Part > 0 && UniformAcrossUFs) + Res = State.get(this, VPIteration(0, 0)); + else + Res = generate(State, Part); + State.set(this, Res, VPIteration(Part, 0)); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPUniformPerUFRecipe ::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "UNIFORM-PER-UF "; + printAsOperand(O, SlotTracker); + O << " = " << Instruction::getOpcodeName(Opcode) << " "; + printOperands(O, SlotTracker); + O << " to " << *ResultTy; +} +#endif + void VPBranchOnMaskRecipe::execute(VPTransformState &State) { assert(State.Instance && "Branch on Mask works only on single instance."); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b3694e74a38509..6ba8901e76aa50 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -505,6 +505,15 @@ static VPValue *createScalarIVSteps(VPlan &Plan, const InductionDescriptor &ID, HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP); } + VPTypeAnalysis TypeInfo(SE.getContext()); + if (TypeInfo.inferScalarType(BaseIV) != TypeInfo.inferScalarType(Step)) { + Step = new VPUniformPerUFRecipe(Instruction::Trunc, Step, + TypeInfo.inferScalarType(BaseIV)); + auto *VecPreheader = + cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor()); + VecPreheader->appendRecipe(Step->getDefiningRecipe()); + } + VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(ID, BaseIV, Step); HeaderVPBB->insert(Steps, IP); return Steps; diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 8cc98f4abf933e..009edea39a3c43 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -362,6 +362,7 @@ class VPDef { // START: Phi-like recipes. Need to be kept together. VPBlendSC, VPPredInstPHISC, + VPUniformPerUFSC, // START: SubclassID for recipes that inherit VPHeaderPHIRecipe. // VPHeaderPHIRecipe need to be kept together. VPCanonicalIVPHISC, diff --git a/llvm/test/Transforms/LoopVectorize/cast-induction.ll b/llvm/test/Transforms/LoopVectorize/cast-induction.ll index c5edf9831d7d90..4121a1399c47f5 100644 --- a/llvm/test/Transforms/LoopVectorize/cast-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/cast-induction.ll @@ -83,12 +83,14 @@ define void @cast_variable_step(i64 %step) { ; VF4: middle.block: ; ; IC2-LABEL: @cast_variable_step( +; IC2: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32 +; IC2: br label %vector.body + ; IC2-LABEL: vector.body: ; IC2-NEXT: [[INDEX:%.+]] = phi i64 [ 0, %vector.ph ] ; IC2-NEXT: [[MUL:%.+]] = mul i64 %index, %step ; IC2-NEXT: [[OFFSET_IDX:%.+]] = add i64 10, [[MUL]] ; IC2-NEXT: [[TRUNC_OFF:%.+]] = trunc i64 [[OFFSET_IDX]] to i32 -; IC2-NEXT: [[TRUNC_STEP:%.+]] = trunc i64 %step to i32 ; IC2-NEXT: [[STEP0:%.+]] = mul i32 0, [[TRUNC_STEP]] ; IC2-NEXT: [[T0:%.+]] = add i32 [[TRUNC_OFF]], [[STEP0]] ; IC2-NEXT: [[STEP1:%.+]] = mul i32 1, [[TRUNC_STEP]] diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 297cd2a7c12f9a..6410a556589f94 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -184,6 +184,7 @@ exit: ; DBG-NEXT: No successors ; DBG-EMPTY: ; DBG-NEXT: vector.ph: +; DBG-NEXT: UNIFORM-PER-UF vp<[[CAST:%.+]]> = trunc ir<1> to i32 ; DBG-NEXT: Successor(s): vector loop ; DBG-EMPTY: ; DBG-NEXT: <x1> vector loop: { @@ -191,7 +192,7 @@ exit: ; DBG-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ; DBG-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, vp<[[SCALAR_STEPS:.+]]> ; DBG-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<1> (truncated to i32) -; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, ir<1> +; DBG-NEXT: vp<[[SCALAR_STEPS]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[CAST]]> ; DBG-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[SCALAR_STEPS]]> ; DBG-NEXT: CLONE store vp<[[SPLICE]]>, ir<%dst> ; DBG-NEXT: EMIT vp<[[IV_INC:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> diff --git a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll index ea3de4a0fbb363..f0220f5e766b23 100644 --- a/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll +++ b/llvm/test/Transforms/LoopVectorize/pr46525-expander-insertpoint.ll @@ -43,7 +43,7 @@ define void @test(i16 %x, i64 %y, ptr %ptr) { ; CHECK-NEXT: [[V3:%.*]] = add i8 [[V2]], 1 ; CHECK-NEXT: [[CMP15:%.*]] = icmp slt i8 [[V3]], 5 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], [[INC]] -; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP15]], label [[LOOP]], label [[LOOP_EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: loop.exit: ; CHECK-NEXT: [[DIV_1:%.*]] = udiv i64 [[Y]], [[ADD]] ; CHECK-NEXT: [[V1:%.*]] = add i64 [[DIV_1]], 1 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits