Author: David Green Date: 2025-02-13T16:05:00Z New Revision: b2165f214efab833a4b1a9e8268b1030fc5ebaeb
URL: https://github.com/llvm/llvm-project/commit/b2165f214efab833a4b1a9e8268b1030fc5ebaeb DIFF: https://github.com/llvm/llvm-project/commit/b2165f214efab833a4b1a9e8268b1030fc5ebaeb.diff LOG: [CostModel] Account for power-2 urem in funnel shift costs (#127037) As can be seen in https://godbolt.org/z/qvMqY79cK, a urem by a power-2 constant will be code-generated as an And of a mask. The cost model for funnel shifts tries to account for that by passing OP_PowerOf2 as the operand info for the second operand. As far as I can tell returning a lower cost for urem with a OP_PowerOf2 is only implemented on X86 though. This patch short-cuts that by calling getArithmeticInstrCost(And, ..) directly when we know the typesize will be a power-of-2. This is an alternative to the patch in #126912 which is a more general solution for power-2 udiv/urem costs, this more narrowly just fixes funnel shifts. Added: Modified: llvm/include/llvm/CodeGen/BasicTTIImpl.h llvm/test/Analysis/CostModel/AArch64/fshl.ll llvm/test/Analysis/CostModel/AArch64/fshr.ll llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll Removed: ################################################################################ diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 339b83637fa8f..c63d288ad1579 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1891,10 +1891,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { const TTI::OperandValueInfo OpInfoX = TTI::getOperandInfo(X); const TTI::OperandValueInfo OpInfoY = TTI::getOperandInfo(Y); const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(Z); - const TTI::OperandValueInfo OpInfoBW = - {TTI::OK_UniformConstantValue, - isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 - : TTI::OP_None}; // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) @@ -1909,10 +1905,15 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { Cost += thisT()->getArithmeticInstrCost( BinaryOperator::LShr, RetTy, CostKind, OpInfoY, {OpInfoZ.Kind, TTI::OP_None}); - // Non-constant shift amounts requires a modulo. + // Non-constant shift amounts requires a modulo. If the typesize is a + // power-2 then this will be converted to an and, otherwise it will use a + // urem. if (!OpInfoZ.isConstant()) - Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy, - CostKind, OpInfoZ, OpInfoBW); + Cost += thisT()->getArithmeticInstrCost( + isPowerOf2_32(RetTy->getScalarSizeInBits()) ? BinaryOperator::And + : BinaryOperator::URem, + RetTy, CostKind, OpInfoZ, + {TTI::OK_UniformConstantValue, TTI::OP_None}); // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { Type *CondTy = RetTy->getWithNewBitWidth(1); @@ -2611,8 +2612,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { thisT()->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, CostKind); Cost += thisT()->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, CostKind); - Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy, - CostKind); + // Non-constant shift amounts requires a modulo. If the typesize is a + // power-2 then this will be converted to an and, otherwise it will use a + // urem. + Cost += thisT()->getArithmeticInstrCost( + isPowerOf2_32(RetTy->getScalarSizeInBits()) ? BinaryOperator::And + : BinaryOperator::URem, + RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None}, + {TTI::OK_UniformConstantValue, TTI::OP_None}); // Shift-by-zero handling. Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, CmpInst::ICMP_EQ, CostKind); diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll index 632f26dfa5382..317adc96a74b6 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll @@ -15,7 +15,7 @@ entry: define i8 @fshl_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) { ; CHECK-LABEL: 'fshl_i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshl ; entry: @@ -49,7 +49,7 @@ entry: define i32 @fshl_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: 'fshl_i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshl ; entry: @@ -71,7 +71,7 @@ entry: define i64 @fshl_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: 'fshl_i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshl ; entry: @@ -116,7 +116,7 @@ entry: define <16 x i8> @fshl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl ; entry: @@ -148,7 +148,7 @@ entry: define <8 x i16> @fshl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl ; entry: @@ -180,7 +180,7 @@ entry: define <4 x i32> @fshl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl ; entry: @@ -212,7 +212,7 @@ entry: define <2 x i64> @fshl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl ; entry: diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll index a0a579ae96a9b..14f1f996fa174 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll @@ -15,7 +15,7 @@ entry: define i8 @fshr_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) { ; CHECK-LABEL: 'fshr_i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %fshr ; entry: @@ -49,7 +49,7 @@ entry: define i32 @fshr_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: 'fshr_i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %fshr ; entry: @@ -71,7 +71,7 @@ entry: define i64 @fshr_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: 'fshr_i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %fshr ; entry: @@ -116,7 +116,7 @@ entry: define <16 x i8> @fshr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr ; entry: @@ -148,7 +148,7 @@ entry: define <8 x i16> @fshr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr ; entry: @@ -180,7 +180,7 @@ entry: define <4 x i32> @fshr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr ; entry: @@ -212,7 +212,7 @@ entry: define <2 x i64> @fshr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_var' -; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr ; entry: diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 696dec91d93d2..0bf776b5c97e3 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -1011,24 +1011,24 @@ define void @get_lane_mask() #0 { define void @fshr() #0 { ; CHECK-VSCALE-1-LABEL: 'fshr' -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'fshr' -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPE_BASED_ONLY-LABEL: 'fshr' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) @@ -1040,24 +1040,24 @@ define void @fshr() #0 { define void @fshl() #0 { ; CHECK-VSCALE-1-LABEL: 'fshl' -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-VSCALE-2-LABEL: 'fshl' -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPE_BASED_ONLY-LABEL: 'fshl' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll index 2823ab4b4f78e..66240c8255ad7 100644 --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -231,22 +231,22 @@ define void @ctlz(i32 %a, <16 x i32> %va) { define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) { ; THRU-LABEL: 'fshl' ; THRU-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; THRU-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; THRU-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'fshl' ; LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; LATE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; LATE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'fshl' ; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; SIZE-NEXT: Cost Model: Found an estimated cost of 229 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; SIZE-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'fshl' ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits