[llvm-branch-commits] [llvm] b2165f2 - [CostModel] Account for power-2 urem in funnel shift costs (#127037)

via llvm-branch-commits Tue, 18 Feb 2025 23:23:03 -0800

Author: David Green
Date: 2025-02-13T16:05:00Z
New Revision: b2165f214efab833a4b1a9e8268b1030fc5ebaeb


URL: 
https://github.com/llvm/llvm-project/commit/b2165f214efab833a4b1a9e8268b1030fc5ebaeb
DIFF: 
https://github.com/llvm/llvm-project/commit/b2165f214efab833a4b1a9e8268b1030fc5ebaeb.diff

LOG: [CostModel] Account for power-2 urem in funnel shift costs (#127037)

As can be seen in https://godbolt.org/z/qvMqY79cK, a urem by a power-2
constant will be code-generated as an And of a mask. The cost model for
funnel shifts tries to account for that by passing OP_PowerOf2 as the
operand info for the second operand. As far as I can tell returning a
lower cost for urem with a OP_PowerOf2 is only implemented on X86
though.

This patch short-cuts that by calling getArithmeticInstrCost(And, ..)
directly when we know the typesize will be a power-of-2. This is an
alternative to the patch in #126912 which is a more general solution for
power-2 udiv/urem costs, this more narrowly just fixes funnel shifts.

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/test/Analysis/CostModel/AArch64/fshl.ll
    llvm/test/Analysis/CostModel/AArch64/fshr.ll
    llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
    llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 339b83637fa8f..c63d288ad1579 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1891,10 +1891,6 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase<T> {
       const TTI::OperandValueInfo OpInfoX = TTI::getOperandInfo(X);
       const TTI::OperandValueInfo OpInfoY = TTI::getOperandInfo(Y);
       const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(Z);
-      const TTI::OperandValueInfo OpInfoBW =
-        {TTI::OK_UniformConstantValue,
-         isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
-         : TTI::OP_None};
 
       // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
       // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
@@ -1909,10 +1905,15 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase<T> {
       Cost += thisT()->getArithmeticInstrCost(
           BinaryOperator::LShr, RetTy, CostKind, OpInfoY,
           {OpInfoZ.Kind, TTI::OP_None});
-      // Non-constant shift amounts requires a modulo.
+      // Non-constant shift amounts requires a modulo. If the typesize is a
+      // power-2 then this will be converted to an and, otherwise it will use a
+      // urem.
       if (!OpInfoZ.isConstant())
-        Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
-                                                CostKind, OpInfoZ, OpInfoBW);
+        Cost += thisT()->getArithmeticInstrCost(
+            isPowerOf2_32(RetTy->getScalarSizeInBits()) ? BinaryOperator::And
+                                                        : BinaryOperator::URem,
+            RetTy, CostKind, OpInfoZ,
+            {TTI::OK_UniformConstantValue, TTI::OP_None});
       // For non-rotates (X != Y) we must add shift-by-zero handling costs.
       if (X != Y) {
         Type *CondTy = RetTy->getWithNewBitWidth(1);
@@ -2611,8 +2612,14 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase<T> {
           thisT()->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, 
CostKind);
       Cost += thisT()->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
                                               CostKind);
-      Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
-                                              CostKind);
+      // Non-constant shift amounts requires a modulo. If the typesize is a
+      // power-2 then this will be converted to an and, otherwise it will use a
+      // urem.
+      Cost += thisT()->getArithmeticInstrCost(
+          isPowerOf2_32(RetTy->getScalarSizeInBits()) ? BinaryOperator::And
+                                                      : BinaryOperator::URem,
+          RetTy, CostKind, {TTI::OK_AnyValue, TTI::OP_None},
+          {TTI::OK_UniformConstantValue, TTI::OP_None});
       // Shift-by-zero handling.
       Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
                                           CmpInst::ICMP_EQ, CostKind);

diff  --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll 
b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
index 632f26dfa5382..317adc96a74b6 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@@ -15,7 +15,7 @@ entry:
 
 define i8 @fshl_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
 ; CHECK-LABEL: 'fshl_i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fshl 
= tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl 
= tail call i8 @llvm.fshl.i8(i8 %a, i8 %b, i8 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
i8 %fshl
 ;
 entry:
@@ -49,7 +49,7 @@ entry:
 
 define i32 @fshl_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: 'fshl_i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fshl 
= tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl 
= tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
i32 %fshl
 ;
 entry:
@@ -71,7 +71,7 @@ entry:
 
 define i64 @fshl_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: 'fshl_i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: 
%fshl = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl 
= tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
i64 %fshl
 ;
 entry:
@@ -116,7 +116,7 @@ entry:
 
 define <16 x i8> @fshl_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> 
%c) {
 ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: 
%fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x 
i8> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl 
= tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
<16 x i8> %fshl
 ;
 entry:
@@ -148,7 +148,7 @@ entry:
 
 define <8 x i16> @fshl_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> 
%c) {
 ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: 
%fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x 
i16> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl 
= tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
<8 x i16> %fshl
 ;
 entry:
@@ -180,7 +180,7 @@ entry:
 
 define <4 x i32> @fshl_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> 
%c) {
 ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x 
i32> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl 
= tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
<4 x i32> %fshl
 ;
 entry:
@@ -212,7 +212,7 @@ entry:
 
 define <2 x i64> @fshl_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> 
%c) {
 ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: 
%fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x 
i64> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshl 
= tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
<2 x i64> %fshl
 ;
 entry:

diff  --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll 
b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
index a0a579ae96a9b..14f1f996fa174 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@@ -15,7 +15,7 @@ entry:
 
 define i8 @fshr_i8_3rd_arg_var(i8 %a, i8 %b, i8 %c) {
 ; CHECK-LABEL: 'fshr_i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fshr 
= tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr 
= tail call i8 @llvm.fshr.i8(i8 %a, i8 %b, i8 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
i8 %fshr
 ;
 entry:
@@ -49,7 +49,7 @@ entry:
 
 define i32 @fshr_i32_3rd_arg_var(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: 'fshr_i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %fshr 
= tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr 
= tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
i32 %fshr
 ;
 entry:
@@ -71,7 +71,7 @@ entry:
 
 define i64 @fshr_i64_3rd_arg_var(i64 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: 'fshr_i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: 
%fshr = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr 
= tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
i64 %fshr
 ;
 entry:
@@ -116,7 +116,7 @@ entry:
 
 define <16 x i8> @fshr_v16i8_3rd_arg_var(<16 x i8> %a, <16 x i8> %b, <16 x i8> 
%c) {
 ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: 
%fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x 
i8> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr 
= tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
<16 x i8> %fshr
 ;
 entry:
@@ -148,7 +148,7 @@ entry:
 
 define <8 x i16> @fshr_v8i16_3rd_arg_var(<8 x i16> %a, <8 x i16> %b, <8 x i16> 
%c) {
 ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: 
%fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x 
i16> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr 
= tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
<8 x i16> %fshr
 ;
 entry:
@@ -180,7 +180,7 @@ entry:
 
 define <4 x i32> @fshr_v4i32_3rd_arg_var(<4 x i32> %a, <4 x i32> %b, <4 x i32> 
%c) {
 ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: 
%fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x 
i32> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr 
= tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
<4 x i32> %fshr
 ;
 entry:
@@ -212,7 +212,7 @@ entry:
 
 define <2 x i64> @fshr_v2i64_3rd_arg_var(<2 x i64> %a, <2 x i64> %b, <2 x i64> 
%c) {
 ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_var'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: 
%fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x 
i64> %c)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %fshr 
= tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
<2 x i64> %fshr
 ;
 entry:

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll 
b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 696dec91d93d2..0bf776b5c97e3 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -1011,24 +1011,24 @@ define void @get_lane_mask() #0 {
 
 define void @fshr() #0 {
 ; CHECK-VSCALE-1-LABEL: 'fshr'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for 
instruction: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'fshr'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for 
instruction: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'fshr'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 24 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 16 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 10 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 10 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 7 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 7 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 7 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 7 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 0 for 
instruction: ret void
 ;
   call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale 
x 16 x i8> undef, <vscale x 16 x i8> undef)
@@ -1040,24 +1040,24 @@ define void @fshr() #0 {
 
 define void @fshl() #0 {
 ; CHECK-VSCALE-1-LABEL: 'fshl'
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-1-NEXT:  Cost Model: Found an estimated cost of 0 for 
instruction: ret void
 ;
 ; CHECK-VSCALE-2-LABEL: 'fshl'
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 13 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 5 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; CHECK-VSCALE-2-NEXT:  Cost Model: Found an estimated cost of 0 for 
instruction: ret void
 ;
 ; TYPE_BASED_ONLY-LABEL: 'fshl'
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 24 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 16 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 10 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
-; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 10 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 7 for 
instruction: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> 
undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 7 for 
instruction: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> 
undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 7 for 
instruction: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> 
undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef)
+; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 7 for 
instruction: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> 
undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef)
 ; TYPE_BASED_ONLY-NEXT:  Cost Model: Found an estimated cost of 0 for 
instruction: ret void
 ;
   call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale 
x 16 x i8> undef, <vscale x 16 x i8> undef)

diff  --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll 
b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
index 2823ab4b4f78e..66240c8255ad7 100644
--- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
@@ -231,22 +231,22 @@ define void @ctlz(i32 %a, <16 x i32> %va) {
 define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 
x i32> %vc) {
 ; THRU-LABEL: 'fshl'
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = 
call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; THRU-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %v = 
call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> 
%vc)
+; THRU-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %v = 
call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> 
%vc)
 ; THRU-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret 
void
 ;
 ; LATE-LABEL: 'fshl'
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %s = 
call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; LATE-NEXT:  Cost Model: Found an estimated cost of 250 for instruction: %v = 
call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> 
%vc)
+; LATE-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v = 
call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> 
%vc)
 ; LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
 ;
 ; SIZE-LABEL: 'fshl'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %s = 
call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 229 for instruction: %v = 
call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> 
%vc)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 89 for instruction: %v = 
call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> 
%vc)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret 
void
 ;
 ; SIZE_LATE-LABEL: 'fshl'
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: 
%s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
-; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 250 for instruction: 
%v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x 
i32> %vc)
+; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: 
%v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x 
i32> %vc)
 ; SIZE_LATE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: 
ret void
 ;
   %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)


        
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] b2165f2 - [CostModel] Account for power-2 urem in funnel shift costs (#127037)

Reply via email to