[llvm-branch-commits] [llvm] 36710c3 - [NFC]Migrate VectorCombine.cpp to use InstructionCost

2021-01-18 Thread Caroline Concatto via llvm-branch-commits

Author: Caroline Concatto
Date: 2021-01-18T13:37:21Z
New Revision: 36710c38c1b741ff9cc70060893d53fc24c07833

URL: 
https://github.com/llvm/llvm-project/commit/36710c38c1b741ff9cc70060893d53fc24c07833
DIFF: 
https://github.com/llvm/llvm-project/commit/36710c38c1b741ff9cc70060893d53fc24c07833.diff

LOG: [NFC]Migrate VectorCombine.cpp to use InstructionCost

This patch changes these functions:
vectorizeLoadInsert
isExtractExtractCheap
foldExtractedCmps
scalarizeBinopOrCmp
getShuffleExtract
foldBitcastShuf
to use the class InstructionCost when calling TTI.getCost().

This patch is part of a series of patches to use InstructionCost instead of
 unsigned/int for the cost model functions.
See this thread for context:
http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html
See this patch for the introduction of the type:
https://reviews.llvm.org/D91174

ps.:This patch adds the test || !NewCost.isValid(), because we want to
return false when:
 !NewCost.isValid && !OldCost.isValid()->the cost to transform it expensive
and
 !NewCost.isValid() && OldCost.isValid()
Therefore for simplication we only add  test for !NewCost.isValid()

Differential Revision: https://reviews.llvm.org/D94069

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index a865f88cba74..787f146bdddc 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -182,20 +182,22 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
   // Use the greater of the alignment on the load or its source pointer.
   Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
   Type *LoadTy = Load->getType();
-  int OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
+  InstructionCost OldCost =
+  TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
   APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
   OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
   /* Insert */ true, HasExtract);
 
   // New pattern: load VecPtr
-  int NewCost = TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, 
AS);
+  InstructionCost NewCost =
+  TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
   // Optionally, we are shuffling the loaded vector element(s) into place.
   if (OffsetEltIndex)
 NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy);
 
   // We can aggressively convert to the vector form because the backend can
   // invert this transform if it does not result in a performance win.
-  if (OldCost < NewCost)
+  if (OldCost < NewCost || !NewCost.isValid())
 return false;
 
   // It is safe and potentially profitable to load a vector directly:
@@ -239,8 +241,14 @@ ExtractElementInst *VectorCombine::getShuffleExtract(
 
   Type *VecTy = Ext0->getVectorOperand()->getType();
   assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching 
types");
-  int Cost0 = TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
-  int Cost1 = TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
+  InstructionCost Cost0 =
+  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
+  InstructionCost Cost1 =
+  TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
+
+  // If both costs are invalid no shuffle is needed
+  if (!Cost0.isValid() && !Cost1.isValid())
+return nullptr;
 
   // We are extracting from 2 
diff erent indexes, so one operand must be shuffled
   // before performing a vector operation and/or extract. The more expensive
@@ -276,7 +284,7 @@ bool 
VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
  "Expected constant extract indexes");
   Type *ScalarTy = Ext0->getType();
   auto *VecTy = cast(Ext0->getOperand(0)->getType());
-  int ScalarOpCost, VectorOpCost;
+  InstructionCost ScalarOpCost, VectorOpCost;
 
   // Get cost estimates for scalar and vector versions of the operation.
   bool IsBinOp = Instruction::isBinaryOp(Opcode);
@@ -297,9 +305,9 @@ bool 
VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
   unsigned Ext0Index = cast(Ext0->getOperand(1))->getZExtValue();
   unsigned Ext1Index = cast(Ext1->getOperand(1))->getZExtValue();
 
-  int Extract0Cost =
+  InstructionCost Extract0Cost =
   TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
-  int Extract1Cost =
+  InstructionCost Extract1Cost =
   TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
 
   // A more expensive extract will always be replaced by a splat shuffle.
@@ -309,11 +317,11 @@ bool 
VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
   // TODO: Evaluate whether that always results in lowest cost. Alternati

[llvm-branch-commits] [llvm] 172f1f8 - [AArch64][SVE]Add cost model for vector reduce for scalable vector

2021-01-19 Thread Caroline Concatto via llvm-branch-commits

Author: Caroline Concatto
Date: 2021-01-19T11:54:16Z
New Revision: 172f1f8952c977c0101ba19e6ecb9474aa3bdd4b

URL: 
https://github.com/llvm/llvm-project/commit/172f1f8952c977c0101ba19e6ecb9474aa3bdd4b
DIFF: 
https://github.com/llvm/llvm-project/commit/172f1f8952c977c0101ba19e6ecb9474aa3bdd4b.diff

LOG: [AArch64][SVE]Add cost model for vector reduce for scalable vector

This patch computes the cost for vector.reduce for scalable vectors.
The cost is split into two parts:  the legalization cost and the horizontal
reduction.

Differential Revision: https://reviews.llvm.org/D93639

Added: 

llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vector-reduce.ll

Modified: 
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 9776c20400d6..3f016d85d8ed 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1288,15 +1288,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fmin:
 case Intrinsic::vector_reduce_umax:
 case Intrinsic::vector_reduce_umin: {
-  if (isa(RetTy))
-return BaseT::getIntrinsicInstrCost(ICA, CostKind);
   IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
   return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
 }
 case Intrinsic::vector_reduce_fadd:
 case Intrinsic::vector_reduce_fmul: {
-  if (isa(RetTy))
-return BaseT::getIntrinsicInstrCost(ICA, CostKind);
   IntrinsicCostAttributes Attrs(
   IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I);
   return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp 
b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 68d382fb784b..ffa045846e59 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1096,11 +1096,70 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned 
Opcode, Type *Ty,
   return false;
 }
 
+int AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+   bool IsPairwise, bool IsUnsigned,
+   TTI::TargetCostKind CostKind) {
+  if (!isa(Ty))
+return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned,
+ CostKind);
+  assert((isa(Ty) && isa(CondTy)) &&
+ "Both vector needs to be scalable");
+
+  std::pair LT = TLI->getTypeLegalizationCost(DL, Ty);
+  int LegalizationCost = 0;
+  if (LT.first > 1) {
+Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
+unsigned CmpOpcode =
+Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp;
+LegalizationCost =
+getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
+   CmpInst::BAD_ICMP_PREDICATE, CostKind) +
+getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy,
+   CmpInst::BAD_ICMP_PREDICATE, CostKind);
+LegalizationCost *= LT.first - 1;
+  }
+
+  return LegalizationCost + /*Cost of horizontal reduction*/ 2;
+}
+
+int AArch64TTIImpl::getArithmeticReductionCostSVE(
+unsigned Opcode, VectorType *ValTy, bool IsPairwise,
+TTI::TargetCostKind CostKind) {
+  assert(!IsPairwise && "Cannot be pair wise to continue");
+
+  std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  int LegalizationCost = 0;
+  if (LT.first > 1) {
+Type *LegalVTy = EVT(LT.second).getTypeForEVT(ValTy->getContext());
+LegalizationCost = getArithmeticInstrCost(Opcode, LegalVTy, CostKind);
+LegalizationCost *= LT.first - 1;
+  }
+
+  int ISD = TLI->InstructionOpcodeToISD(Opcode);
+  assert(ISD && "Invalid opcode");
+  // Add the final reduction cost for the legal horizontal reduction
+  switch (ISD) {
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::FADD:
+return LegalizationCost + 2;
+  default:
+// TODO: Replace for invalid when InstructionCost is used
+// cases not supported by SVE
+return 16;
+  }
+}
+
 int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
VectorType *ValTy,
bool IsPairwiseForm,
TTI::TargetCostKind CostKind) {
 
+  if (isa(ValTy))
+return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm,
+ CostKind);
   if (IsPairwiseForm)
 return BaseT::getArithmeticReductionCost(Opcode, ValTy,

[llvm-branch-commits] [llvm] 060cfd9 - [AArch64][SVE]Add cost model for masked gather and scatter for scalable vector.

2021-01-04 Thread Caroline Concatto via llvm-branch-commits

Author: Caroline Concatto
Date: 2021-01-04T13:59:58Z
New Revision: 060cfd97954835c3be18e47c631d3efb3e374439

URL: 
https://github.com/llvm/llvm-project/commit/060cfd97954835c3be18e47c631d3efb3e374439
DIFF: 
https://github.com/llvm/llvm-project/commit/060cfd97954835c3be18e47c631d3efb3e374439.diff

LOG: [AArch64][SVE]Add cost model for masked gather and scatter for scalable 
vector.

A new TTI interface has been added 'Optional getMaxVScale' that
returns the maximum vscale for a given target.
When known getMaxVScale is used to compute the cost of masked gather scatter
for scalable vector.

Depends on D92094

Differential Revision: https://reviews.llvm.org/D93030

Added: 
llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-gather.ll
llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-scatter.ll

Modified: 
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Removed: 




diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h 
b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0953a3b3f451..d9d04429b181 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -928,6 +928,10 @@ class TargetTransformInfo {
   /// \return The width of the smallest vector register type.
   unsigned getMinVectorRegisterBitWidth() const;
 
+  /// \return The maximum value of vscale if the target specifies an
+  ///  architectural maximum vector length, and None otherwise.
+  Optional getMaxVScale() const;
+
   /// \return True if the vectorization factor should be chosen to
   /// make the vector of the smallest element type match the size of a
   /// vector register. For wider element types, this could result in
@@ -1504,6 +1508,7 @@ class TargetTransformInfo::Concept {
   virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
   virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
   virtual unsigned getMinVectorRegisterBitWidth() = 0;
+  virtual Optional getMaxVScale() const = 0;
   virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
   virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
   virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
@@ -1921,6 +1926,9 @@ class TargetTransformInfo::Model final : public 
TargetTransformInfo::Concept {
   unsigned getMinVectorRegisterBitWidth() override {
 return Impl.getMinVectorRegisterBitWidth();
   }
+  Optional getMaxVScale() const override {
+return Impl.getMaxVScale();
+  }
   bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
 return Impl.shouldMaximizeVectorBandwidth(OptSize);
   }

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h 
b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 620bfb885b54..ef0653d0d9f4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -369,6 +369,8 @@ class TargetTransformInfoImplBase {
 
   unsigned getMinVectorRegisterBitWidth() const { return 128; }
 
+  Optional getMaxVScale() const { return None; }
+
   bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
 
   unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }

diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 02f1b73226fc..9776c20400d6 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -571,6 +571,8 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 
   unsigned getRegisterBitWidth(bool Vector) const { return 32; }
 
+  Optional getMaxVScale() const { return None; }
+
   /// Estimate the overhead of scalarizing an instruction. Insert and Extract
   /// are set if the demanded result elements need to be inserted and/or
   /// extracted from vectors.
@@ -1239,8 +1241,6 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   return thisT()->getMemcpyCost(ICA.getInst());
 
 case Intrinsic::masked_scatter: {
-  if (isa(RetTy))
-return BaseT::getIntrinsicInstrCost(ICA, CostKind);
   assert(VF.isScalar() && "Can't vectorize types here.");
   const Value *Mask = Args[3];
   bool VarMask = !isa(Mask);
@@ -1250,8 +1250,6 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
  VarMask, Alignment, CostKind, I);
 }
 case Intrinsic::masked_gather: {
-  if (isa(RetTy))
-return BaseT::getIntrinsicInstrCost(ICA, CostKind);
   assert(VF.isScalar() && "Can't v

[llvm-branch-commits] [llvm] 01c190e - [AArch64][CostModel]Fix gather scatter cost model

2021-01-07 Thread Caroline Concatto via llvm-branch-commits

Author: Caroline Concatto
Date: 2021-01-07T14:02:08Z
New Revision: 01c190e907ca4752f7ba2a1390a8c91a48b322d1

URL: 
https://github.com/llvm/llvm-project/commit/01c190e907ca4752f7ba2a1390a8c91a48b322d1
DIFF: 
https://github.com/llvm/llvm-project/commit/01c190e907ca4752f7ba2a1390a8c91a48b322d1.diff

LOG: [AArch64][CostModel]Fix gather scatter cost model

This patch fixes a bug introduced in the patch:
https://reviews.llvm.org/D93030

This patch pulls the test for scalable vector to be the first instruction
to be checked. This avoids the Gather and Scatter cost model for AArch64 to
compute the number of vector elements for something that is not a vector and
therefore crashing.

Added: 


Modified: 
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-gather.ll
llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-scatter.ll

Removed: 




diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp 
b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index aaf7371c7933..68d382fb784b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -773,13 +773,13 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool 
IsZeroCmp) const {
 unsigned AArch64TTIImpl::getGatherScatterOpCost(
 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
 Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
+
+  if (!isa(DataTy))
+return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment, CostKind, I);
   auto *VT = cast(DataTy);
   auto LT = TLI->getTypeLegalizationCost(DL, DataTy);
   ElementCount LegalVF = LT.second.getVectorElementCount();
-  if (!LegalVF.isScalable())
-return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment, CostKind, I);
-
   Optional MaxNumVScale = getMaxVScale();
   assert(MaxNumVScale && "Expected valid max vscale value");
 

diff  --git 
a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-gather.ll 
b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-gather.ll
index 38b41b731dd0..83e6ab9932b0 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-gather.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-gather.ll
@@ -32,6 +32,18 @@ define <4 x i32> @masked_gather_v4i32(<4 x i32*> %ld, <4 x 
i1> %masks, <4 x i32>
   ret <4 x i32> %res
 }
 
+; Check it properly falls back to BasicTTIImpl when legalized MVT is not a 
vector
+define <1 x i128> @masked_gather_v1i128(<1 x i128*> %ld, <1 x i1> %masks, <1 x 
i128> %passthru) {
+; CHECK-LABEL: 'masked_gather_v1i128'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   %res 
= call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> %ld, i32 0, 
<1 x i1> %masks, <1 x i128> %passthru)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret 
<1 x i128> %res
+
+  %res = call <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*> %ld, 
i32 0, <1 x i1> %masks, <1 x i128> %passthru)
+  ret <1 x i128> %res
+}
+
 declare  @llvm.masked.gather.nxv4i32( 
%ptrs, i32 %align,  %masks,  %passthru)
 declare  @llvm.masked.gather.nxv8i32( 
%ptrs, i32 %align,  %masks,  %passthru)
 declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 %align, <4 x 
i1> %masks, <4 x i32> %passthru)
+declare <1 x i128> @llvm.masked.gather.v1i128.v1p0i128(<1 x i128*>, i32, <1 x 
i1>, <1 x i128>)
+

diff  --git 
a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-scatter.ll 
b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-scatter.ll
index 4370922e4bf7..fa0002483a1f 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-scatter.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-scatter.ll
@@ -35,6 +35,17 @@ define void @masked_scatter_v4i32(<4 x i32> %data, <4 x 
i32*> %ptrs, <4 x i1> %m
   ret void
 }
 
+; Check it properly falls back to BasicTTIImpl when legalized MVT is not a 
vector
+define void @masked_scatter_v1i128(<1 x i128> %data, <1 x i128*> %ptrs, <1 x 
i1> %masks) {
+; CHECK-LABEL: 'masked_scatter_v1i128'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction:   call 
void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> %data, <1 x i128*> %ptrs, 
i32 0, <1 x i1> %masks)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:   ret 
void
+
+  call void @llvm.masked.scatter.v1i128.v1p0i128(<1 x i128> %data, <1 x i128*> 
%ptrs, i32 0, <1 x i1> %masks)
+  ret void
+}
+
 declare void @llvm.masked.scatter.nxv4i32( %data,  %ptrs, i32 %align,  %masks)
 declare void @llvm.masked.scatter.nxv8i32( %data,  %ptrs, i32 %align,  %ma

[llvm-branch-commits] [llvm] 60e4698 - [CostModel]Replace FixedVectorType by VectorType in costgetIntrinsicInstrCost

2020-12-16 Thread Caroline Concatto via llvm-branch-commits

Author: Caroline Concatto
Date: 2020-12-16T13:06:23Z
New Revision: 60e4698b9aba8d9a2b27ac8a636c95ad1f7d94e0

URL: 
https://github.com/llvm/llvm-project/commit/60e4698b9aba8d9a2b27ac8a636c95ad1f7d94e0
DIFF: 
https://github.com/llvm/llvm-project/commit/60e4698b9aba8d9a2b27ac8a636c95ad1f7d94e0.diff

LOG: [CostModel]Replace FixedVectorType by VectorType in 
costgetIntrinsicInstrCost

This patch replaces FixedVectorType by VectorType in getIntrinsicInstrCost
in BasicTTIImpl.h. It re-arranges the scalable type test earlier return
and add tests for scalable types.

Depends on D91532

Differential Revision: https://reviews.llvm.org/D92094

Added: 
llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll

Modified: 
llvm/include/llvm/CodeGen/BasicTTIImpl.h

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 05c5c835d74a..7dca7cd291c9 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1202,14 +1202,11 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 if (ICA.isTypeBasedOnly())
   return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
 
-// TODO: Handle scalable vectors?
 Type *RetTy = ICA.getReturnType();
-if (isa(RetTy))
-  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
 
 ElementCount VF = ICA.getVectorFactor();
 ElementCount RetVF =
-(RetTy->isVectorTy() ? cast(RetTy)->getElementCount()
+(RetTy->isVectorTy() ? cast(RetTy)->getElementCount()
  : ElementCount::getFixed(1));
 assert((RetVF.isScalar() || VF.isScalar()) &&
"VF > 1 and RetVF is a vector type");
@@ -1238,6 +1235,8 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   return thisT()->getMemcpyCost(ICA.getInst());
 
 case Intrinsic::masked_scatter: {
+  if (isa(RetTy))
+return BaseT::getIntrinsicInstrCost(ICA, CostKind);
   assert(VF.isScalar() && "Can't vectorize types here.");
   const Value *Mask = Args[3];
   bool VarMask = !isa(Mask);
@@ -1247,6 +1246,8 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
  VarMask, Alignment, CostKind, I);
 }
 case Intrinsic::masked_gather: {
+  if (isa(RetTy))
+return BaseT::getIntrinsicInstrCost(ICA, CostKind);
   assert(VF.isScalar() && "Can't vectorize types here.");
   const Value *Mask = Args[2];
   bool VarMask = !isa(Mask);
@@ -1265,17 +1266,23 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 case Intrinsic::vector_reduce_fmin:
 case Intrinsic::vector_reduce_umax:
 case Intrinsic::vector_reduce_umin: {
+  if (isa(RetTy))
+return BaseT::getIntrinsicInstrCost(ICA, CostKind);
   IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
   return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
 }
 case Intrinsic::vector_reduce_fadd:
 case Intrinsic::vector_reduce_fmul: {
+  if (isa(RetTy))
+return BaseT::getIntrinsicInstrCost(ICA, CostKind);
   IntrinsicCostAttributes Attrs(
   IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I);
   return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
 }
 case Intrinsic::fshl:
 case Intrinsic::fshr: {
+  if (isa(RetTy))
+return BaseT::getIntrinsicInstrCost(ICA, CostKind);
   const Value *X = Args[0];
   const Value *Y = Args[1];
   const Value *Z = Args[2];
@@ -1316,6 +1323,9 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   return Cost;
 }
 }
+// TODO: Handle the remaining intrinsic with scalable vector type
+if (isa(RetTy))
+  return BaseT::getIntrinsicInstrCost(ICA, CostKind);
 
 // Assume that we need to scalarize this intrinsic.
 SmallVector Types;

diff  --git 
a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll 
b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll
new file mode 100644
index ..484aa2a01130
--- /dev/null
+++ 
b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-cctz-ctlz.ll
@@ -0,0 +1,33 @@
+; Checks getIntrinsicInstrCost in BasicTTIImpl.h with SVE for CTLZ and CCTZ
+
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 
2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions 
on how to resolve it.
+; WARN-NOT: warning
+
+; Check for CTLZ
+
+define void  @ctlz_nxv4i32( %A) {
+; CHECK-LABEL: 'ctlz_nxv4i32'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:   %1 = 
tail call  @llvm.ctlz.nxv4i32( %A, i1 true)
+; CHECK-NEXT: Cost Model:

[llvm-branch-commits] [llvm] 07217e0 - [CostModel]Migrate getTreeCost() to use InstructionCost

2020-12-16 Thread Caroline Concatto via llvm-branch-commits

Author: Caroline Concatto
Date: 2020-12-16T13:08:37Z
New Revision: 07217e0a1b7e40bb9c4e9953f9c7366c84d30c93

URL: 
https://github.com/llvm/llvm-project/commit/07217e0a1b7e40bb9c4e9953f9c7366c84d30c93
DIFF: 
https://github.com/llvm/llvm-project/commit/07217e0a1b7e40bb9c4e9953f9c7366c84d30c93.diff

LOG: [CostModel]Migrate getTreeCost() to use InstructionCost

This patch changes the type of cost variables (for instance: Cost, ExtractCost,
SpillCost) to use InstructionCost.
This patch also changes the type of cost variables to InstructionCost in other
functions that use the result of getTreeCost()
This patch is part of a series of patches to use InstructionCost instead of
unsigned/int for the cost model functions.

See this thread for context:
http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html

Depends on D91174

Differential Revision: https://reviews.llvm.org/D93049

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cda43521e9ba..c0d7d078a385 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -587,11 +587,11 @@ class BoUpSLP {
 
   /// \returns the cost incurred by unwanted spills and fills, caused by
   /// holding live values over call sites.
-  int getSpillCost() const;
+  InstructionCost getSpillCost() const;
 
   /// \returns the vectorization cost of the subtree that starts at \p VL.
   /// A negative number means that this is profitable.
-  int getTreeCost();
+  InstructionCost getTreeCost();
 
   /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
   /// the purpose of scheduling and extraction in the \p UserIgnoreLst.
@@ -3949,13 +3949,13 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const 
{
   return true;
 }
 
-int BoUpSLP::getSpillCost() const {
+InstructionCost BoUpSLP::getSpillCost() const {
   // Walk from the bottom of the tree to the top, tracking which values are
   // live. When we see a call instruction that is not part of our tree,
   // query TTI to see if there is a cost to keeping values live over it
   // (for example, if spills and fills are required).
   unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
-  int Cost = 0;
+  InstructionCost Cost = 0;
 
   SmallPtrSet LiveValues;
   Instruction *PrevInst = nullptr;
@@ -4031,8 +4031,8 @@ int BoUpSLP::getSpillCost() const {
   return Cost;
 }
 
-int BoUpSLP::getTreeCost() {
-  int Cost = 0;
+InstructionCost BoUpSLP::getTreeCost() {
+  InstructionCost Cost = 0;
   LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
 << VectorizableTree.size() << ".\n");
 
@@ -4062,7 +4062,7 @@ int BoUpSLP::getTreeCost() {
 }))
   continue;
 
-int C = getEntryCost(&TE);
+InstructionCost C = getEntryCost(&TE);
 Cost += C;
 LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
   << " for bundle that starts with " << *TE.Scalars[0]
@@ -4071,7 +4071,7 @@ int BoUpSLP::getTreeCost() {
   }
 
   SmallPtrSet ExtractCostCalculated;
-  int ExtractCost = 0;
+  InstructionCost ExtractCost = 0;
   for (ExternalUser &EU : ExternalUses) {
 // We only add extract cost once for the same scalar.
 if (!ExtractCostCalculated.insert(EU.Scalar).second)
@@ -4101,7 +4101,7 @@ int BoUpSLP::getTreeCost() {
 }
   }
 
-  int SpillCost = getSpillCost();
+  InstructionCost SpillCost = getSpillCost();
   Cost += SpillCost + ExtractCost;
 
 #ifndef NDEBUG
@@ -6009,10 +6009,10 @@ bool 
SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R,
 
   R.computeMinimumValueSizes();
 
-  int Cost = R.getTreeCost();
+  InstructionCost Cost = R.getTreeCost();
 
   LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF =" << VF << 
"\n");
-  if (Cost < -SLPCostThreshold) {
+  if (Cost.isValid() && Cost < -SLPCostThreshold) {
 LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost = " << Cost << "\n");
 
 using namespace ore;
@@ -6213,7 +6213,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R,
 
   bool Changed = false;
   bool CandidateFound = false;
-  int MinCost = SLPCostThreshold;
+  InstructionCost MinCost = SLPCostThreshold.getValue();
 
   bool CompensateUseCost =
   !InsertUses.empty() && llvm::all_of(InsertUses, [](const Value *V) {
@@ -6269,7 +6269,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R,
 continue;
 
   R.computeMinimumValueSizes();
-  int Cost = R.getTreeCost();
+  InstructionCost Cost = R.getTreeCost();
   CandidateFound = true;
   if (CompensateUseCost) {
 // TODO: Use TTI's getScalarizationOverhead for sequence of inserts
@@ -6299,7 +6299,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSL

[llvm-branch-commits] [llvm] be9184b - [SLPVectorizer]Migrate getEntryCost to return InstructionCost

2020-12-16 Thread Caroline Concatto via llvm-branch-commits

Author: Caroline Concatto
Date: 2020-12-16T14:18:40Z
New Revision: be9184bc557ae4000cd785fe369347817e5cbad4

URL: 
https://github.com/llvm/llvm-project/commit/be9184bc557ae4000cd785fe369347817e5cbad4
DIFF: 
https://github.com/llvm/llvm-project/commit/be9184bc557ae4000cd785fe369347817e5cbad4.diff

LOG: [SLPVectorizer]Migrate getEntryCost to return InstructionCost

This patch also changes:
  the return type of getGatherCost and
  the signature of the debug function dumpTreeCosts
to use InstructionCost.

This patch is part of a series of patches to use InstructionCost instead of
unsigned/int for the cost model functions.

See this thread for context:
http://lists.llvm.org/pipermail/llvm-dev/2020-November/146408.html

See this patch for the introduction of the type:
https://reviews.llvm.org/D91174

Depends on D93049

Differential Revision: https://reviews.llvm.org/D93127

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c0d7d078a385..9ab89e091596 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -79,6 +79,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/InstructionCost.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -1493,7 +1494,7 @@ class BoUpSLP {
   bool areAllUsersVectorized(Instruction *I) const;
 
   /// \returns the cost of the vectorizable entry.
-  int getEntryCost(TreeEntry *E);
+  InstructionCost getEntryCost(TreeEntry *E);
 
   /// This is the recursive part of buildTree.
   void buildTree_rec(ArrayRef Roots, unsigned Depth,
@@ -1515,13 +1516,14 @@ class BoUpSLP {
 
   /// \returns the scalarization cost for this type. Scalarization in this
   /// context means the creation of vectors from a group of scalars.
-  int getGatherCost(FixedVectorType *Ty,
-const DenseSet &ShuffledIndices) const;
+  InstructionCost
+  getGatherCost(FixedVectorType *Ty,
+const DenseSet &ShuffledIndices) const;
 
   /// \returns the scalarization cost for this list of values. Assuming that
   /// this subtree gets vectorized, we may need to extract the values from the
   /// roots. This method calculates the cost of extracting the values.
-  int getGatherCost(ArrayRef VL) const;
+  InstructionCost getGatherCost(ArrayRef VL) const;
 
   /// Set the Builder insert point to one after the last instruction in
   /// the bundle
@@ -1755,8 +1757,9 @@ class BoUpSLP {
   };
 
 #ifndef NDEBUG
-  void dumpTreeCosts(TreeEntry *E, int ReuseShuffleCost, int VecCost,
- int ScalarCost) const {
+  void dumpTreeCosts(TreeEntry *E, InstructionCost ReuseShuffleCost,
+ InstructionCost VecCost,
+ InstructionCost ScalarCost) const {
 dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump();
 dbgs() << "SLP: Costs:\n";
 dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n";
@@ -3423,7 +3426,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
   return {IntrinsicCost, LibCost};
 }
 
-int BoUpSLP::getEntryCost(TreeEntry *E) {
+InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
   ArrayRef VL = E->Scalars;
 
   Type *ScalarTy = VL[0]->getType();
@@ -3442,7 +3445,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
 
   unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
   bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
-  int ReuseShuffleCost = 0;
+  InstructionCost ReuseShuffleCost = 0;
   if (NeedToShuffleReuses) {
 ReuseShuffleCost =
 TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
@@ -3458,7 +3461,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
 allSameType(VL) && allSameBlock(VL)) {
   Optional ShuffleKind = isShuffle(VL);
   if (ShuffleKind.hasValue()) {
-int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
+InstructionCost Cost =
+TTI->getShuffleCost(ShuffleKind.getValue(), VecTy);
 for (auto *V : VL) {
   // If all users of instruction are going to be vectorized and this
   // instruction itself is not going to be vectorized, consider this
@@ -3490,7 +3494,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
 
 case Instruction::ExtractValue:
 case Instruction::ExtractElement: {
-  int DeadCost = 0;
+  InstructionCost DeadCost = 0;
   if (NeedToShuffleReuses) {
 unsigned Idx = 0;
 for (unsigned I : E->ReuseShuffleIndices) {
@@ -3565,7 +3569,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
 case Instruction::FPTrunc:
 case Instruction::BitCast: {
   Type *SrcTy = VL0->getO

[llvm-branch-commits] [llvm] 4b0ef2b - [NFC][CostModel]Extend class IntrinsicCostAttributes to use ElementCount Type

2020-12-01 Thread Caroline Concatto via llvm-branch-commits

Author: Caroline Concatto
Date: 2020-12-01T11:12:51Z
New Revision: 4b0ef2b075002f94e37dc2f28caf6b167052f93f

URL: 
https://github.com/llvm/llvm-project/commit/4b0ef2b075002f94e37dc2f28caf6b167052f93f
DIFF: 
https://github.com/llvm/llvm-project/commit/4b0ef2b075002f94e37dc2f28caf6b167052f93f.diff

LOG: [NFC][CostModel]Extend class IntrinsicCostAttributes to use ElementCount 
Type

This patch replaces the attribute  `unsigned VF`  in the class
IntrinsicCostAttributes by `ElementCount VF`.
This is a non-functional change to help upcoming patches to compute the cost
model for scalable vector inside this class.

Differential Revision: https://reviews.llvm.org/D91532

Added: 


Modified: 
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h 
b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 9cb5fe78f418..af57176401b4 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -117,7 +117,7 @@ class IntrinsicCostAttributes {
   SmallVector ParamTys;
   SmallVector Arguments;
   FastMathFlags FMF;
-  unsigned VF = 1;
+  ElementCount VF = ElementCount::getFixed(1);
   // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
   // arguments and the return value will be computed based on types.
   unsigned ScalarizationCost = std::numeric_limits::max();
@@ -128,15 +128,10 @@ class IntrinsicCostAttributes {
   IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
 
   IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
-  unsigned Factor);
-  IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
-  ElementCount Factor)
-  : IntrinsicCostAttributes(Id, CI, Factor.getKnownMinValue()) {
-assert(!Factor.isScalable());
-  }
+  ElementCount Factor);
 
   IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
-  unsigned Factor, unsigned ScalarCost);
+  ElementCount Factor, unsigned ScalarCost);
 
   IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
   ArrayRef Tys, FastMathFlags Flags);
@@ -159,7 +154,7 @@ class IntrinsicCostAttributes {
   Intrinsic::ID getID() const { return IID; }
   const IntrinsicInst *getInst() const { return II; }
   Type *getReturnType() const { return RetTy; }
-  unsigned getVectorFactor() const { return VF; }
+  ElementCount getVectorFactor() const { return VF; }
   FastMathFlags getFlags() const { return FMF; }
   unsigned getScalarizationCost() const { return ScalarizationCost; }
   const SmallVectorImpl &getArgs() const { return Arguments; }

diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h 
b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 0b6b2655e0d5..05c5c835d74a 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1207,11 +1207,12 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 if (isa(RetTy))
   return BaseT::getIntrinsicInstrCost(ICA, CostKind);
 
-unsigned VF = ICA.getVectorFactor();
-unsigned RetVF =
-(RetTy->isVectorTy() ? cast(RetTy)->getNumElements()
- : 1);
-assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
+ElementCount VF = ICA.getVectorFactor();
+ElementCount RetVF =
+(RetTy->isVectorTy() ? cast(RetTy)->getElementCount()
+ : ElementCount::getFixed(1));
+assert((RetVF.isScalar() || VF.isScalar()) &&
+   "VF > 1 and RetVF is a vector type");
 const IntrinsicInst *I = ICA.getInst();
 const SmallVectorImpl &Args = ICA.getArgs();
 FastMathFlags FMF = ICA.getFlags();
@@ -1221,13 +1222,15 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
 
 case Intrinsic::cttz:
   // FIXME: If necessary, this should go in target-specific overrides.
-  if (VF == 1 && RetVF == 1 && getTLI()->isCheapToSpeculateCttz())
+  if (VF.isScalar() && RetVF.isScalar() &&
+  getTLI()->isCheapToSpeculateCttz())
 return TargetTransformInfo::TCC_Basic;
   break;
 
 case Intrinsic::ctlz:
   // FIXME: If necessary, this should go in target-specific overrides.
-  if (VF == 1 && RetVF == 1 && getTLI()->isCheapToSpeculateCtlz())
+  if (VF.isScalar() && RetVF.isScalar() &&
+  getTLI()->isCheapToSpeculateCtlz())
 return TargetTransformInfo::TCC_Basic;
   break;
 
@@ -1235,7 +1238,7 @@ class BasicTTIImplBase : public 
TargetTransformInfoImplCRTPBase {
   return thisT()->getMemcpyCost(ICA.get