[llvm-branch-commits] [llvm] 3122c66 - [AArch64][SVE] Remove chains of unnecessary SVE reinterpret intrinsics

2021-01-13 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2021-01-13T09:44:09Z
New Revision: 3122c66aee7b709046753873c4e94db73742b3de

URL: 
https://github.com/llvm/llvm-project/commit/3122c66aee7b709046753873c4e94db73742b3de
DIFF: 
https://github.com/llvm/llvm-project/commit/3122c66aee7b709046753873c4e94db73742b3de.diff

LOG: [AArch64][SVE] Remove chains of unnecessary SVE reinterpret intrinsics

This commit extends SVEIntrinsicOpts::optimizeConvertFromSVBool to
identify and remove longer chains of redundant SVE reintepret
intrinsics. For example, the following chain of redundant SVE
reinterprets is now recognised as redundant:

%a = 
%1 =  @llvm.aarch64.sve.convert.to.svbool( %a)
%2 =  @llvm.aarch64.sve.convert.from.svbool( %1)
%3 =  @llvm.aarch64.sve.convert.to.svbool( %2)
%4 =  @llvm.aarch64.sve.convert.from.svbool( %3)
%5 =  @llvm.aarch64.sve.convert.to.svbool( %4)
%6 =  @llvm.aarch64.sve.convert.from.svbool( %5)
ret  %6

and will be replaced with:

ret  %a

Eliminating these can sometimes mean emitting fewer unnecessary
loads/stores when lowering to assembly.

Differential Revision: https://reviews.llvm.org/D94074

Added: 


Modified: 
llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll

Removed: 




diff  --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp 
b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 8e8b12c07bbf..9911f33371c6 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -177,22 +177,50 @@ bool 
SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) {
   if (isa(I->getArgOperand(0)))
 return processPhiNode(I);
 
-  // If we have a reinterpret intrinsic I of type A which is converting from
-  // another reinterpret Y of type B, and the source type of Y is A, then we 
can
-  // elide away both reinterprets if there are no other users of Y.
-  auto *Y = isReinterpretToSVBool(I->getArgOperand(0));
-  if (!Y)
-return false;
+  SmallVector CandidatesForRemoval;
+  Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr;
+
+  const auto *IVTy = cast(I->getType());
+
+  // Walk the chain of conversions.
+  while (Cursor) {
+// If the type of the cursor has fewer lanes than the final result, zeroing
+// must take place, which breaks the equivalence chain.
+const auto *CursorVTy = cast(Cursor->getType());
+if (CursorVTy->getElementCount().getKnownMinValue() <
+IVTy->getElementCount().getKnownMinValue())
+  break;
+
+// If the cursor has the same type as I, it is a viable replacement.
+if (Cursor->getType() == IVTy)
+  EarliestReplacement = Cursor;
 
-  Value *SourceVal = Y->getArgOperand(0);
-  if (I->getType() != SourceVal->getType())
+auto *IntrinsicCursor = dyn_cast(Cursor);
+
+// If this is not an SVE conversion intrinsic, this is the end of the 
chain.
+if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
+  Intrinsic::aarch64_sve_convert_to_svbool ||
+  IntrinsicCursor->getIntrinsicID() ==
+  Intrinsic::aarch64_sve_convert_from_svbool))
+  break;
+
+CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor);
+Cursor = IntrinsicCursor->getOperand(0);
+  }
+
+  // If no viable replacement in the conversion chain was found, there is
+  // nothing to do.
+  if (!EarliestReplacement)
 return false;
 
-  I->replaceAllUsesWith(SourceVal);
+  I->replaceAllUsesWith(EarliestReplacement);
   I->eraseFromParent();
-  if (Y->use_empty())
-Y->eraseFromParent();
 
+  while (!CandidatesForRemoval.empty()) {
+Instruction *Candidate = CandidatesForRemoval.pop_back_val();
+if (Candidate->use_empty())
+  Candidate->eraseFromParent();
+  }
   return true;
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll 
b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
index 47e0ff8f19c7..22c61d0565af 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
@@ -67,6 +67,62 @@ define  @reinterpret_test_d_rev( %a) {
   ret  %2
 }
 
+define  @reinterpret_test_full_chain( %a) {
+; OPT-LABEL: @reinterpret_test_full_chain(
+; OPT: ret  %a
+  %1 = tail call  
@llvm.aarch64.sve.convert.to.svbool.nxv2i1( %a)
+  %2 = tail call  
@llvm.aarch64.sve.convert.from.svbool.nxv4i1( %1)
+  %3 = tail call  
@llvm.aarch64.sve.convert.to.svbool.nxv4i1( %2)
+  %4 = tail call  
@llvm.aarch64.sve.convert.from.svbool.nxv4i1( %3)
+  %5 = tail call  
@llvm.aarch64.sve.convert.to.svbool.nxv4i1( %4)
+  %6 = tail call  
@llvm.aarch64.sve.convert.from.svbool.nxv2i1( %5)
+  ret  %6
+}
+
+; The last two reinterprets are not necessary, since they are doing the same
+; work as the first two.
+define  @rei

[llvm-branch-commits] [clang] 3d5b18a - [clang][AArch64][SVE] Avoid going through memory for coerced VLST arguments

2021-01-05 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2021-01-05T15:18:21Z
New Revision: 3d5b18a3fdf47ae2286642131e4a92968dd01c2a

URL: 
https://github.com/llvm/llvm-project/commit/3d5b18a3fdf47ae2286642131e4a92968dd01c2a
DIFF: 
https://github.com/llvm/llvm-project/commit/3d5b18a3fdf47ae2286642131e4a92968dd01c2a.diff

LOG: [clang][AArch64][SVE] Avoid going through memory for coerced VLST arguments

VLST arguments are coerced to VLATs at the function boundary for
consistency with the VLAT ABI. They are then bitcast back to VLSTs in
the function prolog. Previously, this conversion is done through memory.
With the introduction of the llvm.vector.{insert,extract} intrinsic, we
can avoid going through memory here.

Depends on D92761

Differential Revision: https://reviews.llvm.org/D92762

Added: 


Modified: 
clang/lib/CodeGen/CGCall.cpp
clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index e28736bd3d2f..f1987408165b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2688,6 +2688,27 @@ void CodeGenFunction::EmitFunctionProlog(const 
CGFunctionInfo &FI,
 break;
   }
 
+  // VLST arguments are coerced to VLATs at the function boundary for
+  // ABI consistency. If this is a VLST that was coerced to
+  // a VLAT at the function boundary and the types match up, use
+  // llvm.experimental.vector.extract to convert back to the original
+  // VLST.
+  if (auto *VecTyTo = dyn_cast(ConvertType(Ty))) {
+auto *Coerced = Fn->getArg(FirstIRArg);
+if (auto *VecTyFrom =
+dyn_cast(Coerced->getType())) {
+  if (VecTyFrom->getElementType() == VecTyTo->getElementType()) {
+llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
+
+assert(NumIRArgs == 1);
+Coerced->setName(Arg->getName() + ".coerce");
+
ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector(
+VecTyTo, Coerced, Zero, "castFixedSve")));
+break;
+  }
+}
+  }
+
   Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg),
  Arg->getName());
 

diff  --git 
a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c 
b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
index 9b733d21dbbb..9d3206a75f0e 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
@@ -59,14 +59,14 @@ typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
 
 // CHECK-LABEL: define{{.*}} void @f2(
 // CHECK-SAME:   <[[#div(VBITS,8)]] x i8>* noalias nocapture 
sret(<[[#div(VBITS,8)]] x i8>) align 16 %agg.result, <[[#div(VBITS,8)]] x i8>* 
nocapture readonly %0)
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:[[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, 
<[[#div(VBITS,8)]] x i8>* [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]]
-// CHECK-NEXT:[[TMP1:%.*]] = call  
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8( undef, <[[#div(VBITS,8)]] x i8> [[X]], i64 0)
-// CHECK-NEXT:[[TMP2:%.*]] = call  
@llvm.aarch64.sve.asrd.nxv16i8( [[TMP1]],  
[[CASTSCALABLESVE]], i32 1)
-// CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <[[#div(VBITS,8)]] x i8> 
@llvm.experimental.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8( [[TMP2]], i64 0)
-// CHECK-NEXT:store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], 
<[[#div(VBITS,8)]] x i8>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]]
-// CHECK-NEXT:ret void
+// CHECK-NEXT: entry:
+// CHECK-NEXT:   [[X:%.*]] = load <[[#div(VBITS,8)]] x i8>, <[[#div(VBITS,8)]] 
x i8>* [[TMP0:%.*]], align 16, [[TBAA6:!tbaa !.*]]
+// CHECK-NEXT:   [[TMP1:%.*]] = call  
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+// CHECK-NEXT:   [[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv16i8.v[[#div(VBITS,8)]]i8( undef, <[[#div(VBITS,8)]] x i8> [[X]], i64 0)
+// CHECK-NEXT:   [[TMP2:%.*]] = call  
@llvm.aarch64.sve.asrd.nxv16i8( [[TMP1]],  
[[CASTSCALABLESVE]], i32 1)
+// CHECK-NEXT:   [[CASTFIXEDSVE:%.*]] = call <[[#div(VBITS,8)]] x i8> 
@llvm.experimental.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8( [[TMP2]], i64 0)
+// CHECK-NEXT:   store <[[#div(VBITS,8)]] x i8> [[CASTFIXEDSVE]], 
<[[#div(VBITS,8)]] x i8>* [[AGG_RESULT:%.*]], align 16, [[TBAA6]]
+// CHECK-NEXT:   ret void
 vec_int8 f2(vec_int8 x) { return svasrd_x(svptrue_b8(), x, 1); }
 #endif
 
@@ -78,24 +78,18 @@ void f3(vec1);
 typedef svint8_t vec2 __attrib

[llvm-branch-commits] [clang] 8ea72b3 - [clang][AArch64][SVE] Avoid going through memory for coerced VLST return values

2021-01-11 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2021-01-11T12:10:59Z
New Revision: 8ea72b388734ce660f861e0dfbe53d203e94876a

URL: 
https://github.com/llvm/llvm-project/commit/8ea72b388734ce660f861e0dfbe53d203e94876a
DIFF: 
https://github.com/llvm/llvm-project/commit/8ea72b388734ce660f861e0dfbe53d203e94876a.diff

LOG: [clang][AArch64][SVE] Avoid going through memory for coerced VLST return 
values

VLST return values are coerced to VLATs in the function epilog for
consistency with the VLAT ABI. Previously, this coercion was done
through memory. It is preferable to use the
llvm.experimental.vector.insert intrinsic to avoid going through memory
here.

Reviewed By: c-rhodes

Differential Revision: https://reviews.llvm.org/D94290

Added: 


Modified: 
clang/lib/CodeGen/CGCall.cpp
clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index f1987408165b..2cc7203d1194 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -1265,6 +1265,21 @@ static llvm::Value *CreateCoercedLoad(Address Src, 
llvm::Type *Ty,
 return CGF.Builder.CreateLoad(Src);
   }
 
+  // If coercing a fixed vector to a scalable vector for ABI compatibility, and
+  // the types match, use the llvm.experimental.vector.insert intrinsic to
+  // perform the conversion.
+  if (auto *ScalableDst = dyn_cast(Ty)) {
+if (auto *FixedSrc = dyn_cast(SrcTy)) {
+  if (ScalableDst->getElementType() == FixedSrc->getElementType()) {
+auto *Load = CGF.Builder.CreateLoad(Src);
+auto *UndefVec = llvm::UndefValue::get(ScalableDst);
+auto *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
+return CGF.Builder.CreateInsertVector(ScalableDst, UndefVec, Load, 
Zero,
+  "castScalableSve");
+  }
+}
+  }
+
   // Otherwise do coercion through memory. This is stupid, but simple.
   Address Tmp =
   CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment(), Src.getName());

diff  --git 
a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp 
b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
index 6fafc2ca2db9..a808d50884ea 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
@@ -48,14 +48,11 @@ void test02() {
 // CHECK-SAME:[[#VBITS]]
 // CHECK-SAME:EES_( %x.coerce,  
%y.coerce)
 // CHECK-NEXT: entry:
-// CHECK-NEXT:   [[RETVAL_COERCE:%.*]] = alloca , align 16
 // CHECK-NEXT:   [[X:%.*]] = call <[[#div(VBITS, 32)]] x i32> 
@llvm.experimental.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32( [[X_COERCE:%.*]], i64 0)
 // CHECK-NEXT:   [[Y:%.*]] = call <[[#div(VBITS, 32)]] x i32> 
@llvm.experimental.vector.extract.v[[#div(VBITS, 32)]]i32.nxv4i32( [[X_COERCE1:%.*]], i64 0)
 // CHECK-NEXT:   [[ADD:%.*]] = add <[[#div(VBITS, 32)]] x i32> [[Y]], [[X]]
-// CHECK-NEXT:   [[RETVAL_0__SROA_CAST:%.*]] = bitcast * 
[[RETVAL_COERCE]] to <[[#div(VBITS, 32)]] x i32>*
-// CHECK-NEXT:   store <[[#div(VBITS, 32)]] x i32> [[ADD]], <[[#div(VBITS, 
32)]] x i32>* [[RETVAL_0__SROA_CAST]], align 16
-// CHECK-NEXT:   [[TMP0:%.*]] = load , * 
[[RETVAL_COERCE]], align 16
-// CHECK-NEXT:   ret  [[TMP0]]
+// CHECK-NEXT:   [[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v[[#div(VBITS, 32)]]i32( undef, <[[#div(VBITS, 32)]] x i32> [[ADD]], i64 0)
+// CHECK-NEXT:   ret  [[CASTSCALABLESVE]]
 typedef svint32_t vec __attribute__((arm_sve_vector_bits(N)));
 auto f(vec x, vec y) { return x + y; } // Returns a vec.
 #endif

diff  --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c 
b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
index f909b7164622..f988d54bacd4 100644
--- a/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
+++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
@@ -24,14 +24,11 @@ svint32_t sizeless_callee(svint32_t x) {
 
 // CHECK-LABEL: @fixed_caller(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[RETVAL_COERCE:%.*]] = alloca , align 16
 // CHECK-NEXT:[[X:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( 
[[X_COERCE:%.*]], i64 0)
 // CHECK-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv4i32.v16i32( undef, <16 x 
i32> [[X]], i64 0)
 // CHECK-NEXT:[[CASTFIXEDSVE:%.*]] = call <16 x i32> 
@llvm.experimental.vector.extract.v16i32.nxv4i32( 
[[CASTSCALABLESVE]], i64 0)
-// CHECK-NEXT:[[RETVAL_0__SROA_CAST:%.*]] = bitcast * 
[[RETVAL_COERCE]] to <16 x i32>*
-// CHECK-NEXT:store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* 
[[RETVAL_0__SROA_CAST]], align 16
-// C

[llvm-branch-commits] [llvm] 0073582 - [DAGCombiner] Use getVectorElementCount inside visitINSERT_SUBVECTOR

2021-01-11 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2021-01-11T14:15:11Z
New Revision: 007358239decd45819a6fa44eb2a2e07fd85e796

URL: 
https://github.com/llvm/llvm-project/commit/007358239decd45819a6fa44eb2a2e07fd85e796
DIFF: 
https://github.com/llvm/llvm-project/commit/007358239decd45819a6fa44eb2a2e07fd85e796.diff

LOG: [DAGCombiner] Use getVectorElementCount inside visitINSERT_SUBVECTOR

This avoids TypeSize-/ElementCount-related warnings.

Differential Revision: https://reviews.llvm.org/D92747

Added: 
llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll

Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index be57d9250db7..75dbc2227f6a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21008,8 +21008,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
   N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
   N1.getOperand(0).getOperand(1) == N2 &&
-  N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
-  VT.getVectorNumElements() &&
+  N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
+  VT.getVectorElementCount() &&
   N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
   VT.getSizeInBits()) {
 return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
@@ -21026,7 +21026,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
 EVT CN1VT = CN1.getValueType();
 if (CN0VT.isVector() && CN1VT.isVector() &&
 CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
-CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
+CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
   SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
   CN0.getValueType(), CN0, CN1, N2);
   return DAG.getBitcast(VT, NewINSERT);
@@ -21107,8 +21107,10 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   // If the input vector is a concatenation, and the insert replaces
   // one of the pieces, we can optimize into a single concat_vectors.
   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
-  N0.getOperand(0).getValueType() == N1.getValueType()) {
-unsigned Factor = N1.getValueType().getVectorNumElements();
+  N0.getOperand(0).getValueType() == N1.getValueType() &&
+  N0.getOperand(0).getValueType().isScalableVector() ==
+  N1.getValueType().isScalableVector()) {
+unsigned Factor = N1.getValueType().getVectorMinNumElements();
 SmallVector Ops(N0->op_begin(), N0->op_end());
 Ops[InsIdx / Factor] = N1;
 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);

diff  --git a/llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll 
b/llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll
new file mode 100644
index ..a89e3a09c1f1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dag-combine-insert-subvector.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -o /dev/null 2>&1 | FileCheck --allow-empty %s
+
+; This regression test is defending against a ElementCount warning 'Possible 
incorrect use of
+; EVT::getVectorNumElements() for scalable vector'. This warning appeared in
+; DAGCombiner::visitINSERT_SUBVECTOR because of the use of 
getVectorNumElements() on scalable
+; types.
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions 
on how to resolve it.
+; CHECK-NOT: warning:
+
+target triple = "aarch64-unknown-linux-gnu"
+attributes #0 = {"target-features"="+sve"}
+
+declare <16 x float> @llvm.experimental.vector.extract.v16f32.nxv4f32(, i64)
+declare  
@llvm.experimental.vector.insert.nxv2f64.v8f64(, <8 x 
double>, i64)
+
+define  @reproducer_one( %vec_a) #0 {
+  %a = call <16 x float> 
@llvm.experimental.vector.extract.v16f32.nxv4f32( %vec_a, 
i64 0)
+  %b = bitcast <16 x float> %a to <8 x double>
+  %retval = call  
@llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 
x double> %b, i64 0)
+  ret  %retval
+}
+
+define  @reproducer_two(<4 x double> %a, <4 x double> %b) 
#0 {
+  %concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> 
+  %retval = call  
@llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 
x double> %concat, i64 0)
+  ret  %retval
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 5a2a836 - [AArch64][NEON] Remove undocumented vceqz{, q}_p16, vml{a, s}q_n_f64 intrinsics

2020-12-15 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2020-12-15T17:19:16Z
New Revision: 5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf

URL: 
https://github.com/llvm/llvm-project/commit/5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf
DIFF: 
https://github.com/llvm/llvm-project/commit/5a2a8369e82cea9689b0ff60f3e9baa7fc79fbcf.diff

LOG: [AArch64][NEON] Remove undocumented vceqz{,q}_p16, vml{a,s}q_n_f64 
intrinsics

Prior to this patch, Clang supported the following C/C++ intrinsics:

vceqz_p16
vceqzq_p16
vmlaq_n_f64
vmlsq_n_f64

... exposed through arm_neon.h. However, these intrinsics are not part
of the ACLE, allowing developers to write code that is not compatible
with other toolchains.

This patch removes these intrinsics.

There is a bug report capturing this issue here:

https://bugs.llvm.org/show_bug.cgi?id=47471

Reviewed By: bsmith

Differential Revision: https://reviews.llvm.org/D93206

Added: 


Modified: 
clang/include/clang/Basic/arm_neon.td
clang/test/CodeGen/aarch64-neon-fma.c
clang/test/CodeGen/aarch64-neon-misc.c

Removed: 




diff  --git a/clang/include/clang/Basic/arm_neon.td 
b/clang/include/clang/Basic/arm_neon.td
index 4d4e42dd514b..6f1380d58c16 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -786,9 +786,6 @@ def VMUL_N_A64 : IOpInst<"vmul_n", "..1", "Qd", OP_MUL_N>;
 def FMLA_N : SOpInst<"vfma_n", "...1", "dQd", OP_FMLA_N>;
 def FMLS_N : SOpInst<"vfms_n", "...1", "fdQfQd", OP_FMLS_N>;
 
-def MLA_N : SOpInst<"vmla_n", "...1", "Qd", OP_MLA_N>;
-def MLS_N : SOpInst<"vmls_n", "...1", "Qd", OP_MLS_N>;
-
 

 // Logical operations
 def BSL : SInst<"vbsl", ".U..", "dPlQdQPl">;
@@ -868,7 +865,7 @@ def CFMGT  : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>;
 def CFMLT  : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>;
 
 def CMEQ  : SInst<"vceqz", "U.",
-  "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">;
+  "csilfUcUsUiUlPcPlQcQsQiQlQfQUcQUsQUiQUlQPcdQdQPl">;
 def CMGE  : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">;
 def CMLE  : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">;
 def CMGT  : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">;

diff  --git a/clang/test/CodeGen/aarch64-neon-fma.c 
b/clang/test/CodeGen/aarch64-neon-fma.c
index c2dd315ed9fc..0726218d2e15 100644
--- a/clang/test/CodeGen/aarch64-neon-fma.c
+++ b/clang/test/CodeGen/aarch64-neon-fma.c
@@ -26,16 +26,6 @@ float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, 
float32_t c) {
   return vmlaq_n_f32(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x 
double> %b, double %c) #1 {
-// CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, 
i32 0
-// CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], 
double %c, i32 1
-// CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
-// CHECK:   [[ADD_I:%.*]] = fadd <2 x double> %a, [[MUL_I]]
-// CHECK:   ret <2 x double> [[ADD_I]]
-float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
-  return vmlaq_n_f64(a, b, c);
-}
-
 // CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x 
float> %b, float %c) #1 {
 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], 
float %c, i32 1
@@ -58,16 +48,6 @@ float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, 
float32_t c) {
   return vmls_n_f32(a, b, c);
 }
 
-// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x 
double> %b, double %c) #1 {
-// CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, 
i32 0
-// CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], 
double %c, i32 1
-// CHECK:   [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
-// CHECK:   [[SUB_I:%.*]] = fsub <2 x double> %a, [[MUL_I]]
-// CHECK:   ret <2 x double> [[SUB_I]]
-float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
-  return vmlsq_n_f64(a, b, c);
-}
-
 // CHECK-LABEL: define <2 x float> @test_vmla_lane_f32_0(<2 x float> %a, <2 x 
float> %b, <2 x float> %v) #0 {
 // CHECK:[[TMP0:%.*]] = bitcast <2 x float> [[V:%.*]] to <8 x i8>
 // CHECK:[[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>

diff  --git a/clang/test/CodeGen/aarch64-neon-misc.c 
b/clang/test/CodeGen/aarch64-neon-misc.c
index 88020a1a69c2..5517fe3dc411 100644
--- a/clang/test/CodeGen/aarch64-neon-misc.c
+++ b/clang/test/CodeGen/aarch64-neon-misc.c
@@ -198,24 +198,6 @@ uint8x16_t test_vceqzq_p8(poly8x16_t a) {
   return vceqzq_p8(a);
 }
 
-// CHECK-LABEL: @test_vceqz_p16(
-// CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
-// CHECK:   [[TMP1:%.*]] = icmp eq <4 x i16> %a, zeroinitializer
-// CHECK:   [[VCEQZ_I:%.*]] = sext <4 x i1> [[TMP1]] to <4

[llvm-branch-commits] [clang] dad07ba - [clang][AArch64][SVE] Avoid going through memory for VLAT <-> VLST casts

2020-12-16 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2020-12-16T12:24:32Z
New Revision: dad07baf123e672b1d5d5e7c21e73b92399d5a0c

URL: 
https://github.com/llvm/llvm-project/commit/dad07baf123e672b1d5d5e7c21e73b92399d5a0c
DIFF: 
https://github.com/llvm/llvm-project/commit/dad07baf123e672b1d5d5e7c21e73b92399d5a0c.diff

LOG: [clang][AArch64][SVE] Avoid going through memory for VLAT <-> VLST casts

This change makes use of the llvm.vector.extract intrinsic to avoid
going through memory when performing bitcasts between vector-length
agnostic types and vector-length specific types.

Depends on D91362

Reviewed By: c-rhodes

Differential Revision: https://reviews.llvm.org/D92761

Added: 


Modified: 
clang/lib/CodeGen/CGExprScalar.cpp
clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
llvm/include/llvm/IR/IRBuilder.h

Removed: 




diff  --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 973cefd831e6..c9cf1d0dfd89 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1996,7 +1996,39 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   }
 }
 
+// If Src is a fixed vector and Dst is a scalable vector, and both have the
+// same element type, use the llvm.experimental.vector.insert intrinsic to
+// perform the bitcast.
+if (const auto *FixedSrc = dyn_cast(SrcTy)) {
+  if (const auto *ScalableDst = dyn_cast(DstTy)) 
{
+if (FixedSrc->getElementType() == ScalableDst->getElementType()) {
+  llvm::Value *UndefVec = llvm::UndefValue::get(DstTy);
+  llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
+  return Builder.CreateInsertVector(DstTy, UndefVec, Src, Zero,
+"castScalableSve");
+}
+  }
+}
+
+// If Src is a scalable vector and Dst is a fixed vector, and both have the
+// same element type, use the llvm.experimental.vector.extract intrinsic to
+// perform the bitcast.
+if (const auto *ScalableSrc = dyn_cast(SrcTy)) {
+  if (const auto *FixedDst = dyn_cast(DstTy)) {
+if (ScalableSrc->getElementType() == FixedDst->getElementType()) {
+  llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
+  return Builder.CreateExtractVector(DstTy, Src, Zero, "castFixedSve");
+}
+  }
+}
+
 // Perform VLAT <-> VLST bitcast through memory.
+// TODO: since the llvm.experimental.vector.{insert,extract} intrinsics
+//   require the element types of the vectors to be the same, we
+//   need to keep this around for casting between predicates, or more
+//   generally for bitcasts between VLAT <-> VLST where the element
+//   types of the vectors are not the same, until we figure out a 
better
+//   way of doing these casts.
 if ((isa(SrcTy) &&
  isa(DstTy)) ||
 (isa(SrcTy) &&

diff  --git 
a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c 
b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
index fed7708c6893..beba6a3f0199 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
@@ -51,34 +51,22 @@ vec2048 x2048 = {0, 1, 2, 3, 3 , 2 , 1, 0, 0, 1, 2, 3, 3 , 
2 , 1, 0,
 typedef int8_t vec_int8 __attribute__((vector_size(N / 8)));
 // CHECK128-LABEL: define <16 x i8> @f2(<16 x i8> %x)
 // CHECK128-NEXT:  entry:
-// CHECK128-NEXT:%x.addr = alloca <16 x i8>, align 16
-// CHECK128-NEXT:%saved-call-rvalue = alloca , align 16
-// CHECK128-NEXT:store <16 x i8> %x, <16 x i8>* %x.addr, align 16
-// CHECK128-NEXT:%0 = call  
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-// CHECK128-NEXT:%1 = bitcast <16 x i8>* %x.addr to *
-// CHECK128-NEXT:%2 = load , * %1, 
align 16
-// CHECK128-NEXT:%3 = call  
@llvm.aarch64.sve.asrd.nxv16i8( %0,  %2, 
i32 1)
-// CHECK128-NEXT:store  %3, * 
%saved-call-rvalue, align 16
-// CHECK128-NEXT:%castFixedSve = bitcast * 
%saved-call-rvalue to <16 x i8>*
-// CHECK128-NEXT:%4 = load <16 x i8>, <16 x i8>* %castFixedSve, align 16
-// CHECK128-NEXT:ret <16 x i8> %4
+// CHECK128-NEXT:[[TMP0:%.*]] = call  
@llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+// CHECK128-NEXT:[[CASTSCALABLESVE:%.*]] = call  
@llvm.experimental.vector.insert.nxv16i8.v16i8( undef, <16 x 
i8> [[X:%.*]], i64 0)
+// CHECK128-NEXT:[[TMP1:%.*]] = call  
@llvm.aarch64.sve.asrd.nxv16i8( [[TMP0]],  
[[CASTSCALABLESVE]], i32 1)
+// CHECK128-NEXT:

[llvm-branch-commits] [clang] 3c696a2 - [AArch64][SVE] Allow lax conversion between VLATs and GNU vectors

2020-11-23 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2020-11-23T10:47:17Z
New Revision: 3c696a212ba4328e4f8f92136bc4d728a6490ef7

URL: 
https://github.com/llvm/llvm-project/commit/3c696a212ba4328e4f8f92136bc4d728a6490ef7
DIFF: 
https://github.com/llvm/llvm-project/commit/3c696a212ba4328e4f8f92136bc4d728a6490ef7.diff

LOG: [AArch64][SVE] Allow lax conversion between VLATs and GNU vectors

Previously, lax conversions were only allowed between SVE vector-length
agnostic types and vector-length specific types. This meant that code
such as the following:

#include 
#define N __ARM_FEATURE_SVE_BITS
#define FIXED_ATTR __attribute__ ((vector_size (N/8)))
typedef float fixed_float32_t FIXED_ATTR;

void foo() {
fixed_float32_t fs32;
svfloat64_t s64;
fs32 = s64;
}

was not allowed.

This patch makes a minor change to areLaxCompatibleSveTypes to allow for
lax conversions to be performed between SVE vector-length agnostic types
and GNU vectors.

Differential Revision: https://reviews.llvm.org/D91696

Added: 


Modified: 
clang/lib/AST/ASTContext.cpp
clang/test/Sema/aarch64-sve-lax-vector-conversions.c
clang/test/Sema/attr-arm-sve-vector-bits.c
clang/test/SemaCXX/aarch64-sve-lax-vector-conversions.cpp

Removed: 




diff  --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index f54916babed7..67ee8c0956d6 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -8586,10 +8586,20 @@ bool ASTContext::areLaxCompatibleSveTypes(QualType 
FirstType,
 
 const auto *VecTy = SecondType->getAs();
 if (VecTy &&
-VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector) {
+(VecTy->getVectorKind() == VectorType::SveFixedLengthDataVector ||
+ VecTy->getVectorKind() == VectorType::GenericVector)) {
   const LangOptions::LaxVectorConversionKind LVCKind =
   getLangOpts().getLaxVectorConversions();
 
+  // If __ARM_FEATURE_SVE_BITS != N do not allow GNU vector lax conversion.
+  // "Whenever __ARM_FEATURE_SVE_BITS==N, GNUT implicitly
+  // converts to VLAT and VLAT implicitly converts to GNUT."
+  // ACLE Spec Version 00bet6, 3.7.3.2. Behavior common to vectors and
+  // predicates.
+  if (VecTy->getVectorKind() == VectorType::GenericVector &&
+  getTypeSize(SecondType) != getLangOpts().ArmSveVectorBits)
+return false;
+
   // If -flax-vector-conversions=all is specified, the types are
   // certainly compatible.
   if (LVCKind == LangOptions::LaxVectorConversionKind::All)

diff  --git a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c 
b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c
index e2fe87f7dd20..1a1addcf1c1b 100644
--- a/clang/test/Sema/aarch64-sve-lax-vector-conversions.c
+++ b/clang/test/Sema/aarch64-sve-lax-vector-conversions.c
@@ -7,32 +7,61 @@
 #include 
 
 #define N __ARM_FEATURE_SVE_BITS
-#define FIXED_ATTR __attribute__((arm_sve_vector_bits(N)))
+#define SVE_FIXED_ATTR __attribute__((arm_sve_vector_bits(N)))
+#define GNU_FIXED_ATTR __attribute__((vector_size(N / 8)))
 
-typedef svfloat32_t fixed_float32_t FIXED_ATTR;
-typedef svint32_t fixed_int32_t FIXED_ATTR;
+typedef svfloat32_t sve_fixed_float32_t SVE_FIXED_ATTR;
+typedef svint32_t sve_fixed_int32_t SVE_FIXED_ATTR;
+typedef float gnu_fixed_float32_t GNU_FIXED_ATTR;
+typedef int gnu_fixed_int32_t GNU_FIXED_ATTR;
 
-void allowed_with_integer_lax_conversions() {
-  fixed_int32_t fi32;
+void sve_allowed_with_integer_lax_conversions() {
+  sve_fixed_int32_t fi32;
   svint64_t si64;
 
   // The implicit cast here should fail if -flax-vector-conversions=none, but 
pass if
   // -flax-vector-conversions={integer,all}.
   fi32 = si64;
-  // lax-vector-none-error@-1 {{assigning to 'fixed_int32_t' (vector of 16 
'int' values) from incompatible type}}
+  // lax-vector-none-error@-1 {{assigning to 'sve_fixed_int32_t' (vector of 16 
'int' values) from incompatible type}}
   si64 = fi32;
   // lax-vector-none-error@-1 {{assigning to 'svint64_t' (aka '__SVInt64_t') 
from incompatible type}}
 }
 
-void allowed_with_all_lax_conversions() {
-  fixed_float32_t ff32;
+void sve_allowed_with_all_lax_conversions() {
+  sve_fixed_float32_t ff32;
   svfloat64_t sf64;
 
   // The implicit cast here should fail if 
-flax-vector-conversions={none,integer}, but pass if
   // -flax-vector-conversions=all.
   ff32 = sf64;
-  // lax-vector-none-error@-1 {{assigning to 'fixed_float32_t' (vector of 16 
'float' values) from incompatible type}}
-  // lax-vector-integer-error@-2 {{assigning to 'fixed_float32_t' (vector of 
16 'float' values) from incompatible type}}
+  // lax-vector-none-error@-1 {{assigning to 'sve_fixed_float32_t' (vector of 
16 'float' values) from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'sve_fixed_float32_t' (vector 
of 16 'float' values) from incompatible type}}
+  sf64 = ff32;
+  //

[llvm-branch-commits] [llvm] 06654a5 - [SVE] Fix TypeSize warning in RuntimePointerChecking::insert

2020-11-25 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2020-11-25T16:59:03Z
New Revision: 06654a5348bfc510208514a30c552f4f2c4c0ee7

URL: 
https://github.com/llvm/llvm-project/commit/06654a5348bfc510208514a30c552f4f2c4c0ee7
DIFF: 
https://github.com/llvm/llvm-project/commit/06654a5348bfc510208514a30c552f4f2c4c0ee7.diff

LOG: [SVE] Fix TypeSize warning in RuntimePointerChecking::insert

The TypeSize warning would occur because RuntimePointerChecking::insert
was not scalable vector aware. The fix is to use
ScalarEvolution::getSizeOfExpr to grab the size of types.

Differential Revision: https://reviews.llvm.org/D90171

Added: 
llvm/test/Analysis/LoopAccessAnalysis/memcheck-store-vs-alloc-size.ll

llvm/test/Analysis/LoopAccessAnalysis/runtime-pointer-checking-insert-typesize.ll

Modified: 
llvm/include/llvm/Analysis/ScalarEvolution.h
llvm/lib/Analysis/LoopAccessAnalysis.cpp
llvm/lib/Analysis/ScalarEvolution.cpp

Removed: 




diff  --git a/llvm/include/llvm/Analysis/ScalarEvolution.h 
b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 9c19ec986444..a7a24f086fbe 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -600,9 +600,17 @@ class ScalarEvolution {
 return getConstant(Ty, -1, /*isSigned=*/true);
   }
 
-  /// Return an expression for sizeof AllocTy that is type IntTy
+  /// Return an expression for sizeof ScalableTy that is type IntTy, where
+  /// ScalableTy is a scalable vector type.
+  const SCEV *getSizeOfScalableVectorExpr(Type *IntTy,
+  ScalableVectorType *ScalableTy);
+
+  /// Return an expression for the alloc size of AllocTy that is type IntTy
   const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy);
 
+  /// Return an expression for the store size of StoreTy that is type IntTy
+  const SCEV *getStoreSizeOfExpr(Type *IntTy, Type *StoreTy);
+
   /// Return an expression for offsetof on the given field with type IntTy
   const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo);
 

diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp 
b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 19a8ea23b70b..d37c07801b2e 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -224,9 +224,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, 
bool WritePtr,
 }
 // Add the size of the pointed element to ScEnd.
 auto &DL = Lp->getHeader()->getModule()->getDataLayout();
-unsigned EltSize =
-DL.getTypeStoreSizeInBits(Ptr->getType()->getPointerElementType()) / 8;
-const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
+Type *IdxTy = DL.getIndexType(Ptr->getType());
+const SCEV *EltSizeSCEV =
+SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
 ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
   }
 

diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp 
b/llvm/lib/Analysis/ScalarEvolution.cpp
index 5f77f4aa05c2..5a7f1b94a4e8 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3660,28 +3660,42 @@ const SCEV 
*ScalarEvolution::getUMinExpr(SmallVectorImpl &Ops) {
   return getMinMaxExpr(scUMinExpr, Ops);
 }
 
+const SCEV *
+ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy,
+ ScalableVectorType *ScalableTy) {
+  Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo());
+  Constant *One = ConstantInt::get(IntTy, 1);
+  Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One);
+  // Note that the expression we created is the final expression, we don't
+  // want to simplify it any further Also, if we call a normal getSCEV(),
+  // we'll end up in an endless recursion. So just create an SCEVUnknown.
+  return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy));
+}
+
 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
-  if (isa(AllocTy)) {
-Constant *NullPtr = Constant::getNullValue(AllocTy->getPointerTo());
-Constant *One = ConstantInt::get(IntTy, 1);
-Constant *GEP = ConstantExpr::getGetElementPtr(AllocTy, NullPtr, One);
-// Note that the expression we created is the final expression, we don't
-// want to simplify it any further Also, if we call a normal getSCEV(),
-// we'll end up in an endless recursion. So just create an SCEVUnknown.
-return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy));
-  }
-  // We can bypass creating a target-independent
-  // constant expression and then folding it back into a ConstantInt.
-  // This is just a compile-time optimization.
+  if (auto *ScalableAllocTy = dyn_cast(AllocTy))
+return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy);
+  // We can bypass creating a target-independent constant expression and then
+  // folding it back into a ConstantInt. This is just a compile

[llvm-branch-commits] [llvm] f6150aa - [SelectionDAGBuilder] Update signature of `getRegsAndSizes()`.

2020-11-30 Thread Joe Ellis via llvm-branch-commits

Author: Francesco Petrogalli
Date: 2020-11-30T17:38:51Z
New Revision: f6150aa41a48ac8b5372fe4d6ccdfff96e432431

URL: 
https://github.com/llvm/llvm-project/commit/f6150aa41a48ac8b5372fe4d6ccdfff96e432431
DIFF: 
https://github.com/llvm/llvm-project/commit/f6150aa41a48ac8b5372fe4d6ccdfff96e432431.diff

LOG: [SelectionDAGBuilder] Update signature of `getRegsAndSizes()`.

The mapping between registers and relative size has been updated to
use TypeSize to account for the size of scalable EVTs.

The patch is a NFCI, if not for the fact that with this change the
function `getUnderlyingArgRegs` does not raise a warning for implicit
conversion of `TypeSize` to `unsigned` when generating machine code
from the test added to the patch.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D92096

Added: 
llvm/test/CodeGen/AArch64/sdag-no-typesize-warnings-regandsizes.ll

Modified: 
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index dd5beb33ecce..9d2174f4b85a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -980,14 +980,14 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, 
bool HasMatching,
   }
 }
 
-SmallVector, 4>
+SmallVector, 4>
 RegsForValue::getRegsAndSizes() const {
-  SmallVector, 4> OutVec;
+  SmallVector, 4> OutVec;
   unsigned I = 0;
   for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
 unsigned RegCount = std::get<0>(CountAndVT);
 MVT RegisterVT = std::get<1>(CountAndVT);
-unsigned RegisterSize = RegisterVT.getSizeInBits();
+TypeSize RegisterSize = RegisterVT.getSizeInBits();
 for (unsigned E = I + RegCount; I != E; ++I)
   OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
   }
@@ -5317,7 +5317,7 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc 
&DL,
 // getUnderlyingArgRegs - Find underlying registers used for a truncated,
 // bitcasted, or split argument. Returns a list of 
 static void
-getUnderlyingArgRegs(SmallVectorImpl> &Regs,
+getUnderlyingArgRegs(SmallVectorImpl> &Regs,
  const SDValue &N) {
   switch (N.getOpcode()) {
   case ISD::CopyFromReg: {
@@ -5428,7 +5428,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
   if (FI != std::numeric_limits::max())
 Op = MachineOperand::CreateFI(FI);
 
-  SmallVector, 8> ArgRegsAndSizes;
+  SmallVector, 8> ArgRegsAndSizes;
   if (!Op && N.getNode()) {
 getUnderlyingArgRegs(ArgRegsAndSizes, N);
 Register Reg;
@@ -5458,8 +5458,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
 
   if (!Op) {
 // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
-auto splitMultiRegDbgValue
-  = [&](ArrayRef> SplitRegs) {
+auto splitMultiRegDbgValue = [&](ArrayRef>
+ SplitRegs) {
   unsigned Offset = 0;
   for (auto RegAndSize : SplitRegs) {
 // If the expression is already a fragment, the current register

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 89b5de0a9f21..bf2023674342 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -895,7 +895,7 @@ struct RegsForValue {
   }
 
   /// Return a list of registers and their sizes.
-  SmallVector, 4> getRegsAndSizes() const;
+  SmallVector, 4> getRegsAndSizes() const;
 };
 
 } // end namespace llvm

diff  --git 
a/llvm/test/CodeGen/AArch64/sdag-no-typesize-warnings-regandsizes.ll 
b/llvm/test/CodeGen/AArch64/sdag-no-typesize-warnings-regandsizes.ll
new file mode 100644
index ..5a519bea2a70
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sdag-no-typesize-warnings-regandsizes.ll
@@ -0,0 +1,30 @@
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sve < %s 2>%t | 
FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read
+; clang/test/CodeGen/aarch64-sve-intrinsics/README for instructions on
+; how to resolve it.
+
+; WARN-NOT: warning
+
+; CHECK-LABEL: do_something:
+define  @do_something( %vx) {
+entry:
+  call void @llvm.dbg.value(metadata  %vx, metadata !3, 
metadata !DIExpression()), !dbg !5
+  %0 = tail call  @f( %vx)
+  ret  %0
+}
+
+declare  @f()
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1)
+!1 = !DIFile(filename: "file.c", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !DILocalVariable(scope: !4)
+!4 = distinct !DISubprogram(unit: !0)
+!5 = !DILocation(scope: !4)



__

[llvm-branch-commits] [llvm] 78c0ea5 - [DAGCombine] Fix TypeSize warning in DAGCombine::visitLIFETIME_END

2020-12-03 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2020-12-03T12:12:41Z
New Revision: 78c0ea54a22aea5c3ff9030b66d2e1c50ca2c1a3

URL: 
https://github.com/llvm/llvm-project/commit/78c0ea54a22aea5c3ff9030b66d2e1c50ca2c1a3
DIFF: 
https://github.com/llvm/llvm-project/commit/78c0ea54a22aea5c3ff9030b66d2e1c50ca2c1a3.diff

LOG: [DAGCombine] Fix TypeSize warning in DAGCombine::visitLIFETIME_END

Bail out early if we encounter a scalable store.

Reviewed By: peterwaller-arm

Differential Revision: https://reviews.llvm.org/D92392

Added: 
llvm/test/CodeGen/AArch64/dag-combine-lifetime-end-store-typesize.ll

Modified: 
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 661e05bdd579..6ce6e1093dc6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17607,11 +17607,16 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
   // TODO: Can relax for unordered atomics (see D66309)
   if (!ST->isSimple() || ST->isIndexed())
 continue;
+  const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
+  // The bounds of a scalable store are not known until runtime, so this
+  // store cannot be elided.
+  if (StoreSize.isScalable())
+continue;
   const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
   // If we store purely within object bounds just before its lifetime ends,
   // we can remove the store.
   if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
-   ST->getMemoryVT().getStoreSizeInBits())) {
+   StoreSize.getFixedSize() * 8)) {
 LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
dbgs() << "\nwithin LIFETIME_END of : ";
LifetimeEndBase.dump(); dbgs() << "\n");

diff  --git 
a/llvm/test/CodeGen/AArch64/dag-combine-lifetime-end-store-typesize.ll 
b/llvm/test/CodeGen/AArch64/dag-combine-lifetime-end-store-typesize.ll
new file mode 100644
index ..fd5b85a57de1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dag-combine-lifetime-end-store-typesize.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=aarch64-- < %s 2>&1 | FileCheck --allow-empty %s
+
+; This regression test is defending against a TypeSize warning 'assumption 
that TypeSize is not
+; scalable'. This warning appeared in DAGCombiner::visitLIFETIME_END when 
visiting a LIFETIME_END
+; node linked to a scalable store.
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions 
on how to resolve it.
+; CHECK-NOT: warning: {{.*}}TypeSize is not scalable
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+define void @foo(* nocapture dereferenceable(16) %ptr) {
+entry:
+  %tmp = alloca , align 8
+  %tmp_ptr = bitcast * %tmp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 32, i8* %tmp_ptr)
+  store  undef, * %ptr
+  call void @llvm.lifetime.end.p0i8(i64 32, i8* %tmp_ptr)
+  ret void
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 80c33de - [SelectionDAG] Add llvm.vector.{extract, insert} intrinsics

2020-12-09 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2020-12-09T11:08:41Z
New Revision: 80c33de2d3c59ca357c67b2b2475d27f79dd8a8b

URL: 
https://github.com/llvm/llvm-project/commit/80c33de2d3c59ca357c67b2b2475d27f79dd8a8b
DIFF: 
https://github.com/llvm/llvm-project/commit/80c33de2d3c59ca357c67b2b2475d27f79dd8a8b.diff

LOG: [SelectionDAG] Add llvm.vector.{extract,insert} intrinsics

This commit adds two new intrinsics.

- llvm.experimental.vector.insert: used to insert a vector into another
  vector starting at a given index.

- llvm.experimental.vector.extract: used to extract a subvector from a
  larger vector starting from a given index.

The codegen work for these intrinsics has already been completed; this
commit is simply exposing the existing ISD nodes to LLVM IR.

Reviewed By: cameron.mcinally

Differential Revision: https://reviews.llvm.org/D91362

Added: 
llvm/test/CodeGen/AArch64/sve-extract-vector.ll
llvm/test/CodeGen/AArch64/sve-insert-vector.ll
llvm/test/Transforms/InstCombine/canonicalize-vector-extract.ll
llvm/test/Transforms/InstCombine/canonicalize-vector-insert.ll
llvm/test/Verifier/extract-vector-mismatched-element-types.ll
llvm/test/Verifier/insert-vector-mismatched-element-types.ll

Modified: 
llvm/docs/LangRef.rst
llvm/include/llvm/IR/Intrinsics.td
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/IR/Verifier.cpp
llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Removed: 




diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 6724a4019030..b29eb589e2d7 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -16095,6 +16095,81 @@ Arguments:
 ""
 The argument to this intrinsic must be a vector of floating-point values.
 
+'``llvm.experimental.vector.insert``' Intrinsic
+^^^
+
+Syntax:
+"""
+This is an overloaded intrinsic. You can use 
``llvm.experimental.vector.insert``
+to insert a fixed-width vector into a scalable vector, but not the other way
+around.
+
+::
+
+  declare  
@llvm.experimental.vector.insert.v4f32( %vec, <4 x float> 
%subvec, i64 %idx)
+  declare  
@llvm.experimental.vector.insert.v2f64( %vec, <2 x double> 
%subvec, i64 %idx)
+
+Overview:
+"
+
+The '``llvm.experimental.vector.insert.*``' intrinsics insert a vector into 
another vector
+starting from a given index. The return type matches the type of the vector we
+insert into. Conceptually, this can be used to build a scalable vector out of
+non-scalable vectors.
+
+Arguments:
+""
+
+The ``vec`` is the vector which ``subvec`` will be inserted into.
+The ``subvec`` is the vector that will be inserted.
+
+``idx`` represents the starting element number at which ``subvec`` will be
+inserted. ``idx`` must be a constant multiple of ``subvec``'s known minimum
+vector length. If ``subvec`` is a scalable vector, ``idx`` is first scaled by
+the runtime scaling factor of ``subvec``. The elements of ``vec`` starting at
+``idx`` are overwritten with ``subvec``. Elements ``idx`` through (``idx`` +
+num_elements(``subvec``) - 1) must be valid ``vec`` indices. If this condition
+cannot be determined statically but is false at runtime, then the result vector
+is undefined.
+
+
+'``llvm.experimental.vector.extract``' Intrinsic
+
+
+Syntax:
+"""
+This is an overloaded intrinsic. You can use
+``llvm.experimental.vector.extract`` to extract a fixed-width vector from a
+scalable vector, but not the other way around.
+
+::
+
+  declare <4 x float> @llvm.experimental.vector.extract.v4f32( %vec, i64 %idx)
+  declare <2 x double> @llvm.experimental.vector.extract.v2f64( %vec, i64 %idx)
+
+Overview:
+"
+
+The '``llvm.experimental.vector.extract.*``' intrinsics extract a vector from
+within another vector starting from a given index. The return type must be
+explicitly specified. Conceptually, this can be used to decompose a scalable
+vector into non-scalable parts.
+
+Arguments:
+""
+
+The ``vec`` is the vector from which we will extract a subvector.
+
+The ``idx`` specifies the starting element number within ``vec`` from which a
+subvector is extracted. ``idx`` must be a constant multiple of the 
known-minimum
+vector length of the result type. If the result type is a scalable vector,
+``idx`` is first scaled by the result type's runtime scaling factor. Elements
+``idx`` through (``idx`` + num_elements(result_type) - 1) must be valid vector
+indices. If this condition cannot be determined statically but is false at
+runtime, then the result vector is undefined. The ``idx`` parameter must be a
+vector index constant type (for most targets this will be an integer pointer
+type).
+
 Matrix Intrinsics
 -
 

diff  --git a/llvm/include/llvm/IR/Intrinsics.td 
b/llvm/include/llvm/IR/Intrinsics.td
index 710479103459..eb6c408b4f85 100644
--- a/llv

[llvm-branch-commits] [llvm] d863a0d - [SelectionDAG] Implement SplitVecOp_INSERT_SUBVECTOR

2020-12-11 Thread Joe Ellis via llvm-branch-commits

Author: Joe Ellis
Date: 2020-12-11T11:07:59Z
New Revision: d863a0ddebc889af31b8f729103e9d965a40a495

URL: 
https://github.com/llvm/llvm-project/commit/d863a0ddebc889af31b8f729103e9d965a40a495
DIFF: 
https://github.com/llvm/llvm-project/commit/d863a0ddebc889af31b8f729103e9d965a40a495.diff

LOG: [SelectionDAG] Implement SplitVecOp_INSERT_SUBVECTOR

This function is needed for when it is necessary to split the subvector
operand of an llvm.experimental.vector.insert call. Splitting the
subvector operand means performing two insertions: one inserting the
lower part of the split subvector into the destination vector, and
another for inserting the upper part.

Through experimenting, it seems quite rare to need split the subvector
operand, but this is necessary to avoid assertion errors.

Differential Revision: https://reviews.llvm.org/D92760

Added: 
llvm/test/CodeGen/AArch64/split-vector-insert.ll

Modified: 
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index fed111f4d64e..aea2e9ba2bd5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -841,6 +841,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SplitVecOp_TruncateHelper(SDNode *N);
 
   SDValue SplitVecOp_BITCAST(SDNode *N);
+  SDValue SplitVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue SplitVecOp_ExtVecInRegOp(SDNode *N);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp 
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 86a1f6bff9f7..3c642df7ba11 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2062,6 +2062,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, 
unsigned OpNo) {
   case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
   case ISD::BITCAST:   Res = SplitVecOp_BITCAST(N); break;
   case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+  case ISD::INSERT_SUBVECTOR:  Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); 
break;
   case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
   case ISD::CONCAT_VECTORS:Res = SplitVecOp_CONCAT_VECTORS(N); break;
   case ISD::TRUNCATE:
@@ -2278,6 +2279,32 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
  JoinIntegers(Lo, Hi));
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N,
+  unsigned OpNo) {
+  assert(OpNo == 1 && "Invalid OpNo; can only split SubVec.");
+  // We know that the result type is legal.
+  EVT ResVT = N->getValueType(0);
+
+  SDValue Vec = N->getOperand(0);
+  SDValue SubVec = N->getOperand(1);
+  SDValue Idx = N->getOperand(2);
+  SDLoc dl(N);
+
+  SDValue Lo, Hi;
+  GetSplitVector(SubVec, Lo, Hi);
+
+  uint64_t IdxVal = cast(Idx)->getZExtValue();
+  uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
+
+  SDValue FirstInsertion =
+  DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx);
+  SDValue SecondInsertion =
+  DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi,
+  DAG.getVectorIdxConstant(IdxVal + LoElts, dl));
+
+  return SecondInsertion;
+}
+
 SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
   // We know that the extracted result type is legal.
   EVT SubVT = N->getValueType(0);

diff  --git a/llvm/test/CodeGen/AArch64/split-vector-insert.ll 
b/llvm/test/CodeGen/AArch64/split-vector-insert.ll
new file mode 100644
index ..3fb86ae6b963
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/split-vector-insert.ll
@@ -0,0 +1,115 @@
+; RUN: llc < %s -debug-only=legalize-types 2>&1 | FileCheck %s 
--check-prefix=CHECK-LEGALIZATION
+; RUN: llc < %s | FileCheck %s
+; REQUIRES: asserts
+
+target triple = "aarch64-unknown-linux-gnu"
+attributes #0 = {"target-features"="+sve"}
+
+declare  
@llvm.experimental.vector.insert.nxv2i64.v8i64(, <8 x i64>, 
i64)
+declare  
@llvm.experimental.vector.insert.nxv2f64.v8f64(, <8 x 
double>, i64)
+
+define  @test_nxv2i64_v8i64( %a, <8 x i64> 
%b) #0 {
+; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2i64 = 
insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0>
+; CHECK-LEGALIZATION: Legally typed node: [[T2:t[0-9]+]]: nxv2i64 = 
insert_subvector [[T1]], {{t[0-9]+}}, Constant:i64<2>
+; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2i64 = 
insert_subvector [[T2]], {{t[0-9]+}}, Constant:i64<4>
+; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2i64 = 
insert_subvector [[T3]], {{t[0-9]+}}, Constant:i64