kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, c-rhodes, efriedma, cameron.mcinally.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.
This patch maps IR operations for sdiv & udiv to the
@llvm.aarch64.sve.[s|u]div intrinsics.
A ptrue must be created during lowering as the div instructions
have only a predicated form.
Patch contains changes by Andrzej Warzynski.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D78569
Files:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/TargetLoweringBase.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
Index: llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
@@ -0,0 +1,87 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; SDIV
+;
+
+define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @sdiv_i32
+; CHECK: ptrue p0.s
+; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %div = sdiv <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: @sdiv_i64
+; CHECK: ptrue p0.d
+; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %div = sdiv <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i64> %div
+}
+
+define <vscale x 8 x i32> @sdiv_narrow(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
+; CHECK-LABEL: @sdiv_narrow
+; CHECK: ptrue p0.s
+; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+ %div = sdiv <vscale x 8 x i32> %a, %b
+ ret <vscale x 8 x i32> %div
+}
+
+define <vscale x 2 x i32> @sdiv_widen(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: @sdiv_widen
+; CHECK: ptrue p0.d
+; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %div = sdiv <vscale x 2 x i32> %a, %b
+ ret <vscale x 2 x i32> %div
+}
+
+;
+; UDIV
+;
+
+define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @udiv_i32
+; CHECK: ptrue p0.s
+; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %div = udiv <vscale x 4 x i32> %a, %b
+ ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: @udiv_i64
+; CHECK: ptrue p0.d
+; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %div = udiv <vscale x 2 x i64> %a, %b
+ ret <vscale x 2 x i64> %div
+}
+
+define <vscale x 8 x i32> @udiv_narrow(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
+; CHECK-LABEL: @udiv_narrow
+; CHECK: ptrue p0.s
+; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+ %div = udiv <vscale x 8 x i32> %a, %b
+ ret <vscale x 8 x i32> %div
+}
+
+define <vscale x 2 x i32> @udiv_widen(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: @udiv_widen
+; CHECK: ptrue p0.d
+; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
+; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %div = udiv <vscale x 2 x i32> %a, %b
+ ret <vscale x 2 x i32> %div
+}
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -776,6 +776,8 @@
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDIV(SDValue Op, SelectionDAG &DAG,
+ unsigned IntrID) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -883,8 +883,11 @@
// splat of 0 or undef) once vector selects supported in SVE codegen. See
// D68877 for more details.
for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
- if (isTypeLegal(VT))
+ if (isTypeLegal(VT)) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SDIV, VT, Custom);
+ setOperationAction(ISD::UDIV, VT, Custom);
+ }
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
@@ -3337,6 +3340,10 @@
return LowerSPLAT_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::SDIV:
+ return LowerDIV(Op, DAG, Intrinsic::aarch64_sve_sdiv);
+ case ISD::UDIV:
+ return LowerDIV(Op, DAG, Intrinsic::aarch64_sve_udiv);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
@@ -7643,6 +7650,25 @@
return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
}
+SDValue AArch64TargetLowering::LowerDIV(SDValue Op,
+ SelectionDAG &DAG,
+ unsigned IntrID) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+
+ assert(Op.getOperand(0).getValueType().isScalableVector() &&
+ Op.getOperand(1).getValueType().isScalableVector() &&
+ "Only scalable vectors are supported");
+
+ auto PredTy = VT.getVectorVT(*DAG.getContext(), MVT::i1,
+ VT.getVectorNumElements(), true);
+ SDValue Mask = getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
+
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(IntrID, DL, MVT::i64),
+ Mask, Op.getOperand(0), Op.getOperand(1));
+}
+
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
APInt &UndefBits) {
EVT VT = BVN->getValueType(0);
Index: llvm/lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1393,6 +1393,7 @@
unsigned &NumIntermediates,
MVT &RegisterVT) const {
unsigned NumElts = VT.getVectorNumElements();
+ bool IsScalable = VT.isScalableVector();
// If there is a wider vector type with the same element type as this one,
// or a promoted vector type that has the same number of elements which
@@ -1424,15 +1425,15 @@
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
- while (NumElts > 1 && !isTypeLegal(
- EVT::getVectorVT(Context, EltTy, NumElts))) {
+ while (NumElts > 1 &&
+ !isTypeLegal(EVT::getVectorVT(Context, EltTy, NumElts, IsScalable))) {
NumElts >>= 1;
NumVectorRegs <<= 1;
}
NumIntermediates = NumVectorRegs;
- EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+ EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts, IsScalable);
if (!isTypeLegal(NewVT))
NewVT = EltTy;
IntermediateVT = NewVT;
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -735,7 +735,8 @@
unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
EVT BuiltVectorTy = EVT::getVectorVT(
- *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
+ *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts,
+ ValueVT.isScalableVector());
if (ValueVT != BuiltVectorTy) {
if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
Val = Widened;
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2604,9 +2604,9 @@
SDLoc DL(N);
GetSplitVector(N->getOperand(0), Lo0, Hi0);
GetSplitVector(N->getOperand(1), Lo1, Hi1);
- unsigned PartElements = Lo0.getValueType().getVectorNumElements();
- EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
- EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+ auto PartEltCnt = Lo0.getValueType().getVectorElementCount();
+ EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartEltCnt);
+ EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartEltCnt*2);
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits