[PATCH] D78569: [SVE][CodeGen] Lower SDIV & UDIV to SVE intrinsics

Kerry McLaughlin via Phabricator via cfe-commits Tue, 21 Apr 2020 09:13:08 -0700

kmclaughlin created this revision.
kmclaughlin added reviewers: sdesmalen, c-rhodes, efriedma, cameron.mcinally.
Herald added subscribers: psnobl, rkruppe, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: rengolin.
Herald added a project: LLVM.


This patch maps IR operations for sdiv & udiv to the
@llvm.aarch64.sve.[s|u]div intrinsics.

A ptrue must be created during lowering as the div instructions
have only a predicated form.

Patch contains changes by Andrzej Warzynski.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D78569

Files:
  llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/CodeGen/TargetLoweringBase.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll

Index: llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll
@@ -0,0 +1,87 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; SDIV
+;
+
+define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @sdiv_i32
+; CHECK: ptrue p0.s
+; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: @sdiv_i64
+; CHECK: ptrue p0.d
+; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %div
+}
+
+define <vscale x 8 x i32> @sdiv_narrow(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
+; CHECK-LABEL: @sdiv_narrow
+; CHECK: ptrue p0.s
+; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 8 x i32> %a, %b
+  ret <vscale x 8 x i32> %div
+}
+
+define <vscale x 2 x i32> @sdiv_widen(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: @sdiv_widen
+; CHECK: ptrue p0.d
+; CHECK-NEXT: sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT: sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %div = sdiv <vscale x 2 x i32> %a, %b
+  ret <vscale x 2 x i32> %div
+}
+
+;
+; UDIV
+;
+
+define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: @udiv_i32
+; CHECK: ptrue p0.s
+; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %div
+}
+
+define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: @udiv_i64
+; CHECK: ptrue p0.d
+; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %div
+}
+
+define <vscale x 8 x i32> @udiv_narrow(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
+; CHECK-LABEL: @udiv_narrow
+; CHECK: ptrue p0.s
+; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z3.s
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 8 x i32> %a, %b
+  ret <vscale x 8 x i32> %div
+}
+
+define <vscale x 2 x i32> @udiv_widen(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
+; CHECK-LABEL: @udiv_widen
+; CHECK: ptrue p0.d
+; CHECK-NEXT: and z1.d, z1.d, #0xffffffff
+; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+  %div = udiv <vscale x 2 x i32> %a, %b
+  ret <vscale x 2 x i32> %div
+}
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -776,6 +776,8 @@
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerDIV(SDValue Op, SelectionDAG &DAG,
+                   unsigned IntrID) const;
   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -883,8 +883,11 @@
     // splat of 0 or undef) once vector selects supported in SVE codegen. See
     // D68877 for more details.
     for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
-      if (isTypeLegal(VT))
+      if (isTypeLegal(VT)) {
         setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+        setOperationAction(ISD::SDIV, VT, Custom);
+        setOperationAction(ISD::UDIV, VT, Custom);
+      }
     }
     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
@@ -3337,6 +3340,10 @@
     return LowerSPLAT_VECTOR(Op, DAG);
   case ISD::EXTRACT_SUBVECTOR:
     return LowerEXTRACT_SUBVECTOR(Op, DAG);
+  case ISD::SDIV:
+    return LowerDIV(Op, DAG, Intrinsic::aarch64_sve_sdiv);
+  case ISD::UDIV:
+    return LowerDIV(Op, DAG, Intrinsic::aarch64_sve_udiv);
   case ISD::SRA:
   case ISD::SRL:
   case ISD::SHL:
@@ -7643,6 +7650,25 @@
   return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
 }
 
+SDValue AArch64TargetLowering::LowerDIV(SDValue Op,
+                                        SelectionDAG &DAG,
+                                        unsigned IntrID) const {
+  EVT VT = Op.getValueType();
+  SDLoc DL(Op);
+
+  assert(Op.getOperand(0).getValueType().isScalableVector() &&
+         Op.getOperand(1).getValueType().isScalableVector() &&
+         "Only scalable vectors are supported");
+
+  auto PredTy = VT.getVectorVT(*DAG.getContext(), MVT::i1,
+                               VT.getVectorNumElements(), true);
+  SDValue Mask = getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
+
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                     DAG.getConstant(IntrID, DL, MVT::i64),
+                     Mask, Op.getOperand(0), Op.getOperand(1));
+}
+
 static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
                                APInt &UndefBits) {
   EVT VT = BVN->getValueType(0);
Index: llvm/lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1393,6 +1393,7 @@
                                                 unsigned &NumIntermediates,
                                                 MVT &RegisterVT) const {
   unsigned NumElts = VT.getVectorNumElements();
+  bool IsScalable = VT.isScalableVector();
 
   // If there is a wider vector type with the same element type as this one,
   // or a promoted vector type that has the same number of elements which
@@ -1424,15 +1425,15 @@
 
   // Divide the input until we get to a supported size.  This will always
   // end with a scalar if the target doesn't support vectors.
-  while (NumElts > 1 && !isTypeLegal(
-                                   EVT::getVectorVT(Context, EltTy, NumElts))) {
+  while (NumElts > 1 &&
+         !isTypeLegal(EVT::getVectorVT(Context, EltTy, NumElts, IsScalable))) {
     NumElts >>= 1;
     NumVectorRegs <<= 1;
   }
 
   NumIntermediates = NumVectorRegs;
 
-  EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+  EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts, IsScalable);
   if (!isTypeLegal(NewVT))
     NewVT = EltTy;
   IntermediateVT = NewVT;
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -735,7 +735,8 @@
   unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
 
   EVT BuiltVectorTy = EVT::getVectorVT(
-      *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
+      *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts,
+      ValueVT.isScalableVector());
   if (ValueVT != BuiltVectorTy) {
     if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
       Val = Widened;
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2604,9 +2604,9 @@
   SDLoc DL(N);
   GetSplitVector(N->getOperand(0), Lo0, Hi0);
   GetSplitVector(N->getOperand(1), Lo1, Hi1);
-  unsigned PartElements = Lo0.getValueType().getVectorNumElements();
-  EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
-  EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+  auto PartEltCnt = Lo0.getValueType().getVectorElementCount();
+  EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartEltCnt);
+  EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartEltCnt*2);
 
   LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
   HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D78569: [SVE][CodeGen] Lower SDIV & UDIV to SVE intrinsics

Reply via email to