[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

Albion Fung via Phabricator via cfe-commits Mon, 13 Jul 2020 06:57:51 -0700

Conanap updated this revision to Diff 277415.
Conanap marked 2 inline comments as done.
Conanap added a comment.


Removed unecessary comments and unintended changes.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82502/new/

https://reviews.llvm.org/D82502

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
  llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s

Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
===================================================================
--- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -429,3 +429,15 @@
 # CHECK-BE: stxvrdx 35, 3, 1                      # encoding: [0x7c,0x63,0x09,0xdb]
 # CHECK-LE: stxvrdx 35, 3, 1                      # encoding: [0xdb,0x09,0x63,0x7c]
             stxvrdx 35, 3, 1
+# CHECK-BE: lxvrbx 32, 1, 2                       # encoding: [0x7c,0x01,0x10,0x1b]
+# CHECK-LE: lxvrbx 32, 1, 2                       # encoding: [0x1b,0x10,0x01,0x7c]
+            lxvrbx 32, 1, 2
+# CHECK-BE: lxvrhx 33, 1, 2                       # encoding: [0x7c,0x21,0x10,0x5b]
+# CHECK-LE: lxvrhx 33, 1, 2                       # encoding: [0x5b,0x10,0x21,0x7c]
+            lxvrhx 33, 1, 2
+# CHECK-BE: lxvrdx 34, 1, 2                       # encoding: [0x7c,0x41,0x10,0xdb]
+# CHECK-LE: lxvrdx 34, 1, 2                       # encoding: [0xdb,0x10,0x41,0x7c]
+            lxvrdx 34, 1, 2
+# CHECK-BE: lxvrwx 35, 1, 2                       # encoding: [0x7c,0x61,0x10,0x9b]
+# CHECK-LE: lxvrwx 35, 1, 2                       # encoding: [0x9b,0x10,0x61,0x7c]
+            lxvrwx 35, 1, 2
Index: llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
===================================================================
--- llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -302,3 +302,15 @@
 
 # CHECK: stxvrdx 35, 3, 1
 0x7c 0x63 0x09 0xdb
+
+# CHECK: lxvrbx 32, 1, 2
+0x7c 0x01 0x10 0x1b
+
+# CHECK: lxvrhx 33, 1, 2
+0x7c 0x21 0x10 0x5b
+
+# CHECK: lxvrdx 34, 1, 2
+0x7c 0x41 0x10 0xdb
+
+# CHECK: lxvrwx 35, 1, 2
+0x7c 0x61 0x10 0x9b
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -18,6 +18,15 @@
 // address computations).
 class isPCRel { bit PCRel = 1; }
 
+// PowerPC specific type constraints.
+def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
+  SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+
+// PPC Specific DAG Nodes.
+def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
+                       [SDNPHasChain, SDNPMayLoad]>;
+
 // Top-level class for prefixed instructions.
 class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
          InstrItinClass itin> : Instruction {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -493,6 +493,12 @@
     /// an xxswapd.
     LXVD2X,
 
+    /// LXVRZX - Load VSX Vector Rightmost and Zero Extend
+    /// This node represents v1i128 BUILD_VECTOR of a zero extending load
+    /// instruction from <byte, halfword, word, or doubleword> to i128.
+    /// Allows utilization of the Load VSX Vector Rightmost Instructions.
+    LXVRZX,
+
     /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
     /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
     /// the vector type to load vector in big-endian element order.
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1572,6 +1572,7 @@
   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
+  case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
   }
   return nullptr;
 }
@@ -14125,6 +14126,44 @@
   return SDValue();
 }
 
+// Look for the pattern of a load from a narrow width to i128, feeding
+// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
+// (LXVRZX). This node represents a zero extending load that will be matched
+// to the Load VSX Vector Rightmost instructions.
+static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
+  SDLoc dl(N);
+
+  // This combine is only eligible for a BUILD_VECTOR of v1i128.
+  // Other return types are not valid for the LXVRZX replacement.
+  if (N->getValueType(0) != MVT::v1i128)
+    return SDValue();
+
+  SDValue Operand = N->getOperand(0);
+  // Proceed with the transformation if the operand to the BUILD_VECTOR
+  // is a load instruction.
+  if (Operand.getOpcode() != ISD::LOAD)
+    return SDValue();
+
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
+  EVT MemoryType = LD->getMemoryVT();
+
+  // This transformation is only valid if the we are loading either a byte,
+  // halfword, word, or doubleword.
+  bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
+                     MemoryType == MVT::i32 || MemoryType == MVT::i64;
+
+  // Ensure that the load from the narrow width is being zero extended to i128.
+  if (!ValidLDType || (LD->getValueType(0) != MVT::i128) ||
+      (LD->getExtensionType() != ISD::ZEXTLOAD))
+    return SDValue();
+
+  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr(), DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), dl)};
+
+ return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, dl,
+                                 DAG.getVTList(MVT::v1i128, MVT::Other),
+                                 LoadOps, MemoryType, LD->getMemOperand());
+}
+
 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
@@ -14162,6 +14201,14 @@
       return Reduced;
   }
 
+  // On Power10, the Load VSX Vector Rightmost instructions can be utilized
+  // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
+  // is a load from <valid narrow width> to i128.
+  if (Subtarget.isISA3_1()) {
+    SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
+    if (BVOfZLoad)
+      return BVOfZLoad;
+  }
 
   if (N->getValueType(0) != MVT::v2f64)
     return SDValue();
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -24,10 +24,14 @@
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
-unsigned int uia, uib;
-unsigned char uca;
-unsigned short usa;
-unsigned long long ulla;
+signed int *iap;
+unsigned int uia, uib, *uiap;
+signed char *cap;
+unsigned char uca, *ucap;
+signed short *sap;
+unsigned short usa, *usap;
+signed long long *llap, llb;
+unsigned long long ulla, *ullap;
 
 vector unsigned long long test_vpdepd(void) {
   // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
@@ -581,3 +585,59 @@
   // CHECK: ret <4 x float>
   return vec_splati_ins(vfa, 0, 1.0f);
 }
+
+vector signed __int128 test_vec_xl_sext_i8(void) {
+  // CHECK: load i8
+  // CHECK: sext i8
+  // CHECK: ret <1 x i128>
+  return vec_xl_sext(llb, cap);
+}
+
+vector signed __int128 test_vec_xl_sext_i16(void) {
+  // CHECK: load i16
+  // CHECK: sext i16
+  // CHECK: ret <1 x i128>
+  return vec_xl_sext(llb, sap);
+}
+
+vector signed __int128 test_vec_xl_sext_i32(void) {
+  // CHECK: load i32
+  // CHECK: sext i32
+  // CHECK: ret <1 x i128>
+  return vec_xl_sext(llb, iap);
+}
+
+vector signed __int128 test_vec_xl_sext_i64(void) {
+  // CHECK: load i64
+  // CHECK: sext i64
+  // CHECK: ret <1 x i128>
+  return vec_xl_sext(llb, llap);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i8(void) {
+  // CHECK: load i8
+  // CHECK: zext i8
+  // CHECK: ret <1 x i128>
+  return vec_xl_zext(llb, ucap);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i16(void) {
+  // CHECK: load i16
+  // CHECK: zext i16
+  // CHECK: ret <1 x i128>
+  return vec_xl_zext(llb, usap);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i32(void) {
+  // CHECK: load i32
+  // CHECK: zext i32
+  // CHECK: ret <1 x i128>
+  return vec_xl_zext(llb, uiap);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i64(void) {
+  // CHECK: load i64
+  // CHECK: zext i64
+  // CHECK: ret <1 x i128>
+  return vec_xl_zext(llb, ullap);
+}
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -16509,6 +16509,54 @@
   #define vec_xl_be vec_xl
 #endif
 
+#if defined(__POWER10_VECTOR__) && defined(__VSX__)
+
+/* vect_xl_sext */
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed char *__pointer) {
+  return (unaligned_vec_si128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed short *__pointer) {
+  return (unaligned_vec_si128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed int *__pointer) {
+  return (unaligned_vec_si128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed long long *__pointer) {
+  return (unaligned_vec_si128)*(__pointer + __offset);
+}
+
+/* vec_xl_zext */
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned char *__pointer) {
+  return (unaligned_vec_ui128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned short *__pointer) {
+  return (unaligned_vec_ui128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned int *__pointer) {
+  return (unaligned_vec_ui128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned long long *__pointer) {
+  return (unaligned_vec_ui128)*(__pointer + __offset);
+}
+
+#endif
+
 /* vec_xst */
 
 static inline __ATTRS_o_ai void vec_xst(vector signed char __vec,

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D82502: [PowerPC][Power10] Implement Load VSX Vector and Sign Extend and Zero Extend

Reply via email to