[PATCH] D28037: [PowerPC, DAGCombiner] Change vec_sl to a << (b % (sizeof(a) * 8)), and fold it back to a << b.

Tim Shen via Phabricator via cfe-commits Wed, 21 Dec 2016 16:03:55 -0800

timshen created this revision.
timshen added reviewers: kbarton, hfinkel, iteratee, echristo, bogner.
timshen added subscribers: llvm-commits, cfe-commits.
Herald added subscribers: amehsan, nemanjai, mehdi_amini.


For a << b (as original vec_sl does), if b >= sizeof(a) * 8, the
behavior is undefined. However, Power instructions do define the
behavior, which is equivalent to a << (b % (sizeof(a) * 8)).

This patch changes altivec.h to use a << (b % (sizeof(a) * 8)), to ensure
the consistent semantic of the instructions. Then it combines
the generated multiple instructions back to a single shift.

This patch handles left shift only. Right shift, on the other hand, is more
complicated, considering arithematic/logical right shift.


https://reviews.llvm.org/D28037

Files:
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-altivec.c
  llvm/include/llvm/Target/TargetLowering.h
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/test/CodeGen/PowerPC/shift_mask.ll

Index: llvm/test/CodeGen/PowerPC/shift_mask.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/shift_mask.ll
+++ llvm/test/CodeGen/PowerPC/shift_mask.ll
@@ -5,7 +5,6 @@
 define i8 @test000(i8 %a, i8 %b) {
 ; CHECK-LABEL: test000:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 29, 31
 ; CHECK-NEXT:    slw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i8 %b, 7
@@ -16,7 +15,6 @@
 define i16 @test001(i16 %a, i16 %b) {
 ; CHECK-LABEL: test001:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
 ; CHECK-NEXT:    slw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i16 %b, 15
@@ -27,7 +25,6 @@
 define i32 @test002(i32 %a, i32 %b) {
 ; CHECK-LABEL: test002:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 27, 31
 ; CHECK-NEXT:    slw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i32 %b, 31
@@ -38,7 +35,6 @@
 define i64 @test003(i64 %a, i64 %b) {
 ; CHECK-LABEL: test003:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 26, 31
 ; CHECK-NEXT:    sld 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i64 %b, 63
@@ -49,8 +45,6 @@
 define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test010:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisb 4, 7
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vslb 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -61,8 +55,6 @@
 define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test011:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltish 4, 15
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vslh 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -73,10 +65,6 @@
 define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test012:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisw 4, -16
-; CHECK-NEXT:    vspltisw 5, 15
-; CHECK-NEXT:    vsubuwm 4, 5, 4
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vslw 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -87,11 +75,6 @@
 define <2 x i64> @test013(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test013:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI7_0@toc@l
-; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxswapd 36, 0
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsld 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <2 x i64> %b, <i64 63, i64 63>
@@ -103,7 +86,6 @@
 ; CHECK-LABEL: test100:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
-; CHECK-NEXT:    rlwinm 4, 4, 0, 29, 31
 ; CHECK-NEXT:    srw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i8 %b, 7
@@ -115,7 +97,6 @@
 ; CHECK-LABEL: test101:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
-; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
 ; CHECK-NEXT:    srw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i16 %b, 15
@@ -126,7 +107,6 @@
 define i32 @test102(i32 %a, i32 %b) {
 ; CHECK-LABEL: test102:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 27, 31
 ; CHECK-NEXT:    srw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i32 %b, 31
@@ -137,7 +117,6 @@
 define i64 @test103(i64 %a, i64 %b) {
 ; CHECK-LABEL: test103:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 26, 31
 ; CHECK-NEXT:    srd 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i64 %b, 63
@@ -148,8 +127,6 @@
 define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test110:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisb 4, 7
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrb 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -160,8 +137,6 @@
 define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test111:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltish 4, 15
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrh 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -172,10 +147,6 @@
 define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test112:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisw 4, -16
-; CHECK-NEXT:    vspltisw 5, 15
-; CHECK-NEXT:    vsubuwm 4, 5, 4
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrw 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -186,11 +157,6 @@
 define <2 x i64> @test113(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test113:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI15_0@toc@l
-; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxswapd 36, 0
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrd 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <2 x i64> %b, <i64 63, i64 63>
@@ -202,7 +168,6 @@
 ; CHECK-LABEL: test200:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    extsb 3, 3
-; CHECK-NEXT:    rlwinm 4, 4, 0, 29, 31
 ; CHECK-NEXT:    sraw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i8 %b, 7
@@ -214,7 +179,6 @@
 ; CHECK-LABEL: test201:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    extsh 3, 3
-; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
 ; CHECK-NEXT:    sraw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i16 %b, 15
@@ -225,7 +189,6 @@
 define i32 @test202(i32 %a, i32 %b) {
 ; CHECK-LABEL: test202:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 27, 31
 ; CHECK-NEXT:    sraw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i32 %b, 31
@@ -236,7 +199,6 @@
 define i64 @test203(i64 %a, i64 %b) {
 ; CHECK-LABEL: test203:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 26, 31
 ; CHECK-NEXT:    srad 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i64 %b, 63
@@ -247,8 +209,6 @@
 define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: test210:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisb 4, 7
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrab 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -259,8 +219,6 @@
 define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test211:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltish 4, 15
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrah 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -271,10 +229,6 @@
 define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test212:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisw 4, -16
-; CHECK-NEXT:    vspltisw 5, 15
-; CHECK-NEXT:    vsubuwm 4, 5, 4
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsraw 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -285,11 +239,6 @@
 define <2 x i64> @test213(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test213:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
-; CHECK-NEXT:    addi 3, 3, .LCPI23_0@toc@l
-; CHECK-NEXT:    lxvd2x 0, 0, 3
-; CHECK-NEXT:    xxswapd 36, 0
-; CHECK-NEXT:    xxland 35, 35, 36
 ; CHECK-NEXT:    vsrad 2, 2, 3
 ; CHECK-NEXT:    blr
   %rem = and <2 x i64> %b, <i64 63, i64 63>
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -996,6 +996,13 @@
     SDValue
       combineElementTruncationToVectorTruncation(SDNode *N,
                                                  DAGCombinerInfo &DCI) const;
+
+    bool supportsModuloShift(ISD::NodeType Inst,
+                             EVT ReturnType) const override {
+      assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
+             "Expect a shift instruction");
+      return true;
+    }
   };
 
   namespace PPC {
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4651,6 +4651,17 @@
     }
   }
 
+  // If the target supports masking y in (shl, y),
+  // fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y)
+  if (TLI.supportsModuloShift((ISD::NodeType)N->getOpcode(), VT) &&
+      N1->getOpcode() == ISD::AND) {
+    if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
+      if (Mask->getZExtValue() == OpSizeInBits - 1) {
+        return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0));
+      }
+    }
+  }
+
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // fold (shl c1, c2) -> c1<<c2
@@ -4846,6 +4857,17 @@
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
+  // If the target supports masking y in (sra, y),
+  // fold (sra x, (and y, ((1 << numbits(x)) - 1))) -> (sra x, y)
+  if (TLI.supportsModuloShift((ISD::NodeType)N->getOpcode(), VT) &&
+      N1->getOpcode() == ISD::AND) {
+    if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
+      if (Mask->getZExtValue() == OpSizeInBits - 1) {
+        return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0));
+      }
+    }
+  }
+
   // Arithmetic shifting an all-sign-bit value is a no-op.
   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
     return N0;
@@ -5000,6 +5022,17 @@
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
+  // If the target supports masking y in (srl, y),
+  // fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y)
+  if (TLI.supportsModuloShift((ISD::NodeType)N->getOpcode(), VT) &&
+      N1->getOpcode() == ISD::AND) {
+    if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
+      if (Mask->getZExtValue() == OpSizeInBits - 1) {
+        return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0));
+      }
+    }
+  }
+
   // fold vector ops
   if (VT.isVector())
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
Index: llvm/include/llvm/Target/TargetLowering.h
===================================================================
--- llvm/include/llvm/Target/TargetLowering.h
+++ llvm/include/llvm/Target/TargetLowering.h
@@ -1903,6 +1903,14 @@
     return false;
   }
 
+  // Return true if the instruction that performs a << b actually performs
+  // a << (b % (sizeof(a) * 8)).
+  virtual bool supportsModuloShift(ISD::NodeType Inst, EVT ReturnType) const {
+    assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
+           "Expect a shift instruction");
+    return false;
+  }
+
   //===--------------------------------------------------------------------===//
   // Runtime Library hooks
   //
Index: clang/test/CodeGen/builtins-ppc-altivec.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-altivec.c
+++ clang/test/CodeGen/builtins-ppc-altivec.c
@@ -3419,28 +3419,40 @@
 
   /* vec_sl */
   res_vsc = vec_sl(vsc, vuc);
-// CHECK: shl <16 x i8>
-// CHECK-LE: shl <16 x i8>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
 
   res_vuc = vec_sl(vuc, vuc);
-// CHECK: shl <16 x i8>
-// CHECK-LE: shl <16 x i8>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
 
   res_vs  = vec_sl(vs, vus);
-// CHECK: shl <8 x i16>
-// CHECK-LE: shl <8 x i16>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
 
   res_vus = vec_sl(vus, vus);
-// CHECK: shl <8 x i16>
-// CHECK-LE: shl <8 x i16>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
 
   res_vi  = vec_sl(vi, vui);
-// CHECK: shl <4 x i32>
-// CHECK-LE: shl <4 x i32>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
 
   res_vui = vec_sl(vui, vui);
-// CHECK: shl <4 x i32>
-// CHECK-LE: shl <4 x i32>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
 
   res_vsc = vec_vslb(vsc, vuc);
 // CHECK: shl <16 x i8>
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -8043,45 +8043,49 @@
 
 /* vec_sl */
 
-static __inline__ vector signed char __ATTRS_o_ai
-vec_sl(vector signed char __a, vector unsigned char __b) {
-  return __a << (vector signed char)__b;
-}
-
 static __inline__ vector unsigned char __ATTRS_o_ai
 vec_sl(vector unsigned char __a, vector unsigned char __b) {
-  return __a << __b;
+  return __a << (__b %
+                 (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__));
 }
 
-static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a,
-                                                   vector unsigned short __b) {
-  return __a << (vector short)__b;
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sl(vector signed char __a, vector unsigned char __b) {
+  return (vector signed char)vec_sl((vector unsigned char)__a, __b);
 }
 
 static __inline__ vector unsigned short __ATTRS_o_ai
 vec_sl(vector unsigned short __a, vector unsigned short __b) {
-  return __a << __b;
+  return __a << (__b % (vector unsigned short)(sizeof(unsigned short) *
+                                               __CHAR_BIT__));
 }
 
-static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a,
-                                                 vector unsigned int __b) {
-  return __a << (vector int)__b;
+static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a,
+                                                   vector unsigned short __b) {
+  return (vector short)vec_sl((vector unsigned short)__a, __b);
 }
 
 static __inline__ vector unsigned int __ATTRS_o_ai
 vec_sl(vector unsigned int __a, vector unsigned int __b) {
-  return __a << __b;
+  return __a << (__b %
+                 (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__));
 }
 
-#ifdef __POWER8_VECTOR__
-static __inline__ vector signed long long __ATTRS_o_ai
-vec_sl(vector signed long long __a, vector unsigned long long __b) {
-  return __a << (vector long long)__b;
+static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a,
+                                                 vector unsigned int __b) {
+  return (vector int)vec_sl((vector unsigned int)__a, __b);
 }
 
+#ifdef __POWER8_VECTOR__
 static __inline__ vector unsigned long long __ATTRS_o_ai
 vec_sl(vector unsigned long long __a, vector unsigned long long __b) {
-  return __a << __b;
+  return __a << (__b % (vector unsigned long long)(sizeof(unsigned long long) *
+                                                   __CHAR_BIT__));
+}
+
+static __inline__ vector long long __ATTRS_o_ai
+vec_sl(vector long long __a, vector unsigned long long __b) {
+  return (vector long long)vec_sl((vector unsigned long long)__a, __b);
 }
 #endif

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D28037: [PowerPC, DAGCombiner] Change vec_sl to a << (b % (sizeof(a) * 8)), and fold it back to a << b.

Reply via email to