timshen created this revision.
timshen added reviewers: kbarton, hfinkel, iteratee, echristo, bogner.
timshen added subscribers: llvm-commits, cfe-commits.
Herald added subscribers: amehsan, nemanjai, mehdi_amini.
For a << b (as original vec_sl does), if b >= sizeof(a) * 8, the
behavior is undefined. However, Power instructions do define the
behavior, which is equivalent to a << (b % (sizeof(a) * 8)).
This patch changes altivec.h to use a << (b % (sizeof(a) * 8)), to ensure
the consistent semantic of the instructions. Then it combines
the generated multiple instructions back to a single shift.
This patch handles left shift only. Right shift, on the other hand, is more
complicated, considering arithematic/logical right shift.
https://reviews.llvm.org/D28037
Files:
clang/lib/Headers/altivec.h
clang/test/CodeGen/builtins-ppc-altivec.c
llvm/include/llvm/Target/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/test/CodeGen/PowerPC/shift_mask.ll
Index: llvm/test/CodeGen/PowerPC/shift_mask.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/shift_mask.ll
+++ llvm/test/CodeGen/PowerPC/shift_mask.ll
@@ -5,7 +5,6 @@
define i8 @test000(i8 %a, i8 %b) {
; CHECK-LABEL: test000:
; CHECK: # BB#0:
-; CHECK-NEXT: rlwinm 4, 4, 0, 29, 31
; CHECK-NEXT: slw 3, 3, 4
; CHECK-NEXT: blr
%rem = and i8 %b, 7
@@ -16,7 +15,6 @@
define i16 @test001(i16 %a, i16 %b) {
; CHECK-LABEL: test001:
; CHECK: # BB#0:
-; CHECK-NEXT: rlwinm 4, 4, 0, 28, 31
; CHECK-NEXT: slw 3, 3, 4
; CHECK-NEXT: blr
%rem = and i16 %b, 15
@@ -27,7 +25,6 @@
define i32 @test002(i32 %a, i32 %b) {
; CHECK-LABEL: test002:
; CHECK: # BB#0:
-; CHECK-NEXT: rlwinm 4, 4, 0, 27, 31
; CHECK-NEXT: slw 3, 3, 4
; CHECK-NEXT: blr
%rem = and i32 %b, 31
@@ -38,7 +35,6 @@
define i64 @test003(i64 %a, i64 %b) {
; CHECK-LABEL: test003:
; CHECK: # BB#0:
-; CHECK-NEXT: rlwinm 4, 4, 0, 26, 31
; CHECK-NEXT: sld 3, 3, 4
; CHECK-NEXT: blr
%rem = and i64 %b, 63
@@ -49,8 +45,6 @@
define <16 x i8> @test010(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test010:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltisb 4, 7
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vslb 2, 2, 3
; CHECK-NEXT: blr
%rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -61,8 +55,6 @@
define <8 x i16> @test011(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test011:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltish 4, 15
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vslh 2, 2, 3
; CHECK-NEXT: blr
%rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -73,10 +65,6 @@
define <4 x i32> @test012(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test012:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltisw 4, -16
-; CHECK-NEXT: vspltisw 5, 15
-; CHECK-NEXT: vsubuwm 4, 5, 4
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vslw 2, 2, 3
; CHECK-NEXT: blr
%rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -87,11 +75,6 @@
define <2 x i64> @test013(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test013:
; CHECK: # BB#0:
-; CHECK-NEXT: addis 3, 2, .LCPI7_0@toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI7_0@toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 36, 0
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsld 2, 2, 3
; CHECK-NEXT: blr
%rem = and <2 x i64> %b, <i64 63, i64 63>
@@ -103,7 +86,6 @@
; CHECK-LABEL: test100:
; CHECK: # BB#0:
; CHECK-NEXT: rlwinm 3, 3, 0, 24, 31
-; CHECK-NEXT: rlwinm 4, 4, 0, 29, 31
; CHECK-NEXT: srw 3, 3, 4
; CHECK-NEXT: blr
%rem = and i8 %b, 7
@@ -115,7 +97,6 @@
; CHECK-LABEL: test101:
; CHECK: # BB#0:
; CHECK-NEXT: rlwinm 3, 3, 0, 16, 31
-; CHECK-NEXT: rlwinm 4, 4, 0, 28, 31
; CHECK-NEXT: srw 3, 3, 4
; CHECK-NEXT: blr
%rem = and i16 %b, 15
@@ -126,7 +107,6 @@
define i32 @test102(i32 %a, i32 %b) {
; CHECK-LABEL: test102:
; CHECK: # BB#0:
-; CHECK-NEXT: rlwinm 4, 4, 0, 27, 31
; CHECK-NEXT: srw 3, 3, 4
; CHECK-NEXT: blr
%rem = and i32 %b, 31
@@ -137,7 +117,6 @@
define i64 @test103(i64 %a, i64 %b) {
; CHECK-LABEL: test103:
; CHECK: # BB#0:
-; CHECK-NEXT: rlwinm 4, 4, 0, 26, 31
; CHECK-NEXT: srd 3, 3, 4
; CHECK-NEXT: blr
%rem = and i64 %b, 63
@@ -148,8 +127,6 @@
define <16 x i8> @test110(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test110:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltisb 4, 7
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrb 2, 2, 3
; CHECK-NEXT: blr
%rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -160,8 +137,6 @@
define <8 x i16> @test111(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test111:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltish 4, 15
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrh 2, 2, 3
; CHECK-NEXT: blr
%rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -172,10 +147,6 @@
define <4 x i32> @test112(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test112:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltisw 4, -16
-; CHECK-NEXT: vspltisw 5, 15
-; CHECK-NEXT: vsubuwm 4, 5, 4
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrw 2, 2, 3
; CHECK-NEXT: blr
%rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -186,11 +157,6 @@
define <2 x i64> @test113(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test113:
; CHECK: # BB#0:
-; CHECK-NEXT: addis 3, 2, .LCPI15_0@toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI15_0@toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 36, 0
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrd 2, 2, 3
; CHECK-NEXT: blr
%rem = and <2 x i64> %b, <i64 63, i64 63>
@@ -202,7 +168,6 @@
; CHECK-LABEL: test200:
; CHECK: # BB#0:
; CHECK-NEXT: extsb 3, 3
-; CHECK-NEXT: rlwinm 4, 4, 0, 29, 31
; CHECK-NEXT: sraw 3, 3, 4
; CHECK-NEXT: blr
%rem = and i8 %b, 7
@@ -214,7 +179,6 @@
; CHECK-LABEL: test201:
; CHECK: # BB#0:
; CHECK-NEXT: extsh 3, 3
-; CHECK-NEXT: rlwinm 4, 4, 0, 28, 31
; CHECK-NEXT: sraw 3, 3, 4
; CHECK-NEXT: blr
%rem = and i16 %b, 15
@@ -225,7 +189,6 @@
define i32 @test202(i32 %a, i32 %b) {
; CHECK-LABEL: test202:
; CHECK: # BB#0:
-; CHECK-NEXT: rlwinm 4, 4, 0, 27, 31
; CHECK-NEXT: sraw 3, 3, 4
; CHECK-NEXT: blr
%rem = and i32 %b, 31
@@ -236,7 +199,6 @@
define i64 @test203(i64 %a, i64 %b) {
; CHECK-LABEL: test203:
; CHECK: # BB#0:
-; CHECK-NEXT: rlwinm 4, 4, 0, 26, 31
; CHECK-NEXT: srad 3, 3, 4
; CHECK-NEXT: blr
%rem = and i64 %b, 63
@@ -247,8 +209,6 @@
define <16 x i8> @test210(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: test210:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltisb 4, 7
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrab 2, 2, 3
; CHECK-NEXT: blr
%rem = and <16 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
@@ -259,8 +219,6 @@
define <8 x i16> @test211(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test211:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltish 4, 15
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrah 2, 2, 3
; CHECK-NEXT: blr
%rem = and <8 x i16> %b, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
@@ -271,10 +229,6 @@
define <4 x i32> @test212(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test212:
; CHECK: # BB#0:
-; CHECK-NEXT: vspltisw 4, -16
-; CHECK-NEXT: vspltisw 5, 15
-; CHECK-NEXT: vsubuwm 4, 5, 4
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsraw 2, 2, 3
; CHECK-NEXT: blr
%rem = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
@@ -285,11 +239,6 @@
define <2 x i64> @test213(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: test213:
; CHECK: # BB#0:
-; CHECK-NEXT: addis 3, 2, .LCPI23_0@toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI23_0@toc@l
-; CHECK-NEXT: lxvd2x 0, 0, 3
-; CHECK-NEXT: xxswapd 36, 0
-; CHECK-NEXT: xxland 35, 35, 36
; CHECK-NEXT: vsrad 2, 2, 3
; CHECK-NEXT: blr
%rem = and <2 x i64> %b, <i64 63, i64 63>
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -996,6 +996,13 @@
SDValue
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
+
+ bool supportsModuloShift(ISD::NodeType Inst,
+ EVT ReturnType) const override {
+ assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
+ "Expect a shift instruction");
+ return true;
+ }
};
namespace PPC {
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4651,6 +4651,17 @@
}
}
+ // If the target supports masking y in (shl, y),
+ // fold (shl x, (and y, ((1 << numbits(x)) - 1))) -> (shl x, y)
+ if (TLI.supportsModuloShift((ISD::NodeType)N->getOpcode(), VT) &&
+ N1->getOpcode() == ISD::AND) {
+ if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
+ if (Mask->getZExtValue() == OpSizeInBits - 1) {
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1->getOperand(0));
+ }
+ }
+ }
+
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (shl c1, c2) -> c1<<c2
@@ -4846,6 +4857,17 @@
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ // If the target supports masking y in (sra, y),
+ // fold (sra x, (and y, ((1 << numbits(x)) - 1))) -> (sra x, y)
+ if (TLI.supportsModuloShift((ISD::NodeType)N->getOpcode(), VT) &&
+ N1->getOpcode() == ISD::AND) {
+ if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
+ if (Mask->getZExtValue() == OpSizeInBits - 1) {
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, N1->getOperand(0));
+ }
+ }
+ }
+
// Arithmetic shifting an all-sign-bit value is a no-op.
if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
return N0;
@@ -5000,6 +5022,17 @@
EVT VT = N0.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ // If the target supports masking y in (srl, y),
+ // fold (srl x, (and y, ((1 << numbits(x)) - 1))) -> (srl x, y)
+ if (TLI.supportsModuloShift((ISD::NodeType)N->getOpcode(), VT) &&
+ N1->getOpcode() == ISD::AND) {
+ if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) {
+ if (Mask->getZExtValue() == OpSizeInBits - 1) {
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1->getOperand(0));
+ }
+ }
+ }
+
// fold vector ops
if (VT.isVector())
if (SDValue FoldedVOp = SimplifyVBinOp(N))
Index: llvm/include/llvm/Target/TargetLowering.h
===================================================================
--- llvm/include/llvm/Target/TargetLowering.h
+++ llvm/include/llvm/Target/TargetLowering.h
@@ -1903,6 +1903,14 @@
return false;
}
+ // Return true if the instruction that performs a << b actually performs
+ // a << (b % (sizeof(a) * 8)).
+ virtual bool supportsModuloShift(ISD::NodeType Inst, EVT ReturnType) const {
+ assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
+ "Expect a shift instruction");
+ return false;
+ }
+
//===--------------------------------------------------------------------===//
// Runtime Library hooks
//
Index: clang/test/CodeGen/builtins-ppc-altivec.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-altivec.c
+++ clang/test/CodeGen/builtins-ppc-altivec.c
@@ -3419,28 +3419,40 @@
/* vec_sl */
res_vsc = vec_sl(vsc, vuc);
-// CHECK: shl <16 x i8>
-// CHECK-LE: shl <16 x i8>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
res_vuc = vec_sl(vuc, vuc);
-// CHECK: shl <16 x i8>
-// CHECK-LE: shl <16 x i8>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
res_vs = vec_sl(vs, vus);
-// CHECK: shl <8 x i16>
-// CHECK-LE: shl <8 x i16>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
res_vus = vec_sl(vus, vus);
-// CHECK: shl <8 x i16>
-// CHECK-LE: shl <8 x i16>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
res_vi = vec_sl(vi, vui);
-// CHECK: shl <4 x i32>
-// CHECK-LE: shl <4 x i32>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
res_vui = vec_sl(vui, vui);
-// CHECK: shl <4 x i32>
-// CHECK-LE: shl <4 x i32>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
res_vsc = vec_vslb(vsc, vuc);
// CHECK: shl <16 x i8>
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -8043,45 +8043,49 @@
/* vec_sl */
-static __inline__ vector signed char __ATTRS_o_ai
-vec_sl(vector signed char __a, vector unsigned char __b) {
- return __a << (vector signed char)__b;
-}
-
static __inline__ vector unsigned char __ATTRS_o_ai
vec_sl(vector unsigned char __a, vector unsigned char __b) {
- return __a << __b;
+ return __a << (__b %
+ (vector unsigned char)(sizeof(unsigned char) * __CHAR_BIT__));
}
-static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a,
- vector unsigned short __b) {
- return __a << (vector short)__b;
+static __inline__ vector signed char __ATTRS_o_ai
+vec_sl(vector signed char __a, vector unsigned char __b) {
+ return (vector signed char)vec_sl((vector unsigned char)__a, __b);
}
static __inline__ vector unsigned short __ATTRS_o_ai
vec_sl(vector unsigned short __a, vector unsigned short __b) {
- return __a << __b;
+ return __a << (__b % (vector unsigned short)(sizeof(unsigned short) *
+ __CHAR_BIT__));
}
-static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a,
- vector unsigned int __b) {
- return __a << (vector int)__b;
+static __inline__ vector short __ATTRS_o_ai vec_sl(vector short __a,
+ vector unsigned short __b) {
+ return (vector short)vec_sl((vector unsigned short)__a, __b);
}
static __inline__ vector unsigned int __ATTRS_o_ai
vec_sl(vector unsigned int __a, vector unsigned int __b) {
- return __a << __b;
+ return __a << (__b %
+ (vector unsigned int)(sizeof(unsigned int) * __CHAR_BIT__));
}
-#ifdef __POWER8_VECTOR__
-static __inline__ vector signed long long __ATTRS_o_ai
-vec_sl(vector signed long long __a, vector unsigned long long __b) {
- return __a << (vector long long)__b;
+static __inline__ vector int __ATTRS_o_ai vec_sl(vector int __a,
+ vector unsigned int __b) {
+ return (vector int)vec_sl((vector unsigned int)__a, __b);
}
+#ifdef __POWER8_VECTOR__
static __inline__ vector unsigned long long __ATTRS_o_ai
vec_sl(vector unsigned long long __a, vector unsigned long long __b) {
- return __a << __b;
+ return __a << (__b % (vector unsigned long long)(sizeof(unsigned long long) *
+ __CHAR_BIT__));
+}
+
+static __inline__ vector long long __ATTRS_o_ai
+vec_sl(vector long long __a, vector unsigned long long __b) {
+ return (vector long long)vec_sl((vector unsigned long long)__a, __b);
}
#endif
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits