Will, Segher:
Patch 4 adds the vector 128-bit integer shift instruction support for
the V1TI type.
This patch also renames and moves the VSX_TI iterator from vsx.md to
VEC_TI in vector.md. The uses of VEC_TI are also updated.
Re-tested the patch on Power 9 with no regression errors.
Carl
------------------------------------------------
gcc/ChangeLog
2020-10-12 Carl Love <[email protected]>
* config/rs6000/altivec.md (altivec_vslq, altivec_vsrq):
Rename to altivec_vslq_<mode>, altivec_vsrq_<mode>, mode VEC_TI.
* config/rs6000/vector.md (VEC_TI): Was named VSX_TI in vsx.md.
(vashlv1ti3): Change to vashl<mode>3, mode VEC_TI.
(vlshrv1ti3): Change to vlshr<mode>3, mode VEC_TI.
* config/rs6000/vsx.md (VSX_TI): Remove define_mode_iterator. Update
uses of VSX_TI to VEC_TI.
gcc/testsuite/ChangeLog
2020-10-12 Carl Love <[email protected]>
gcc.target/powerpc/int_128bit-runnable.c: Add shift_right, shift_left
tests.
---
gcc/config/rs6000/altivec.md | 16 ++++-----
gcc/config/rs6000/vector.md | 27 ++++++++-------
gcc/config/rs6000/vsx.md | 33 +++++++++----------
.../gcc.target/powerpc/int_128bit-runnable.c | 16 +++++++--
4 files changed, 52 insertions(+), 40 deletions(-)
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index e9623bc3285..9b70830ae00 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2220,10 +2220,10 @@
"vsl<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
-(define_insn "altivec_vslq"
- [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
- (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
- (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_insn "altivec_vslq_<mode>"
+ [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+ (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v")
+ (match_operand:VEC_TI 2 "vsx_register_operand" "v")))]
"TARGET_POWER10"
/* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
"vslq %0,%1,%2"
@@ -2237,10 +2237,10 @@
"vsr<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
-(define_insn "altivec_vsrq"
- [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
- (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
- (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_insn "altivec_vsrq_<mode>"
+ [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+ (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v")
+ (match_operand:VEC_TI 2 "vsx_register_operand"
"v")))]
"TARGET_POWER10"
/* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
"vsrq %0,%1,%2"
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index c2ae74fbe92..b2f17063ac9 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -26,6 +26,9 @@
;; Vector int modes
(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
+;; 128-bit int modes
+(define_mode_iterator VEC_TI [V1TI TI])
+
;; Vector int modes for parity
(define_mode_iterator VEC_IP [V8HI
V4SI
@@ -1627,17 +1630,17 @@
"")
;; No immediate version of this 128-bit instruction
-(define_expand "vashlv1ti3"
- [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
- (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
- (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_expand "vashl<mode>3"
+ [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+ (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand")
+ (match_operand:VEC_TI 2 "vsx_register_operand")))]
"TARGET_POWER10"
{
/* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
- rtx tmp = gen_reg_rtx (V1TImode);
+ rtx tmp = gen_reg_rtx (<MODE>mode);
emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
- emit_insn (gen_altivec_vslq (operands[0], operands[1], tmp));
+ emit_insn(gen_altivec_vslq_<mode> (operands[0], operands[1], tmp));
DONE;
})
@@ -1650,17 +1653,17 @@
"")
;; No immediate version of this 128-bit instruction
-(define_expand "vlshrv1ti3"
- [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
- (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
- (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_expand "vlshr<mode>3"
+ [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+ (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand")
+ (match_operand:VEC_TI 2 "vsx_register_operand")))]
"TARGET_POWER10"
{
/* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */
- rtx tmp = gen_reg_rtx (V1TImode);
+ rtx tmp = gen_reg_rtx (<MODE>mode);
emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
- emit_insn (gen_altivec_vsrq (operands[0], operands[1], tmp));
+ emit_insn(gen_altivec_vsrq_<mode> (operands[0], operands[1], tmp));
DONE;
})
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 5b6a0bd728a..87f96ffcc4c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -37,9 +37,6 @@
TI
V1TI])
-;; Iterator for 128-bit integer types that go in a single vector register.
-(define_mode_iterator VSX_TI [TI V1TI])
-
;; Iterator for the 2 32-bit vector types
(define_mode_iterator VSX_W [V4SF V4SI])
@@ -944,9 +941,9 @@
;; special V1TI container class, which it is not appropriate to use vec_select
;; for the type.
(define_insn "*vsx_le_permute_<mode>"
- [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
- (rotate:VSX_TI
- (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
+ [(set (match_operand:VEC_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
+ (rotate:VEC_TI
+ (match_operand:VEC_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"@
@@ -960,10 +957,10 @@
(set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
(define_insn_and_split "*vsx_le_undo_permute_<mode>"
- [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
- (rotate:VSX_TI
- (rotate:VSX_TI
- (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
+ [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=wa,wa")
+ (rotate:VEC_TI
+ (rotate:VEC_TI
+ (match_operand:VEC_TI 1 "vsx_register_operand" "0,wa")
(const_int 64))
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -1031,11 +1028,11 @@
;; Peepholes to catch loads and stores for TImode if TImode landed in
;; GPR registers on a little endian system.
(define_peephole2
- [(set (match_operand:VSX_TI 0 "int_reg_operand")
- (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
+ [(set (match_operand:VEC_TI 0 "int_reg_operand")
+ (rotate:VEC_TI (match_operand:VEC_TI 1 "memory_operand")
(const_int 64)))
- (set (match_operand:VSX_TI 2 "int_reg_operand")
- (rotate:VSX_TI (match_dup 0)
+ (set (match_operand:VEC_TI 2 "int_reg_operand")
+ (rotate:VEC_TI (match_dup 0)
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
&& (rtx_equal_p (operands[0], operands[2])
@@ -1043,11 +1040,11 @@
[(set (match_dup 2) (match_dup 1))])
(define_peephole2
- [(set (match_operand:VSX_TI 0 "int_reg_operand")
- (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
+ [(set (match_operand:VEC_TI 0 "int_reg_operand")
+ (rotate:VEC_TI (match_operand:VEC_TI 1 "int_reg_operand")
(const_int 64)))
- (set (match_operand:VSX_TI 2 "memory_operand")
- (rotate:VSX_TI (match_dup 0)
+ (set (match_operand:VEC_TI 2 "memory_operand")
+ (rotate:VEC_TI (match_dup 0)
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
&& peep2_reg_dead_p (2, operands[0])"
diff --git a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
index 9d281850ee3..15b4fbf0d95 100644
--- a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
@@ -53,6 +53,18 @@ void print_i128(__int128_t val)
void abort (void);
+__attribute__((noinline))
+__int128_t shift_right (__int128_t a, __uint128_t b)
+{
+ return a >> b;
+}
+
+__attribute__((noinline))
+__int128_t shift_left (__int128_t a, __uint128_t b)
+{
+ return a << b;
+}
+
int main ()
{
int i, result_int;
@@ -141,7 +153,7 @@ int main ()
#endif
}
- arg1 = 3;
+ arg1 = vec_result[0];
uarg2 = 4;
expected_result = arg1*16;
@@ -225,7 +237,7 @@ int main ()
#endif
}
- arg1 = 48;
+ arg1 = vec_uresult[0];
uarg2 = 4;
expected_result = arg1/16;
--
2.17.1