Will, Segher:
Patch 4 adds the vector 128-bit integer shift instruction support for
the V1TI type.
The changes from the previous version include:
Fixed up the change log entry issues noted by Will.
Regression tests reran on Power 9 LE with no regression errors.
Please let me know if it looks OK to commit to mainline.
Carl
---------------------------------------------
gcc/ChangeLog
2020-10-05 Carl Love <[email protected]>
* config/rs6000/altivec.md (altivec_vslq, altivec_vsrq):
Rename to altivec_vslq_<mode>, altivec_vsrq_<mode>, mode VEC_TI.
* config/rs6000/vector.md (VEC_TI): New name for VSX_TI iterator.
(vashlv1ti3): Change to vashl<mode>3, mode VEC_TI.
(vlshrv1ti3): Change to vlshr<mode>3, mode VEC_TI.
* config/rs6000/vsx.md (VSX_TI): Remove define_mode_iterator.
gcc/testsuite/ChangeLog
2020-10-05 Carl Love <[email protected]>
gcc.target/powerpc/int_128bit-runnable.c: Add shift_right, shift_left
tests.
---
gcc/config/rs6000/altivec.md | 16 ++++-----
gcc/config/rs6000/vector.md | 27 ++++++++-------
gcc/config/rs6000/vsx.md | 33 +++++++++----------
.../gcc.target/powerpc/int_128bit-runnable.c | 16 +++++++--
4 files changed, 52 insertions(+), 40 deletions(-)
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 34a4731342a..5db3de3cc9f 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2219,10 +2219,10 @@
"vsl<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
-(define_insn "altivec_vslq"
- [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
- (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
- (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_insn "altivec_vslq_<mode>"
+ [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+ (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v")
+ (match_operand:VEC_TI 2 "vsx_register_operand" "v")))]
"TARGET_POWER10"
/* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
"vslq %0,%1,%2"
@@ -2236,10 +2236,10 @@
"vsr<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
-(define_insn "altivec_vsrq"
- [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
- (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
- (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_insn "altivec_vsrq_<mode>"
+ [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+ (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v")
+ (match_operand:VEC_TI 2 "vsx_register_operand"
"v")))]
"TARGET_POWER10"
/* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
"vsrq %0,%1,%2"
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 0cca4232619..3ea3a91845a 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -26,6 +26,9 @@
;; Vector int modes
(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
+;; 128-bit int modes
+(define_mode_iterator VEC_TI [V1TI TI])
+
;; Vector int modes for parity
(define_mode_iterator VEC_IP [V8HI
V4SI
@@ -1627,17 +1630,17 @@
"")
;; No immediate version of this 128-bit instruction
-(define_expand "vashlv1ti3"
- [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
- (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
- (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_expand "vashl<mode>3"
+ [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+ (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand")
+ (match_operand:VEC_TI 2 "vsx_register_operand")))]
"TARGET_POWER10"
{
/* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
- rtx tmp = gen_reg_rtx (V1TImode);
+ rtx tmp = gen_reg_rtx (<MODE>mode);
emit_insn(gen_xxswapd_v1ti (tmp, operands[2]));
- emit_insn(gen_altivec_vslq (operands[0], operands[1], tmp));
+ emit_insn(gen_altivec_vslq_<mode> (operands[0], operands[1], tmp));
DONE;
})
@@ -1650,17 +1653,17 @@
"")
;; No immediate version of this 128-bit instruction
-(define_expand "vlshrv1ti3"
- [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
- (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
- (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_expand "vlshr<mode>3"
+ [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+ (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand")
+ (match_operand:VEC_TI 2 "vsx_register_operand")))]
"TARGET_POWER10"
{
/* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */
- rtx tmp = gen_reg_rtx (V1TImode);
+ rtx tmp = gen_reg_rtx (<MODE>mode);
emit_insn(gen_xxswapd_v1ti (tmp, operands[2]));
- emit_insn(gen_altivec_vsrq (operands[0], operands[1], tmp));
+ emit_insn(gen_altivec_vsrq_<mode> (operands[0], operands[1], tmp));
DONE;
})
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 5b6a0bd728a..87f96ffcc4c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -37,9 +37,6 @@
TI
V1TI])
-;; Iterator for 128-bit integer types that go in a single vector register.
-(define_mode_iterator VSX_TI [TI V1TI])
-
;; Iterator for the 2 32-bit vector types
(define_mode_iterator VSX_W [V4SF V4SI])
@@ -944,9 +941,9 @@
;; special V1TI container class, which it is not appropriate to use vec_select
;; for the type.
(define_insn "*vsx_le_permute_<mode>"
- [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
- (rotate:VSX_TI
- (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
+ [(set (match_operand:VEC_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q")
+ (rotate:VEC_TI
+ (match_operand:VEC_TI 1 "input_operand" "wa,Z,wa,r,Q,r")
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"@
@@ -960,10 +957,10 @@
(set_attr "type" "vecperm,vecload,vecstore,*,load,store")])
(define_insn_and_split "*vsx_le_undo_permute_<mode>"
- [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa")
- (rotate:VSX_TI
- (rotate:VSX_TI
- (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa")
+ [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=wa,wa")
+ (rotate:VEC_TI
+ (rotate:VEC_TI
+ (match_operand:VEC_TI 1 "vsx_register_operand" "0,wa")
(const_int 64))
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -1031,11 +1028,11 @@
;; Peepholes to catch loads and stores for TImode if TImode landed in
;; GPR registers on a little endian system.
(define_peephole2
- [(set (match_operand:VSX_TI 0 "int_reg_operand")
- (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand")
+ [(set (match_operand:VEC_TI 0 "int_reg_operand")
+ (rotate:VEC_TI (match_operand:VEC_TI 1 "memory_operand")
(const_int 64)))
- (set (match_operand:VSX_TI 2 "int_reg_operand")
- (rotate:VSX_TI (match_dup 0)
+ (set (match_operand:VEC_TI 2 "int_reg_operand")
+ (rotate:VEC_TI (match_dup 0)
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
&& (rtx_equal_p (operands[0], operands[2])
@@ -1043,11 +1040,11 @@
[(set (match_dup 2) (match_dup 1))])
(define_peephole2
- [(set (match_operand:VSX_TI 0 "int_reg_operand")
- (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand")
+ [(set (match_operand:VEC_TI 0 "int_reg_operand")
+ (rotate:VEC_TI (match_operand:VEC_TI 1 "int_reg_operand")
(const_int 64)))
- (set (match_operand:VSX_TI 2 "memory_operand")
- (rotate:VSX_TI (match_dup 0)
+ (set (match_operand:VEC_TI 2 "memory_operand")
+ (rotate:VEC_TI (match_dup 0)
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR
&& peep2_reg_dead_p (2, operands[0])"
diff --git a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
index ec3dcf3dff1..25e2c9d1af4 100644
--- a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
@@ -53,6 +53,18 @@ void print_i128(__int128_t val)
void abort (void);
+__attribute__((noinline))
+__int128_t shift_right (__int128_t a, __uint128_t b)
+{
+ return a >> b;
+}
+
+__attribute__((noinline))
+__int128_t shift_left (__int128_t a, __uint128_t b)
+{
+ return a << b;
+}
+
int main ()
{
int i, result_int;
@@ -141,7 +153,7 @@ int main ()
#endif
}
- arg1 = 3;
+ arg1 = vec_result[0];
uarg2 = 4;
expected_result = arg1*16;
@@ -225,7 +237,7 @@ int main ()
#endif
}
- arg1 = 48;
+ arg1 = vec_uresult[0];
uarg2 = 4;
expected_result = arg1/16;
--
2.17.1