Hi!

BSWAP is documented as:

@item (bswap:@var{m} @var{x})
Represents the value @var{x} with the order of bytes reversed, carried out
in mode @var{m}, which must be a fixed-point machine mode.
The mode of @var{x} must be @var{m} or @code{VOIDmode}.

The fixed-point is something used widely in rtl.texi and is very confusing
now that we have FIXED_POINT_TYPE floats, I assume it talks about integral
modes or, because it is also used with vector modes, about INTEGRAL_MODE_P
modes.  My understanding is that bswap on a vector integral mode is meant to
be bswap of each element individually.

The rs6000 backend uses bswap not just on scalar integral modes and
vector integral modes, but also on V4SF and V2DF, but ICEs in simplify-rtx.c
where we don't expect bswap to be used on SF/DFmode (vector bswap is handled
as bswap of each element).

The following patch adjusts the rs6000 backend to use well defined bswaps
on corresponding integral modes instead (and also does what we've done in
i386 backend years ago, avoid subreg on the lhs because it breaks combine
attempts to optimize it).

Or do we want to change documentation and simplify-rtx.c and whatever else
in the middle-end to also make floating point bswaps well defined?  How
exactly, as bswaps of the underlying bits, i.e. for simplify-rtx.c as
subreg of the constant to a corresponding integral mode, bswap in it and
subreg back?  IMHO changing the rs6000 backend is easier and defining what
exactly means a floating point bswap may be hard to understand.

Bootstrapped/regtested on powerpc64{,le}-linux, on powerpc64-linux including
-m64/-m32, ok for trunk?

2018-02-09  Jakub Jelinek  <ja...@redhat.com>

        PR target/84226
        * config/rs6000/vsx.md (p9_xxbrq_v16qi): Change input operand
        constraint from =wa to wa.  Avoid a subreg on the output operand,
        instead use a pseudo and subreg it in a move.
        (p9_xxbrd_<mode>): Changed to ...
        (p9_xxbrd_v2di): ... this insn, without VSX_D iterator.
        (p9_xxbrd_v2df): New expander.
        (p9_xxbrw_<mode>): Changed to ...
        (p9_xxbrw_v4si): ... this insn, without VSX_W iterator.
        (p9_xxbrw_v4sf): New expander.

        * gcc.target/powerpc/pr84226.c: New test.

--- gcc/config/rs6000/vsx.md.jj 2018-01-22 23:57:21.299779544 +0100
+++ gcc/config/rs6000/vsx.md    2018-02-08 17:21:13.197642776 +0100
@@ -5311,35 +5311,60 @@ (define_insn "p9_xxbrq_v1ti"
 
 (define_expand "p9_xxbrq_v16qi"
   [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa"))
-   (use (match_operand:V16QI 1 "vsx_register_operand" "=wa"))]
+   (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))]
   "TARGET_P9_VECTOR"
 {
-  rtx op0 = gen_lowpart (V1TImode, operands[0]);
+  rtx op0 = gen_reg_rtx (V1TImode);
   rtx op1 = gen_lowpart (V1TImode, operands[1]);
   emit_insn (gen_p9_xxbrq_v1ti (op0, op1));
+  emit_move_insn (operands[0], gen_lowpart (V16QImode, op0));
   DONE;
 })
 
 ;; Swap all bytes in each 64-bit element
-(define_insn "p9_xxbrd_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-       (bswap:VSX_D (match_operand:VSX_D 1 "vsx_register_operand" "wa")))]
+(define_insn "p9_xxbrd_v2di"
+  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
+       (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))]
   "TARGET_P9_VECTOR"
   "xxbrd %x0,%x1"
   [(set_attr "type" "vecperm")])
 
+(define_expand "p9_xxbrd_v2df"
+  [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa"))
+   (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))]
+  "TARGET_P9_VECTOR"
+{
+  rtx op0 = gen_reg_rtx (V2DImode);
+  rtx op1 = gen_lowpart (V2DImode, operands[1]);
+  emit_insn (gen_p9_xxbrd_v2di (op0, op1));
+  emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0));
+  DONE;
+})
+
 ;; Swap all bytes in each 32-bit element
-(define_insn "p9_xxbrw_<mode>"
-  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
-       (bswap:VSX_W (match_operand:VSX_W 1 "vsx_register_operand" "wa")))]
+(define_insn "p9_xxbrw_v4si"
+  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
+       (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))]
   "TARGET_P9_VECTOR"
   "xxbrw %x0,%x1"
   [(set_attr "type" "vecperm")])
 
+(define_expand "p9_xxbrw_v4sf"
+  [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa"))
+   (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))]
+  "TARGET_P9_VECTOR"
+{
+  rtx op0 = gen_reg_rtx (V4SImode);
+  rtx op1 = gen_lowpart (V4SImode, operands[1]);
+  emit_insn (gen_p9_xxbrw_v4si (op0, op1));
+  emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0));
+  DONE;
+})
+
 ;; Swap all bytes in each element of vector
 (define_expand "revb_<mode>"
-  [(set (match_operand:VEC_REVB 0 "vsx_register_operand")
-       (bswap:VEC_REVB (match_operand:VEC_REVB 1 "vsx_register_operand")))]
+  [(use (match_operand:VEC_REVB 0 "vsx_register_operand"))
+   (use (match_operand:VEC_REVB 1 "vsx_register_operand"))]
   ""
 {
   if (TARGET_P9_VECTOR)
--- gcc/testsuite/gcc.target/powerpc/pr84226.c.jj       2018-02-08 
17:26:54.124611674 +0100
+++ gcc/testsuite/gcc.target/powerpc/pr84226.c  2018-02-08 17:26:43.315612659 
+0100
@@ -0,0 +1,6 @@
+/* PR target/84226 */
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-misc -O1" } */
+
+#include "builtins-revb-runnable.c"

        Jakub

Reply via email to