Re: [PATCH], Improve moving SFmode to GPR on PowerPC, #2 of 8

Michael Meissner Tue, 26 Sep 2017 07:32:49 -0700

Off list, Segher asked that I break the patch eliminating a shift right when
transfering SFmode from a vector register to a GPR register down into smaller
chunks.  The power7 and power8 instructions that convert values in the double
precision format to single precision actually duplicate the 32-bits in the
first word and second word (the ISA says the second word is undefined).  We are
in the process of issuing an update to ISA 3.0 to clarify that this will be the
required behavior going forward.


I have broken the patches down to 8 chunks.  Some of the patch are just
cosmetic of things I noticed while doing the main patch.  One patch eliminates
the shift.  Another fixes up the peephole2 that optimizes putting a SFmode into
a union and then doing masking on the value.  And the final patch updates the
tests that need to be changed.

I have verified that each of these sub-patches build, and after all 8 patches
have been applied, I did the full bootstrap and regresion test, and like the
previous combination patch there were no regressions.  If only some of the
patches are applied, then there will be 3 regressions until the remaining
patches are applied.

This is patch #2.  Can I check this into the trunk?  Compared to the previous
patch, I simplified this to use zero_extendsidi2 instead of using an UNSPEC,
and I deleted the UNSPEC.

2017-09-25  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * config/rs6000/rs6000.md (movsi_from_sf): Adjust code to
        eliminate doing a 32-bit shift right or vector extract after doing
        XSCVDPSPN.  Use zero_extendsidi2 instead of p8_mfvsrd_4_disf to
        move the value to the GPRs.
        (movdi_from_sf_zero_ext): Likewise.
        (reload_gpr_from_vsxsf): Likewise.
        (p8_mfvsrd_4_disf): Delete, no longer used.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797

Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 253169)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -6806,25 +6806,25 @@ (define_insn "*movsi_internal1_single"
 ;; needed.
 
 ;;             MR           LWZ          LFIWZX       LXSIWZX   STW
-;;             STFS         STXSSP       STXSSPX      VSX->GPR  MTVSRWZ
-;;             VSX->VSX
+;;             STFS         STXSSP       STXSSPX      VSX->GPR  VSX->VSX,
+;;             MTVSRWZ
 
 (define_insn_and_split "movsi_from_sf"
   [(set (match_operand:SI 0 "nonimmediate_operand"
                "=r,         r,           ?*wI,        ?*wH,     m,
-                m,          wY,          Z,           r,        wIwH,
-                ?wK")
+                m,          wY,          Z,           r,        ?*wIwH,
+                wIwH")
 
        (unspec:SI [(match_operand:SF 1 "input_operand"
                "r,          m,           Z,           Z,        r,
-                f,          wb,          wu,          wIwH,     r,
-                wK")]
+                f,          wb,          wu,          wIwH,     wIwH,
+                r")]
                    UNSPEC_SI_FROM_SF))
 
    (clobber (match_scratch:V4SF 2
                "=X,         X,           X,           X,        X,
-                X,          X,           X,           wa,       X,
-                wa"))]
+                X,          X,           X,           wIwH,     X,
+                X"))]
 
   "TARGET_NO_SF_SUBREG
    && (register_operand (operands[0], SImode)
@@ -6839,10 +6839,10 @@ (define_insn_and_split "movsi_from_sf"
    stxssp %1,%0
    stxsspx %x1,%y0
    #
-   mtvsrwz %x0,%1
-   #"
+   xscvdpspn %x0,%x1
+   mtvsrwz %x0,%1"
   "&& reload_completed
-   && register_operand (operands[0], SImode)
+   && int_reg_operand (operands[0], SImode)
    && vsx_reg_sfsubreg_ok (operands[1], SFmode)"
   [(const_int 0)]
 {
@@ -6850,52 +6850,41 @@ (define_insn_and_split "movsi_from_sf"
   rtx op1 = operands[1];
   rtx op2 = operands[2];
   rtx op0_di = gen_rtx_REG (DImode, REGNO (op0));
+  rtx op2_si = gen_rtx_REG (SImode, REGNO (op2));
 
   emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
-
-  if (int_reg_operand (op0, SImode))
-    {
-      emit_insn (gen_p8_mfvsrd_4_disf (op0_di, op2));
-      emit_insn (gen_lshrdi3 (op0_di, op0_di, GEN_INT (32)));
-    }
-  else
-    {
-      rtx op1_v16qi = gen_rtx_REG (V16QImode, REGNO (op1));
-      rtx byte_off = VECTOR_ELT_ORDER_BIG ? const0_rtx : GEN_INT (12);
-      emit_insn (gen_vextract4b (op0_di, op1_v16qi, byte_off));
-    }
-
+  emit_insn (gen_zero_extendsidi2 (op0_di, op2_si));
   DONE;
 }
   [(set_attr "type"
                "*,          load,        fpload,      fpload,   store,
-                fpstore,    fpstore,     fpstore,     mftgpr,   mffgpr,
-                veclogical")
+                fpstore,    fpstore,     fpstore,     mftgpr,   fp,
+                mffgpr")
 
    (set_attr "length"
                "4,          4,           4,           4,        4,
-                4,          4,           4,           12,       4,
-                8")])
+                4,          4,           4,           8,        4,
+                4")])
 
 ;; movsi_from_sf with zero extension
 ;;
 ;;             RLDICL       LWZ          LFIWZX       LXSIWZX   VSX->GPR
-;;             MTVSRWZ      VSX->VSX
+;;             VSX->VSX     MTVSRWZ
 
 (define_insn_and_split "*movdi_from_sf_zero_ext"
   [(set (match_operand:DI 0 "gpc_reg_operand"
                "=r,         r,           ?*wI,        ?*wH,     r,
-               wIwH,        ?wK")
+                wK,         wIwH")
 
        (zero_extend:DI
         (unspec:SI [(match_operand:SF 1 "input_operand"
                "r,          m,           Z,           Z,        wIwH,
-                r,          wK")]
+                wIwH,       r")]
                    UNSPEC_SI_FROM_SF)))
 
    (clobber (match_scratch:V4SF 2
                "=X,         X,           X,           X,        wa,
-                X,          wa"))]
+                wIwH,       X"))]
 
   "TARGET_DIRECT_MOVE_64BIT
    && (register_operand (operands[0], DImode)
@@ -6906,40 +6895,29 @@ (define_insn_and_split "*movdi_from_sf_z
    lfiwzx %0,%y1
    lxsiwzx %x0,%y1
    #
-   mtvsrwz %x0,%1
-   #"
+   #
+   mtvsrwz %x0,%1"
   "&& reload_completed
+   && register_operand (operands[0], DImode)
    && vsx_reg_sfsubreg_ok (operands[1], SFmode)"
   [(const_int 0)]
 {
   rtx op0 = operands[0];
   rtx op1 = operands[1];
   rtx op2 = operands[2];
+  rtx op2_si = gen_rtx_REG (SImode, reg_or_subregno (op2));
 
   emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
-
-  if (int_reg_operand (op0, DImode))
-    {
-      emit_insn (gen_p8_mfvsrd_4_disf (op0, op2));
-      emit_insn (gen_lshrdi3 (op0, op0, GEN_INT (32)));
-    }
-  else
-    {
-      rtx op0_si = gen_rtx_REG (SImode, REGNO (op0));
-      rtx op1_v16qi = gen_rtx_REG (V16QImode, REGNO (op1));
-      rtx byte_off = VECTOR_ELT_ORDER_BIG ? const0_rtx : GEN_INT (12);
-      emit_insn (gen_vextract4b (op0_si, op1_v16qi, byte_off));
-    }
-
+  emit_insn (gen_zero_extendsidi2 (op0, op2_si));
   DONE;
 }
   [(set_attr "type"
                "*,          load,        fpload,      fpload,  mftgpr,
-                mffgpr,     veclogical")
+                vecexts,    mffgpr")
 
    (set_attr "length"
-               "4,          4,           4,           4,        12,
-                4,          8")])
+               "4,          4,           4,           4,        8,
+                8,          4")])
 
 ;; Split a load of a large constant into the appropriate two-insn
 ;; sequence.
@@ -8439,9 +8417,9 @@ (define_insn_and_split "reload_gpr_from_
 
 (define_insn_and_split "reload_gpr_from_vsxsf"
   [(set (match_operand:SF 0 "register_operand" "=r")
-       (unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
+       (unspec:SF [(match_operand:SF 1 "register_operand" "ww")]
                   UNSPEC_P8V_RELOAD_FROM_VSX))
-   (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
+   (clobber (match_operand:V4SF 2 "register_operand" "=wIwH"))]
   "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
   "#"
   "&& reload_completed"
@@ -8450,23 +8428,15 @@ (define_insn_and_split "reload_gpr_from_
   rtx op0 = operands[0];
   rtx op1 = operands[1];
   rtx op2 = operands[2];
-  rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
+  rtx op0_di = gen_rtx_REG (DImode, reg_or_subregno (op0));
+  rtx op2_si = gen_rtx_REG (SImode, reg_or_subregno (op2));
 
   emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
-  emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
-  emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
+  emit_insn (gen_zero_extendsidi2 (op0_di, op2_si));
   DONE;
 }
-  [(set_attr "length" "12")
-   (set_attr "type" "three")])
-
-(define_insn "p8_mfvsrd_4_disf"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
-                  UNSPEC_P8V_RELOAD_FROM_VSX))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
-  "mfvsrd %0,%x1"
-  [(set_attr "type" "mftgpr")])
+  [(set_attr "length" "8")
+   (set_attr "type" "two")])
 
 
 ;; Next come the multi-word integer load and store and the load and store

Re: [PATCH], Improve moving SFmode to GPR on PowerPC, #2 of 8

Reply via email to