[GCC8 patch], PowerPC PRs 79038, 79202, and 79203: Improve small integer conversions to/from floating point

Michael Meissner Wed, 26 Apr 2017 11:34:55 -0700

This patch is meant for GCC 8.  While GCC 7 has branched, I plan to wait
until it is actually released before applying these patches.  But if you would
prefer me to submit them sooner, I can do it.


This patch, addresses the remaining issues on 3 PRs (79038, 79202, and 79203).
Since these issues were inter-related, I am tackling them via a combined patch.
PR 79038 (integer <-> IEEE 128 fp conversions) is pretty much answered with
previous changes.

This patch addresses PR 79202 (use vector instructions for sign extension of
32-bit integers on pre-ISA 3.0 systems (ISA 3.0 has an appropriate sign
extension instruction).  It also addresses PR 79203 (have fp conversion know
that 32-bit integers can go in vector registers on ISA 2.07, and 8/16-bit
integers can go in vector registers on ISA 3.0), and replaces UNSPECs with more
normal moves, etc.

On Spec 2006, it gives minor (1%) gains on the int benchmark astar, and also 1%
gains on the floating point povray and sphinx3 benchmarks.  There were no
significant regressions in performance with these patches on the other
benchmarks in Spec 2006.

Once gcc7 is released, can I check this into the gcc8 trunk?  I would like to
back port these changes to gcc 7 for GCC 7.2 as well.  These patches depend on
the small integer support, which is not in GCC 6, so they would not be
appropriate for GCC 6.

[gcc]
2017-04-26  Michael Meissner  <[email protected]>

        PR target/79038
        PR target/79202
        PR target/79203
        * config/rs6000/rs6000.md (u code attribute): Add FIX and
        UNSIGNED_FIX.
        (extendsi<mode>2): Add support for doing sign extension via
        VUPKHSW and XXPERMDI if the value is in Altivec registers and we
        don't have ISA 3.0 instructions.
        (extendsi<mode>2 splitter): Likewise.
        (fix_trunc<mode>si2): If we are at ISA 2.07 (VSX small integer),
        generate the normal insns since SImode can now go in vector
        registers.  Disallow the special UNSPECs needed for previous
        machines to hide SImode being used.  Add new insns
        fctiw{,w}_<mode>_smallint if SImode can go in vector registers.
        (fix_trunc<mode>si2_stfiwx): Likewise.
        (fix_trunc<mode>si2_internal): Likewise.
        (fixuns_trunc<mode>si2): Likewise.
        (fixuns_trunc<mode>si2_stfiwx): Likewise.
        (fctiw<u>z_<mode>_smallint): Likewise.
        (fctiw<u>z_<mode>_mem): New combiner pattern to prevent conversion
        of floating point to 32-bit integer from doing a direct move to
        the GPR registers to do a store.
        (fctiwz_<mode>): Break long line.

[gcc/testsuite]
2017-04-26  Michael Meissner  <[email protected]>

        PR target/79038
        PR target/79202
        PR target/79203
        * gcc.target/powerpc/ppc-round3.c: New test.
        * gcc.target/powerpc/ppc-round2.c: Update expected code.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: [email protected], phone: +1 (978) 899-4797

Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 246711)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -566,7 +566,9 @@ (define_code_iterator any_fix               [fix unsi
 (define_code_iterator any_float                [float unsigned_float])
 
 (define_code_attr u  [(sign_extend     "")
-                     (zero_extend      "u")])
+                     (zero_extend      "u")
+                     (fix              "")
+                     (unsigned_fix     "u")])
 
 (define_code_attr su [(sign_extend     "s")
                      (zero_extend      "u")
@@ -1027,8 +1029,8 @@ (define_insn_and_split "*extendhi<mode>2
 
 
 (define_insn "extendsi<mode>2"
-  [(set (match_operand:EXTSI 0 "gpc_reg_operand" "=r,r,wl,wu,wj,wK")
-       (sign_extend:EXTSI (match_operand:SI 1 "lwa_operand" "Y,r,Z,Z,r,wK")))]
+  [(set (match_operand:EXTSI 0 "gpc_reg_operand" "=r,r,wl,wu,wj,wK,wH")
+       (sign_extend:EXTSI (match_operand:SI 1 "lwa_operand" 
"Y,r,Z,Z,r,wK,wH")))]
   ""
   "@
    lwa%U1%X1 %0,%1
@@ -1036,9 +1038,38 @@ (define_insn "extendsi<mode>2"
    lfiwax %0,%y1
    lxsiwax %x0,%y1
    mtvsrwa %x0,%1
-   vextsw2d %0,%1"
-  [(set_attr "type" "load,exts,fpload,fpload,mffgpr,vecexts")
-   (set_attr "sign_extend" "yes")])
+   vextsw2d %0,%1
+   #"
+  [(set_attr "type" "load,exts,fpload,fpload,mffgpr,vecexts,vecperm")
+   (set_attr "sign_extend" "yes")
+   (set_attr "length" "4,4,4,4,4,4,8")])
+
+(define_split
+  [(set (match_operand:DI 0 "altivec_register_operand")
+       (sign_extend:DI (match_operand:SI 1 "altivec_register_operand")))]
+  "TARGET_VSX_SMALL_INTEGER && TARGET_P8_VECTOR && !TARGET_P9_VECTOR
+   && reload_completed"
+  [(const_int 0)]
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  int dest_regno = REGNO (dest);
+  int src_regno = REGNO (src);
+  rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno);
+  rtx src_v4si = gen_rtx_REG (V4SImode, src_regno);
+
+  if (VECTOR_ELT_ORDER_BIG)
+    {
+      emit_insn (gen_altivec_vupkhsw (dest_v2di, src_v4si));
+      emit_insn (gen_vsx_xxspltd_v2di (dest_v2di, dest_v2di, const1_rtx));
+    }
+  else
+    {
+      emit_insn (gen_altivec_vupklsw (dest_v2di, src_v4si));
+      emit_insn (gen_vsx_xxspltd_v2di (dest_v2di, dest_v2di, const0_rtx));
+    }
+  DONE;
+})
 
 (define_insn_and_split "*extendsi<mode>2_dot"
   [(set (match_operand:CC 2 "cc_reg_operand" "=x,?y")
@@ -5570,7 +5601,7 @@ (define_expand "fix_trunc<mode>si2"
   "TARGET_HARD_FLOAT && ((TARGET_FPRS && <TARGET_FLOAT>) || <E500_CONVERT>)"
   "
 {
-  if (!<E500_CONVERT>)
+  if (!<E500_CONVERT> && !TARGET_VSX_SMALL_INTEGER)
     {
       rtx src = force_reg (<MODE>mode, operands[1]);
 
@@ -5596,7 +5627,8 @@ (define_insn_and_split "fix_trunc<mode>s
    (clobber (match_scratch:DI 2 "=d"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
    && (<MODE>mode != SFmode || TARGET_SINGLE_FLOAT)
-   && TARGET_STFIWX && can_create_pseudo_p ()"
+   && TARGET_STFIWX && can_create_pseudo_p ()
+   && !TARGET_VSX_SMALL_INTEGER"
   "#"
   ""
   [(pc)]
@@ -5637,7 +5669,8 @@ (define_insn_and_split "fix_trunc<mode>s
        (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d,<rreg>")))
    (clobber (match_operand:DI 2 "gpc_reg_operand" "=1,d"))
    (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o,o"))]
-  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !TARGET_VSX_SMALL_INTEGER"
   "#"
   ""
   [(pc)]
@@ -5721,7 +5754,7 @@ (define_expand "fixuns_trunc<mode>si2"
        || <E500_CONVERT>)"
   "
 {
-  if (!<E500_CONVERT>)
+  if (!<E500_CONVERT> && !TARGET_VSX_SMALL_INTEGER)
     {
       emit_insn (gen_fixuns_trunc<mode>si2_stfiwx (operands[0], operands[1]));
       DONE;
@@ -5733,7 +5766,8 @@ (define_insn_and_split "fixuns_trunc<mod
        (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))
    (clobber (match_scratch:DI 2 "=d"))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && <TARGET_FLOAT> && TARGET_FCTIWUZ
-   && TARGET_STFIWX && can_create_pseudo_p ()"
+   && TARGET_STFIWX && can_create_pseudo_p ()
+   && !TARGET_VSX_SMALL_INTEGER"
   "#"
   ""
   [(pc)]
@@ -5818,13 +5852,43 @@ (define_insn_and_split "*fixuns_trunc<SF
     }
   DONE;
 })
-; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ))
-; rather than (set (subreg:SI (reg)) (fix:SI ...))
-; because the first makes it clear that operand 0 is not live
-; before the instruction.
+
+;; If -mvsx-small-integer, we can represent the FIX operation directly.  On
+;; older machines, we have to use an UNSPEC to produce a SImode and move it
+;; to another location, since SImode is not allowed in vector registers.
+(define_insn "*fctiw<u>z_<mode>_smallint"
+  [(set (match_operand:SI 0 "vsx_register_operand" "=d,wi")
+       (any_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && TARGET_VSX_SMALL_INTEGER"
+  "@
+   fctiw<u>z %0,%1
+   xscvdp<su>xws %x0,%x1"
+  [(set_attr "type" "fp")])
+
+;; Combiner pattern to prevent moving the result of converting a floating point
+;; value to 32-bit integer to GPR in order to save it.
+(define_insn_and_split "*fctiw<u>z_<mode>_mem"
+  [(set (match_operand:SI 0 "memory_operand" "=Z")
+       (any_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "wa")))
+   (clobber (match_scratch:SI 2 "=wa"))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && TARGET_VSX_SMALL_INTEGER"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2)
+       (any_fix:SI (match_dup 1)))
+   (set (match_dup 0)
+       (match_dup 2))])
+
+;; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ))
+;; rather than (set (subreg:SI (reg)) (fix:SI ...))
+;; because the first makes it clear that operand 0 is not live
+;; before the instruction.
 (define_insn "fctiwz_<mode>"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wi")
-       (unspec:DI [(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" 
"<Ff>,<Fv>"))]
+       (unspec:DI [(fix:SI
+                    (match_operand:SFDF 1 "gpc_reg_operand" "<Ff>,<Fv>"))]
                   UNSPEC_FCTIWZ))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
   "@
Index: gcc/testsuite/gcc.target/powerpc/ppc-round2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/ppc-round2.c       (revision 246711)
+++ gcc/testsuite/gcc.target/powerpc/ppc-round2.c       (working copy)
@@ -3,18 +3,21 @@
 /* { dg-require-effective-target powerpc_p8vector_ok } */
 /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power8" } } */
 /* { dg-options "-O2 -mcpu=power8" } */
-/* { dg-final { scan-assembler-times "fcfid "      2 } } */
-/* { dg-final { scan-assembler-times "fcfids "     2 } } */
-/* { dg-final { scan-assembler-times "fctiwuz \|xscvdpuxws " 2 } } */
+/* { dg-final { scan-assembler-times "fcfid \|xscvsxddp "    2 } } */
+/* { dg-final { scan-assembler-times "fcfids \|xscvsxdsp "   2 } } */
 /* { dg-final { scan-assembler-times "fctiwz \|xscvdpsxws "  2 } } */
-/* { dg-final { scan-assembler-times "mfvsrd "     4 } } */
-/* { dg-final { scan-assembler-times "mtvsrwa "    2 } } */
-/* { dg-final { scan-assembler-times "mtvsrwz "    2 } } */
-/* { dg-final { scan-assembler-not   "lwz"           } } */
-/* { dg-final { scan-assembler-not   "lfiwax "       } } */
-/* { dg-final { scan-assembler-not   "lfiwzx "       } } */
-/* { dg-final { scan-assembler-not   "stw"           } } */
-/* { dg-final { scan-assembler-not   "stfiwx "       } } */
+/* { dg-final { scan-assembler-times "fctiwuz \|xscvdpuxws " 2 } } */
+/* { dg-final { scan-assembler-times {\mmfvsrwz\M}           2 } } */
+/* { dg-final { scan-assembler-times {\mmtvsrwz\M}           2 } } */
+/* { dg-final { scan-assembler-times {\mvupkhsw\M}           2 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M}          2 } } */
+/* { dg-final { scan-assembler-not   {\mmfvsrd\M}              } } */
+/* { dg-final { scan-assembler-not   {\mmtvsrwa\M}             } } */
+/* { dg-final { scan-assembler-not   {\mlwz\M}                 } } */
+/* { dg-final { scan-assembler-not   {\mlfiwax\M}              } } */
+/* { dg-final { scan-assembler-not   {\mlfiwzx\M}              } } */
+/* { dg-final { scan-assembler-not   {\mstw\M}                 } } */
+/* { dg-final { scan-assembler-not   {\mstfiwx\M}              } } */
 
 /* Make sure we don't have loads/stores to the GPR unit.  */
 double
Index: gcc/testsuite/gcc.target/powerpc/ppc-round3.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/ppc-round3.c       (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/ppc-round3.c       (revision 0)
@@ -0,0 +1,45 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power8" } } */
+/* { dg-options "-O2 -mcpu=power9" } */
+/* { dg-final { scan-assembler-times "fcfid \|xscvsxddp "    2 } } */
+/* { dg-final { scan-assembler-times "fcfids \|xscvsxdsp "   2 } } */
+/* { dg-final { scan-assembler-times "fctiwz \|xscvdpsxws "  2 } } */
+/* { dg-final { scan-assembler-times "fctiwuz \|xscvdpuxws " 2 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M}          2 } } */
+/* { dg-final { scan-assembler-times {\mxxextractuw\M}       2 } } */
+/* { dg-final { scan-assembler-not   {\mmfvsrd\M}              } } */
+/* { dg-final { scan-assembler-not   {\mmfvsrwz\M}             } } */
+/* { dg-final { scan-assembler-not   {\mmtvsrwa\M}             } } */
+/* { dg-final { scan-assembler-not   {\mmtvsrwz\M}             } } */
+/* { dg-final { scan-assembler-not   {\mlwz\M}                 } } */
+/* { dg-final { scan-assembler-not   {\mlfiwax\M}              } } */
+/* { dg-final { scan-assembler-not   {\mlfiwzx\M}              } } */
+/* { dg-final { scan-assembler-not   {\mstw\M}                 } } */
+/* { dg-final { scan-assembler-not   {\mstfiwx\M}              } } */
+
+/* Make sure we don't have loads/stores to the GPR unit.  */
+double
+round_double_int (double a)
+{
+  return (double)(int)a;
+}
+
+float
+round_float_int (float a)
+{
+  return (float)(int)a;
+}
+
+double
+round_double_uint (double a)
+{
+  return (double)(unsigned int)a;
+}
+
+float
+round_float_uint (float a)
+{
+  return (float)(unsigned int)a;
+}

[GCC8 patch], PowerPC PRs 79038, 79202, and 79203: Improve small integer conversions to/from floating point

Reply via email to