PowerPC ISA 3.0 adds new instructions (LXSIHZX, LXSIBZX, STXSIHX, and STXSIBX)
that allow you to load and zero extend byte and half word values from memory
and to store them back.

This patch is similar in spirit to the patch I wrote years ago for power7 that
generates LFIWAX, LFIWZX, and STFIWX when loading up 32-bit integers to convert
to floating point, and converting floating point to 32-bit integers.

At some point it would be nice to allow various small integers directly into
the floating/vector registers, but I suspect that will take some amount of
effort to implement and tune.  So this patch adds support to avoid using direct
move when converting between small integers and floating point.

If you are curious, out of the 29 Spec 2006 CPU benchmarks, there are 8
benchmarks (perlbench, cactusADM, gobmk, povray, k264ref, omnetpp, wrf, and
sphinx3) that convert load up small integers from memory and convert them to
floating point.

There are 3 benchmarks (cactusADM, povray, and wrf) that convert floating point
to small integers and store the result.

I have done a bootstrap and make check with no regression on a power8 little
endian system and there were no regressions.  Are these patches ok to check
into the trunk, and after a burn-in period, check them into the GCC 6.2 branch?

[gcc]
2016-06-23  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * config/rs6000/vsx.md (UNSPEC_P9_MEMORY): New unspec to support
        loading and storing byte/half-word values in the vector registers.
        (vsx_sign_extend_hi_<mode>): Enable the generator function.
        (p9_lxsi<wd>zx): New insns to load zero-extended bytes and
        half-words on ISA 3.0 to the vector registers.
        (p9_stxsi<wd>zx): New insns to store zero-extended bytes and
        half-words on ISA 3.0 from the vector registers.
        * config/rs6000/rs6000.md (FP_ISA3): New iterator to optimize
        converting char/half-word items to floating point on ISA 3.0.
        (float<QHI:mode><FP_ISA3:mode>2): On ISA 3.0 generate the lxsihzx
        and lxsibzx instructions if we are converting an 8-bit or 16-bit
        item from memory to floating point.
        (float<QHI:mode><FP_ISA3:mode>2_internal): Likewise.
        (floatuns<QHI:mode><FP_ISA3:mode>2): Likewise.
        (floatuns<QHI:mode><FP_ISA3:mode>2_internal): Likewise.
        (fix_trunc<SFDF:mode><QHI:mode>2): On ISA 3.0 generate the stxsihx
        and stxsibx instructions to store floating point values converted
        to 8 or 16-bit integers.
        (fixuns_trunc<mode>si2): Likewise.

[gcc/testsuite]
2016-06-23  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * gcc.target/powerpc/p9-fpcvt-1.c: New test to test ISA 3.0 load
        byte/half-word to vector registers and store byte/half-word from
        vector register instructions.
        * gcc.target/powerpc/p9-fpcvt-2.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md    
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000)    
(revision 237716)
+++ gcc/config/rs6000/vsx.md    (.../gcc/config/rs6000) (working copy)
@@ -293,6 +293,7 @@ (define_c_enum "unspec"
    UNSPEC_VSX_XVCVDPSXDS
    UNSPEC_VSX_XVCVDPUXDS
    UNSPEC_VSX_SIGN_EXTEND
+   UNSPEC_P9_MEMORY
   ])
 
 ;; VSX moves
@@ -2705,7 +2706,7 @@ (define_insn "vsx_sign_extend_qi_<mode>"
   "vextsb2<wd> %0,%1"
   [(set_attr "type" "vecsimple")])
 
-(define_insn "*vsx_sign_extend_hi_<mode>"
+(define_insn "vsx_sign_extend_hi_<mode>"
   [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
        (unspec:VSINT_84
         [(match_operand:V8HI 1 "vsx_register_operand" "v")]
@@ -2721,3 +2722,24 @@ (define_insn "*vsx_sign_extend_si_v2di"
   "TARGET_P9_VECTOR"
   "vextsw2d %0,%1"
   [(set_attr "type" "vecsimple")])
+
+
+;; ISA 3.0 memory operations
+(define_insn "p9_lxsi<wd>zx"
+  [(set (match_operand:DI 0 "vsx_register_operand" "=wi")
+       (unspec:DI [(zero_extend:DI
+                    (match_operand:QHI 1 "indexed_or_indirect_operand" "Z"))]
+                  UNSPEC_P9_MEMORY))]
+  "TARGET_P9_VECTOR"
+  "lxsi<wd>zx %x0,%y1"
+  [(set_attr "type" "fpload")])
+
+(define_insn "p9_stxsi<wd>x"
+  [(set (match_operand:QHI 0 "reg_or_indexed_operand" "=r,Z")
+       (unspec:QHI [(match_operand:DI 1 "vsx_register_operand" "wi,wi")]
+                   UNSPEC_P9_MEMORY))]
+  "TARGET_P9_VECTOR"
+  "@
+   mfvsrd %0,%x1
+   stxsi<wd>x %x1,%y0"
+  [(set_attr "type" "mffgpr,fpstore")])
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md 
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000)    
(revision 237716)
+++ gcc/config/rs6000/rs6000.md (.../gcc/config/rs6000) (working copy)
@@ -506,6 +506,12 @@ (define_mode_iterator FLOAT128 [(KF "TAR
                                (IF "TARGET_FLOAT128")
                                (TF "TARGET_LONG_DOUBLE_128")])
 
+; Iterator for ISA 3.0 supported floating point types
+(define_mode_iterator FP_ISA3 [SF
+                              DF
+                              (KF "FLOAT128_IEEE_P (KFmode)")
+                              (TF "FLOAT128_IEEE_P (TFmode)")])
+
 ; SF/DF suffix for traditional floating instructions
 (define_mode_attr Ftrad                [(SF "s") (DF "")])
 
@@ -872,7 +878,6 @@ (define_insn_and_split "*zero_extendsi<m
    (set_attr "dot" "yes")
    (set_attr "length" "4,8")])
 
-
 (define_insn "extendqi<mode>2"
   [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r")
        (sign_extend:EXTQI (match_operand:QI 1 "gpc_reg_operand" "r")))]
@@ -5188,6 +5193,107 @@ (define_insn_and_split "*floatunssidf2_i
   [(set_attr "length" "20")
    (set_attr "type" "fp")])
 
+;; ISA 3.0 adds instructions lxsi[bh]zx to directly load QImode and HImode to
+;; vector registers.  At the moment, QI/HImode are not allowed in floating
+;; point or vector registers, so we use UNSPEC's to use the load byte and
+;; half-word instructions.
+
+(define_expand "float<QHI:mode><FP_ISA3:mode>2"
+  [(parallel [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "")
+                  (float:FP_ISA3
+                   (match_operand:QHI 1 "input_operand" "")))
+             (clobber (match_scratch:DI 2 ""))
+             (clobber (match_scratch:DI 3 ""))])]
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
+{
+  if (MEM_P (operands[1]))
+    operands[1] = rs6000_address_for_fpconvert (operands[1]);
+})
+
+(define_insn_and_split "*float<QHI:mode><FP_ISA3:mode>2_internal"
+  [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>")
+       (float:FP_ISA3
+        (match_operand:QHI 1 "reg_or_indexed_operand" "r,Z")))
+   (clobber (match_scratch:DI 2 "=wi,v"))
+   (clobber (match_scratch:DI 3 "=r,X"))]
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64
+   && TARGET_UPPER_REGS_DI"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx result = operands[0];
+  rtx input = operands[1];
+  rtx di = operands[2];
+
+  if (!MEM_P (input))
+    {
+      rtx tmp = operands[3];
+      emit_insn (gen_extend<QHI:mode>di2 (tmp, input));
+      emit_move_insn (di, tmp);
+    }
+  else
+    {
+      machine_mode vmode;
+      rtx di_vector;
+
+      emit_insn (gen_p9_lxsi<QHI:wd>zx (di, input));
+
+      if (<MODE>mode == QImode)
+       vmode = V16QImode;
+      else if (<MODE>mode == HImode)
+       vmode = V8HImode;
+      else
+       gcc_unreachable ();
+
+      di_vector = gen_rtx_REG (vmode, REGNO (di));
+      emit_insn (gen_vsx_sign_extend_<QHI:mode>_di (di, di_vector));
+    }
+
+  emit_insn (gen_floatdi<FP_ISA3:mode>2 (result, di));
+  DONE;
+})
+
+(define_expand "floatuns<QHI:mode><FP_ISA3:mode>2"
+  [(parallel [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "")
+                  (unsigned_float:FP_ISA3
+                   (match_operand:QHI 1 "input_operand" "")))
+             (clobber (match_scratch:DI 2 ""))
+             (clobber (match_scratch:DI 3 ""))])]
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
+{
+  if (MEM_P (operands[1]))
+    operands[1] = rs6000_address_for_fpconvert (operands[1]);
+})
+
+(define_insn_and_split "*floatuns<QHI:mode><FP_ISA3:mode>2_internal"
+  [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>")
+       (unsigned_float:FP_ISA3
+        (match_operand:QHI 1 "reg_or_indexed_operand" "r,Z")))
+   (clobber (match_scratch:DI 2 "=wi,wi"))
+   (clobber (match_scratch:DI 3 "=r,X"))]
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx result = operands[0];
+  rtx input = operands[1];
+  rtx di = operands[2];
+  rtx tmp = operands[3];
+
+  if (!MEM_P (input))
+    {
+      emit_insn (gen_zero_extend<QHI:mode>di2 (tmp, input));
+      emit_move_insn (di, tmp);
+    }
+  else
+    emit_insn (gen_p9_lxsi<QHI:wd>zx (di, input));
+
+  emit_insn (gen_floatdi<FP_ISA3:mode>2 (result, di));
+  DONE;
+})
+
 (define_expand "fix_trunc<mode>si2"
   [(set (match_operand:SI 0 "gpc_reg_operand" "")
        (fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "")))]
@@ -5296,6 +5402,23 @@ (define_insn "*fix_trunc<mode>di2_fctidz
    xscvdpsxds %x0,%x1"
   [(set_attr "type" "fp")])
 
+(define_expand "fix_trunc<SFDF:mode><QHI:mode>2"
+  [(use (match_operand:QHI 0 "rs6000_nonimmediate_operand" ""))
+   (use (match_operand:SFDF 1 "vsx_register_operand" ""))]
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx di_tmp = gen_reg_rtx (DImode);
+
+  if (MEM_P (op0))
+    op0 = rs6000_address_for_fpconvert (op0);
+
+  emit_insn (gen_fctiwz_<SFDF:mode> (di_tmp, op1));
+  emit_insn (gen_p9_stxsi<QHI:wd>x (op0, di_tmp));
+  DONE;
+})
+
 (define_expand "fixuns_trunc<mode>si2"
   [(set (match_operand:SI 0 "gpc_reg_operand" "")
        (unsigned_fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "")))]
@@ -5368,6 +5491,23 @@ (define_insn "*fixuns_trunc<mode>di2_fct
    xscvdpuxds %x0,%x1"
   [(set_attr "type" "fp")])
 
+(define_expand "fixuns_trunc<SFDF:mode><QHI:mode>2"
+  [(use (match_operand:QHI 0 "rs6000_nonimmediate_operand" ""))
+   (use (match_operand:SFDF 1 "vsx_register_operand" ""))]
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx di_tmp = gen_reg_rtx (DImode);
+
+  if (MEM_P (op0))
+    op0 = rs6000_address_for_fpconvert (op0);
+
+  emit_insn (gen_fctiwuz_<SFDF:mode> (di_tmp, op1));
+  emit_insn (gen_p9_stxsi<QHI:wd>x (op0, di_tmp));
+  DONE;
+})
+
 ; Here, we use (set (reg) (unspec:DI [(fix:SI ...)] UNSPEC_FCTIWZ))
 ; rather than (set (subreg:SI (reg)) (fix:SI ...))
 ; because the first makes it clear that operand 0 is not live
Index: gcc/testsuite/gcc.target/powerpc/p9-fpcvt-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-fpcvt-1.c       
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc)
     (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/p9-fpcvt-1.c       
(.../gcc/testsuite/gcc.target/powerpc)  (revision 237749)
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+void sc (signed char    *p, double x) { *p = x; }
+void uc (unsigned char  *p, double x) { *p = x; }
+void ss (signed short   *p, double x) { *p = x; }
+void us (unsigned short *p, double x) { *p = x; }
+
+/* { dg-final { scan-assembler     "stxsibx" } } */
+/* { dg-final { scan-assembler     "stxsihx" } } */
+/* { dg-final { scan-assembler-not "mfvsrd"  } } */
+/* { dg-final { scan-assembler-not "mfvsrwz" } } */
+/* { dg-final { scan-assembler-not "mtvsrd"  } } */
+/* { dg-final { scan-assembler-not "mtvsrwa" } } */
+/* { dg-final { scan-assembler-not "mtvsrwz" } } */
Index: gcc/testsuite/gcc.target/powerpc/p9-fpcvt-2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/p9-fpcvt-2.c       
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc)
     (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/p9-fpcvt-2.c       
(.../gcc/testsuite/gcc.target/powerpc)  (revision 237749)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+double sc (signed char    *p) { return (double)*p; }
+double uc (unsigned char  *p) { return (double)*p; }
+double ss (signed short   *p) { return (double)*p; }
+double us (unsigned short *p) { return (double)*p; }
+
+/* { dg-final { scan-assembler     "lxsibzx"  } } */
+/* { dg-final { scan-assembler     "lxsihzx"  } } */
+/* { dg-final { scan-assembler     "vextsb2d" } } */
+/* { dg-final { scan-assembler     "vextsh2d" } } */
+/* { dg-final { scan-assembler-not "mfvsrd"   } } */
+/* { dg-final { scan-assembler-not "mfvsrwz"  } } */
+/* { dg-final { scan-assembler-not "mtvsrd"   } } */
+/* { dg-final { scan-assembler-not "mtvsrwa"  } } */
+/* { dg-final { scan-assembler-not "mtvsrwz"  } } */

Reply via email to