work182-test)] Add power9 and power10 float to logical optimizations.

Michael Meissner via Gcc-cvs Fri, 08 Nov 2024 10:18:50 -0800

https://gcc.gnu.org/g:b6cef51eaf142eb14e32544d982db0d186f348d9


commit b6cef51eaf142eb14e32544d982db0d186f348d9
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Fri Nov 8 13:17:45 2024 -0500

    Add power9 and power10 float to logical optimizations.
    
    2024-11-08  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            PR target/117487
            * config/rs6000/rs6000.cc (sf_logical_op_p): Delete.
            * config/rs6000/rs6000.h (sf_logical_op_p): Likewise.
            * config/rs6000/vsx.md (SFmode logical peephoole): Update comments 
in
            the original code that supports power8.  Add a new define_peephole2 
to
            do the optimization on power9/power10.

Diff:
---
 gcc/config/rs6000/rs6000.cc |  62 -----------------
 gcc/config/rs6000/rs6000.h  |   1 -
 gcc/config/rs6000/vsx.md    | 161 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 155 insertions(+), 69 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index e1ec9591a0eb..aa67e7256bb9 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -29564,68 +29564,6 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt)
   return false;
 }
 
-bool
-sf_logical_op_p (rtx operands[])
-{
-  if (!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE)
-    {
-      fprintf (stderr, "!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE\n");
-      return false;
-    }
-
-   /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
-      to compare registers, when the mode is different.  */
-  if (!REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D]))
-    {
-      fprintf (stderr, "REG_P (operands[SFBOOL_MFVSR_D]) && REG_P 
(operands[SFBOOL_BOOL_D]))\n");
-      return false;
-    }
-
-  if (!REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D]))
-    {
-      fprintf (stderr, "!REG_P (operands[SFBOOL_BOOL_A1]) && REG_P 
(operands[SFBOOL_SHL_D])\n");
-      return false;
-    }
-
-  if (!REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D]))
-    {
-      fprintf (stderr, "!REG_P (operands[SFBOOL_SHL_A])   && REG_P 
(operands[SFBOOL_MTVSR_D])\n");
-      return false;
-    }
-
-  if (!REG_P (operands[SFBOOL_BOOL_A2])
-       && !CONST_INT_P (operands[SFBOOL_BOOL_A2]))
-    {
-      fprintf (stderr, "!REG_P (operands[SFBOOL_BOOL_A2]) && !CONST_INT_P 
(operands[SFBOOL_BOOL_A2])\n");
-      return false;
-    }
-
-  if (!REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
-       && !peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
-    {
-      fprintf (stderr, "!REGNO (operands[SFBOOL_BOOL_D]) == REGNO 
(operands[SFBOOL_MFVSR_D]) && !peep2_reg_dead_p (2, 
operands[SFBOOL_MFVSR_D])\n");
-      return false;
-    }
-
-  if (((REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
-       || (REG_P (operands[SFBOOL_BOOL_A2])
-           && REGNO (operands[SFBOOL_MFVSR_D]) == REGNO 
(operands[SFBOOL_BOOL_A2])))
-       && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
-       && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
-          || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
-       && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])))
-    {
-      fprintf (stderr, "last test passed\n");
-      return true;
-    }
-  else
-    {
-      fprintf (stderr, "last test failed\n");
-      return false;
-    }
-}
-    
-
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 499e80fda08d..197005af5195 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2526,4 +2526,3 @@ enum {
 #undef ARCH_EXPAND
 #endif /* GCC_HWINT_H.  */
 
-extern bool sf_logical_op_p (rtx operands[]);
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcf8e2a60462..bfa1516768bc 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6262,7 +6262,7 @@
    (SFBOOL_MFVSR_A              3)             ;; move to gpr src
    (SFBOOL_BOOL_D               4)             ;; and/ior/xor dest
    (SFBOOL_BOOL_A1              5)             ;; and/ior/xor arg1
-   (SFBOOL_BOOL_A2              6)             ;; and/ior/xor arg1
+   (SFBOOL_BOOL_A2              6)             ;; and/ior/xor arg2
    (SFBOOL_SHL_D                7)             ;; shift left dest
    (SFBOOL_SHL_A                8)             ;; shift left arg
    (SFBOOL_MTVSR_D              9)             ;; move to vecter dest
@@ -6302,18 +6302,18 @@
 ;; GPR, and instead move the integer mask value to the vector register after a
 ;; shift and do the VSX logical operation.
 
-;; The insns for dealing with SFmode in GPR registers looks like:
+;; The insns for dealing with SFmode in GPR registers looks like on power8:
 ;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
 ;;
-;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX))
+;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
 ;;
-;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3)))
+;; (set (reg:DI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
 ;;
 ;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32)))
 ;;
 ;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD))
 ;;
-;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
+;; (set (reg:SF reg7) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN))
 
 (define_peephole2
   [(match_scratch:DI SFBOOL_TMP_GPR "r")
@@ -6338,7 +6338,24 @@
    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
        (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
 
-  "sf_logical_op_p (operands)"
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
+   /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
+      to compare registers, when the mode is different.  */
+   && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
+   && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D])
+   && REG_P (operands[SFBOOL_SHL_A])   && REG_P (operands[SFBOOL_MTVSR_D])
+   && (REG_P (operands[SFBOOL_BOOL_A2])
+       || CONST_INT_P (operands[SFBOOL_BOOL_A2]))
+   && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D])
+       || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D]))
+   && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1])
+       || (REG_P (operands[SFBOOL_BOOL_A2])
+          && REGNO (operands[SFBOOL_MFVSR_D])
+               == REGNO (operands[SFBOOL_BOOL_A2])))
+   && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A])
+   && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D])
+       || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D]))
+   && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])"
   [(set (match_dup SFBOOL_TMP_GPR)
        (ashift:DI (match_dup SFBOOL_BOOL_A_DI)
                   (const_int 32)))
@@ -6377,6 +6394,138 @@
   operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d);
 })
 
+;; Constants for SFbool optimization on power9/power10
+(define_constants
+  [(SFBOOL2_TMP_VSX_V4SI        0)             ;; vector temporary (V4SI)
+   (SFBOOL2_TMP_GPR_SI          1)             ;; GPR temporary (SI)
+   (SFBOOL2_MFVSR_D             2)             ;; move to gpr dest (DI)
+   (SFBOOL2_MFVSR_A             3)             ;; move to gpr src (SI)
+   (SFBOOL2_BOOL_D              4)             ;; and/ior/xor dest (SI)
+   (SFBOOL2_BOOL_A1             5)             ;; and/ior/xor arg1 (SI)
+   (SFBOOL2_BOOL_A2             6)             ;; and/ior/xor arg2 (SI)
+   (SFBOOL2_SPLAT_D             7)             ;; splat dest (V4SI)
+   (SFBOOL2_MTVSR_D             8)             ;; move/splat to VSX dest.
+   (SFBOOL2_MTVSR_A             9)             ;; move/splat to VSX arg.
+   (SFBOOL2_MFVSR_A_V4SI       10)             ;; MFVSR_A as V4SI
+   (SFBOOL2_MTVSR_D_V4SI       11)             ;; MTVSR_D as V4SI
+   (SFBOOL2_XXSPLTW            12)])           ;; 1 or 3 for XXSPLTW
+
+;; On power9/power10, the code is different because we have a splat 32-bit
+;; operation that does a direct move to the FPR/vector registers (MTVSRWS).
+;;
+;; The insns for dealing with SFmode in GPR registers looks like on
+;; power9/power10:
+;;
+;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN))
+;;
+;; (set (reg:DI reg3) (zero_extend:DI (reg:SI reg2)))
+;;
+;; (set (reg:SI reg4) (and:SI (reg:SI reg3) (reg:SI mask)))
+;;
+;; (set (reg:V4SI reg5) (vec_duplicate:V4SI (reg:SI reg4)))
+;;
+;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg5)] UNSPEC_VSX_CVSPDPN))
+
+;; The VSX temporary needs to be an Altivec register in case we are trying to
+;; do and/ior/xor of -16..15 and we want to use VSPLTISW to load the constant.
+;;
+;; The GPR temporary is only used if we are trying to do a logical operation
+;; with a constant outside of the -16..15 range on a power9.  Otherwise, we can
+;; load the constant directly into the VSX temporary register.
+
+(define_peephole2
+  [(match_scratch:V4SI SFBOOL2_TMP_VSX_V4SI "v")
+   (match_scratch:SI SFBOOL2_TMP_GPR_SI "r")
+
+   ;; Zero_extend and direct move
+   (set (match_operand:DI SFBOOL2_MFVSR_D "int_reg_operand")
+       (zero_extend:DI
+        (match_operand:SI SFBOOL2_MFVSR_A "vsx_register_operand")))
+
+   ;; AND/IOR/XOR operation on int
+   (set (match_operand:SI SFBOOL2_BOOL_D "int_reg_operand")
+       (and_ior_xor:SI
+        (match_operand:SI SFBOOL2_BOOL_A1 "int_reg_operand")
+        (match_operand:SI SFBOOL2_BOOL_A2 "reg_or_cint_operand")))
+
+   ;; Splat sfbool result to vector register
+   (set (match_operand:V4SI SFBOOL2_SPLAT_D "vsx_register_operand")
+       (vec_duplicate:V4SI
+        (match_dup SFBOOL2_BOOL_D)))]
+
+  "TARGET_POWERPC64 && TARGET_P9_VECTOR
+   && REG_P (operands[SFBOOL2_MFVSR_D])
+   && REG_P (operands[SFBOOL2_BOOL_A1])
+   && (REGNO (operands[SFBOOL2_MFVSR_D]) == REGNO (operands[SFBOOL2_BOOL_A1])
+       || (REG_P (operands[SFBOOL2_BOOL_A2])
+           && (REGNO (operands[SFBOOL2_MFVSR_D])
+               == REGNO (operands[SFBOOL2_BOOL_A2]))))
+   && peep2_reg_dead_p (3, operands[SFBOOL2_MFVSR_D])
+   && peep2_reg_dead_p (4, operands[SFBOOL2_BOOL_D])"
+
+  ;; Either (set (reg:SI xxx) (reg:SI yyy))    or
+  ;;        (set (reg:V4SI xxx) (const_vector (parallel [c, c, c, c])))
+  [(set (match_dup SFBOOL2_MTVSR_D)
+       (match_dup SFBOOL2_MTVSR_A))
+
+   ;; And/ior/xor on vector registers
+   (set (match_dup SFBOOL2_TMP_VSX_V4SI)
+       (and_ior_xor:V4SI
+        (match_dup SFBOOL2_MFVSR_A_V4SI)
+        (match_dup SFBOOL2_TMP_VSX_V4SI)))
+
+   ;; XXSPLTW t,r,r,1
+   (set (match_dup SFBOOL2_SPLAT_D)
+       (vec_duplicate:V4SI
+        (vec_select:SI
+         (match_dup SFBOOL2_TMP_VSX_V4SI)
+         (parallel [(match_dup SFBOOL2_XXSPLTW)]))))]
+{
+  rtx mfvsr_d = operands[SFBOOL2_MFVSR_D];
+  rtx bool_a1 = operands[SFBOOL2_BOOL_A1];
+  rtx bool_a2 = operands[SFBOOL2_BOOL_A2];
+  rtx bool_arg = (rtx_equal_p (mfvsr_d, bool_a1) ? bool_a2 : bool_a1);
+  int regno_mfvsr_a = REGNO (operands[SFBOOL2_MFVSR_A]);
+  int regno_tmp_vsx = REGNO (operands[SFBOOL2_TMP_VSX_V4SI]);
+
+  /* If the logical operation is a constant, form the constant in a vector
+     register.  */
+  if (CONST_INT_P (bool_arg))
+    {
+      HOST_WIDE_INT value = INTVAL (bool_arg);
+
+      /* See if we can directly load the constant, either by VSPLTIW or by
+         XXSPLTIW on power10.  */
+
+      if (IN_RANGE (value, -16, 15) || TARGET_PREFIXED)
+       {
+         rtvec cv = gen_rtvec (4, bool_arg, bool_arg, bool_arg, bool_arg);
+         operands[SFBOOL2_MTVSR_D] = gen_rtx_REG (V4SImode, regno_tmp_vsx);
+         operands[SFBOOL2_MTVSR_A] = gen_rtx_CONST_VECTOR (V4SImode, cv);
+       }
+
+      else
+       {
+         /* We need to load up the constant to a GPR and move it to a
+            vector register.  */
+         rtx tmp_gpr = operands[SFBOOL2_TMP_GPR_SI];
+         emit_move_insn (tmp_gpr, bool_arg);
+         operands[SFBOOL2_MTVSR_D] = gen_rtx_REG (SImode, regno_tmp_vsx);
+         operands[SFBOOL2_MTVSR_A] = tmp_gpr;
+       }
+    }
+  else
+    {
+      /* Mask is in a register, move it to a vector register.  */
+      operands[SFBOOL2_MTVSR_D] = gen_rtx_REG (SImode, regno_tmp_vsx);
+      operands[SFBOOL2_MTVSR_A] = bool_arg;
+    }
+
+    operands[SFBOOL2_TMP_VSX_V4SI] = gen_rtx_REG (V4SImode, regno_tmp_vsx);
+    operands[SFBOOL2_MFVSR_A_V4SI] = gen_rtx_REG (V4SImode, regno_mfvsr_a);
+    operands[SFBOOL2_XXSPLTW] = GEN_INT (BYTES_BIG_ENDIAN ? 1 : 2);
+})
+
 ;; Support signed/unsigned long long to float conversion vectorization.
 ;; Note that any_float (pc) here is just for code attribute <su>.
 (define_expand "vec_pack<su>_float_v2di"

[gcc(refs/users/meissner/heads/work182-test)] Add power9 and power10 float to logical optimizations.

Reply via email to