gcc-17-future)] Add miscellaneous -mcpu=future instructions

Michael Meissner via Gcc-cvs Wed, 01 Jul 2026 08:27:33 -0700

https://gcc.gnu.org/g:1710e94d9af8d36508f09de2077bf0d64db38888


commit 1710e94d9af8d36508f09de2077bf0d64db38888
Author: Michael Meissner <[email protected]>
Date:   Wed Jul 1 10:54:14 2026 -0400

    Add miscellaneous -mcpu=future instructions
    
    2026-07-01  Michael Meissner  <[email protected]>
    
    gcc/
    
            * config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): Enable using 
load
            vector pair and store vector pair instructions for memory copy
            operations.
            (POWERPC_MASKS): Make the option for enabling using load vector 
pair and
            store vector pair operations set and reset when the PowerPC 
processor is
            changed.
            * config/rs6000/rs6000.cc (rs6000_machine_from_flags): Disable
            -mblock-ops-vector-pair from influencing .machine selection.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/future-3.c: New test.
    
    2026-07-01   Michael Meissner  <[email protected]>
    
    gcc/
    
            * config/rs6000/rs6000.md (gtu_geu): New code iterator.
            (subfus<mode>3_<code>): New insns.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/saturate-subtract-1.c: New test.
            * gcc.target/powerpc/saturate-subtract-2.c: Likewise.
            * lib/target-supports.exp 
(check_effective_target_powerpc_future_ok):
            New target test.
    
    2026-07-01  Michael Meissner  <[email protected]>
    
    gcc/
    
            * config/rs6000/altivec.md (xvrlw): New insn.
            * config/rs6000/rs6000.h (TARGET_XVRLW): New macro.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vector-rotate-left.c: New test.
    
    2026-07-01   Michael Meissner  <[email protected]>
    
    gcc/
    
            * config/rs6000/rs6000-string.cc (expand_block_move): Do not 
generate
            lxvl and stxvl on 32-bit.
            * config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl 
with
            the shift count automaticaly used in the insn.
            (lxvrl): New insn for -mcpu=future.
            (lxvrll): Likewise.
            (stxvl): If -mcpu=future, generate the stxvl with the shift count
            automaticaly used in the insn.
            (stxvrl): New insn for -mcpu=future.
            (stxvrll): Likewise.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/lxvrl.c: New test.
    
    2026-07-01  Michael Meissner  <[email protected]>
    
    gcc/
    
            * config/rs6000/constraints.md (eU): New constraint.
            (eV): Likewise.
            * config/rs6000/predicates.md (paddis_operand): New predicate.
            (paddis_paddi_operand): Likewise.
            (add_cint_operand): Add paddis support.
            (reg_or_add_cint_operand): Add support for adds that can be done 
with
            paddis and paddi/addi.
            (add_operand): Add support for adds that can be done with paddis, 
but
            not paddis + paddi/addi..
            * config/rs6000/rs6000.cc (num_insns_constant_gpr): Add support for 
adds
            that can be done with paddis and also paddis combined with 
paddi/addi.
            (print_operand): Add %B<n> for paddis support.
            * config/rs6000/rs6000.h (TARGET_PADDIS): New macro.
            (SIGNED_INTEGER_64BIT_P): Likewise.
            * config/rs6000/rs6000.md (add<mode>3 define_expand): Add paddis
            support.
            (*add<mode>3 define_insn): Likewise.
            (movdi_internal64): Likewise.
            (movdi splitter): New splitter for paddis + paddi/addi.
            * doc/md.texi (PowerPC constraints): Add eU and eV documentation.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/prefixed-addis.c: New test.

Diff:
---
 gcc/config/rs6000/altivec.md                       |  14 ++
 gcc/config/rs6000/constraints.md                   |  10 ++
 gcc/config/rs6000/predicates.md                    |  79 +++++++++++-
 gcc/config/rs6000/rs6000-cpus.def                  |  15 ++-
 gcc/config/rs6000/rs6000-string.cc                 |   1 +
 gcc/config/rs6000/rs6000.cc                        |  23 +++-
 gcc/config/rs6000/rs6000.h                         |  15 +++
 gcc/config/rs6000/rs6000.md                        | 143 ++++++++++++++++-----
 gcc/config/rs6000/vsx.md                           | 122 +++++++++++++++---
 gcc/doc/md.texi                                    |   6 +
 gcc/testsuite/gcc.target/powerpc/future-3.c        |  22 ++++
 gcc/testsuite/gcc.target/powerpc/lxvrl.c           |  32 +++++
 gcc/testsuite/gcc.target/powerpc/prefixed-addis.c  |  24 ++++
 .../gcc.target/powerpc/saturate-subtract-1.c       |  39 ++++++
 .../gcc.target/powerpc/saturate-subtract-2.c       |  40 ++++++
 .../gcc.target/powerpc/vector-rotate-left.c        |  34 +++++
 gcc/testsuite/lib/target-supports.exp              |  13 ++
 17 files changed, 570 insertions(+), 62 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index dbe24c450e14..95433ce500a4 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2006,6 +2006,20 @@
 }
   [(set_attr "type" "vecperm")])
 
+;; -mcpu=future adds a vector rotate left word variant.  There is no vector
+;; byte/half-word/double-word/quad-word rotate left.  This insn occurs before
+;; altivec_vrl<VI_char> and will match for -mcpu=future, while other cpus will
+;; match the generic insn.
+(define_insn "*xvrlw"
+  [(set (match_operand:V4SI 0 "register_operand" "=v,wa")
+       (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa")
+                    (match_operand:V4SI 2 "register_operand" "v,wa")))]
+  "TARGET_XVRLW"
+  "@
+   vrlw %0,%1,%2
+   xvrlw %x0,%x1,%x2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vrl<VI_char>"
   [(set (match_operand:VI2 0 "register_operand" "=v")
         (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 0d1cde5bd4de..0169a7b85222 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -222,6 +222,16 @@
   "An IEEE 128-bit constant that can be loaded into VSX registers."
   (match_operand 0 "easy_vector_constant_ieee128"))
 
+(define_constraint "eU"
+  "@internal integer constant that can be loaded with paddis"
+  (and (match_code "const_int")
+       (match_operand 0 "paddis_operand")))
+
+(define_constraint "eV"
+  "@A signed integer constant that paddis and paddi instructions generate."
+  (and (match_code "const_int")
+       (match_operand 0 "paddis_paddi_operand")))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 7f8d316648cc..73cc356e9834 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -383,6 +383,68 @@
   return SIGNED_INTEGER_34BIT_P (INTVAL (op));
 })
 
+;; Return 1 if op is a 64-bit constant that uses the paddis instruction
+(define_predicate "paddis_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS)
+    return false;
+
+  if (mode != VOIDmode && mode != DImode)
+    return false;
+
+  HOST_WIDE_INT value = INTVAL (op);
+
+  if (!SIGNED_INTEGER_64BIT_P (value))
+    return false;
+
+  /* If paddi alone can handle the number, don't return true.  */
+  if (SIGNED_INTEGER_34BIT_P (value))
+    return false;
+
+  /* If the bottom 32-bits are non-zero, paddis alone can't handle it.  */
+  if ((value & HOST_WIDE_INT_C(0xffffffff)) != 0)
+    return false;
+
+  return true;
+})
+
+;; Return 1 if op is a 64-bit constant that can be created with a
+;; combination of paddi and paddis.  Don't generate paddi and paddis if
+;; we can do it via addis and rldicl.
+(define_predicate "paddis_paddi_operand"
+  (match_code "const_int")
+{
+  if (!TARGET_PADDIS)
+    return false;
+
+  if (mode != VOIDmode && mode != DImode)
+    return false;
+
+  HOST_WIDE_INT value = INTVAL (op);
+
+  if (!SIGNED_INTEGER_64BIT_P (value))
+    return false;
+
+  /* Don't worry about negative values at the moment.  */
+  if (value < 0)
+    return false;
+
+  /* If paddi alone can handle the number, don't return true.  */
+  if (SIGNED_INTEGER_34BIT_P (value))
+    return false;
+
+  /* If we can do the add or generate the constant via addis/rldicl, fail.  */
+  if (rs6000_is_valid_and_mask (op, mode))
+    return false;
+
+  /* Only return true if we need both paddi and paddis.  */
+  if ((value & HOST_WIDE_INT_C(0xffffffff)) == 0)
+    return false;
+
+  return true;
+})
+
 ;; Return 1 if op is a register that is not special.
 ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where
 ;; you need to be careful in moving a SFmode to SImode and vice versa due to
@@ -573,18 +635,22 @@
   (ior (match_operand 0 "zero_constant")
        (match_operand 0 "gpc_reg_operand")))
 
-;; Return 1 if op is a constant integer valid for addition with addis, addi.
+;; Return 1 if op is a constant integer valid for addition with addis,
+;; addi, paddi, or paddis.
 (define_predicate "add_cint_operand"
   (and (match_code "const_int")
-       (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)
-                      + (mode == SImode ? 0x80000000 : 0x80008000))
-                   < (unsigned HOST_WIDE_INT) 0x100000000ll")))
+       (ior (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)
+                          + (mode == SImode ? 0x80000000 : 0x80008000))
+                       < (unsigned HOST_WIDE_INT) 0x100000000ll")
+           (match_operand 0 "cint34_operand")
+           (match_operand 0 "paddis_operand"))))
 
 ;; Return 1 if op is a constant integer valid for addition
 ;; or non-special register.
 (define_predicate "reg_or_add_cint_operand"
   (if_then_else (match_code "const_int")
-    (match_operand 0 "add_cint_operand")
+    (ior (match_operand 0 "add_cint_operand")
+        (match_operand 0 "paddis_paddi_operand"))
     (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if op is a constant integer valid for subtraction
@@ -1127,7 +1193,8 @@
   (if_then_else (match_code "const_int")
     (match_test "satisfies_constraint_I (op)
                 || satisfies_constraint_L (op)
-                || satisfies_constraint_eI (op)")
+                || satisfies_constraint_eI (op)
+                || satisfies_constraint_eU (op)")
     (match_operand 0 "gpc_reg_operand")))
 
 ;; Return 1 if the operand is either a non-special register, or 0, or -1.
diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 38e6bc880b25..d668953e6a39 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -83,10 +83,16 @@
 #define POWER11_MASKS_SERVER (ISA_3_1_MASKS_SERVER                     \
                              | OPTION_MASK_POWER11)
 
-/* -mcpu=future flags.  */
-#define FUTURE_MASKS_SERVER    (POWER11_MASKS_SERVER                   \
-                                | OPTION_MASK_DENSE_MATH               \
-                                | OPTION_MASK_FUTURE)
+/* -mcpu=future flags.
+
+   During the development of the power10 support for GCC, using load/store
+   vector pair instructions for string operations was turned off by default,
+   because there was a use case that had really bad performance.  Assume this
+   will be fixed in potential future machines.  */
+#define FUTURE_MASKS_SERVER    (POWER11_MASKS_SERVER                   \
+                               | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR     \
+                               | OPTION_MASK_DENSE_MATH                \
+                               | OPTION_MASK_FUTURE)
 
 /* Flags that need to be turned off if -mno-vsx.  */
 #define OTHER_VSX_VECTOR_MASKS (OPTION_MASK_EFFICIENT_UNALIGNED_VSX    \
@@ -116,6 +122,7 @@
 
 /* Mask of all options to set the default isa flags based on -mcpu=<xxx>.  */
 #define POWERPC_MASKS          (OPTION_MASK_ALTIVEC                    \
+                                | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR    \
                                 | OPTION_MASK_CMPB                     \
                                 | OPTION_MASK_CRYPTO                   \
                                 | OPTION_MASK_DENSE_MATH               \
diff --git a/gcc/config/rs6000/rs6000-string.cc 
b/gcc/config/rs6000/rs6000-string.cc
index 062ff1e2465e..0c14ba4cc3ea 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -2786,6 +2786,7 @@ expand_block_move (rtx operands[], bool might_overlap)
 
       if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX
          && TARGET_BLOCK_OPS_VECTOR_PAIR
+         && TARGET_POWERPC64
          && bytes >= 32
          && (align >= 256 || !STRICT_ALIGNMENT))
        {
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 2da66de42196..21261839001e 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -6017,7 +6017,7 @@ rs6000_machine_from_flags (void)
 
   /* Disable the flags that should never influence the .machine selection.  */
   flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL
-            | OPTION_MASK_ALTIVEC);
+            | OPTION_MASK_ALTIVEC | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR);
 
   if ((flags & (FUTURE_MASKS_SERVER & ~POWER11_MASKS_SERVER)) != 0)
     return "future";
@@ -6167,7 +6167,18 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
   else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
     return 1;
 
-  else if (TARGET_POWERPC64)
+  /* PADDIS support.  */
+  else if (TARGET_PADDIS)
+    {
+      rtx num = GEN_INT (value);
+      if (paddis_operand (num, VOIDmode))
+       return 1;       /* paddis alone.  */
+
+      if (paddis_paddi_operand (num, VOIDmode))
+       return 2;       /* paddis + paddi/addi.  */
+    }
+
+  if (TARGET_POWERPC64)
     {
       int num_insns = 0;
       rs6000_emit_set_long_const (nullptr, value, &num_insns);
@@ -14277,6 +14288,14 @@ print_operand (FILE *file, rtx x, int code)
        fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
       return;
 
+    case 'B':
+      /* Upper 32-bits of a constant.  */
+      if (!CONST_INT_P (x))
+       output_operand_lossage ("Not a constant.");
+
+      fprintf (file, "%" HOST_LONG_FORMAT "d", INTVAL (x) >> 32);
+      return;
+
     case 'D':
       /* Like 'J' but get to the GT bit only.  */
       if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index eef25768b5af..cccb839b489e 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -571,6 +571,14 @@ extern int rs6000_vector_align[];
    below.  */
 #define RS6000_FN_TARGET_INFO_HTM 1
 
+/* Whether we have XVRLW support.  */
+#define TARGET_XVRLW                   TARGET_FUTURE
+
+/* Whether we have PADDIS support.  */
+#define TARGET_PADDIS                  (TARGET_FUTURE                  \
+                                        && TARGET_PREFIXED             \
+                                        && TARGET_POWERPC64)
+
 /* Whether the various reciprocal divide/square root estimate instructions
    exist, and whether we should automatically generate code for the instruction
    by default.  */
@@ -2494,6 +2502,13 @@ typedef struct GTY(()) machine_function
 #define SIGNED_INTEGER_16BIT_P(VALUE)  SIGNED_INTEGER_NBIT_P (VALUE, 16)
 #define SIGNED_INTEGER_34BIT_P(VALUE)  SIGNED_INTEGER_NBIT_P (VALUE, 34)
 
+#if HOST_BITS_PER_WIDE_INT > 64
+#define SIGNED_INTEGER_64BIT_P(VALUE)  SIGNED_INTEGER_NBIT_P (VALUE, 64)
+
+#else
+#define SIGNED_INTEGER_64BIT_P(VALUE)  1
+#endif
+
 /* Like SIGNED_INTEGER_16BIT_P and SIGNED_INTEGER_34BIT_P, but with an extra
    argument that gives a length to validate a range of addresses, to allow for
    splitting insns into several insns, each of which has an offsettable
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 5b590bc9b0d9..0dfe71ae0da9 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -427,6 +427,7 @@
      (and (eq_attr "isa" "future")
           (match_test "TARGET_FUTURE"))
      (const_int 1)
+
     ] (const_int 0)))
 
 ;; If this instruction is microcoded on the CELL processor
@@ -1796,14 +1797,18 @@
                  (match_operand:SDI 2 "reg_or_add_cint_operand")))]
   ""
 {
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
   if (<MODE>mode == DImode && !TARGET_POWERPC64)
     {
-      rtx lo0 = gen_lowpart (SImode, operands[0]);
-      rtx lo1 = gen_lowpart (SImode, operands[1]);
-      rtx lo2 = gen_lowpart (SImode, operands[2]);
-      rtx hi0 = gen_highpart (SImode, operands[0]);
-      rtx hi1 = gen_highpart (SImode, operands[1]);
-      rtx hi2 = gen_highpart_mode (SImode, DImode, operands[2]);
+      rtx lo0 = gen_lowpart (SImode, op0);
+      rtx lo1 = gen_lowpart (SImode, op1);
+      rtx lo2 = gen_lowpart (SImode, op2);
+      rtx hi0 = gen_highpart (SImode, op0);
+      rtx hi1 = gen_highpart (SImode, op1);
+      rtx hi2 = gen_highpart_mode (SImode, DImode, op2);
 
       if (!reg_or_short_operand (lo2, SImode))
        lo2 = force_reg (SImode, lo2);
@@ -1815,24 +1820,40 @@
       DONE;
     }
 
-  if (CONST_INT_P (operands[2]) && !add_operand (operands[2], <MODE>mode))
+  if (CONST_INT_P (op2) && !add_operand (op2, <MODE>mode))
     {
-      rtx tmp = ((!can_create_pseudo_p ()
-                 || rtx_equal_p (operands[0], operands[1]))
-                ? operands[0] : gen_reg_rtx (<MODE>mode));
+      rtx tmp = ((!can_create_pseudo_p () || rtx_equal_p (op0, op1))
+                ? op0
+                : gen_reg_rtx (<MODE>mode));
 
       /* Adding a constant to r0 is not a valid insn, so use a different
         strategy in that case.  */
-      if (reg_or_subregno (operands[1]) == 0 || reg_or_subregno (tmp) == 0)
+      if (reg_or_subregno (op1) == 0 || reg_or_subregno (tmp) == 0)
        {
-         if (operands[0] == operands[1])
+         if (op0 == op1)
            FAIL;
-         rs6000_emit_move (operands[0], operands[2], <MODE>mode);
-         emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[0]));
+         rs6000_emit_move (op0, op2, <MODE>mode);
+         emit_insn (gen_add<mode>3 (op0, op1, op0));
+         DONE;
+       }
+
+      HOST_WIDE_INT val = INTVAL (op2);
+
+      /* If we have paddis, split the add into paddis and either addi or
+        paddi.  However, if we can generate addis and rldicl, do that
+        instead of doing paddis/paddi.  Emit the paddis first, just
+        in case this is a memory operation and we could fold the offset
+        into the memory ooperation.  */
+
+      if (TARGET_PADDIS && paddis_paddi_operand (op2, <MODE>mode))
+       {
+         const HOST_WIDE_INT mask = HOST_WIDE_INT_C(0xffffffff);
+
+         emit_insn (gen_add<mode>3 (tmp, op1, GEN_INT (val & ~mask)));
+         emit_insn (gen_add<mode>3 (op0, tmp, GEN_INT (val & mask)));
          DONE;
        }
 
-      HOST_WIDE_INT val = INTVAL (operands[2]);
       HOST_WIDE_INT low = sext_hwi (val, 16);
       HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode);
 
@@ -1842,24 +1863,28 @@
       /* The ordering here is important for the prolog expander.
         When space is allocated from the stack, adding 'low' first may
         produce a temporary deallocation (which would be bad).  */
-      emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (rest)));
-      emit_insn (gen_add<mode>3 (operands[0], tmp, GEN_INT (low)));
+      emit_insn (gen_add<mode>3 (tmp, op1, GEN_INT (rest)));
+      emit_insn (gen_add<mode>3 (op0, tmp, GEN_INT (low)));
       DONE;
     }
 })
 
 (define_insn "*add<mode>3"
-  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r,r")
-       (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b,b")
-                 (match_operand:GPR 2 "add_operand" "r,I,L,eI")))]
+  [(set (match_operand:GPR 0 "gpc_reg_operand"           "=r,r,r, r, r")
+       (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b, b, b")
+                 (match_operand:GPR 2 "add_operand"      "r,I,L,eI,eU")))]
   ""
   "@
    add %0,%1,%2
    addi %0,%1,%2
    addis %0,%1,%v2
-   addi %0,%1,%2"
+   addi %0,%1,%2
+   paddis %0,%1,%B2"
   [(set_attr "type" "add")
-   (set_attr "isa" "*,*,*,p10")])
+   (set_attr "isa" "*,*,*,p10,future")
+   (set_attr "length" "*,*,*,*,12")
+   (set_attr "prefixed" "*,*,*,*,yes")
+   (set_attr "maybe_prefixed" "*,*,*,*,no")])
 
 (define_insn "*addsi3_high"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=b")
@@ -2401,6 +2426,20 @@
   ""
 )
 
+;; Saturating subtract
+(define_code_iterator gtu_geu [gtu geu])
+
+(define_insn "*subfus<mode>3_<code>"
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+       (if_then_else:GPR (gtu_geu (match_operand:GPR 1 "gpc_reg_operand" "r")
+                                  (match_operand:GPR 2 "gpc_reg_operand" "r"))
+                         (minus:GPR (match_dup 1)
+                                    (match_dup 2))
+                         (const_int 0)))]
+  "TARGET_FUTURE"
+  "sub<wd>us %0,%1,%2"
+  [(set_attr "type" "add")])
+
 (define_insn "@neg<mode>2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
        (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))]
@@ -9868,7 +9907,7 @@
   DONE;
 })
 
-;;        GPR store   GPR load    GPR move
+;;        GPR store   GPR load    GPR move    GPR paddis   GPR paddis+paddi
 ;;        GPR li      GPR lis     GPR pli     GPR #
 ;;        FPR store   FPR load    FPR move
 ;;        AVX store   AVX store   AVX load    AVX load    VSX move
@@ -9878,7 +9917,7 @@
 ;;        VSX->GPR    GPR->VSX
 (define_insn "*movdi_internal64"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-         "=YZ,        r,          r,
+         "=YZ,        r,          r,          r,          b,
           r,          r,          r,          r,
           m,          ^d,         ^d,
           wY,         Z,          $v,         $v,         ^wa,
@@ -9887,7 +9926,7 @@
           r,          *h,         *h,
           ?r,         ?wa")
        (match_operand:DI 1 "input_operand"
-         "r,          YZ,         r,
+         "r,          YZ,         r,          eU,         eV,
           I,          L,          eI,         nF,
           ^d,         m,          ^d,
           ^v,         $v,         wY,         Z,          ^wa,
@@ -9902,6 +9941,8 @@
    std%U0%X0 %1,%0
    ld%U1%X1 %0,%1
    mr %0,%1
+   paddis %0,0,%B1
+   #
    li %0,%1
    lis %0,%v1
    li %0,%1
@@ -9927,7 +9968,7 @@
    mfvsrd %0,%x1
    mtvsrd %x0,%1"
   [(set_attr "type"
-         "store,      load,       *,
+         "store,      load,       *,          *,          *,
           *,          *,          *,          *,
           fpstore,    fpload,     fpsimple,
           fpstore,    fpstore,    fpload,     fpload,     veclogical,
@@ -9937,7 +9978,7 @@
           mfvsr,      mtvsr")
    (set_attr "size" "64")
    (set_attr "length"
-         "*,          *,          *,
+         "*,          *,          *,          12,         24,
           *,          *,          *,          20,
           *,          *,          *,
           *,          *,          *,          *,          *,
@@ -9946,14 +9987,32 @@
           *,          *,          *,
           *,          *")
    (set_attr "isa"
-         "*,          *,          *,
+         "*,          *,          *,          future,     future,
           *,          *,          p10,        *,
           *,          *,          *,
           p9v,        p7v,        p9v,        p7v,        *,
           p9v,        p9v,        p7v,        *,          *,
           p7v,        p7v,
           *,          *,          *,
-          p8v,        p8v")])
+          p8v,        p8v")
+   (set_attr "prefixed"
+         "*,          *,          *,          yes,        yes,
+          *,          *,          *,          *,
+          *,          *,          *,
+          *,          *,          *,          *,          *,
+          *,          *,          *,          *,          *,
+          *,          *,
+          *,          *,          *,
+          *,          *")
+   (set_attr "maybe_prefixed"
+         "*,          *,          *,          no,         no,
+          *,          *,          *,          *,
+          *,          *,          *,
+          *,          *,          *,          *,          *,
+          *,          *,          *,          *,          *,
+          *,          *,
+          *,          *,          *,
+          *,          *")])
 
 ; Some DImode loads are best done as a load of -1 followed by a mask
 ; instruction.
@@ -9971,6 +10030,32 @@
                (match_dup 1)))]
   "")
 
+;; Split a constant that can be generated by a paddis and paddi into 2
+;; instructions.  We can't split setting r0 since that would generate:
+;;     paddis r0,0,upper
+;;     paddi  r0,r0,lower
+;;
+;; which gives the wrong value.
+
+(define_split
+  [(set (match_operand:DI 0 "base_reg_operand")
+       (match_operand:DI 1 "paddis_paddi_operand"))]
+  "TARGET_PADDIS"
+  [(set (match_dup 2)
+       (match_dup 3))
+   (set (match_dup 0)
+       (plus:DI (match_dup 2)
+                (match_dup 4)))]
+{
+  HOST_WIDE_INT value = INTVAL (operands[1]);
+  const HOST_WIDE_INT mask = HOST_WIDE_INT_C (0xffffffff);
+  operands[2] = (can_create_pseudo_p ()
+                ? gen_reg_rtx (DImode)
+                : operands[0]);
+  operands[3] = GEN_INT (value & ~mask);
+  operands[4] = GEN_INT (value & mask);
+})
+
 ;; Split a load of a large constant into the appropriate five-instruction
 ;; sequence.  Handle anything in a constant number of insns.
 ;; When non-easy constants can go in the TOC, this should use
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 9863c476baca..05f066cf0c7a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5743,20 +5743,32 @@
   DONE;
 })
 
-;; Load VSX Vector with Length
+;; Load VSX Vector with Length.  If we have lxvrl, we don't have to do an
+;; explicit shift left into a pseudo.
 (define_expand "lxvl"
-  [(set (match_dup 3)
-        (ashift:DI (match_operand:DI 2 "register_operand")
-                   (const_int 56)))
-   (set (match_operand:V16QI 0 "vsx_register_operand")
-       (unspec:V16QI
-        [(match_operand:DI 1 "gpc_reg_operand")
-          (mem:V16QI (match_dup 1))
-         (match_dup 3)]
-        UNSPEC_LXVL))]
+  [(use (match_operand:V16QI 0 "vsx_register_operand"))
+   (use (match_operand:DI 1 "gpc_reg_operand"))
+   (use (match_operand:DI 2 "gpc_reg_operand"))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
-  operands[3] = gen_reg_rtx (DImode);
+  rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+  rtx len;
+
+  if (TARGET_FUTURE)
+    len = shift_len;
+  else
+    {
+      len = gen_reg_rtx (DImode);
+      emit_insn (gen_rtx_SET (len, shift_len));
+    }
+
+  rtx dest = operands[0];
+  rtx addr = operands[1];
+  rtx mem = gen_rtx_MEM (V16QImode, addr);
+  rtvec rv = gen_rtvec (3, addr, mem, len);
+  rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL);
+  emit_insn (gen_rtx_SET (dest, lxvl));
+  DONE;
 })
 
 (define_insn "*lxvl"
@@ -5780,6 +5792,34 @@
   "lxvll %x0,%1,%2"
   [(set_attr "type" "vecload")])
 
+;; For lxvrl and lxvrll, use the combiner to eliminate the shift.  The
+;; define_expand for lxvl will already incorporate the shift in generating the
+;; insn.  The lxvll buitl-in function required the user to have already done
+;; the shift.  Defining lxvrll this way, will optimize cases where the user has
+;; done the shift immediately before the built-in.
+(define_insn "*lxvrl"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+       (unspec:V16QI
+        [(match_operand:DI 1 "gpc_reg_operand" "b")
+         (mem:V16QI (match_dup 1))
+         (ashift:DI (match_operand:DI 2 "register_operand" "r")
+                    (const_int 56))]
+        UNSPEC_LXVL))]
+  "TARGET_FUTURE && TARGET_64BIT"
+  "lxvrl %x0,%1,%2"
+  [(set_attr "type" "vecload")])
+
+(define_insn "*lxvrll"
+  [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+       (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b")
+                       (mem:V16QI (match_dup 1))
+                      (ashift:DI (match_operand:DI 2 "register_operand" "r")
+                                 (const_int 56))]
+                     UNSPEC_LXVLL))]
+  "TARGET_FUTURE"
+  "lxvrll %x0,%1,%2"
+  [(set_attr "type" "vecload")])
+
 ;; Expand for builtin xl_len_r
 (define_expand "xl_len_r"
   [(match_operand:V16QI 0 "vsx_register_operand")
@@ -5811,18 +5851,29 @@
 
 ;; Store VSX Vector with Length
 (define_expand "stxvl"
-  [(set (match_dup 3)
-       (ashift:DI (match_operand:DI 2 "register_operand")
-                  (const_int 56)))
-   (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand"))
-       (unspec:V16QI
-        [(match_operand:V16QI 0 "vsx_register_operand")
-         (mem:V16QI (match_dup 1))
-         (match_dup 3)]
-        UNSPEC_STXVL))]
+  [(use (match_operand:V16QI 0 "vsx_register_operand"))
+   (use (match_operand:DI 1 "gpc_reg_operand"))
+   (use (match_operand:DI 2 "gpc_reg_operand"))]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
-  operands[3] = gen_reg_rtx (DImode);
+  rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56));
+  rtx len;
+
+  if (TARGET_FUTURE)
+    len = shift_len;
+  else
+    {
+      len = gen_reg_rtx (DImode);
+      emit_insn (gen_rtx_SET (len, shift_len));
+    }
+
+  rtx src = operands[0];
+  rtx addr = operands[1];
+  rtx mem = gen_rtx_MEM (V16QImode, addr);
+  rtvec rv = gen_rtvec (3, src, mem, len);
+  rtx stxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_STXVL);
+  emit_insn (gen_rtx_SET (mem, stxvl));
+  DONE;
 })
 
 ;; Define optab for vector access with length vectorization exploitation.
@@ -5867,6 +5918,35 @@
   "stxvl %x0,%1,%2"
   [(set_attr "type" "vecstore")])
 
+;; For stxvrl and stxvrll, use the combiner to eliminate the shift.  The
+;; define_expand for stxvl will already incorporate the shift in generating the
+;; insn.  The stxvll buitl-in function required the user to have already done
+;; the shift.  Defining stxvrll this way, will optimize cases where the user
+;; has done the shift immediately before the built-in.
+
+(define_insn "*stxvrl"
+  [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
+       (unspec:V16QI
+        [(match_operand:V16QI 0 "vsx_register_operand" "wa")
+         (mem:V16QI (match_dup 1))
+         (ashift:DI (match_operand:DI 2 "register_operand" "r")
+                    (const_int 56))]
+        UNSPEC_STXVL))]
+  "TARGET_FUTURE && TARGET_64BIT"
+  "stxvrl %x0,%1,%2"
+  [(set_attr "type" "vecstore")])
+
+(define_insn "*stxvrll"
+  [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b"))
+       (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa")
+                      (mem:V16QI (match_dup 1))
+                      (ashift:DI (match_operand:DI 2 "register_operand" "r")
+                                 (const_int 56))]
+                     UNSPEC_STXVLL))]
+  "TARGET_FUTURE"
+  "stxvrll %x0,%1,%2"
+  [(set_attr "type" "vecstore")])
+
 ;; Expand for builtin xst_len_r
 (define_expand "xst_len_r"
   [(match_operand:V16QI 0 "vsx_register_operand" "=wa")
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index f227353bd82c..b22d9092ea2d 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3368,6 +3368,12 @@ loaded to a VSX register with one prefixed instruction.
 An IEEE 128-bit constant that can be loaded into a VSX register with
 the @code{lxvkq} instruction.
 
+@item eU
+A signed integer constant that can be used with the paddis instruction.
+
+@item eV
+A signed integer constant that paddis and paddi instructions generate.
+
 @ifset INTERNALS
 @item G
 A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/future-3.c 
b/gcc/testsuite/gcc.target/powerpc/future-3.c
new file mode 100644
index 000000000000..afa22228b96d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/future-3.c
@@ -0,0 +1,22 @@
+/* 32-bit doesn't generate vector pair instructions.  */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test to see that memcpy will use load/store vector pair with
+   -mcpu=future.  */
+
+#ifndef SIZE
+#define SIZE 4
+#endif
+
+extern vector double to[SIZE], from[SIZE];
+
+void
+copy (void)
+{
+  __builtin_memcpy (to, from, sizeof (to));
+  return;
+}
+
+/* { dg-final { scan-assembler {\mlxvpx?\M}  } } */
+/* { dg-final { scan-assembler {\mstxvpx?\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/lxvrl.c 
b/gcc/testsuite/gcc.target/powerpc/lxvrl.c
new file mode 100644
index 000000000000..71854c50c911
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/lxvrl.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the lxvrl and stxvrl instructions are generated for
+   -mcpu=future on memory copy operations.  */
+
+#ifndef VSIZE
+#define VSIZE 2
+#endif
+
+#ifndef LSIZE
+#define LSIZE 5
+#endif
+
+struct foo {
+  vector unsigned char vc[VSIZE];
+  unsigned char leftover[LSIZE];
+};
+
+void memcpy_ptr (struct foo *p, struct foo *q)
+{
+  __builtin_memcpy ((void *) p,                /* lxvrl and stxvrl.  */
+                   (void *) q,
+                   (sizeof (vector unsigned char) * VSIZE) + LSIZE);
+}
+
+/* { dg-final { scan-assembler     {\mlxvrl\M}  } } */
+/* { dg-final { scan-assembler     {\mstxvrl\M} } } */
+/* { dg-final { scan-assembler-not {\mlxvl\M}   } } */
+/* { dg-final { scan-assembler-not {\mstxvl\M}  } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/prefixed-addis.c 
b/gcc/testsuite/gcc.target/powerpc/prefixed-addis.c
new file mode 100644
index 000000000000..d08e3675f94c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/prefixed-addis.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the xvrl (vector word rotate left using VSX registers insead of
+   Altivec registers is generated.  */
+
+#include <stddef.h>
+
+size_t
+prefix_addis_addi (size_t x)
+{
+  return x + 0x123456789ABCDEUL;       /* paddis + paddi.  */
+}
+
+size_t
+prefix_addis (size_t x)
+{
+  return x + 0x12345600000000UL;       /* paddis.  */
+}
+
+/* { dg-final { scan-assembler-times {\mpaddis\M} 2  } } */
+/* { dg-final { scan-assembler-times {\mpaddi\M}  1  } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/saturate-subtract-1.c 
b/gcc/testsuite/gcc.target/powerpc/saturate-subtract-1.c
new file mode 100644
index 000000000000..c32a70a5e898
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/saturate-subtract-1.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+/* { dg-require-effective-target powerpc_future_ok } */
+
+/* Check that saturating subtract (subfus) is generated.  Check that all
+   combinations of >, >=, <, and <= are optimized.  */
+
+#ifndef TYPE
+#define TYPE   unsigned int
+#endif
+
+void
+saturated_subtract_gt (TYPE a, TYPE b, TYPE *p)
+{
+  *p = (a > b) ? a - b : 0;
+}
+
+void
+saturated_subtract_ge (TYPE a, TYPE b, TYPE *p)
+{
+  *p = (a >= b) ? a - b : 0;
+}
+
+void
+saturated_subtract_lt (TYPE a, TYPE b, TYPE *p)
+{
+  *p = (a < b) ? 0 : a - b;
+}
+
+void
+saturated_subtract_le (TYPE a, TYPE b, TYPE *p)
+{
+  *p = (a <= b) ? 0 : a - b;
+}
+
+/* { dg-final { scan-assembler-times {\msubwus\M} 4 } } */
+/* { dg-final { scan-assembler-not   {\mcmplw\M}    } } */
+/* { dg-final { scan-assembler-not   {\misel\M}     } } */
+/* { dg-final { scan-assembler-not   {\msubf\M}     } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/saturate-subtract-2.c 
b/gcc/testsuite/gcc.target/powerpc/saturate-subtract-2.c
new file mode 100644
index 000000000000..482d7384c172
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/saturate-subtract-2.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Check that saturating subtract (subfus) is generated.  Check that all
+   combinations of >, >=, <, and <= are optimized.  */
+
+#ifndef TYPE
+#define TYPE   unsigned long long
+#endif
+
+void
+saturated_subtract_gt (TYPE a, TYPE b, TYPE *p)
+{
+  *p = (a > b) ? a - b : 0;
+}
+
+void
+saturated_subtract_ge (TYPE a, TYPE b, TYPE *p)
+{
+  *p = (a >= b) ? a - b : 0;
+}
+
+void
+saturated_subtract_lt (TYPE a, TYPE b, TYPE *p)
+{
+  *p = (a < b) ? 0 : a - b;
+}
+
+void
+saturated_subtract_le (TYPE a, TYPE b, TYPE *p)
+{
+  *p = (a <= b) ? 0 : a - b;
+}
+
+/* { dg-final { scan-assembler-times {\msubdus\M} 4 } } */
+/* { dg-final { scan-assembler-not   {\mcmpld\M}    } } */
+/* { dg-final { scan-assembler-not   {\misel\M}     } } */
+/* { dg-final { scan-assembler-not   {\msubf\M}     } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c 
b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
new file mode 100644
index 000000000000..f9e87ad4bfcf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_future_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+/* Test whether the xvrl (vector word rotate left using VSX registers insead of
+   Altivec registers is generated.  */
+
+#include <altivec.h>
+
+typedef vector unsigned int  v4si_t;
+
+v4si_t
+rotl_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x << n) | (x >> (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotr_v4si_scalar (v4si_t x, unsigned long n)
+{
+  __asm__ (" # %x0" : "+f" (x));
+  return (x >> n) | (x << (32 - n));   /* xvrlw.  */
+}
+
+v4si_t
+rotl_v4si_vector (v4si_t x, v4si_t y)
+{
+  __asm__ (" # %x0" : "+f" (x));       /* xvrlw.  */
+  return vec_rl (x, y);
+}
+
+/* { dg-final { scan-assembler-times {\mxvrlw\M} 3  } } */
+/* { dg-final { scan-assembler-not   {\mvrlw\M}     } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index fab707f07fd9..b5e1acf39ae9 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8282,6 +8282,19 @@ proc check_htm_hw_available { } {
        }
     }]
 }
+
+# Return 1 if this is a PowerPC target supporting -mcpu=future
+
+proc check_effective_target_powerpc_future_ok { } {
+    return [check_no_compiler_messages powerpc_future_ok object {
+       unsigned long a, b, c;
+       int main (void) {
+           asm ("subdus %0,%1,%2" : "=r" (a) : "r" (b), "r" (c));
+           return 0;
+       }
+    } "-mcpu=future"]
+}
+
 # Return 1 if this is a PowerPC target supporting -mcpu=cell.
 
 proc check_effective_target_powerpc_ppu_ok { } {

[gcc(refs/vendors/ibm/heads/gcc-17-future)] Add miscellaneous -mcpu=future instructions

Reply via email to