https://gcc.gnu.org/g:1710e94d9af8d36508f09de2077bf0d64db38888
commit 1710e94d9af8d36508f09de2077bf0d64db38888 Author: Michael Meissner <[email protected]> Date: Wed Jul 1 10:54:14 2026 -0400 Add miscellaneous -mcpu=future instructions 2026-07-01 Michael Meissner <[email protected]> gcc/ * config/rs6000/rs6000-cpus.def (FUTURE_MASKS_SERVER): Enable using load vector pair and store vector pair instructions for memory copy operations. (POWERPC_MASKS): Make the option for enabling using load vector pair and store vector pair operations set and reset when the PowerPC processor is changed. * config/rs6000/rs6000.cc (rs6000_machine_from_flags): Disable -mblock-ops-vector-pair from influencing .machine selection. gcc/testsuite/ * gcc.target/powerpc/future-3.c: New test. 2026-07-01 Michael Meissner <[email protected]> gcc/ * config/rs6000/rs6000.md (gtu_geu): New code iterator. (subfus<mode>3_<code>): New insns. gcc/testsuite/ * gcc.target/powerpc/saturate-subtract-1.c: New test. * gcc.target/powerpc/saturate-subtract-2.c: Likewise. * lib/target-supports.exp (check_effective_target_powerpc_future_ok): New target test. 2026-07-01 Michael Meissner <[email protected]> gcc/ * config/rs6000/altivec.md (xvrlw): New insn. * config/rs6000/rs6000.h (TARGET_XVRLW): New macro. gcc/testsuite/ * gcc.target/powerpc/vector-rotate-left.c: New test. 2026-07-01 Michael Meissner <[email protected]> gcc/ * config/rs6000/rs6000-string.cc (expand_block_move): Do not generate lxvl and stxvl on 32-bit. * config/rs6000/vsx.md (lxvl): If -mcpu=future, generate the lxvl with the shift count automaticaly used in the insn. (lxvrl): New insn for -mcpu=future. (lxvrll): Likewise. (stxvl): If -mcpu=future, generate the stxvl with the shift count automaticaly used in the insn. (stxvrl): New insn for -mcpu=future. (stxvrll): Likewise. gcc/testsuite/ * gcc.target/powerpc/lxvrl.c: New test. 2026-07-01 Michael Meissner <[email protected]> gcc/ * config/rs6000/constraints.md (eU): New constraint. (eV): Likewise. * config/rs6000/predicates.md (paddis_operand): New predicate. (paddis_paddi_operand): Likewise. (add_cint_operand): Add paddis support. (reg_or_add_cint_operand): Add support for adds that can be done with paddis and paddi/addi. (add_operand): Add support for adds that can be done with paddis, but not paddis + paddi/addi.. * config/rs6000/rs6000.cc (num_insns_constant_gpr): Add support for adds that can be done with paddis and also paddis combined with paddi/addi. (print_operand): Add %B<n> for paddis support. * config/rs6000/rs6000.h (TARGET_PADDIS): New macro. (SIGNED_INTEGER_64BIT_P): Likewise. * config/rs6000/rs6000.md (add<mode>3 define_expand): Add paddis support. (*add<mode>3 define_insn): Likewise. (movdi_internal64): Likewise. (movdi splitter): New splitter for paddis + paddi/addi. * doc/md.texi (PowerPC constraints): Add eU and eV documentation. gcc/testsuite/ * gcc.target/powerpc/prefixed-addis.c: New test. Diff: --- gcc/config/rs6000/altivec.md | 14 ++ gcc/config/rs6000/constraints.md | 10 ++ gcc/config/rs6000/predicates.md | 79 +++++++++++- gcc/config/rs6000/rs6000-cpus.def | 15 ++- gcc/config/rs6000/rs6000-string.cc | 1 + gcc/config/rs6000/rs6000.cc | 23 +++- gcc/config/rs6000/rs6000.h | 15 +++ gcc/config/rs6000/rs6000.md | 143 ++++++++++++++++----- gcc/config/rs6000/vsx.md | 122 +++++++++++++++--- gcc/doc/md.texi | 6 + gcc/testsuite/gcc.target/powerpc/future-3.c | 22 ++++ gcc/testsuite/gcc.target/powerpc/lxvrl.c | 32 +++++ gcc/testsuite/gcc.target/powerpc/prefixed-addis.c | 24 ++++ .../gcc.target/powerpc/saturate-subtract-1.c | 39 ++++++ .../gcc.target/powerpc/saturate-subtract-2.c | 40 ++++++ .../gcc.target/powerpc/vector-rotate-left.c | 34 +++++ gcc/testsuite/lib/target-supports.exp | 13 ++ 17 files changed, 570 insertions(+), 62 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index dbe24c450e14..95433ce500a4 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2006,6 +2006,20 @@ } [(set_attr "type" "vecperm")]) +;; -mcpu=future adds a vector rotate left word variant. There is no vector +;; byte/half-word/double-word/quad-word rotate left. This insn occurs before +;; altivec_vrl<VI_char> and will match for -mcpu=future, while other cpus will +;; match the generic insn. +(define_insn "*xvrlw" + [(set (match_operand:V4SI 0 "register_operand" "=v,wa") + (rotate:V4SI (match_operand:V4SI 1 "register_operand" "v,wa") + (match_operand:V4SI 2 "register_operand" "v,wa")))] + "TARGET_XVRLW" + "@ + vrlw %0,%1,%2 + xvrlw %x0,%x1,%x2" + [(set_attr "type" "vecsimple")]) + (define_insn "altivec_vrl<VI_char>" [(set (match_operand:VI2 0 "register_operand" "=v") (rotate:VI2 (match_operand:VI2 1 "register_operand" "v") diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md index 0d1cde5bd4de..0169a7b85222 100644 --- a/gcc/config/rs6000/constraints.md +++ b/gcc/config/rs6000/constraints.md @@ -222,6 +222,16 @@ "An IEEE 128-bit constant that can be loaded into VSX registers." (match_operand 0 "easy_vector_constant_ieee128")) +(define_constraint "eU" + "@internal integer constant that can be loaded with paddis" + (and (match_code "const_int") + (match_operand 0 "paddis_operand"))) + +(define_constraint "eV" + "@A signed integer constant that paddis and paddi instructions generate." + (and (match_code "const_int") + (match_operand 0 "paddis_paddi_operand"))) + ;; Floating-point constraints. These two are defined so that insn ;; length attributes can be calculated exactly. diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 7f8d316648cc..73cc356e9834 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -383,6 +383,68 @@ return SIGNED_INTEGER_34BIT_P (INTVAL (op)); }) +;; Return 1 if op is a 64-bit constant that uses the paddis instruction +(define_predicate "paddis_operand" + (match_code "const_int") +{ + if (!TARGET_PADDIS) + return false; + + if (mode != VOIDmode && mode != DImode) + return false; + + HOST_WIDE_INT value = INTVAL (op); + + if (!SIGNED_INTEGER_64BIT_P (value)) + return false; + + /* If paddi alone can handle the number, don't return true. */ + if (SIGNED_INTEGER_34BIT_P (value)) + return false; + + /* If the bottom 32-bits are non-zero, paddis alone can't handle it. */ + if ((value & HOST_WIDE_INT_C(0xffffffff)) != 0) + return false; + + return true; +}) + +;; Return 1 if op is a 64-bit constant that can be created with a +;; combination of paddi and paddis. Don't generate paddi and paddis if +;; we can do it via addis and rldicl. +(define_predicate "paddis_paddi_operand" + (match_code "const_int") +{ + if (!TARGET_PADDIS) + return false; + + if (mode != VOIDmode && mode != DImode) + return false; + + HOST_WIDE_INT value = INTVAL (op); + + if (!SIGNED_INTEGER_64BIT_P (value)) + return false; + + /* Don't worry about negative values at the moment. */ + if (value < 0) + return false; + + /* If paddi alone can handle the number, don't return true. */ + if (SIGNED_INTEGER_34BIT_P (value)) + return false; + + /* If we can do the add or generate the constant via addis/rldicl, fail. */ + if (rs6000_is_valid_and_mask (op, mode)) + return false; + + /* Only return true if we need both paddi and paddis. */ + if ((value & HOST_WIDE_INT_C(0xffffffff)) == 0) + return false; + + return true; +}) + ;; Return 1 if op is a register that is not special. ;; Disallow (SUBREG:SF (REG:SI)) and (SUBREG:SI (REG:SF)) on VSX systems where ;; you need to be careful in moving a SFmode to SImode and vice versa due to @@ -573,18 +635,22 @@ (ior (match_operand 0 "zero_constant") (match_operand 0 "gpc_reg_operand"))) -;; Return 1 if op is a constant integer valid for addition with addis, addi. +;; Return 1 if op is a constant integer valid for addition with addis, +;; addi, paddi, or paddis. (define_predicate "add_cint_operand" (and (match_code "const_int") - (match_test "((unsigned HOST_WIDE_INT) INTVAL (op) - + (mode == SImode ? 0x80000000 : 0x80008000)) - < (unsigned HOST_WIDE_INT) 0x100000000ll"))) + (ior (match_test "((unsigned HOST_WIDE_INT) INTVAL (op) + + (mode == SImode ? 0x80000000 : 0x80008000)) + < (unsigned HOST_WIDE_INT) 0x100000000ll") + (match_operand 0 "cint34_operand") + (match_operand 0 "paddis_operand")))) ;; Return 1 if op is a constant integer valid for addition ;; or non-special register. (define_predicate "reg_or_add_cint_operand" (if_then_else (match_code "const_int") - (match_operand 0 "add_cint_operand") + (ior (match_operand 0 "add_cint_operand") + (match_operand 0 "paddis_paddi_operand")) (match_operand 0 "gpc_reg_operand"))) ;; Return 1 if op is a constant integer valid for subtraction @@ -1127,7 +1193,8 @@ (if_then_else (match_code "const_int") (match_test "satisfies_constraint_I (op) || satisfies_constraint_L (op) - || satisfies_constraint_eI (op)") + || satisfies_constraint_eI (op) + || satisfies_constraint_eU (op)") (match_operand 0 "gpc_reg_operand"))) ;; Return 1 if the operand is either a non-special register, or 0, or -1. diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 38e6bc880b25..d668953e6a39 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -83,10 +83,16 @@ #define POWER11_MASKS_SERVER (ISA_3_1_MASKS_SERVER \ | OPTION_MASK_POWER11) -/* -mcpu=future flags. */ -#define FUTURE_MASKS_SERVER (POWER11_MASKS_SERVER \ - | OPTION_MASK_DENSE_MATH \ - | OPTION_MASK_FUTURE) +/* -mcpu=future flags. + + During the development of the power10 support for GCC, using load/store + vector pair instructions for string operations was turned off by default, + because there was a use case that had really bad performance. Assume this + will be fixed in potential future machines. */ +#define FUTURE_MASKS_SERVER (POWER11_MASKS_SERVER \ + | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \ + | OPTION_MASK_DENSE_MATH \ + | OPTION_MASK_FUTURE) /* Flags that need to be turned off if -mno-vsx. */ #define OTHER_VSX_VECTOR_MASKS (OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ @@ -116,6 +122,7 @@ /* Mask of all options to set the default isa flags based on -mcpu=<xxx>. */ #define POWERPC_MASKS (OPTION_MASK_ALTIVEC \ + | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \ | OPTION_MASK_CMPB \ | OPTION_MASK_CRYPTO \ | OPTION_MASK_DENSE_MATH \ diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc index 062ff1e2465e..0c14ba4cc3ea 100644 --- a/gcc/config/rs6000/rs6000-string.cc +++ b/gcc/config/rs6000/rs6000-string.cc @@ -2786,6 +2786,7 @@ expand_block_move (rtx operands[], bool might_overlap) if (TARGET_MMA && TARGET_BLOCK_OPS_UNALIGNED_VSX && TARGET_BLOCK_OPS_VECTOR_PAIR + && TARGET_POWERPC64 && bytes >= 32 && (align >= 256 || !STRICT_ALIGNMENT)) { diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 2da66de42196..21261839001e 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -6017,7 +6017,7 @@ rs6000_machine_from_flags (void) /* Disable the flags that should never influence the .machine selection. */ flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL - | OPTION_MASK_ALTIVEC); + | OPTION_MASK_ALTIVEC | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR); if ((flags & (FUTURE_MASKS_SERVER & ~POWER11_MASKS_SERVER)) != 0) return "future"; @@ -6167,7 +6167,18 @@ num_insns_constant_gpr (HOST_WIDE_INT value) else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value)) return 1; - else if (TARGET_POWERPC64) + /* PADDIS support. */ + else if (TARGET_PADDIS) + { + rtx num = GEN_INT (value); + if (paddis_operand (num, VOIDmode)) + return 1; /* paddis alone. */ + + if (paddis_paddi_operand (num, VOIDmode)) + return 2; /* paddis + paddi/addi. */ + } + + if (TARGET_POWERPC64) { int num_insns = 0; rs6000_emit_set_long_const (nullptr, value, &num_insns); @@ -14277,6 +14288,14 @@ print_operand (FILE *file, rtx x, int code) fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4); return; + case 'B': + /* Upper 32-bits of a constant. */ + if (!CONST_INT_P (x)) + output_operand_lossage ("Not a constant."); + + fprintf (file, "%" HOST_LONG_FORMAT "d", INTVAL (x) >> 32); + return; + case 'D': /* Like 'J' but get to the GT bit only. */ if (!REG_P (x) || !CR_REGNO_P (REGNO (x))) diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index eef25768b5af..cccb839b489e 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -571,6 +571,14 @@ extern int rs6000_vector_align[]; below. */ #define RS6000_FN_TARGET_INFO_HTM 1 +/* Whether we have XVRLW support. */ +#define TARGET_XVRLW TARGET_FUTURE + +/* Whether we have PADDIS support. */ +#define TARGET_PADDIS (TARGET_FUTURE \ + && TARGET_PREFIXED \ + && TARGET_POWERPC64) + /* Whether the various reciprocal divide/square root estimate instructions exist, and whether we should automatically generate code for the instruction by default. */ @@ -2494,6 +2502,13 @@ typedef struct GTY(()) machine_function #define SIGNED_INTEGER_16BIT_P(VALUE) SIGNED_INTEGER_NBIT_P (VALUE, 16) #define SIGNED_INTEGER_34BIT_P(VALUE) SIGNED_INTEGER_NBIT_P (VALUE, 34) +#if HOST_BITS_PER_WIDE_INT > 64 +#define SIGNED_INTEGER_64BIT_P(VALUE) SIGNED_INTEGER_NBIT_P (VALUE, 64) + +#else +#define SIGNED_INTEGER_64BIT_P(VALUE) 1 +#endif + /* Like SIGNED_INTEGER_16BIT_P and SIGNED_INTEGER_34BIT_P, but with an extra argument that gives a length to validate a range of addresses, to allow for splitting insns into several insns, each of which has an offsettable diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 5b590bc9b0d9..0dfe71ae0da9 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -427,6 +427,7 @@ (and (eq_attr "isa" "future") (match_test "TARGET_FUTURE")) (const_int 1) + ] (const_int 0))) ;; If this instruction is microcoded on the CELL processor @@ -1796,14 +1797,18 @@ (match_operand:SDI 2 "reg_or_add_cint_operand")))] "" { + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + if (<MODE>mode == DImode && !TARGET_POWERPC64) { - rtx lo0 = gen_lowpart (SImode, operands[0]); - rtx lo1 = gen_lowpart (SImode, operands[1]); - rtx lo2 = gen_lowpart (SImode, operands[2]); - rtx hi0 = gen_highpart (SImode, operands[0]); - rtx hi1 = gen_highpart (SImode, operands[1]); - rtx hi2 = gen_highpart_mode (SImode, DImode, operands[2]); + rtx lo0 = gen_lowpart (SImode, op0); + rtx lo1 = gen_lowpart (SImode, op1); + rtx lo2 = gen_lowpart (SImode, op2); + rtx hi0 = gen_highpart (SImode, op0); + rtx hi1 = gen_highpart (SImode, op1); + rtx hi2 = gen_highpart_mode (SImode, DImode, op2); if (!reg_or_short_operand (lo2, SImode)) lo2 = force_reg (SImode, lo2); @@ -1815,24 +1820,40 @@ DONE; } - if (CONST_INT_P (operands[2]) && !add_operand (operands[2], <MODE>mode)) + if (CONST_INT_P (op2) && !add_operand (op2, <MODE>mode)) { - rtx tmp = ((!can_create_pseudo_p () - || rtx_equal_p (operands[0], operands[1])) - ? operands[0] : gen_reg_rtx (<MODE>mode)); + rtx tmp = ((!can_create_pseudo_p () || rtx_equal_p (op0, op1)) + ? op0 + : gen_reg_rtx (<MODE>mode)); /* Adding a constant to r0 is not a valid insn, so use a different strategy in that case. */ - if (reg_or_subregno (operands[1]) == 0 || reg_or_subregno (tmp) == 0) + if (reg_or_subregno (op1) == 0 || reg_or_subregno (tmp) == 0) { - if (operands[0] == operands[1]) + if (op0 == op1) FAIL; - rs6000_emit_move (operands[0], operands[2], <MODE>mode); - emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[0])); + rs6000_emit_move (op0, op2, <MODE>mode); + emit_insn (gen_add<mode>3 (op0, op1, op0)); + DONE; + } + + HOST_WIDE_INT val = INTVAL (op2); + + /* If we have paddis, split the add into paddis and either addi or + paddi. However, if we can generate addis and rldicl, do that + instead of doing paddis/paddi. Emit the paddis first, just + in case this is a memory operation and we could fold the offset + into the memory ooperation. */ + + if (TARGET_PADDIS && paddis_paddi_operand (op2, <MODE>mode)) + { + const HOST_WIDE_INT mask = HOST_WIDE_INT_C(0xffffffff); + + emit_insn (gen_add<mode>3 (tmp, op1, GEN_INT (val & ~mask))); + emit_insn (gen_add<mode>3 (op0, tmp, GEN_INT (val & mask))); DONE; } - HOST_WIDE_INT val = INTVAL (operands[2]); HOST_WIDE_INT low = sext_hwi (val, 16); HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode); @@ -1842,24 +1863,28 @@ /* The ordering here is important for the prolog expander. When space is allocated from the stack, adding 'low' first may produce a temporary deallocation (which would be bad). */ - emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (rest))); - emit_insn (gen_add<mode>3 (operands[0], tmp, GEN_INT (low))); + emit_insn (gen_add<mode>3 (tmp, op1, GEN_INT (rest))); + emit_insn (gen_add<mode>3 (op0, tmp, GEN_INT (low))); DONE; } }) (define_insn "*add<mode>3" - [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r,r") - (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b,b") - (match_operand:GPR 2 "add_operand" "r,I,L,eI")))] + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r,r, r, r") + (plus:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,b,b, b, b") + (match_operand:GPR 2 "add_operand" "r,I,L,eI,eU")))] "" "@ add %0,%1,%2 addi %0,%1,%2 addis %0,%1,%v2 - addi %0,%1,%2" + addi %0,%1,%2 + paddis %0,%1,%B2" [(set_attr "type" "add") - (set_attr "isa" "*,*,*,p10")]) + (set_attr "isa" "*,*,*,p10,future") + (set_attr "length" "*,*,*,*,12") + (set_attr "prefixed" "*,*,*,*,yes") + (set_attr "maybe_prefixed" "*,*,*,*,no")]) (define_insn "*addsi3_high" [(set (match_operand:SI 0 "gpc_reg_operand" "=b") @@ -2401,6 +2426,20 @@ "" ) +;; Saturating subtract +(define_code_iterator gtu_geu [gtu geu]) + +(define_insn "*subfus<mode>3_<code>" + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (if_then_else:GPR (gtu_geu (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")) + (minus:GPR (match_dup 1) + (match_dup 2)) + (const_int 0)))] + "TARGET_FUTURE" + "sub<wd>us %0,%1,%2" + [(set_attr "type" "add")]) + (define_insn "@neg<mode>2" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))] @@ -9868,7 +9907,7 @@ DONE; }) -;; GPR store GPR load GPR move +;; GPR store GPR load GPR move GPR paddis GPR paddis+paddi ;; GPR li GPR lis GPR pli GPR # ;; FPR store FPR load FPR move ;; AVX store AVX store AVX load AVX load VSX move @@ -9878,7 +9917,7 @@ ;; VSX->GPR GPR->VSX (define_insn "*movdi_internal64" [(set (match_operand:DI 0 "nonimmediate_operand" - "=YZ, r, r, + "=YZ, r, r, r, b, r, r, r, r, m, ^d, ^d, wY, Z, $v, $v, ^wa, @@ -9887,7 +9926,7 @@ r, *h, *h, ?r, ?wa") (match_operand:DI 1 "input_operand" - "r, YZ, r, + "r, YZ, r, eU, eV, I, L, eI, nF, ^d, m, ^d, ^v, $v, wY, Z, ^wa, @@ -9902,6 +9941,8 @@ std%U0%X0 %1,%0 ld%U1%X1 %0,%1 mr %0,%1 + paddis %0,0,%B1 + # li %0,%1 lis %0,%v1 li %0,%1 @@ -9927,7 +9968,7 @@ mfvsrd %0,%x1 mtvsrd %x0,%1" [(set_attr "type" - "store, load, *, + "store, load, *, *, *, *, *, *, *, fpstore, fpload, fpsimple, fpstore, fpstore, fpload, fpload, veclogical, @@ -9937,7 +9978,7 @@ mfvsr, mtvsr") (set_attr "size" "64") (set_attr "length" - "*, *, *, + "*, *, *, 12, 24, *, *, *, 20, *, *, *, *, *, *, *, *, @@ -9946,14 +9987,32 @@ *, *, *, *, *") (set_attr "isa" - "*, *, *, + "*, *, *, future, future, *, *, p10, *, *, *, *, p9v, p7v, p9v, p7v, *, p9v, p9v, p7v, *, *, p7v, p7v, *, *, *, - p8v, p8v")]) + p8v, p8v") + (set_attr "prefixed" + "*, *, *, yes, yes, + *, *, *, *, + *, *, *, + *, *, *, *, *, + *, *, *, *, *, + *, *, + *, *, *, + *, *") + (set_attr "maybe_prefixed" + "*, *, *, no, no, + *, *, *, *, + *, *, *, + *, *, *, *, *, + *, *, *, *, *, + *, *, + *, *, *, + *, *")]) ; Some DImode loads are best done as a load of -1 followed by a mask ; instruction. @@ -9971,6 +10030,32 @@ (match_dup 1)))] "") +;; Split a constant that can be generated by a paddis and paddi into 2 +;; instructions. We can't split setting r0 since that would generate: +;; paddis r0,0,upper +;; paddi r0,r0,lower +;; +;; which gives the wrong value. + +(define_split + [(set (match_operand:DI 0 "base_reg_operand") + (match_operand:DI 1 "paddis_paddi_operand"))] + "TARGET_PADDIS" + [(set (match_dup 2) + (match_dup 3)) + (set (match_dup 0) + (plus:DI (match_dup 2) + (match_dup 4)))] +{ + HOST_WIDE_INT value = INTVAL (operands[1]); + const HOST_WIDE_INT mask = HOST_WIDE_INT_C (0xffffffff); + operands[2] = (can_create_pseudo_p () + ? gen_reg_rtx (DImode) + : operands[0]); + operands[3] = GEN_INT (value & ~mask); + operands[4] = GEN_INT (value & mask); +}) + ;; Split a load of a large constant into the appropriate five-instruction ;; sequence. Handle anything in a constant number of insns. ;; When non-easy constants can go in the TOC, this should use diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 9863c476baca..05f066cf0c7a 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5743,20 +5743,32 @@ DONE; }) -;; Load VSX Vector with Length +;; Load VSX Vector with Length. If we have lxvrl, we don't have to do an +;; explicit shift left into a pseudo. (define_expand "lxvl" - [(set (match_dup 3) - (ashift:DI (match_operand:DI 2 "register_operand") - (const_int 56))) - (set (match_operand:V16QI 0 "vsx_register_operand") - (unspec:V16QI - [(match_operand:DI 1 "gpc_reg_operand") - (mem:V16QI (match_dup 1)) - (match_dup 3)] - UNSPEC_LXVL))] + [(use (match_operand:V16QI 0 "vsx_register_operand")) + (use (match_operand:DI 1 "gpc_reg_operand")) + (use (match_operand:DI 2 "gpc_reg_operand"))] "TARGET_P9_VECTOR && TARGET_64BIT" { - operands[3] = gen_reg_rtx (DImode); + rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56)); + rtx len; + + if (TARGET_FUTURE) + len = shift_len; + else + { + len = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (len, shift_len)); + } + + rtx dest = operands[0]; + rtx addr = operands[1]; + rtx mem = gen_rtx_MEM (V16QImode, addr); + rtvec rv = gen_rtvec (3, addr, mem, len); + rtx lxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_LXVL); + emit_insn (gen_rtx_SET (dest, lxvl)); + DONE; }) (define_insn "*lxvl" @@ -5780,6 +5792,34 @@ "lxvll %x0,%1,%2" [(set_attr "type" "vecload")]) +;; For lxvrl and lxvrll, use the combiner to eliminate the shift. The +;; define_expand for lxvl will already incorporate the shift in generating the +;; insn. The lxvll buitl-in function required the user to have already done +;; the shift. Defining lxvrll this way, will optimize cases where the user has +;; done the shift immediately before the built-in. +(define_insn "*lxvrl" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI + [(match_operand:DI 1 "gpc_reg_operand" "b") + (mem:V16QI (match_dup 1)) + (ashift:DI (match_operand:DI 2 "register_operand" "r") + (const_int 56))] + UNSPEC_LXVL))] + "TARGET_FUTURE && TARGET_64BIT" + "lxvrl %x0,%1,%2" + [(set_attr "type" "vecload")]) + +(define_insn "*lxvrll" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") + (mem:V16QI (match_dup 1)) + (ashift:DI (match_operand:DI 2 "register_operand" "r") + (const_int 56))] + UNSPEC_LXVLL))] + "TARGET_FUTURE" + "lxvrll %x0,%1,%2" + [(set_attr "type" "vecload")]) + ;; Expand for builtin xl_len_r (define_expand "xl_len_r" [(match_operand:V16QI 0 "vsx_register_operand") @@ -5811,18 +5851,29 @@ ;; Store VSX Vector with Length (define_expand "stxvl" - [(set (match_dup 3) - (ashift:DI (match_operand:DI 2 "register_operand") - (const_int 56))) - (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) - (unspec:V16QI - [(match_operand:V16QI 0 "vsx_register_operand") - (mem:V16QI (match_dup 1)) - (match_dup 3)] - UNSPEC_STXVL))] + [(use (match_operand:V16QI 0 "vsx_register_operand")) + (use (match_operand:DI 1 "gpc_reg_operand")) + (use (match_operand:DI 2 "gpc_reg_operand"))] "TARGET_P9_VECTOR && TARGET_64BIT" { - operands[3] = gen_reg_rtx (DImode); + rtx shift_len = gen_rtx_ASHIFT (DImode, operands[2], GEN_INT (56)); + rtx len; + + if (TARGET_FUTURE) + len = shift_len; + else + { + len = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (len, shift_len)); + } + + rtx src = operands[0]; + rtx addr = operands[1]; + rtx mem = gen_rtx_MEM (V16QImode, addr); + rtvec rv = gen_rtvec (3, src, mem, len); + rtx stxvl = gen_rtx_UNSPEC (V16QImode, rv, UNSPEC_STXVL); + emit_insn (gen_rtx_SET (mem, stxvl)); + DONE; }) ;; Define optab for vector access with length vectorization exploitation. @@ -5867,6 +5918,35 @@ "stxvl %x0,%1,%2" [(set_attr "type" "vecstore")]) +;; For stxvrl and stxvrll, use the combiner to eliminate the shift. The +;; define_expand for stxvl will already incorporate the shift in generating the +;; insn. The stxvll buitl-in function required the user to have already done +;; the shift. Defining stxvrll this way, will optimize cases where the user +;; has done the shift immediately before the built-in. + +(define_insn "*stxvrl" + [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) + (unspec:V16QI + [(match_operand:V16QI 0 "vsx_register_operand" "wa") + (mem:V16QI (match_dup 1)) + (ashift:DI (match_operand:DI 2 "register_operand" "r") + (const_int 56))] + UNSPEC_STXVL))] + "TARGET_FUTURE && TARGET_64BIT" + "stxvrl %x0,%1,%2" + [(set_attr "type" "vecstore")]) + +(define_insn "*stxvrll" + [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) + (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa") + (mem:V16QI (match_dup 1)) + (ashift:DI (match_operand:DI 2 "register_operand" "r") + (const_int 56))] + UNSPEC_STXVLL))] + "TARGET_FUTURE" + "stxvrll %x0,%1,%2" + [(set_attr "type" "vecstore")]) + ;; Expand for builtin xst_len_r (define_expand "xst_len_r" [(match_operand:V16QI 0 "vsx_register_operand" "=wa") diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index f227353bd82c..b22d9092ea2d 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -3368,6 +3368,12 @@ loaded to a VSX register with one prefixed instruction. An IEEE 128-bit constant that can be loaded into a VSX register with the @code{lxvkq} instruction. +@item eU +A signed integer constant that can be used with the paddis instruction. + +@item eV +A signed integer constant that paddis and paddi instructions generate. + @ifset INTERNALS @item G A floating point constant that can be loaded into a register with one diff --git a/gcc/testsuite/gcc.target/powerpc/future-3.c b/gcc/testsuite/gcc.target/powerpc/future-3.c new file mode 100644 index 000000000000..afa22228b96d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/future-3.c @@ -0,0 +1,22 @@ +/* 32-bit doesn't generate vector pair instructions. */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +/* Test to see that memcpy will use load/store vector pair with + -mcpu=future. */ + +#ifndef SIZE +#define SIZE 4 +#endif + +extern vector double to[SIZE], from[SIZE]; + +void +copy (void) +{ + __builtin_memcpy (to, from, sizeof (to)); + return; +} + +/* { dg-final { scan-assembler {\mlxvpx?\M} } } */ +/* { dg-final { scan-assembler {\mstxvpx?\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/lxvrl.c b/gcc/testsuite/gcc.target/powerpc/lxvrl.c new file mode 100644 index 000000000000..71854c50c911 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/lxvrl.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_future_ok } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +/* Test whether the lxvrl and stxvrl instructions are generated for + -mcpu=future on memory copy operations. */ + +#ifndef VSIZE +#define VSIZE 2 +#endif + +#ifndef LSIZE +#define LSIZE 5 +#endif + +struct foo { + vector unsigned char vc[VSIZE]; + unsigned char leftover[LSIZE]; +}; + +void memcpy_ptr (struct foo *p, struct foo *q) +{ + __builtin_memcpy ((void *) p, /* lxvrl and stxvrl. */ + (void *) q, + (sizeof (vector unsigned char) * VSIZE) + LSIZE); +} + +/* { dg-final { scan-assembler {\mlxvrl\M} } } */ +/* { dg-final { scan-assembler {\mstxvrl\M} } } */ +/* { dg-final { scan-assembler-not {\mlxvl\M} } } */ +/* { dg-final { scan-assembler-not {\mstxvl\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/prefixed-addis.c b/gcc/testsuite/gcc.target/powerpc/prefixed-addis.c new file mode 100644 index 000000000000..d08e3675f94c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/prefixed-addis.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_future_ok } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +/* Test whether the xvrl (vector word rotate left using VSX registers insead of + Altivec registers is generated. */ + +#include <stddef.h> + +size_t +prefix_addis_addi (size_t x) +{ + return x + 0x123456789ABCDEUL; /* paddis + paddi. */ +} + +size_t +prefix_addis (size_t x) +{ + return x + 0x12345600000000UL; /* paddis. */ +} + +/* { dg-final { scan-assembler-times {\mpaddis\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mpaddi\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/saturate-subtract-1.c b/gcc/testsuite/gcc.target/powerpc/saturate-subtract-1.c new file mode 100644 index 000000000000..c32a70a5e898 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/saturate-subtract-1.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ +/* { dg-require-effective-target powerpc_future_ok } */ + +/* Check that saturating subtract (subfus) is generated. Check that all + combinations of >, >=, <, and <= are optimized. */ + +#ifndef TYPE +#define TYPE unsigned int +#endif + +void +saturated_subtract_gt (TYPE a, TYPE b, TYPE *p) +{ + *p = (a > b) ? a - b : 0; +} + +void +saturated_subtract_ge (TYPE a, TYPE b, TYPE *p) +{ + *p = (a >= b) ? a - b : 0; +} + +void +saturated_subtract_lt (TYPE a, TYPE b, TYPE *p) +{ + *p = (a < b) ? 0 : a - b; +} + +void +saturated_subtract_le (TYPE a, TYPE b, TYPE *p) +{ + *p = (a <= b) ? 0 : a - b; +} + +/* { dg-final { scan-assembler-times {\msubwus\M} 4 } } */ +/* { dg-final { scan-assembler-not {\mcmplw\M} } } */ +/* { dg-final { scan-assembler-not {\misel\M} } } */ +/* { dg-final { scan-assembler-not {\msubf\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/saturate-subtract-2.c b/gcc/testsuite/gcc.target/powerpc/saturate-subtract-2.c new file mode 100644 index 000000000000..482d7384c172 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/saturate-subtract-2.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_future_ok } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +/* Check that saturating subtract (subfus) is generated. Check that all + combinations of >, >=, <, and <= are optimized. */ + +#ifndef TYPE +#define TYPE unsigned long long +#endif + +void +saturated_subtract_gt (TYPE a, TYPE b, TYPE *p) +{ + *p = (a > b) ? a - b : 0; +} + +void +saturated_subtract_ge (TYPE a, TYPE b, TYPE *p) +{ + *p = (a >= b) ? a - b : 0; +} + +void +saturated_subtract_lt (TYPE a, TYPE b, TYPE *p) +{ + *p = (a < b) ? 0 : a - b; +} + +void +saturated_subtract_le (TYPE a, TYPE b, TYPE *p) +{ + *p = (a <= b) ? 0 : a - b; +} + +/* { dg-final { scan-assembler-times {\msubdus\M} 4 } } */ +/* { dg-final { scan-assembler-not {\mcmpld\M} } } */ +/* { dg-final { scan-assembler-not {\misel\M} } } */ +/* { dg-final { scan-assembler-not {\msubf\M} } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c new file mode 100644 index 000000000000..f9e87ad4bfcf --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-rotate-left.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_future_ok } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +/* Test whether the xvrl (vector word rotate left using VSX registers insead of + Altivec registers is generated. */ + +#include <altivec.h> + +typedef vector unsigned int v4si_t; + +v4si_t +rotl_v4si_scalar (v4si_t x, unsigned long n) +{ + __asm__ (" # %x0" : "+f" (x)); + return (x << n) | (x >> (32 - n)); /* xvrlw. */ +} + +v4si_t +rotr_v4si_scalar (v4si_t x, unsigned long n) +{ + __asm__ (" # %x0" : "+f" (x)); + return (x >> n) | (x << (32 - n)); /* xvrlw. */ +} + +v4si_t +rotl_v4si_vector (v4si_t x, v4si_t y) +{ + __asm__ (" # %x0" : "+f" (x)); /* xvrlw. */ + return vec_rl (x, y); +} + +/* { dg-final { scan-assembler-times {\mxvrlw\M} 3 } } */ +/* { dg-final { scan-assembler-not {\mvrlw\M} } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index fab707f07fd9..b5e1acf39ae9 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8282,6 +8282,19 @@ proc check_htm_hw_available { } { } }] } + +# Return 1 if this is a PowerPC target supporting -mcpu=future + +proc check_effective_target_powerpc_future_ok { } { + return [check_no_compiler_messages powerpc_future_ok object { + unsigned long a, b, c; + int main (void) { + asm ("subdus %0,%1,%2" : "=r" (a) : "r" (b), "r" (c)); + return 0; + } + } "-mcpu=future"] +} + # Return 1 if this is a PowerPC target supporting -mcpu=cell. proc check_effective_target_powerpc_ppu_ok { } {
