Re: [PATCH, rs6000] power8 patch #1, infrastructure changes (revised patch)

Michael Meissner Mon, 20 May 2013 14:34:26 -0700

After submitting the patch, I realized I had submitted a previous version of
the patch, that had the wq constraint that was initially for the quad memory
operations, and also had the changes for ChangeLog.ibm, that I keep on the
branch.  However, the wq constraint was always equal to the r constraint, do I
have removed it, and used the 'r' constraint once again.


I have also done bootstraps and make check with the patches submitted, with no
regressions found.  Can I check in the revised patch?

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797

Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi (revision 199121)
+++ gcc/doc/invoke.texi (revision 199122)
@@ -860,7 +860,10 @@ See RS/6000 and PowerPC Options.
 -mno-recip-precision @gol
 -mveclibabi=@var{type} -mfriz -mno-friz @gol
 -mpointers-to-nested-functions -mno-pointers-to-nested-functions @gol
--msave-toc-indirect -mno-save-toc-indirect}
+-msave-toc-indirect -mno-save-toc-indirect @gol
+-mpower8-fusion -mno-mpower8-fusion -mpower8-vector -mno-power8-vector @gol
+-mcrypto -mno-crypto -mdirect-move -mno-direct-move @gol
+-mquad-memory -mno-quad-memory}
 
 @emph{RX Options}
 @gccoptlist{-m64bit-doubles  -m32bit-doubles  -fpu  -nofpu@gol
@@ -17341,7 +17344,8 @@ following options:
 @gccoptlist{-maltivec  -mfprnd  -mhard-float  -mmfcrf  -mmultiple @gol
 -mpopcntb -mpopcntd  -mpowerpc64 @gol
 -mpowerpc-gpopt  -mpowerpc-gfxopt  -msingle-float -mdouble-float @gol
--msimple-fpu -mstring  -mmulhw  -mdlmzb  -mmfpgpr -mvsx}
+-msimple-fpu -mstring  -mmulhw  -mdlmzb  -mmfpgpr -mvsx @gol
+-mcrypto -mdirect-move -mpower8-fusion -mpower8-vector -mquad-memory}
 
 The particular options set for any particular CPU varies between
 compiler versions, depending on what setting seems to produce optimal
@@ -17459,6 +17463,47 @@ Generate code that uses (does not use) v
 instructions, and also enable the use of built-in functions that allow
 more direct access to the VSX instruction set.
 
+@item -mcrypto
+@itemx -mno-crypto
+@opindex mcrypto
+@opindex mno-crypto
+Enable the use (disable) of the built-in functions that allow direct
+access to the cryptographic instructions that were added in version
+2.07 of the PowerPC ISA.
+
+@item -mdirect-move
+@itemx -mno-direct-move
+@opindex mdirect-move
+@opindex mno-direct-move
+Generate code that uses (does not use) the instructions to move data
+between the general purpose registers and the vector/scalar (VSX)
+registers that were added in version 2.07 of the PowerPC ISA.
+
+@item -mpower8-fusion
+@itemx -mno-power8-fusion
+@opindex mpower8-fusion
+@opindex mno-power8-fusion
+Generate code that keeps (does not keeps) some integer operations
+adjacent so that the instructions can be fused together on power8 and
+later processors.
+
+@item -mpower8-vector
+@itemx -mno-power8-vector
+@opindex mpower8-vector
+@opindex mno-power8-vector
+Generate code that uses (does not use) the vector and scalar
+instructions that were added in version 2.07 of the PowerPC ISA.  Also
+enable the use of built-in functions that allow more direct access to
+the vector instructions.
+
+@item -mquad-memory
+@itemx -mno-quad-memory
+@opindex mquad-memory
+@opindex mno-quad-memory
+Generate code that uses (does not use) the quad word memory
+instructions.  The @option{-mquad-memory} option requires use of
+64-bit mode.
+
 @item -mfloat-gprs=@var{yes/single/double/no}
 @itemx -mfloat-gprs
 @opindex mfloat-gprs
Index: gcc/doc/md.texi
===================================================================
--- gcc/doc/md.texi     (revision 199121)
+++ gcc/doc/md.texi     (revision 199122)
@@ -2055,7 +2055,7 @@ Any constant whose absolute value is no 
 
 @end table
 
-@item PowerPC and IBM RS6000---@file{config/rs6000/rs6000.h}
+@item PowerPC and IBM RS6000---@file{config/rs6000/constraints.md}
 @table @code
 @item b
 Address base register
@@ -2069,6 +2069,9 @@ Floating point register (containing 32-b
 @item v
 Altivec vector register
 
+@item wa
+Any VSX register
+
 @item wd
 VSX vector register to hold vector double data
 
@@ -2081,6 +2084,15 @@ If @option{-mmfpgpr} was used, a floatin
 @item wl
 If the LFIWAX instruction is enabled, a floating point register
 
+@item wm
+If direct moves are enabled, a VSX register.
+
+@item wn
+No register.
+
+@item wr
+General purpose register if 64-bit mode is used
+
 @item ws
 VSX vector register to hold scalar float data
 
@@ -2093,8 +2105,9 @@ If the STFIWX instruction is enabled, a 
 @item wz
 If the LFIWZX instruction is enabled, a floating point register
 
-@item wa
-Any VSX register
+@item wQ
+A memory address that will work with the @code{lq} and @code{stq}
+instructions.
 
 @item h
 @samp{MQ}, @samp{CTR}, or @samp{LINK} register
Index: gcc/config/rs6000/rs6000.opt
===================================================================
--- gcc/config/rs6000/rs6000.opt        (revision 199121)
+++ gcc/config/rs6000/rs6000.opt        (revision 199122)
@@ -517,4 +517,28 @@ Control whether we save the TOC in the p
 
 mvsx-timode
 Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags)
-; Allow/disallow TImode in VSX registers
+Allow 128-bit integers in VSX registers
+
+mpower8-fusion
+Target Report Mask(P8_FUSION) Var(rs6000_isa_flags)
+Fuse certain integer operations together for better performance on power8
+
+mpower8-fusion-sign
+Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
+Allow sign extension in fusion operations
+
+mpower8-vector
+Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
+Use/do not use vector and scalar instructions added in ISA 2.07.
+
+mcrypto
+Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
+Use ISA 2.07 crypto instructions
+
+mdirect-move
+Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags)
+Use ISA 2.07 direct move between GPR & VSX register instructions
+
+mquad-memory
+Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
+Generate the quad word memory instructions (lq/stq/lqarx/stqcx).
Index: gcc/config/rs6000/rs6000-c.c
===================================================================
--- gcc/config/rs6000/rs6000-c.c        (revision 199121)
+++ gcc/config/rs6000/rs6000-c.c        (revision 199122)
@@ -315,6 +315,8 @@ rs6000_target_modify_macros (bool define
     rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X");
   if ((flags & OPTION_MASK_POPCNTD) != 0)
     rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
+  if ((flags & OPTION_MASK_DIRECT_MOVE) != 0)
+    rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8");
   if ((flags & OPTION_MASK_SOFT_FLOAT) != 0)
     rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT");
   if ((flags & OPTION_MASK_RECIP_PRECISION) != 0)
@@ -331,6 +333,8 @@ rs6000_target_modify_macros (bool define
     }
   if ((flags & OPTION_MASK_VSX) != 0)
     rs6000_define_or_undefine_macro (define_p, "__VSX__");
+  if ((flags & OPTION_MASK_P8_VECTOR) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
 
   /* options from the builtin masks.  */
   if ((bu_mask & RS6000_BTM_SPE) != 0)
Index: gcc/config/rs6000/constraints.md
===================================================================
--- gcc/config/rs6000/constraints.md    (revision 199121)
+++ gcc/config/rs6000/constraints.md    (revision 199122)
@@ -79,12 +79,31 @@ (define_register_constraint "wg" "rs6000
 (define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]"
   "Floating point register if the LFIWAX instruction is enabled or NO_REGS.")
 
+(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]"
+  "VSX register if direct move instructions are enabled, or NO_REGS.")
+
+(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
+  "General purpose register if 64-bit instructions are enabled or NO_REGS.")
+
+(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]"
+  "Altivec register if -mpower8-vector is used or NO_REGS.")
+
 (define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
   "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
 
 (define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]"
   "Floating point register if the LFIWZX instruction is enabled or NO_REGS.")
 
+;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
+;; direct move directly, and movsf can't to move between the register sets.
+;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode
+(define_register_constraint "wn" "NO_REGS")
+
+;; Lq/stq validates the address for load/store quad
+(define_memory_constraint "wQ"
+  "Memory operand suitable for the load/store quad instructions"
+  (match_operand 0 "quad_memory_operand"))
+
 ;; Altivec style load/store that ignores the bottom bits of the address
 (define_memory_constraint "wZ"
   "Indexed or indirect memory operand, ignoring the bottom 4 bits"
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 199121)
+++ gcc/config/rs6000/rs6000.c  (revision 199122)
@@ -831,6 +831,25 @@ struct processor_costs power7_cost = {
   12,                  /* prefetch streams */
 };
 
+/* Instruction costs on POWER8 processors.  */
+static const
+struct processor_costs power8_cost = {
+  COSTS_N_INSNS (3),   /* mulsi */
+  COSTS_N_INSNS (3),   /* mulsi_const */
+  COSTS_N_INSNS (3),   /* mulsi_const9 */
+  COSTS_N_INSNS (3),   /* muldi */
+  COSTS_N_INSNS (19),  /* divsi */
+  COSTS_N_INSNS (35),  /* divdi */
+  COSTS_N_INSNS (3),   /* fp */
+  COSTS_N_INSNS (3),   /* dmul */
+  COSTS_N_INSNS (14),  /* sdiv */
+  COSTS_N_INSNS (17),  /* ddiv */
+  128,                 /* cache line size */
+  32,                  /* l1 cache */
+  256,                 /* l2 cache */
+  12,                  /* prefetch streams */
+};
+
 /* Instruction costs on POWER A2 processors.  */
 static const
 struct processor_costs ppca2_cost = {
@@ -1547,6 +1566,15 @@ rs6000_hard_regno_mode_ok (int regno, en
 {
   int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
 
+  /* PTImode can only go in GPRs.  Quad word memory operations require even/odd
+     register combinations, and use PTImode where we need to deal with quad
+     word memory operations.  Don't allow quad words in the argument or frame
+     pointer registers, just registers 0..31.  */
+  if (mode == PTImode)
+    return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+           && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+           && ((regno & 1) == 0));
+
   /* VSX registers that overlap the FPR registers are larger than for non-VSX
      implementations.  Don't allow an item to be split between a FP register
      and an Altivec register.  */
@@ -1678,6 +1706,16 @@ rs6000_debug_reg_print (int first_regno,
          comma = "";
        }
 
+      len += fprintf (stderr, "%sreg-class = %s", comma,
+                     reg_class_names[(int)rs6000_regno_regclass[r]]);
+      comma = ", ";
+
+      if (len > 70)
+       {
+         fprintf (stderr, ",\n\t");
+         comma = "";
+       }
+
       fprintf (stderr, "%sregno = %d\n", comma, r);
     }
 }
@@ -1710,6 +1748,7 @@ rs6000_debug_reg_global (void)
     "none",
     "altivec",
     "vsx",
+    "p8_vector",
     "paired",
     "spe",
     "other"
@@ -1802,8 +1841,11 @@ rs6000_debug_reg_global (void)
           "wf reg_class = %s\n"
           "wg reg_class = %s\n"
           "wl reg_class = %s\n"
+          "wm reg_class = %s\n"
+          "wr reg_class = %s\n"
           "ws reg_class = %s\n"
           "wt reg_class = %s\n"
+          "wv reg_class = %s\n"
           "wx reg_class = %s\n"
           "wz reg_class = %s\n"
           "\n",
@@ -1815,8 +1857,11 @@ rs6000_debug_reg_global (void)
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
+          reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
+          reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
+          reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
           reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
 
@@ -2050,6 +2095,10 @@ rs6000_debug_reg_global (void)
   if (targetm.lra_p ())
     fprintf (stderr, DEBUG_FMT_S, "lra", "true");
 
+  if (TARGET_P8_FUSION)
+    fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
+            (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
+
   fprintf (stderr, DEBUG_FMT_S, "plt-format",
           TARGET_SECURE_PLT ? "secure" : "bss");
   fprintf (stderr, DEBUG_FMT_S, "struct-return",
@@ -2240,6 +2289,15 @@ rs6000_init_hard_regno_mode_ok (bool glo
   if (TARGET_LFIWAX)
     rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
 
+  if (TARGET_DIRECT_MOVE)
+    rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
+
+  if (TARGET_POWERPC64)
+    rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
+
+  if (TARGET_P8_VECTOR)
+    rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
+
   if (TARGET_STFIWX)
     rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
 
@@ -2520,16 +2578,18 @@ darwin_rs6000_override_options (void)
 HOST_WIDE_INT
 rs6000_builtin_mask_calculate (void)
 {
-  return (((TARGET_ALTIVEC)                ? RS6000_BTM_ALTIVEC  : 0)
-         | ((TARGET_VSX)                   ? RS6000_BTM_VSX      : 0)
-         | ((TARGET_SPE)                   ? RS6000_BTM_SPE      : 0)
-         | ((TARGET_PAIRED_FLOAT)          ? RS6000_BTM_PAIRED   : 0)
-         | ((TARGET_FRE)                   ? RS6000_BTM_FRE      : 0)
-         | ((TARGET_FRES)                  ? RS6000_BTM_FRES     : 0)
-         | ((TARGET_FRSQRTE)               ? RS6000_BTM_FRSQRTE  : 0)
-         | ((TARGET_FRSQRTES)              ? RS6000_BTM_FRSQRTES : 0)
-         | ((TARGET_POPCNTD)               ? RS6000_BTM_POPCNTD  : 0)
-         | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL     : 0));
+  return (((TARGET_ALTIVEC)                ? RS6000_BTM_ALTIVEC   : 0)
+         | ((TARGET_VSX)                   ? RS6000_BTM_VSX       : 0)
+         | ((TARGET_SPE)                   ? RS6000_BTM_SPE       : 0)
+         | ((TARGET_PAIRED_FLOAT)          ? RS6000_BTM_PAIRED    : 0)
+         | ((TARGET_FRE)                   ? RS6000_BTM_FRE       : 0)
+         | ((TARGET_FRES)                  ? RS6000_BTM_FRES      : 0)
+         | ((TARGET_FRSQRTE)               ? RS6000_BTM_FRSQRTE   : 0)
+         | ((TARGET_FRSQRTES)              ? RS6000_BTM_FRSQRTES  : 0)
+         | ((TARGET_POPCNTD)               ? RS6000_BTM_POPCNTD   : 0)
+         | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL      : 0)
+         | ((TARGET_P8_VECTOR)             ? RS6000_BTM_P8_VECTOR : 0)
+         | ((TARGET_CRYPTO)                ? RS6000_BTM_CRYPTO    : 0));
 }
 
 /* Override command line options.  Mostly we process the processor type and
@@ -2803,7 +2863,9 @@ rs6000_option_override_internal (bool gl
 
   /* For the newer switches (vsx, dfp, etc.) set some of the older options,
      unless the user explicitly used the -mno-<option> to disable the code.  */
-  if (TARGET_VSX)
+  if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
+    rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
+  else if (TARGET_VSX)
     rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
   else if (TARGET_POPCNTD)
     rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
@@ -2818,6 +2880,34 @@ rs6000_option_override_internal (bool gl
   else if (TARGET_ALTIVEC)
     rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
 
+  if (TARGET_CRYPTO && !TARGET_ALTIVEC)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
+       error ("-mcrypto requires -maltivec");
+      rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
+    }
+
+  if (TARGET_DIRECT_MOVE && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
+       error ("-mdirect-move requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
+    }
+
+  if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+       error ("-mpower8-vector requires -maltivec");
+      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+    }
+
+  if (TARGET_P8_VECTOR && !TARGET_VSX)
+    {
+      if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+       error ("-mpower8-vector requires -mvsx");
+      rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+    }
+
   if (TARGET_VSX_TIMODE && !TARGET_VSX)
     {
       if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
@@ -3019,16 +3109,19 @@ rs6000_option_override_internal (bool gl
                        && rs6000_cpu != PROCESSOR_POWER5
                        && rs6000_cpu != PROCESSOR_POWER6
                        && rs6000_cpu != PROCESSOR_POWER7
+                       && rs6000_cpu != PROCESSOR_POWER8
                        && rs6000_cpu != PROCESSOR_PPCA2
                        && rs6000_cpu != PROCESSOR_CELL
                        && rs6000_cpu != PROCESSOR_PPC476);
   rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
                         || rs6000_cpu == PROCESSOR_POWER5
-                        || rs6000_cpu == PROCESSOR_POWER7);
+                        || rs6000_cpu == PROCESSOR_POWER7
+                        || rs6000_cpu == PROCESSOR_POWER8);
   rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
                                 || rs6000_cpu == PROCESSOR_POWER5
                                 || rs6000_cpu == PROCESSOR_POWER6
                                 || rs6000_cpu == PROCESSOR_POWER7
+                                || rs6000_cpu == PROCESSOR_POWER8
                                 || rs6000_cpu == PROCESSOR_PPCE500MC
                                 || rs6000_cpu == PROCESSOR_PPCE500MC64
                                 || rs6000_cpu == PROCESSOR_PPCE5500
@@ -3272,6 +3365,10 @@ rs6000_option_override_internal (bool gl
        rs6000_cost = &power7_cost;
        break;
 
+      case PROCESSOR_POWER8:
+       rs6000_cost = &power8_cost;
+       break;
+
       case PROCESSOR_PPCA2:
        rs6000_cost = &ppca2_cost;
        break;
@@ -3444,7 +3541,8 @@ rs6000_loop_align (rtx label)
       && (rs6000_cpu == PROCESSOR_POWER4
          || rs6000_cpu == PROCESSOR_POWER5
          || rs6000_cpu == PROCESSOR_POWER6
-         || rs6000_cpu == PROCESSOR_POWER7))
+         || rs6000_cpu == PROCESSOR_POWER7
+         || rs6000_cpu == PROCESSOR_POWER8))
     return 5;
   else
     return align_loops_log;
@@ -12891,8 +12989,23 @@ rs6000_common_init_builtins (void)
       else
        {
          enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
-           continue;
+         if (d->name == 0)
+           {
+             if (TARGET_DEBUG_BUILTIN)
+               fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
+                        (long unsigned)i);
+
+             continue;
+           }
+
+          if (icode == CODE_FOR_nothing)
+           {
+             if (TARGET_DEBUG_BUILTIN)
+               fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
+                        d->name);
+
+             continue;
+           }
 
          type = builtin_function_type (insn_data[icode].operand[0].mode,
                                        insn_data[icode].operand[1].mode,
@@ -12931,8 +13044,23 @@ rs6000_common_init_builtins (void)
       else
        {
          enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
-           continue;
+         if (d->name == 0)
+           {
+             if (TARGET_DEBUG_BUILTIN)
+               fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
+                        (long unsigned)i);
+
+             continue;
+           }
+
+          if (icode == CODE_FOR_nothing)
+           {
+             if (TARGET_DEBUG_BUILTIN)
+               fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
+                        d->name);
+
+             continue;
+           }
 
           mode0 = insn_data[icode].operand[0].mode;
           mode1 = insn_data[icode].operand[1].mode;
@@ -12993,8 +13121,23 @@ rs6000_common_init_builtins (void)
       else
         {
          enum insn_code icode = d->icode;
-          if (d->name == 0 || icode == CODE_FOR_nothing)
-           continue;
+         if (d->name == 0)
+           {
+             if (TARGET_DEBUG_BUILTIN)
+               fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
+                        (long unsigned)i);
+
+             continue;
+           }
+
+          if (icode == CODE_FOR_nothing)
+           {
+             if (TARGET_DEBUG_BUILTIN)
+               fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
+                        d->name);
+
+             continue;
+           }
 
           mode0 = insn_data[icode].operand[0].mode;
           mode1 = insn_data[icode].operand[1].mode;
@@ -22951,6 +23094,7 @@ rs6000_adjust_cost (rtx insn, rtx link, 
                  || rs6000_cpu_attr == CPU_POWER4
                  || rs6000_cpu_attr == CPU_POWER5
                 || rs6000_cpu_attr == CPU_POWER7
+                || rs6000_cpu_attr == CPU_POWER8
                  || rs6000_cpu_attr == CPU_CELL)
                 && recog_memoized (dep_insn)
                 && (INSN_CODE (dep_insn) >= 0))
@@ -23537,6 +23681,8 @@ rs6000_issue_rate (void)
   case CPU_POWER6:
   case CPU_POWER7:
     return 5;
+  case CPU_POWER8:
+    return 7;
   default:
     return 1;
   }
@@ -24130,6 +24276,7 @@ insn_must_be_first_in_group (rtx insn)
         }
       break;
     case PROCESSOR_POWER7:
+    case PROCESSOR_POWER8:     /* FIXME */
       type = get_attr_type (insn);
 
       switch (type)
@@ -24226,6 +24373,7 @@ insn_must_be_last_in_group (rtx insn)
     }
     break;
   case PROCESSOR_POWER7:
+  case PROCESSOR_POWER8:       /* FIXME */
     type = get_attr_type (insn);
 
     switch (type)
@@ -24332,7 +24480,8 @@ force_new_group (int sched_verbose, FILE
        can_issue_more--;
 
       /* Power6 and Power7 have special group ending nop. */
-      if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7)
+      if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
+         || rs6000_cpu_attr == CPU_POWER8)
        {
          nop = gen_group_ending_nop ();
          emit_insn_before (nop, next_insn);
@@ -26513,7 +26662,8 @@ rs6000_register_move_cost (enum machine_
       /* For those processors that have slow LR/CTR moves, make them more
          expensive than memory in order to bias spills to memory .*/
       else if ((rs6000_cpu == PROCESSOR_POWER6
-               || rs6000_cpu == PROCESSOR_POWER7)
+               || rs6000_cpu == PROCESSOR_POWER7
+               || rs6000_cpu == PROCESSOR_POWER8)
               && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
         ret = 6 * hard_regno_nregs[0][mode];
 
@@ -27742,6 +27892,8 @@ static struct rs6000_opt_mask const rs60
 {
   { "altivec",                 OPTION_MASK_ALTIVEC,            false, true  },
   { "cmpb",                    OPTION_MASK_CMPB,               false, true  },
+  { "crypto",                  OPTION_MASK_CRYPTO,             false, true  },
+  { "direct-move",             OPTION_MASK_DIRECT_MOVE,        false, true  },
   { "dlmzb",                   OPTION_MASK_DLMZB,              false, true  },
   { "fprnd",                   OPTION_MASK_FPRND,              false, true  },
   { "hard-dfp",                        OPTION_MASK_DFP,                false, 
true  },
@@ -27750,13 +27902,17 @@ static struct rs6000_opt_mask const rs60
   { "mfpgpr",                  OPTION_MASK_MFPGPR,             false, true  },
   { "mulhw",                   OPTION_MASK_MULHW,              false, true  },
   { "multiple",                        OPTION_MASK_MULTIPLE,           false, 
true  },
-  { "update",                  OPTION_MASK_NO_UPDATE,          true , true  },
   { "popcntb",                 OPTION_MASK_POPCNTB,            false, true  },
   { "popcntd",                 OPTION_MASK_POPCNTD,            false, true  },
+  { "power8-fusion",           OPTION_MASK_P8_FUSION,          false, true  },
+  { "power8-fusion-sign",      OPTION_MASK_P8_FUSION_SIGN,     false, true  },
+  { "power8-vector",           OPTION_MASK_P8_VECTOR,          false, true  },
   { "powerpc-gfxopt",          OPTION_MASK_PPC_GFXOPT,         false, true  },
   { "powerpc-gpopt",           OPTION_MASK_PPC_GPOPT,          false, true  },
+  { "quad-memory",             OPTION_MASK_QUAD_MEMORY,        false, true  },
   { "recip-precision",         OPTION_MASK_RECIP_PRECISION,    false, true  },
   { "string",                  OPTION_MASK_STRING,             false, true  },
+  { "update",                  OPTION_MASK_NO_UPDATE,          true , true  },
   { "vsx",                     OPTION_MASK_VSX,                false, true  },
   { "vsx-timode",              OPTION_MASK_VSX_TIMODE,         false, true  },
 #ifdef OPTION_MASK_64BIT
@@ -27798,6 +27954,8 @@ static struct rs6000_opt_mask const rs60
   { "frsqrtes",                 RS6000_BTM_FRSQRTES,   false, false },
   { "popcntd",          RS6000_BTM_POPCNTD,    false, false },
   { "cell",             RS6000_BTM_CELL,       false, false },
+  { "power8-vector",    RS6000_BTM_P8_VECTOR,  false, false },
+  { "crypto",           RS6000_BTM_CRYPTO,     false, false },
 };
 
 /* Option variables that we want to support inside attribute((target)) and
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h  (revision 199121)
+++ gcc/config/rs6000/rs6000.h  (revision 199122)
@@ -92,7 +92,7 @@
 #ifdef HAVE_AS_POWER8
 #define ASM_CPU_POWER8_SPEC "-mpower8"
 #else
-#define ASM_CPU_POWER8_SPEC "-mpower4 -maltivec"
+#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC
 #endif
 
 #ifdef HAVE_AS_DCI
@@ -164,6 +164,7 @@
 %{mcpu=e6500: -me6500} \
 %{maltivec: -maltivec} \
 %{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \
+%{mpower8-vector|mcrypto|mdirect-move: %{!mcpu*: %(asm_cpu_power8)}} \
 -many"
 
 #define CPP_DEFAULT_SPEC ""
@@ -277,6 +278,19 @@ extern const char *host_detect_local_cpu
 #define TARGET_POPCNTD 0
 #endif
 
+/* Define the ISA 2.07 flags as 0 if the target assembler does not support the
+   waitasecond instruction.  Allow -mpower8-fusion, since it does not add new
+   instructions.  */
+
+#ifndef HAVE_AS_POWER8
+#undef  TARGET_DIRECT_MOVE
+#undef  TARGET_CRYPTO
+#undef  TARGET_P8_VECTOR
+#define TARGET_DIRECT_MOVE 0
+#define TARGET_CRYPTO 0
+#define TARGET_P8_VECTOR 0
+#endif
+
 /* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync.  If
    not, generate the lwsync code as an integer constant.  */
 #ifdef HAVE_AS_LWSYNC
@@ -386,6 +400,7 @@ extern const char *host_detect_local_cpu
 #define TARGET_DEBUG_TARGET    (rs6000_debug & MASK_DEBUG_TARGET)
 #define TARGET_DEBUG_BUILTIN   (rs6000_debug & MASK_DEBUG_BUILTIN)
 
+/* Describe the vector unit used for arithmetic operations.  */
 extern enum rs6000_vector rs6000_vector_unit[];
 
 #define VECTOR_UNIT_NONE_P(MODE)                       \
@@ -394,12 +409,25 @@ extern enum rs6000_vector rs6000_vector_
 #define VECTOR_UNIT_VSX_P(MODE)                                \
   (rs6000_vector_unit[(MODE)] == VECTOR_VSX)
 
+#define VECTOR_UNIT_P8_VECTOR_P(MODE)                  \
+  (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR)
+
 #define VECTOR_UNIT_ALTIVEC_P(MODE)                    \
   (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC)
 
+#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE)           \
+  (IN_RANGE ((int)rs6000_vector_unit[(MODE)],          \
+            (int)VECTOR_VSX,                           \
+            (int)VECTOR_P8_VECTOR))
+
+/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either
+   altivec (VMX) or VSX vector instructions.  P8 vector support is upwards
+   compatible, so allow it as well, rather than changing all of the uses of the
+   macro.  */
 #define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE)             \
-  (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC        \
-   || rs6000_vector_unit[(MODE)] == VECTOR_VSX)
+  (IN_RANGE ((int)rs6000_vector_unit[(MODE)],          \
+            (int)VECTOR_ALTIVEC,                       \
+            (int)VECTOR_P8_VECTOR))
 
 /* Describe whether to use VSX loads or Altivec loads.  For now, just use the
    same unit as the vector unit we are using, but we may want to migrate to
@@ -412,12 +440,21 @@ extern enum rs6000_vector rs6000_vector_
 #define VECTOR_MEM_VSX_P(MODE)                         \
   (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
 
+#define VECTOR_MEM_P8_VECTOR_P(MODE)                   \
+  (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+
 #define VECTOR_MEM_ALTIVEC_P(MODE)                     \
   (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC)
 
+#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE)            \
+  (IN_RANGE ((int)rs6000_vector_mem[(MODE)],           \
+            (int)VECTOR_VSX,                           \
+            (int)VECTOR_P8_VECTOR))
+
 #define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE)              \
-  (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC         \
-   || rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+  (IN_RANGE ((int)rs6000_vector_mem[(MODE)],           \
+            (int)VECTOR_ALTIVEC,                       \
+            (int)VECTOR_P8_VECTOR))
 
 /* Return the alignment of a given vector type, which is set based on the
    vector unit use.  VSX for instance can load 32 or 64 bit aligned words
@@ -479,6 +516,15 @@ extern int rs6000_vector_align[];
 #define TARGET_FCTIDUZ TARGET_POPCNTD
 #define TARGET_FCTIWUZ TARGET_POPCNTD
 
+#define TARGET_XSCVDPSPN       (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_XSCVSPDPN       (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+
+/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
+   in power7, so conditionalize them on p8 features.  TImode syncs need quad
+   memory support.  */
+#define TARGET_SYNC_HI_QI      (TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE)
+#define TARGET_SYNC_TI         TARGET_QUAD_MEMORY
+
 /* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
    to allocate the SDmode stack slot to get the value into the proper location
    in the register.  */
@@ -489,10 +535,13 @@ extern int rs6000_vector_align[];
    OPTION_MASK_<xxx> back into MASK_<xxx>.  */
 #define MASK_ALTIVEC                   OPTION_MASK_ALTIVEC
 #define MASK_CMPB                      OPTION_MASK_CMPB
+#define MASK_CRYPTO                    OPTION_MASK_CRYPTO
 #define MASK_DFP                       OPTION_MASK_DFP
+#define MASK_DIRECT_MOVE               OPTION_MASK_DIRECT_MOVE
 #define MASK_DLMZB                     OPTION_MASK_DLMZB
 #define MASK_EABI                      OPTION_MASK_EABI
 #define MASK_FPRND                     OPTION_MASK_FPRND
+#define MASK_P8_FUSION                 OPTION_MASK_P8_FUSION
 #define MASK_HARD_FLOAT                        OPTION_MASK_HARD_FLOAT
 #define MASK_ISEL                      OPTION_MASK_ISEL
 #define MASK_MFCRF                     OPTION_MASK_MFCRF
@@ -500,6 +549,7 @@ extern int rs6000_vector_align[];
 #define MASK_MULHW                     OPTION_MASK_MULHW
 #define MASK_MULTIPLE                  OPTION_MASK_MULTIPLE
 #define MASK_NO_UPDATE                 OPTION_MASK_NO_UPDATE
+#define MASK_P8_VECTOR                 OPTION_MASK_P8_VECTOR
 #define MASK_POPCNTB                   OPTION_MASK_POPCNTB
 #define MASK_POPCNTD                   OPTION_MASK_POPCNTD
 #define MASK_PPC_GFXOPT                        OPTION_MASK_PPC_GFXOPT
@@ -1002,7 +1052,9 @@ extern unsigned rs6000_pointer_size;
 
 #define REG_ALLOC_ORDER                                                \
   {32,                                                         \
-   45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34,             \
+   /* move fr13 (ie 45) later, so if we need TFmode, it does */        \
+   /* not use fr14 which is a saved register.  */              \
+   44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45,             \
    33,                                                         \
    63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51,         \
    50, 49, 48, 47, 46,                                         \
@@ -1062,8 +1114,14 @@ extern unsigned rs6000_pointer_size;
 #define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
 
 /* Alternate name for any vector register supporting logical operations, no
-   matter which instruction set(s) are available.  */
-#define VLOGICAL_REGNO_P(N) VFLOAT_REGNO_P (N)
+   matter which instruction set(s) are available.  Under VSX, we allow GPRs as
+   well as vector registers on 64-bit systems.  We don't allow 32-bit systems,
+   due to the number of registers involved, and the number of instructions to
+   load/store the values..  */
+#define VLOGICAL_REGNO_P(N)                                            \
+  (ALTIVEC_REGNO_P (N)                                                 \
+   || (TARGET_VSX && FP_REGNO_P (N))                                   \
+   || (TARGET_VSX && TARGET_POWERPC64 && INT_REGNO_P (N)))
 
 /* Return number of consecutive hard regs needed starting at reg REGNO
    to hold something of mode MODE.  */
@@ -1124,7 +1182,7 @@ extern unsigned rs6000_pointer_size;
    when one has mode MODE1 and one has mode MODE2.
    If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
    for any hard reg, then this must be 0 for correct output.  */
-#define MODES_TIEABLE_P(MODE1, MODE2) \
+#define MODES_TIEABLE_P(MODE1, MODE2)          \
   (SCALAR_FLOAT_MODE_P (MODE1)                 \
    ? SCALAR_FLOAT_MODE_P (MODE2)               \
    : SCALAR_FLOAT_MODE_P (MODE2)               \
@@ -1137,14 +1195,14 @@ extern unsigned rs6000_pointer_size;
    ? SPE_VECTOR_MODE (MODE2)                   \
    : SPE_VECTOR_MODE (MODE2)                   \
    ? SPE_VECTOR_MODE (MODE1)                   \
-   : ALTIVEC_VECTOR_MODE (MODE1)               \
-   ? ALTIVEC_VECTOR_MODE (MODE2)               \
-   : ALTIVEC_VECTOR_MODE (MODE2)               \
-   ? ALTIVEC_VECTOR_MODE (MODE1)               \
    : ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)                \
    ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)                \
    : ALTIVEC_OR_VSX_VECTOR_MODE (MODE2)                \
    ? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1)                \
+   : ALTIVEC_VECTOR_MODE (MODE1)               \
+   ? ALTIVEC_VECTOR_MODE (MODE2)               \
+   : ALTIVEC_VECTOR_MODE (MODE2)               \
+   ? ALTIVEC_VECTOR_MODE (MODE1)               \
    : 1)
 
 /* Post-reload, we can't use any new AltiVec registers, as we already
@@ -1337,8 +1395,11 @@ enum r6000_reg_class_enum {
   RS6000_CONSTRAINT_wg,                /* FPR register for -mmfpgpr */
   RS6000_CONSTRAINT_wf,                /* VSX register for V4SF */
   RS6000_CONSTRAINT_wl,                /* FPR register for LFIWAX */
+  RS6000_CONSTRAINT_wm,                /* VSX register for direct move */
+  RS6000_CONSTRAINT_wr,                /* GPR register if 64-bit  */
   RS6000_CONSTRAINT_ws,                /* VSX register for DF */
   RS6000_CONSTRAINT_wt,                /* VSX register for TImode */
+  RS6000_CONSTRAINT_wv,                /* Altivec register for power8 vector */
   RS6000_CONSTRAINT_wx,                /* FPR register for STFIWX */
   RS6000_CONSTRAINT_wz,                /* FPR register for LFIWZX */
   RS6000_CONSTRAINT_MAX
@@ -2365,6 +2426,8 @@ extern int frame_pointer_needed;
 #define RS6000_BTM_ALWAYS      0               /* Always enabled.  */
 #define RS6000_BTM_ALTIVEC     MASK_ALTIVEC    /* VMX/altivec vectors.  */
 #define RS6000_BTM_VSX         MASK_VSX        /* VSX (vector/scalar).  */
+#define RS6000_BTM_P8_VECTOR   MASK_P8_VECTOR  /* ISA 2.07 vector.  */
+#define RS6000_BTM_CRYPTO      MASK_CRYPTO     /* crypto funcs.  */
 #define RS6000_BTM_SPE         MASK_STRING     /* E500 */
 #define RS6000_BTM_PAIRED      MASK_MULHW      /* 750CL paired insns.  */
 #define RS6000_BTM_FRE         MASK_POPCNTB    /* FRE instruction.  */
@@ -2376,6 +2439,8 @@ extern int frame_pointer_needed;
 
 #define RS6000_BTM_COMMON      (RS6000_BTM_ALTIVEC                     \
                                 | RS6000_BTM_VSX                       \
+                                | RS6000_BTM_P8_VECTOR                 \
+                                | RS6000_BTM_CRYPTO                    \
                                 | RS6000_BTM_FRE                       \
                                 | RS6000_BTM_FRES                      \
                                 | RS6000_BTM_FRSQRTE                   \
Index: gcc/config/rs6000/predicates.md
===================================================================
--- gcc/config/rs6000/predicates.md     (revision 199121)
+++ gcc/config/rs6000/predicates.md     (revision 199122)
@@ -166,6 +166,11 @@ (define_predicate "const_2_to_3_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 2, 3)")))
 
+;; Match op = 0..15
+(define_predicate "const_0_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
 ;; Return 1 if op is a register that is not special.
 (define_predicate "gpc_reg_operand"
   (match_operand 0 "register_operand")
@@ -182,9 +187,68 @@ (define_predicate "gpc_reg_operand"
   if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
     return 1;
 
+  if (TARGET_VSX && VSX_REGNO_P (REGNO (op)))
+    return 1;
+
   return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is a general purpose register.  Unlike gpc_reg_operand, don't
+;; allow floating point or vector registers.
+(define_predicate "int_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
+    return 1;
+
+  return INT_REGNO_P (REGNO (op));
+})
+
+;; Like int_reg_operand, but only return true for base registers
+(define_predicate "base_reg_operand"
+  (match_operand 0 "int_reg_operand")
+{
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  return (REGNO (op) != FIRST_GPR_REGNO);
+})
+
+;; Return 1 if op is a general purpose register that is an even register
+;; which suitable for a load/store quad operation
+(define_predicate "quad_int_reg_operand"
+  (match_operand 0 "register_operand")
+{
+  HOST_WIDE_INT r;
+
+  if (!TARGET_QUAD_MEMORY)
+    return 0;
+
+  if (GET_CODE (op) == SUBREG)
+    op = SUBREG_REG (op);
+
+  if (!REG_P (op))
+    return 0;
+
+  r = REGNO (op);
+  if (r >= FIRST_PSEUDO_REGISTER)
+    return 1;
+
+  return (INT_REGNO_P (r) && ((r & 1) == 0));
+})
+
 ;; Return 1 if op is a register that is a condition register field.
 (define_predicate "cc_reg_operand"
   (match_operand 0 "register_operand")
@@ -302,6 +366,11 @@ (define_predicate "reg_or_logical_cint_o
                      & (~ (unsigned HOST_WIDE_INT) 0xffffffff)) == 0)")
     (match_operand 0 "gpc_reg_operand")))
 
+;; Like reg_or_logical_cint_operand, but allow vsx registers
+(define_predicate "vsx_reg_or_cint_operand"
+  (ior (match_operand 0 "vsx_register_operand")
+       (match_operand 0 "reg_or_logical_cint_operand")))
+
 ;; Return 1 if operand is a CONST_DOUBLE that can be set in a register
 ;; with no more than one instruction per word.
 (define_predicate "easy_fp_constant"
@@ -507,6 +576,54 @@ (define_predicate "offsettable_mem_opera
   (and (match_operand 0 "memory_operand")
        (match_test "offsettable_nonstrict_memref_p (op)")))
 
+;; Return 1 if the operand is suitable for load/store quad memory.
+(define_predicate "quad_memory_operand"
+  (match_code "mem")
+{
+  rtx addr, op0, op1;
+  int ret;
+
+  if (!TARGET_QUAD_MEMORY)
+    ret = 0;
+
+  else if (!memory_operand (op, mode))
+    ret = 0;
+
+  else if (GET_MODE_SIZE (GET_MODE (op)) != 16)
+    ret = 0;
+
+  else if (MEM_ALIGN (op) < 128)
+    ret = 0;
+
+  else
+    {
+      addr = XEXP (op, 0);
+      if (int_reg_operand (addr, Pmode))
+       ret = 1;
+
+      else if (GET_CODE (addr) != PLUS)
+       ret = 0;
+
+      else
+       {
+         op0 = XEXP (addr, 0);
+         op1 = XEXP (addr, 1);
+         ret = (int_reg_operand (op0, Pmode)
+                && GET_CODE (op1) == CONST_INT
+                && IN_RANGE (INTVAL (op1), -32768, 32767)
+                && (INTVAL (op1) & 15) == 0);
+       }
+    }
+
+  if (TARGET_DEBUG_ADDR)
+    {
+      fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : 
"false");
+      debug_rtx (op);
+    }
+
+  return ret;
+})
+
 ;; Return 1 if the operand is an indexed or indirect memory operand.
 (define_predicate "indexed_or_indirect_operand"
   (match_code "mem")
@@ -521,6 +638,19 @@ (define_predicate "indexed_or_indirect_o
   return indexed_or_indirect_address (op, mode);
 })
 
+;; Like indexed_or_indirect_operand, but also allow a GPR register if direct
+;; moves are supported.
+(define_predicate "reg_or_indexed_operand"
+  (match_code "mem,reg")
+{
+  if (MEM_P (op))
+    return indexed_or_indirect_operand (op, mode);
+  else if (TARGET_DIRECT_MOVE)
+    return register_operand (op, mode);
+  return
+    0;
+})
+
 ;; Return 1 if the operand is an indexed or indirect memory operand with an
 ;; AND -16 in it, used to recognize when we need to switch to Altivec loads
 ;; to realign loops instead of VSX (altivec silently ignores the bottom bits,
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 199121)
+++ gcc/config/rs6000/rs6000.md (revision 199122)
@@ -168,7 +168,7 @@ (define_attr "length" ""
 ;; Processor type -- this attribute must exactly match the processor_type
 ;; enumeration in rs6000.h.
 
-(define_attr "cpu" 
"rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan"
+(define_attr "cpu" 
"rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan,power8"
   (const (symbol_ref "rs6000_cpu_attr")))
 
 
Index: gcc/config/rs6000/rs6000-cpus.def
===================================================================
--- gcc/config/rs6000/rs6000-cpus.def   (revision 199121)
+++ gcc/config/rs6000/rs6000-cpus.def   (revision 199122)
@@ -28,7 +28,7 @@
      ALTIVEC, since in general it isn't a win on power6.  In ISA 2.04, fsel,
      fre, fsqrt, etc. were no longer documented as optional.  Group masks by
      server and embedded. */
-#define ISA_2_5_MASKS_EMBEDDED (ISA_2_2_MASKS                          \
+#define ISA_2_5_MASKS_EMBEDDED (ISA_2_4_MASKS                          \
                                 | OPTION_MASK_CMPB                     \
                                 | OPTION_MASK_RECIP_PRECISION          \
                                 | OPTION_MASK_PPC_GFXOPT               \
@@ -45,6 +45,14 @@
                                 | OPTION_MASK_VSX                      \
                                 | OPTION_MASK_VSX_TIMODE)
 
+/* For now, don't provide an embedded version of ISA 2.07.  */
+#define ISA_2_7_MASKS_SERVER   (ISA_2_6_MASKS_SERVER                   \
+                                | OPTION_MASK_P8_FUSION                \
+                                | OPTION_MASK_P8_VECTOR                \
+                                | OPTION_MASK_CRYPTO                   \
+                                | OPTION_MASK_DIRECT_MOVE              \
+                                | OPTION_MASK_QUAD_MEMORY)
+
 #define POWERPC_7400_MASK      (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
 
 /* Deal with ports that do not have -mstrict-align.  */
@@ -61,7 +69,9 @@
 /* Mask of all options to set the default isa flags based on -mcpu=<xxx>.  */
 #define POWERPC_MASKS          (OPTION_MASK_ALTIVEC                    \
                                 | OPTION_MASK_CMPB                     \
+                                | OPTION_MASK_CRYPTO                   \
                                 | OPTION_MASK_DFP                      \
+                                | OPTION_MASK_DIRECT_MOVE              \
                                 | OPTION_MASK_DLMZB                    \
                                 | OPTION_MASK_FPRND                    \
                                 | OPTION_MASK_ISEL                     \
@@ -69,11 +79,14 @@
                                 | OPTION_MASK_MFPGPR                   \
                                 | OPTION_MASK_MULHW                    \
                                 | OPTION_MASK_NO_UPDATE                \
+                                | OPTION_MASK_P8_FUSION                \
+                                | OPTION_MASK_P8_VECTOR                \
                                 | OPTION_MASK_POPCNTB                  \
                                 | OPTION_MASK_POPCNTD                  \
                                 | OPTION_MASK_POWERPC64                \
                                 | OPTION_MASK_PPC_GFXOPT               \
                                 | OPTION_MASK_PPC_GPOPT                \
+                                | OPTION_MASK_QUAD_MEMORY              \
                                 | OPTION_MASK_RECIP_PRECISION          \
                                 | OPTION_MASK_SOFT_FLOAT               \
                                 | OPTION_MASK_STRICT_ALIGN_OPTIONAL    \
@@ -168,10 +181,7 @@ RS6000_CPU ("power7", PROCESSOR_POWER7, 
            POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
            | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
            | MASK_VSX | MASK_RECIP_PRECISION | MASK_VSX_TIMODE)
-RS6000_CPU ("power8", PROCESSOR_POWER7,   /* Don't add MASK_ISEL by default */
-           POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
-           | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
-           | MASK_VSX | MASK_RECIP_PRECISION | MASK_VSX_TIMODE)
+RS6000_CPU ("power8", PROCESSOR_POWER7, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER)
 RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0)
 RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64)
 RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64)
Index: gcc/config/rs6000/rs6000-opts.h
===================================================================
--- gcc/config/rs6000/rs6000-opts.h     (revision 199121)
+++ gcc/config/rs6000/rs6000-opts.h     (revision 199122)
@@ -59,7 +59,8 @@ enum processor_type
    PROCESSOR_POWER7,
    PROCESSOR_CELL,
    PROCESSOR_PPCA2,
-   PROCESSOR_TITAN
+   PROCESSOR_TITAN,
+   PROCESSOR_POWER8
 };
 
 /* FP processor type.  */
@@ -131,11 +132,14 @@ enum rs6000_cmodel {
   CMODEL_LARGE
 };
 
-/* Describe which vector unit to use for a given machine mode.  */
+/* Describe which vector unit to use for a given machine mode.  The
+   VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and
+   P8_VECTOR are contiguous.  */
 enum rs6000_vector {
   VECTOR_NONE,                 /* Type is not  a vector or not supported */
   VECTOR_ALTIVEC,              /* Use altivec for vector processing */
   VECTOR_VSX,                  /* Use VSX for vector processing */
+  VECTOR_P8_VECTOR,            /* Use ISA 2.07 VSX for vector processing */
   VECTOR_PAIRED,               /* Use paired floating point for vectors */
   VECTOR_SPE,                  /* Use SPE for vector processing */
   VECTOR_OTHER                 /* Some other vector unit */

Re: [PATCH, rs6000] power8 patch #1, infrastructure changes (revised patch)

Reply via email to