This patch moves the instructions for movdi (both 32-bit and 64-bit) into a
separate rs6000_output_move_64bit function.
As I'm starting to move more stuff to checking the addr_masks instead of doing
a lot of if mode == MODE1 || mode == MODE2, etc. I realized that the
mult-register types (complex values, long double using IBM double double, etc.)
did not have the offset bits set correctly in reg_addr. I also prevented the
Altivec load/stores (that give you the free AND with -16) from being generated
for multi-register values.
I added a function (rs6000_valid_move_p) that replaces the old is operand[0] a
register or is operand[1] a register tests. Right now, it generates the same
tests, but I may need to add additional conditions in the future.
I have done a full bootstrap and make check on a little endian power8 system
with no regressions.
The next patch will change the MOVDF and MOVDD patterns to use
rs6000_output_move_64bit as well.
2018-03-15 Michael Meissner <[email protected]>
* config/rs6000/rs6000-protos.h (rs6000_output_move_64bit): Add
declaration.
(rs6000_valid_move_p): Likewise.
* config/rs6000/rs6000-output.c (addr_is_xform_p): New helper
function to return if an addresses uses X-form (reg+reg).
(reg_is_spr_p): New helper function to determine if a register is
a SPR.
(rs6000_output_move_64bit): New function to return the proper
instruction to do a 64-bit move.
* config/rs6000/rs6000.c (rs6000_setup_reg_addr_masks): Rework
setting offset addresses to assume multi-register values have the
proper offset bits set. Do not enable Altivec & -16 on
mult-reigster moves.
(rs6000_valid_move_p): New function to validate moves.
(reg_offset_addressing_ok_p): Add check if the mode and register
class support offstable instructions.
* config/rs6000/rs6000.md (movdi_internal32): Move instruction
literals to rs6000_otuput_move_64bit. Check move validity with
rs6000_move_valid_p.
(movdi_internal64): Likewise.
--
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: [email protected], phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h (revision 258535)
+++ gcc/config/rs6000/rs6000-protos.h (working copy)
@@ -52,6 +52,7 @@ extern rtx rs6000_got_register (rtx);
extern rtx find_addr_reg (rtx);
extern rtx gen_easy_altivec_constant (rtx);
extern const char *output_vec_const_move (rtx *);
+extern const char *rs6000_output_move_64bit (rtx *);
extern const char *rs6000_output_move_128bit (rtx *);
extern bool rs6000_move_128bit_ok_p (rtx []);
extern bool rs6000_split_128bit_ok_p (rtx []);
@@ -89,6 +90,7 @@ extern bool rs6000_is_valid_2insn_and (r
extern void rs6000_emit_2insn_and (machine_mode, rtx *, bool, int);
extern int registers_ok_for_quad_peep (rtx, rtx);
extern int mems_ok_for_quad_peep (rtx, rtx);
+extern bool rs6000_valid_move_p (rtx, rtx);
extern bool gpr_or_gpr_p (rtx, rtx);
extern bool direct_move_p (rtx, rtx);
extern bool quad_address_p (rtx, machine_mode, bool);
Index: gcc/config/rs6000/rs6000-output.c
===================================================================
--- gcc/config/rs6000/rs6000-output.c (revision 258538)
+++ gcc/config/rs6000/rs6000-output.c (working copy)
@@ -47,6 +47,215 @@
#include "tm-constrs.h"
+/* Return whether an address is an x-form (reg or reg+reg) address. This is
+ used when we know the instruction is not a traditional GPR or FPR
+ load/store, so check to make sure auto increment is not present in the
+ address. */
+inline static bool
+addr_is_xform_p (rtx addr)
+{
+ gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
+
+ if (REG_P (addr) || SUBREG_P (addr))
+ return true;
+
+ if (GET_CODE (addr) != PLUS)
+ return false;
+
+ rtx op1 = XEXP (addr, 1);
+ return REG_P (op1) || SUBREG_P (op1);
+}
+
+/* Return whether a register is a SPR. */
+inline static bool
+reg_is_spr_p (rtx reg)
+{
+ if (!REG_P (reg))
+ return false;
+
+ enum reg_class rclass = REGNO_REG_CLASS (REGNO (reg));
+ return reg_class_to_reg_type[(int)rclass] == SPR_REG_TYPE;
+}
+
+
+/* Return a string to do a move operation of 64 bits of data. */
+
+const char *
+rs6000_output_move_64bit (rtx operands[])
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ machine_mode mode = GET_MODE (dest);
+ int dest_regno;
+ int src_regno;
+ bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
+ bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
+
+ if (REG_P (dest) || SUBREG_P (dest))
+ {
+ dest_regno = regno_or_subregno (dest);
+ dest_gpr_p = INT_REGNO_P (dest_regno);
+ dest_fp_p = FP_REGNO_P (dest_regno);
+ dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
+ dest_vsx_p = dest_fp_p | dest_vmx_p;
+ }
+ else
+ {
+ dest_regno = -1;
+ dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
+ }
+
+ if (REG_P (src) || SUBREG_P (src))
+ {
+ src_regno = regno_or_subregno (src);
+ src_gpr_p = INT_REGNO_P (src_regno);
+ src_fp_p = FP_REGNO_P (src_regno);
+ src_vmx_p = ALTIVEC_REGNO_P (src_regno);
+ src_vsx_p = src_fp_p | src_vmx_p;
+ }
+ else
+ {
+ src_regno = -1;
+ src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
+ }
+
+ /* Register moves. */
+ if (dest_regno >= 0 && src_regno >= 0)
+ {
+ /* Moves to GPRs. */
+ if (dest_gpr_p)
+ {
+ if (!TARGET_POWERPC64)
+ return "#";
+
+ else if (src_gpr_p)
+ return "mr %0,%1";
+
+ else if (TARGET_DIRECT_MOVE && src_vsx_p)
+ return "mfvsrd %0,%x1";
+
+ else if (TARGET_MFPGPR && src_fp_p)
+ return "mftgpr %0,%1";
+
+ else if (reg_is_spr_p (src))
+ return "mf%1 %0";
+ }
+
+ /* Moves to vector/floating point registers. */
+ else if (dest_vsx_p)
+ {
+ if (dest_fp_p && src_fp_p)
+ return "fmr %0,%1";
+
+ else if (TARGET_VSX && src_vsx_p)
+ return "xxlor %x0,%x1,%x1";
+
+ else if (TARGET_POWERPC64 && src_gpr_p)
+ {
+ if (TARGET_DIRECT_MOVE)
+ return "mtvsrd %x0,%1";
+
+ else if (TARGET_MFPGPR && dest_fp_p)
+ return "mffgpr %0,%1";
+ }
+ }
+
+ /* Moves to SPRs. */
+ else if (reg_is_spr_p (dest))
+ return "mt%0 %1";
+ }
+
+ /* Loads. */
+ else if (dest_regno >= 0 && MEM_P (src))
+ {
+ if (dest_gpr_p)
+ return TARGET_POWERPC64 ? "ld%U1%X1 %0,%1" : "#";
+
+ else if (dest_fp_p)
+ return "lfd%U1%X1 %0,%1";
+
+ else if (dest_vmx_p)
+ {
+ if (TARGET_VSX && addr_is_xform_p (XEXP (src, 0)))
+ return "lxsdx %x0,%y1";
+
+ else if (TARGET_P9_VECTOR)
+ return "lxsd %0,%1";
+ }
+ }
+
+ /* Stores. */
+ else if (src_regno >= 0 && MEM_P (dest))
+ {
+ if (src_gpr_p)
+ return TARGET_POWERPC64 ? "std%U0%X0 %1,%0" : "#";
+
+ else if (src_fp_p)
+ return "stfd%U0%X0 %1,%0";
+
+ else if (src_vmx_p)
+ {
+ if (TARGET_VSX && addr_is_xform_p (XEXP (dest, 0)))
+ return "stxsdx %x1,%y0";
+
+ else if (TARGET_P9_VECTOR)
+ return "stxsd %1,%0";
+ }
+ }
+
+ /* Constants. */
+ else if (dest_regno >= 0 && CONSTANT_P (src))
+ {
+ if (dest_gpr_p)
+ {
+ if (satisfies_constraint_I (src))
+ return "li %0,%1";
+
+ if (satisfies_constraint_L (src))
+ return "lis %0,%v1";
+
+ return "#";
+ }
+
+ else if (TARGET_VSX && dest_vsx_p)
+ {
+ /* We prefer to generate XXSPLTIB/VSPLTISW over XXLXOR/XXLORC to
+ generate 0/-1, because the later can potentially cause a stall if
+ the previous use of the register did a long operation followed by
+ a store. This would cause this insn to wait for the previous
+ operation to finish, even though it doesn't use any of the bits in
+ the previous value. */
+ if (src == CONST0_RTX (mode))
+ {
+ /* Note 0.0 is not all zeros in IBM decimal format. */
+ gcc_assert (mode != DDmode);
+
+ if (TARGET_P9_VECTOR)
+ return "xxspltib %x0,0";
+ else if (dest_vmx_p)
+ return "vspltisw %0,0";
+ else
+ return "xxlxor %x0,%x0,%x0";
+ }
+ else if (GET_MODE_CLASS (mode) == MODE_INT
+ && src == CONSTM1_RTX (mode))
+ {
+ if (TARGET_P9_VECTOR)
+ return "xxspltib %x0,255";
+ else if (dest_vmx_p)
+ return "vspltisw %0,-1";
+ else if (TARGET_P8_VECTOR)
+ return "xxlorc %x0,%x0,%x0";
+ /* XXX: We could generate xxlxor/xxlnor for power7 if
+ desired. */
+ }
+ }
+ }
+
+ fatal_insn ("Bad 64-bit move", gen_rtx_SET (dest, src));
+}
+
+
/* Return a string to do a move operation of 128 bits of data. */
const char *
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 258538)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -2957,49 +2957,52 @@ rs6000_setup_reg_addr_masks (void)
/* GPR and FPR registers can do REG+OFFSET addressing, except
possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
- for 64-bit scalars and 32-bit SFmode to altivec registers. */
- if ((addr_mask != 0) && !indexed_only_p
- && msize <= 8
- && (rc == RELOAD_REG_GPR
- || ((msize == 8 || m2 == SFmode)
- && (rc == RELOAD_REG_FPR
- || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
- addr_mask |= RELOAD_REG_OFFSET;
-
- /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
- instructions are enabled. The offset for 128-bit VSX registers is
- only 12-bits. While GPRs can handle the full offset range, VSX
- registers can only handle the restricted range. */
- else if ((addr_mask != 0) && !indexed_only_p
- && msize == 16 && TARGET_P9_VECTOR
- && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
- || (m2 == TImode && TARGET_VSX)))
- {
- addr_mask |= RELOAD_REG_OFFSET;
- if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
- addr_mask |= RELOAD_REG_QUAD_OFFSET;
- }
-
- /* LD and STD are DS-form instructions, which must have the bottom 2
- bits be 0. However, since DFmode is primarily used in the
- floating point/vector registers, don't restrict the offsets in ISA
- 2.xx. */
- if (rc == RELOAD_REG_GPR && msize == 8 && TARGET_POWERPC64
- && (addr_mask & RELOAD_REG_OFFSET) != 0
- && INTEGRAL_MODE_P (m2))
- addr_mask |= RELOAD_REG_DS_OFFSET;
-
- /* ISA 3.0 LXSD, LXSSP, STXSD, STXSSP altivec load/store instructions
- are DS-FORM. */
- else if (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR
- && (addr_mask & RELOAD_REG_OFFSET) != 0
- && (msize == 8 || m2 == SFmode))
- addr_mask |= RELOAD_REG_DS_OFFSET;
+ for 64-bit scalars and 32-bit SFmode to altivec registers.
+
+ 64-bit GPR offset memory references and Altivec offset memory
+ references use DS-mode offsets where the bottom 2 bits are 0.
+
+ 128-bit vector offset memory references use DQ-mode offsets where
+ the bottom 4 bits are 0. */
+ if ((addr_mask != 0) && !indexed_only_p)
+ {
+ if (rc == RELOAD_REG_GPR)
+ {
+ /* LD/STD on 64-bit use DS-form addresses. */
+ addr_mask |= RELOAD_REG_OFFSET;
+ if (msize >= 8 && TARGET_POWERPC64)
+ addr_mask |= RELOAD_REG_DS_OFFSET;
+ }
+ else if (msize >= 8 || m == E_SFmode)
+ {
+ if (rc == RELOAD_REG_FPR)
+ {
+ /* LXV/STXV use DQ-form addresses. */
+ addr_mask |= RELOAD_REG_OFFSET;
+ if (msize == 16
+ && (addr_mask & RELOAD_REG_MULTIPLE) == 0
+ && TARGET_P9_VECTOR)
+ addr_mask |= RELOAD_REG_QUAD_OFFSET;
+ }
+ else if (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)
+ {
+ /* LXV/STXV use DQ-form addresses, LXSD/LXSSP/STXSD/STXSSP
+ use DS-form addresses. */
+ addr_mask |= RELOAD_REG_OFFSET;
+ if (msize == 16
+ && (addr_mask & RELOAD_REG_MULTIPLE) == 0)
+ addr_mask |= RELOAD_REG_QUAD_OFFSET;
+ else
+ addr_mask |= RELOAD_REG_DS_OFFSET;
+ }
+ }
+ }
/* VMX registers can do (REG & -16) and ((REG+REG) & -16)
addressing on 128-bit types. */
if (rc == RELOAD_REG_VMX && msize == 16
- && (addr_mask & RELOAD_REG_VALID) != 0)
+ && ((addr_mask & (RELOAD_REG_VALID
+ | RELOAD_REG_MULTIPLE)) == RELOAD_REG_VALID))
addr_mask |= RELOAD_REG_AND_M16;
reg_addr[m].addr_mask[rc] = addr_mask;
@@ -8007,6 +8010,26 @@ small_data_operand (rtx op ATTRIBUTE_UNU
#endif
}
+/* Return true if a move is valid. */
+
+bool
+rs6000_valid_move_p (rtx dest, rtx src)
+{
+ if (SUBREG_P (dest))
+ dest = SUBREG_REG (dest);
+
+ if (SUBREG_P (src))
+ src = SUBREG_REG (src);
+
+ if (REG_P (dest))
+ return true;
+
+ if (MEM_P (dest) && REG_P (src))
+ return true;
+
+ return false;
+}
+
/* Return true if either operand is a general purpose register. */
bool
@@ -8239,6 +8262,9 @@ mem_operand_ds_form (rtx op, machine_mod
static bool
reg_offset_addressing_ok_p (machine_mode mode)
{
+ if (!mode_supports_d_form (mode))
+ return false;
+
switch (mode)
{
case E_V16QImode:
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 258531)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -8485,29 +8485,8 @@ (define_insn "*movdi_internal32"
Oj, wM, OjwM, Oj, wM, wS,
wB"))]
- "! TARGET_POWERPC64
- && (gpc_reg_operand (operands[0], DImode)
- || gpc_reg_operand (operands[1], DImode))"
- "@
- #
- #
- #
- stfd%U0%X0 %1,%0
- lfd%U1%X1 %0,%1
- fmr %0,%1
- #
- stxsd %1,%0
- stxsdx %x1,%y0
- lxsd %0,%1
- lxsdx %x0,%y1
- xxlor %x0,%x1,%x1
- xxspltib %x0,0
- xxspltib %x0,255
- vspltisw %0,%1
- xxlxor %x0,%x0,%x0
- xxlorc %x0,%x0,%x0
- #
- #"
+ "! TARGET_POWERPC64 && rs6000_valid_move_p (operands[0], operands[1])"
+ "* return rs6000_output_move_64bit (operands);"
[(set_attr "type"
"store, load, *, fpstore, fpload,
fpsimple,
*, fpstore, fpstore, fpload, fpload,
veclogical,
@@ -8562,38 +8541,8 @@ (define_insn "*movdi_internal64"
wM, wS, wB, *h, r, 0,
wg, r, wj, r"))]
- "TARGET_POWERPC64
- && (gpc_reg_operand (operands[0], DImode)
- || gpc_reg_operand (operands[1], DImode))"
- "@
- std%U0%X0 %1,%0
- ld%U1%X1 %0,%1
- mr %0,%1
- li %0,%1
- lis %0,%v1
- #
- stfd%U0%X0 %1,%0
- lfd%U1%X1 %0,%1
- fmr %0,%1
- stxsd %1,%0
- stxsdx %x1,%y0
- lxsd %0,%1
- lxsdx %x0,%y1
- xxlor %x0,%x1,%x1
- xxspltib %x0,0
- xxspltib %x0,255
- #
- xxlxor %x0,%x0,%x0
- xxlorc %x0,%x0,%x0
- #
- #
- mf%1 %0
- mt%0 %1
- nop
- mftgpr %0,%1
- mffgpr %0,%1
- mfvsrd %0,%x1
- mtvsrd %x0,%1"
+ "TARGET_POWERPC64 && rs6000_valid_move_p (operands[0], operands[1])"
+ "* return rs6000_output_move_64bit (operands);"
[(set_attr "type"
"store, load, *, *, *, *,
fpstore, fpload, fpsimple, fpstore, fpstore,
fpload,