Hello!
As said above i386.c, inline_secondary_memory_needed:
--cut here--
The function can't work reliably when one of the CLASSES is a class
containing registers from multiple sets. We avoid this by never combining
different sets in a single alternative in the machine description.
Ensure that this constraint holds to avoid unexpected surprises.
--cut here--
The patch enforces this constraint also for mask registers and fixes
an oversight in *movsi_internal.
2017-01-17 Uros Bizjak <[email protected]>
* config/i386/i386.h (MASK_CLASS_P): New define.
* config/i386/i386.c (inline_secondary_memory_needed): Ensure that
there are no registers from different register sets also when
mask registers are used. Update function comment.
* config/i386/i386.md (*movsi_internal): Split (*k/*krm) alternative
to (*k/*r) and (*k/*km) alternatives.
Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
Committed to mainline SVN.
Uros.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 244540)
+++ config/i386/i386.c (working copy)
@@ -39868,19 +39868,19 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
return false;
}
-/* If we are copying between general and FP registers, we need a memory
- location. The same is true for SSE and MMX registers.
+/* If we are copying between registers from different register sets
+ (e.g. FP and integer), we may need a memory location.
- To optimize register_move_cost performance, allow inline variant.
-
- The macro can't work reliably when one of the CLASSES is class containing
- registers from multiple units (SSE, MMX, integer). We avoid this by never
- combining those units in single alternative in the machine description.
+ The function can't work reliably when one of the CLASSES is a class
+ containing registers from multiple sets. We avoid this by never combining
+ different sets in a single alternative in the machine description.
Ensure that this constraint holds to avoid unexpected surprises.
- When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
- enforce these sanity checks. */
+ When STRICT is false, we are being called from REGISTER_MOVE_COST,
+ so do not enforce these sanity checks.
+ To optimize register_move_cost performance, define inline variant. */
+
static inline bool
inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
machine_mode mode, int strict)
@@ -39887,12 +39887,15 @@ inline_secondary_memory_needed (enum reg_class cla
{
if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
return false;
+
if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
|| MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
|| MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
|| MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
|| MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
- || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
+ || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
+ || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
+ || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
{
gcc_assert (!strict || lra_in_progress);
return true;
@@ -39902,7 +39905,7 @@ inline_secondary_memory_needed (enum reg_class cla
return true;
/* Between mask and general, we have moves no larger than word size. */
- if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
+ if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
&& (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
return true;
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h (revision 244540)
+++ config/i386/i386.h (working copy)
@@ -1378,6 +1378,8 @@ enum reg_class
reg_class_subset_p ((CLASS), ALL_SSE_REGS)
#define MMX_CLASS_P(CLASS) \
((CLASS) == MMX_REGS)
+#define MASK_CLASS_P(CLASS) \
+ reg_class_subset_p ((CLASS), MASK_REGS)
#define MAYBE_INTEGER_CLASS_P(CLASS) \
reg_classes_intersect_p ((CLASS), GENERAL_REGS)
#define MAYBE_FLOAT_CLASS_P(CLASS) \
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 244540)
+++ config/i386/i386.md (working copy)
@@ -2324,9 +2324,9 @@
(define_insn "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm")
+ "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k,*k
,*rm")
(match_operand:SI 1 "general_operand"
- "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))]
+ "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r
,*r,*km,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2403,7 +2403,7 @@
(const_string "sselog1")
(eq_attr "alternative" "7,8,9,10,12")
(const_string "ssemov")
- (eq_attr "alternative" "13,14")
+ (eq_attr "alternative" "13,14,15")
(const_string "mskmov")
(and (match_operand 0 "register_operand")
(match_operand 1 "pic_32bit_operand"))