The problem of unexpected code generation is discussed on
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64110
The following patch introduces 2 new constraints '^' and '$' which
are analogous to '?' and '!' but disfavor given alternative when *the
operand with the new constraint* needs a reload ('?' and '!' disfavor
the alternative if *any* operand needs a reload). I hope the new
constraints will be useful for other insns and targets.
The patch was successfully bootstrapped and tested on x86-64.
I just need an approval for changes in sse.md, stmt.c, and genoutput.c
Thanks.
2015-01-14 Vladimir Makarov <vmaka...@redhat.com>
PR rtl-optimization/64110
* stmt.c (parse_output_constraint): Process '^' and '$'.
(parse_input_constraint): Ditto.
* lra-constraints.c (process_alt_operands): Process the new
constraints.
* ira-costs.c (record_reg_classes): Process the new constraint
'^'.
* genoutput.c (indep_constraints): Add '^' and '$'.
* config/i386/sse.md (*vec_dup<mode>): Use '$' instead of '!'.
* doc/md.texi: Add description of the new constraints.
2015-01-14 Vladimir Makarov <vmaka...@redhat.com>
PR rtl-optimization/64110
* gcc.target/i386/pr64110.c: Add scan-assembler.
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 219262)
+++ config/i386/sse.md (working copy)
@@ -16713,7 +16713,7 @@
(define_insn "*vec_dup<mode>"
[(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,x")
(vec_duplicate:AVX2_VEC_DUP_MODE
- (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,!r")))]
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
"TARGET_AVX2"
"@
v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
Index: doc/md.texi
===================================================================
--- doc/md.texi (revision 219262)
+++ doc/md.texi (working copy)
@@ -1503,6 +1503,18 @@ in it.
Disparage severely the alternative that the @samp{!} appears in.
This alternative can still be used if it fits without reloading,
but if reloading is needed, some other alternative will be used.
+
+@cindex @samp{^} in constraint
+@cindex caret
+@item ^
+This constraint is analogous to @samp{?} but it disparages slightly
+the alternative only unless the corresponding operand applies exactly.
+
+@cindex @samp{$} in constraint
+@cindex dollar sign
+@item $
+This constraint is analogous to @samp{!} but it disparages severely
+the alternative only unless the corresponding operand applies exactly.
@end table
@ifset INTERNALS
Index: genoutput.c
===================================================================
--- genoutput.c (revision 219262)
+++ genoutput.c (working copy)
@@ -209,7 +209,7 @@ struct constraint_data
/* All machine-independent constraint characters (except digits) that
are handled outside the define*_constraint mechanism. */
-static const char indep_constraints[] = ",=+%*?!#&g";
+static const char indep_constraints[] = ",=+%*?!^$#&g";
static struct constraint_data *
constraints_by_letter_table[1 << CHAR_BIT];
Index: ira-costs.c
===================================================================
--- ira-costs.c (revision 219262)
+++ ira-costs.c (working copy)
@@ -762,6 +762,10 @@ record_reg_classes (int n_alts, int n_op
c = *++p;
break;
+ case '^':
+ alt_cost += 2;
+ break;
+
case '?':
alt_cost += 2;
break;
Index: lra-constraints.c
===================================================================
--- lra-constraints.c (revision 219262)
+++ lra-constraints.c (working copy)
@@ -1640,6 +1640,7 @@ process_alt_operands (int only_alternati
then REJECT is ignored, but otherwise it gets this much counted
against it in addition to the reloading needed. */
int reject;
+ int op_reject;
/* The number of elements in the following array. */
int early_clobbered_regs_num;
/* Numbers of operands which are early clobber registers. */
@@ -1789,6 +1790,7 @@ process_alt_operands (int only_alternati
track. */
lra_assert (*p != 0 && *p != ',');
+ op_reject = 0;
/* Scan this alternative's specs for this operand; set WIN
if the operand fits any letter in this alternative.
Otherwise, clear BADOP if this operand could fit some
@@ -1811,6 +1813,13 @@ process_alt_operands (int only_alternati
early_clobber_p = true;
break;
+ case '$':
+ op_reject += LRA_MAX_REJECT;
+ break;
+ case '^':
+ op_reject += LRA_LOSER_COST_FACTOR;
+ break;
+
case '#':
/* Ignore rest of this alternative. */
c = '\0';
@@ -2097,6 +2106,7 @@ process_alt_operands (int only_alternati
int const_to_mem = 0;
bool no_regs_p;
+ reject += op_reject;
/* Never do output reload of stack pointer. It makes
impossible to do elimination when SP is changed in
RTL. */
Index: stmt.c
===================================================================
--- stmt.c (revision 219262)
+++ stmt.c (working copy)
@@ -292,6 +292,7 @@ parse_output_constraint (const char **co
break;
case '?': case '!': case '*': case '&': case '#':
+ case '$': case '^':
case 'E': case 'F': case 'G': case 'H':
case 's': case 'i': case 'n':
case 'I': case 'J': case 'K': case 'L': case 'M':
@@ -382,6 +383,7 @@ parse_input_constraint (const char **con
case '<': case '>':
case '?': case '!': case '*': case '#':
+ case '$': case '^':
case 'E': case 'F': case 'G': case 'H':
case 's': case 'i': case 'n':
case 'I': case 'J': case 'K': case 'L': case 'M':
Index: testsuite/gcc.target/i386/pr64110.c
===================================================================
--- testsuite/gcc.target/i386/pr64110.c (revision 219262)
+++ testsuite/gcc.target/i386/pr64110.c (working copy)
@@ -1,5 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O3 -march=core-avx2" } */
+/* { dg-final { scan-assembler "vmovd\[\\t \]" } } */
int foo (void);
int a;