On 02/20/2014 12:39 PM, Jakub Jelinek wrote: > + if (!d->testing_p) > + { > + dremap.target = gen_reg_rtx (dremap.vmode); > + dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); > + } ... > + if (d->testing_p) > + d_copy.target = gen_lowpart (V4DFmode, d->target); > + else > + d_copy.target = gen_reg_rtx (V4DFmode);
I'm not keen on these changes, because they could potentially affect how the insn matching happens. I'm not 100% sure it actually matters, but I think there's a simple way around it: use the same gen_raw_REG kind of thing that we do to begin. What about this? Note that I've also slightly adjusted a few of the breaks, to take into account that both arms of the if will always succeed. r~
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index cd14e52..d827bb3 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -39499,6 +39499,7 @@ struct expand_vec_perm_d rtx target, op0, op1; unsigned char perm[MAX_VECT_LEN]; enum machine_mode vmode; + unsigned test_regno; unsigned char nelt; bool one_operand_p; bool testing_p; @@ -42419,6 +42420,17 @@ init_vselect_insn (void) end_sequence (); } +/* Create a new psuedo, or for testing, a dummy register. */ + +static rtx +gen_vec_perm_reg (struct expand_vec_perm_p *d, enum machine_mode mode) +{ + if (d->testing_p) + return gen_raw_REG (mode, ++d->test_regno); + else + return gen_reg_rtx (mode); +} + /* Construct (set target (vec_select op0 (parallel perm))) and return true if that's a valid instruction in the active ISA. */ @@ -42811,9 +42823,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) { for (i = 0; i < 4; i++) perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3; - if (d->testing_p) - return true; - target = gen_reg_rtx (V4DImode); + target = gen_vec_perm_reg (d, V4DImode); if (expand_vselect (target, gen_lowpart (V4DImode, d->op0), perm, 4, false)) { @@ -43411,7 +43421,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) else dfinal.perm[i] = e; } - dremap.target = gen_reg_rtx (dremap.vmode); + dremap.target = gen_vec_perm_reg (d, dremap.vmode); dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); dfinal.op1 = dfinal.op0; dfinal.one_operand_p = true; @@ -43845,6 +43855,9 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d) return false; gcc_assert (!d->one_operand_p); + if (d->testing_p) + return true; + nelt = d->nelt; eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode)); @@ -44053,6 +44066,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) switch (d->vmode) { case V4DFmode: + if (d->testing_p) + break; t1 = gen_reg_rtx (V4DFmode); t2 = gen_reg_rtx (V4DFmode); @@ -44072,6 +44087,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) { int mask = odd ? 0xdd : 0x88; + if (d->testing_p) + break; t1 = gen_reg_rtx (V8SFmode); t2 = gen_reg_rtx (V8SFmode); t3 = gen_reg_rtx (V8SFmode); @@ -44109,6 +44126,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) gcc_unreachable (); case V8HImode: + if (d->testing_p) + break; if (TARGET_SSSE3) return expand_vec_perm_pshufb2 (d); else @@ -44130,6 +44149,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) break; case V16QImode: + if (d->testing_p) + break; if (TARGET_SSSE3) return expand_vec_perm_pshufb2 (d); else @@ -44160,7 +44181,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) { struct expand_vec_perm_d d_copy = *d; d_copy.vmode = V4DFmode; - d_copy.target = gen_reg_rtx (V4DFmode); + d_copy.target = gen_vec_perm_reg (d, V4DFmode); d_copy.op0 = gen_lowpart (V4DFmode, d->op0); d_copy.op1 = gen_lowpart (V4DFmode, d->op1); if (expand_vec_perm_even_odd_1 (&d_copy, odd)) @@ -44173,6 +44194,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) return false; } + if (d->testing_p) + break; + t1 = gen_reg_rtx (V4DImode); t2 = gen_reg_rtx (V4DImode); @@ -44193,7 +44217,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) { struct expand_vec_perm_d d_copy = *d; d_copy.vmode = V8SFmode; - d_copy.target = gen_reg_rtx (V8SFmode); + d_copy.target = gen_vec_perm_reg (d, V8SFmode); d_copy.op0 = gen_lowpart (V8SFmode, d->op0); d_copy.op1 = gen_lowpart (V8SFmode, d->op1); if (expand_vec_perm_even_odd_1 (&d_copy, odd)) @@ -44206,6 +44230,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) return false; } + if (d->testing_p) + break; + t1 = gen_reg_rtx (V8SImode); t2 = gen_reg_rtx (V8SImode); t3 = gen_reg_rtx (V4DImode); @@ -44298,6 +44325,8 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) case V16QImode: /* These can be implemented via interleave. We save one insn by stopping once we have promoted to V4SImode and then use pshufd. */ + if (d->testing_p) + return true; do { rtx dest; @@ -44655,6 +44684,7 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode, d.vmode = vmode; d.nelt = nelt = GET_MODE_NUNITS (d.vmode); d.testing_p = true; + d.test_regno = LAST_VIRTUAL_REGISTER; /* Given sufficient ISA support we can just return true here for selected vector modes. */ @@ -44699,10 +44729,10 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode, /* Otherwise we have to go through the motions and see if we can figure out how to generate the requested permutation. */ - d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); - d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); + d.target = gen_vec_perm_reg (d, d.vmode); + d.op1 = d.op0 = gen_vec_perm_reg (d, d.vmode); if (!d.one_operand_p) - d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); + d.op1 = gen_vec_perm_reg (d, d.vmode); start_sequence (); ret = ix86_expand_vec_perm_const_1 (&d);