On 02/20/2014 12:39 PM, Jakub Jelinek wrote:
> +  if (!d->testing_p)
> +    {
> +      dremap.target = gen_reg_rtx (dremap.vmode);
> +      dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
> +    }
...
> +       if (d->testing_p)
> +         d_copy.target = gen_lowpart (V4DFmode, d->target);
> +       else
> +         d_copy.target = gen_reg_rtx (V4DFmode);

I'm not keen on these changes, because they could potentially affect how the
insn matching happens.  I'm not 100% sure it actually matters, but I think
there's a simple way around it: use the same gen_raw_REG kind of thing that we
do to begin.

What about this?  Note that I've also slightly adjusted a few of the breaks, to
take into account that both arms of the if will always succeed.


r~
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cd14e52..d827bb3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -39499,6 +39499,7 @@ struct expand_vec_perm_d
   rtx target, op0, op1;
   unsigned char perm[MAX_VECT_LEN];
   enum machine_mode vmode;
+  unsigned test_regno;
   unsigned char nelt;
   bool one_operand_p;
   bool testing_p;
@@ -42419,6 +42420,17 @@ init_vselect_insn (void)
   end_sequence ();
 }
 
+/* Create a new psuedo, or for testing, a dummy register.  */
+
+static rtx
+gen_vec_perm_reg (struct expand_vec_perm_p *d, enum machine_mode mode)
+{
+  if (d->testing_p)
+    return gen_raw_REG (mode, ++d->test_regno);
+  else
+    return gen_reg_rtx (mode);
+}
+
 /* Construct (set target (vec_select op0 (parallel perm))) and
    return true if that's a valid instruction in the active ISA.  */
 
@@ -42811,9 +42823,7 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
                {
                  for (i = 0; i < 4; i++)
                    perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
-                 if (d->testing_p)
-                   return true;
-                 target = gen_reg_rtx (V4DImode);
+                 target = gen_vec_perm_reg (d, V4DImode);
                  if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
                                      perm, 4, false))
                    {
@@ -43411,7 +43421,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d 
*d)
       else
        dfinal.perm[i] = e;
     }
-  dremap.target = gen_reg_rtx (dremap.vmode);
+  dremap.target = gen_vec_perm_reg (d, dremap.vmode);
   dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
   dfinal.op1 = dfinal.op0;
   dfinal.one_operand_p = true;
@@ -43845,6 +43855,9 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
     return false;
   gcc_assert (!d->one_operand_p);
 
+  if (d->testing_p)
+    return true;
+
   nelt = d->nelt;
   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
 
@@ -44053,6 +44066,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d 
*d, unsigned odd)
   switch (d->vmode)
     {
     case V4DFmode:
+      if (d->testing_p)
+       break;
       t1 = gen_reg_rtx (V4DFmode);
       t2 = gen_reg_rtx (V4DFmode);
 
@@ -44072,6 +44087,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d 
*d, unsigned odd)
       {
        int mask = odd ? 0xdd : 0x88;
 
+       if (d->testing_p)
+         break;
        t1 = gen_reg_rtx (V8SFmode);
        t2 = gen_reg_rtx (V8SFmode);
        t3 = gen_reg_rtx (V8SFmode);
@@ -44109,6 +44126,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d 
*d, unsigned odd)
       gcc_unreachable ();
 
     case V8HImode:
+      if (d->testing_p)
+       break;
       if (TARGET_SSSE3)
        return expand_vec_perm_pshufb2 (d);
       else
@@ -44130,6 +44149,8 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d 
*d, unsigned odd)
       break;
 
     case V16QImode:
+      if (d->testing_p)
+       break;
       if (TARGET_SSSE3)
        return expand_vec_perm_pshufb2 (d);
       else
@@ -44160,7 +44181,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d 
*d, unsigned odd)
        {
          struct expand_vec_perm_d d_copy = *d;
          d_copy.vmode = V4DFmode;
-         d_copy.target = gen_reg_rtx (V4DFmode);
+         d_copy.target = gen_vec_perm_reg (d, V4DFmode);
          d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
          d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
          if (expand_vec_perm_even_odd_1 (&d_copy, odd))
@@ -44173,6 +44194,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d 
*d, unsigned odd)
          return false;
        }
 
+      if (d->testing_p)
+       break;
+
       t1 = gen_reg_rtx (V4DImode);
       t2 = gen_reg_rtx (V4DImode);
 
@@ -44193,7 +44217,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d 
*d, unsigned odd)
        {
          struct expand_vec_perm_d d_copy = *d;
          d_copy.vmode = V8SFmode;
-         d_copy.target = gen_reg_rtx (V8SFmode);
+         d_copy.target = gen_vec_perm_reg (d, V8SFmode);
          d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
          d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
          if (expand_vec_perm_even_odd_1 (&d_copy, odd))
@@ -44206,6 +44230,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d 
*d, unsigned odd)
          return false;
        }
 
+      if (d->testing_p)
+       break;
+
       t1 = gen_reg_rtx (V8SImode);
       t2 = gen_reg_rtx (V8SImode);
       t3 = gen_reg_rtx (V4DImode);
@@ -44298,6 +44325,8 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d 
*d)
     case V16QImode:
       /* These can be implemented via interleave.  We save one insn by
         stopping once we have promoted to V4SImode and then use pshufd.  */
+      if (d->testing_p)
+       return true;
       do
        {
          rtx dest;
@@ -44655,6 +44684,7 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode 
vmode,
   d.vmode = vmode;
   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
   d.testing_p = true;
+  d.test_regno = LAST_VIRTUAL_REGISTER;
 
   /* Given sufficient ISA support we can just return true here
      for selected vector modes.  */
@@ -44699,10 +44729,10 @@ ix86_vectorize_vec_perm_const_ok (enum machine_mode 
vmode,
 
   /* Otherwise we have to go through the motions and see if we can
      figure out how to generate the requested permutation.  */
-  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
-  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
+  d.target = gen_vec_perm_reg (d, d.vmode);
+  d.op1 = d.op0 = gen_vec_perm_reg (d, d.vmode);
   if (!d.one_operand_p)
-    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
+    d.op1 = gen_vec_perm_reg (d, d.vmode);
 
   start_sequence ();
   ret = ix86_expand_vec_perm_const_1 (&d);

Reply via email to