Hi!

As discussed in the PR, gen_reg_rtx from when init_emit has not been
initialized is highly undesirable.  The following patch makes sure that
for d->testing_p we never call gen_reg_rtx (i.e. from within
ix86_vectorize_vec_perm_const_ok) and never try to emit insns.

Bootstrapped/regtested on x86_64-linux and i686-linux (together with Uros'
patch to assert that gen_reg_rtx is not called when init_emit is not
active) with RTL checking, further tested with
GCC_TEST_RUN_EXPENSIVE=1 make -j16 -k check 
RUNTESTFLAGS='--target_board=unix\{-msse2,-msse3,-mssse3,-msse4,-mavx,-mavx2,-mavx512f\}
 dg-torture.exp=*vshuf*'
(on AVX HW, so -mavx2 and -mavx512f tests expectedly failed execution,
but at least didn't fail compilation, with the exception of
gcc.dg/torture/vshuf-v8sf.c which ICEs with -mavx2 -DEXPENSIVE, but
both without this patch and with this patch - will look at it eventually).

Ok for trunk (and the attached patch for 4.8 branch where Uros has tested
it)?

2014-02-20  Jakub Jelinek  <ja...@redhat.com>

        PR target/57896
        * config/i386/i386.c (expand_vec_perm_interleave2): Don't call
        gen_reg_rtx if d->testing_p.
        (expand_vec_perm_pshufb2, expand_vec_perm_broadcast_1): Return early
        if d->testing_p and we will certainly return true.
        (expand_vec_perm_even_odd_1): Likewise.  Don't call gen_reg_rtx
        if d->testing_p.

--- gcc/config/i386/i386.c.jj   2014-02-19 19:11:03.600211257 +0100
+++ gcc/config/i386/i386.c      2014-02-20 12:57:30.857266155 +0100
@@ -43411,8 +43411,11 @@ expand_vec_perm_interleave2 (struct expa
       else
        dfinal.perm[i] = e;
     }
-  dremap.target = gen_reg_rtx (dremap.vmode);
-  dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
+  if (!d->testing_p)
+    {
+      dremap.target = gen_reg_rtx (dremap.vmode);
+      dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
+    }
   dfinal.op1 = dfinal.op0;
   dfinal.one_operand_p = true;
 
@@ -43845,6 +43848,9 @@ expand_vec_perm_pshufb2 (struct expand_v
     return false;
   gcc_assert (!d->one_operand_p);
 
+  if (d->testing_p)
+    return true;
+
   nelt = d->nelt;
   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
 
@@ -44053,6 +44059,8 @@ expand_vec_perm_even_odd_1 (struct expan
   switch (d->vmode)
     {
     case V4DFmode:
+      if (d->testing_p)
+       break;
       t1 = gen_reg_rtx (V4DFmode);
       t2 = gen_reg_rtx (V4DFmode);
 
@@ -44072,6 +44080,8 @@ expand_vec_perm_even_odd_1 (struct expan
       {
        int mask = odd ? 0xdd : 0x88;
 
+       if (d->testing_p)
+         break;
        t1 = gen_reg_rtx (V8SFmode);
        t2 = gen_reg_rtx (V8SFmode);
        t3 = gen_reg_rtx (V8SFmode);
@@ -44113,6 +44123,8 @@ expand_vec_perm_even_odd_1 (struct expan
        return expand_vec_perm_pshufb2 (d);
       else
        {
+         if (d->testing_p)
+           break;
          /* We need 2*log2(N)-1 operations to achieve odd/even
             with interleave. */
          t1 = gen_reg_rtx (V8HImode);
@@ -44134,6 +44146,8 @@ expand_vec_perm_even_odd_1 (struct expan
        return expand_vec_perm_pshufb2 (d);
       else
        {
+         if (d->testing_p)
+           break;
          t1 = gen_reg_rtx (V16QImode);
          t2 = gen_reg_rtx (V16QImode);
          t3 = gen_reg_rtx (V16QImode);
@@ -44160,7 +44174,10 @@ expand_vec_perm_even_odd_1 (struct expan
        {
          struct expand_vec_perm_d d_copy = *d;
          d_copy.vmode = V4DFmode;
-         d_copy.target = gen_reg_rtx (V4DFmode);
+         if (d->testing_p)
+           d_copy.target = gen_lowpart (V4DFmode, d->target);
+         else
+           d_copy.target = gen_reg_rtx (V4DFmode);
          d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
          d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
          if (expand_vec_perm_even_odd_1 (&d_copy, odd))
@@ -44173,6 +44190,9 @@ expand_vec_perm_even_odd_1 (struct expan
          return false;
        }
 
+      if (d->testing_p)
+       break;
+
       t1 = gen_reg_rtx (V4DImode);
       t2 = gen_reg_rtx (V4DImode);
 
@@ -44193,7 +44213,10 @@ expand_vec_perm_even_odd_1 (struct expan
        {
          struct expand_vec_perm_d d_copy = *d;
          d_copy.vmode = V8SFmode;
-         d_copy.target = gen_reg_rtx (V8SFmode);
+         if (d->testing_p)
+           d_copy.target = gen_lowpart (V8SFmode, d->target);
+         else
+           d_copy.target = gen_reg_rtx (V8SFmode);
          d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
          d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
          if (expand_vec_perm_even_odd_1 (&d_copy, odd))
@@ -44206,6 +44229,9 @@ expand_vec_perm_even_odd_1 (struct expan
          return false;
        }
 
+      if (d->testing_p)
+       break;
+
       t1 = gen_reg_rtx (V8SImode);
       t2 = gen_reg_rtx (V8SImode);
       t3 = gen_reg_rtx (V4DImode);
@@ -44298,6 +44324,8 @@ expand_vec_perm_broadcast_1 (struct expa
     case V16QImode:
       /* These can be implemented via interleave.  We save one insn by
         stopping once we have promoted to V4SImode and then use pshufd.  */
+      if (d->testing_p)
+       return true;
       do
        {
          rtx dest;

        Jakub
2014-02-20  Jakub Jelinek  <ja...@redhat.com>

        PR target/57896
        * config/i386/i386.c (expand_vec_perm_interleave2): Don't call
        gen_reg_rtx if d->testing_p.
        (expand_vec_perm_pshufb2, expand_vec_perm_even_odd_1,
        expand_vec_perm_broadcast_1): Return early if d->testing_p and
        we will certainly return true.

--- gcc/config/i386/i386.c.jj   2014-02-10 15:12:58.000000000 +0100
+++ gcc/config/i386/i386.c      2014-02-20 12:28:37.320252911 +0100
@@ -39414,7 +39414,9 @@ expand_vec_perm_interleave2 (struct expa
       else
        dfinal.perm[i] = e;
     }
-  dfinal.op0 = gen_reg_rtx (dfinal.vmode);
+
+  if (!d->testing_p)
+    dfinal.op0 = gen_reg_rtx (dfinal.vmode);
   dfinal.op1 = dfinal.op0;
   dfinal.one_operand_p = true;
   dremap.target = dfinal.op0;
@@ -39849,6 +39851,9 @@ expand_vec_perm_pshufb2 (struct expand_v
     return false;
   gcc_assert (!d->one_operand_p);
 
+  if (d->testing_p)
+    return true;
+
   nelt = d->nelt;
   eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
 
@@ -40048,6 +40053,8 @@ expand_vec_perm_even_odd_1 (struct expan
   switch (d->vmode)
     {
     case V4DFmode:
+      if (d->testing_p)
+       break;
       t1 = gen_reg_rtx (V4DFmode);
       t2 = gen_reg_rtx (V4DFmode);
 
@@ -40067,6 +40074,8 @@ expand_vec_perm_even_odd_1 (struct expan
       {
        int mask = odd ? 0xdd : 0x88;
 
+       if (d->testing_p)
+         break;
        t1 = gen_reg_rtx (V8SFmode);
        t2 = gen_reg_rtx (V8SFmode);
        t3 = gen_reg_rtx (V8SFmode);
@@ -40108,6 +40117,8 @@ expand_vec_perm_even_odd_1 (struct expan
        return expand_vec_perm_pshufb2 (d);
       else
        {
+         if (d->testing_p)
+           break;
          /* We need 2*log2(N)-1 operations to achieve odd/even
             with interleave. */
          t1 = gen_reg_rtx (V8HImode);
@@ -40129,6 +40140,8 @@ expand_vec_perm_even_odd_1 (struct expan
        return expand_vec_perm_pshufb2 (d);
       else
        {
+         if (d->testing_p)
+           break;
          t1 = gen_reg_rtx (V16QImode);
          t2 = gen_reg_rtx (V16QImode);
          t3 = gen_reg_rtx (V16QImode);
@@ -40161,6 +40174,9 @@ expand_vec_perm_even_odd_1 (struct expan
          return expand_vec_perm_even_odd_1 (&d_copy, odd);
        }
 
+      if (d->testing_p)
+       break;
+
       t1 = gen_reg_rtx (V4DImode);
       t2 = gen_reg_rtx (V4DImode);
 
@@ -40187,6 +40203,9 @@ expand_vec_perm_even_odd_1 (struct expan
          return expand_vec_perm_even_odd_1 (&d_copy, odd);
        }
 
+      if (d->testing_p)
+       break;
+
       t1 = gen_reg_rtx (V8SImode);
       t2 = gen_reg_rtx (V8SImode);
 
@@ -40279,6 +40298,8 @@ expand_vec_perm_broadcast_1 (struct expa
     case V16QImode:
       /* These can be implemented via interleave.  We save one insn by
         stopping once we have promoted to V4SImode and then use pshufd.  */
+      if (d->testing_p)
+       return true;
       do
        {
          rtx dest;

Reply via email to