Hi!

vec_interleave_lowv4sf only supports =x, x, x alternative, not =v, v, v
(which should be supportable for AVX512VL only anyway, but probably
stage1 material), without AVX512VL and with ext sse reg input operand
we have to either due to interleaving or broadcast in the destination,
or disable the splitter.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-03-21  Jakub Jelinek  <ja...@redhat.com>

        PR target/70300
        * config/i386/i386.md (cvtsd2ss splitter): Unpack in destination
        instead of source if operands[1] is xmm16 and above and
        !TARGET_AVX512VL.  Use avx512f_vec_dupv16sf_1 instead of
        vec_interleave_lowv4sf if we need to unpack xmm16 and above.

        * gcc.target/i386/pr70300.c: New test.

--- gcc/config/i386/i386.md.jj  2016-03-17 12:53:25.000000000 +0100
+++ gcc/config/i386/i386.md     2016-03-21 11:50:54.539001151 +0100
@@ -4229,17 +4229,28 @@ (define_split
     {
       /* If it is unsafe to overwrite upper half of source, we need
         to move to destination and unpack there.  */
-      if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
-          || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
-         && true_regnum (operands[0]) != true_regnum (operands[1]))
+      if (((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+           || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
+          && true_regnum (operands[0]) != true_regnum (operands[1]))
+         || (EXT_REX_SSE_REG_P (operands[1])
+             && !TARGET_AVX512VL))
        {
          rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
          emit_move_insn (tmp, operands[1]);
        }
       else
        operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-      emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
-                                            operands[3]));
+      /* FIXME: vec_interleave_lowv4sf for AVX512VL should allow
+        =v, v, then vbroadcastss will be only needed for AVX512F without
+        AVX512VL.  */
+      if (!EXT_REX_SSE_REGNO_P (true_regnum (operands[3])))
+       emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
+                                              operands[3]));
+      else
+       {
+         rtx tmp = simplify_gen_subreg (V16SFmode, operands[3], V4SFmode, 0);
+         emit_insn (gen_avx512f_vec_dupv16sf_1 (tmp, tmp));
+       }
     }
   else
     emit_insn (gen_vec_setv4sf_0 (operands[3],
--- gcc/testsuite/gcc.target/i386/pr70300.c.jj  2016-03-21 11:56:16.455580855 
+0100
+++ gcc/testsuite/gcc.target/i386/pr70300.c     2016-03-21 11:55:55.000000000 
+0100
@@ -0,0 +1,25 @@
+/* PR target/70300 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=amdfam10 -mavx512f" } */
+
+typedef _Complex A __attribute__ ((mode (SC)));
+typedef _Complex B __attribute__ ((mode (DC)));
+typedef _Complex C __attribute__ ((mode (TC)));
+
+C
+foo (A a, B b, C c, A d, B e, C f)
+{
+  b -= a;
+  d += a;
+  a += f;
+  return a + b + d + e;
+}
+
+__attribute__((target ("avx512vl"))) C
+bar (A a, B b, C c, A d, B e, C f)
+{
+  b -= a;
+  d += a;
+  a += f;
+  return a + b + d + e;
+}

        Jakub

Reply via email to