https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66560

            Bug ID: 66560
           Summary: Fails to generate ADDSUBPS
           Product: gcc
           Version: 6.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: ubizjak at gmail dot com
  Target Milestone: ---

The second testcase (bar_v4sf) fails to generate ADDSUBPS with -O2 -mavx.

--cut here--
typedef float v4sf __attribute__((vector_size(16)));
typedef int v4si __attribute__((vector_size(16)));
v4sf foo_v4sf (v4sf x, v4sf y)
{
  v4sf tem0 = x - y;
  v4sf tem1 = x + y;
  return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
}

v4sf bar_v4sf (v4sf x, v4sf y)
{
  v4sf tem0 = x + y;
  v4sf tem1 = x - y;
  return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 });
}
--cut here--

gcc -O2 -mavx:

        vaddps  %xmm1, %xmm0, %xmm2
        vsubps  %xmm1, %xmm0, %xmm0
        vblendps        $5, %xmm0, %xmm2, %xmm0
        ret

The combine pass says:

foo_v4sf:

Successfully matched this instruction:
(set (reg:V4SF 93 [ D.2156 ])
    (vec_merge:V4SF (plus:V4SF (reg/v:V4SF 91 [ x ])
            (reg:V4SF 22 xmm1 [ y ]))
        (minus:V4SF (reg/v:V4SF 91 [ x ])
            (reg:V4SF 22 xmm1 [ y ]))
        (const_int 10 [0xa])))

bar_v4sf:

Failed to match this instruction:
(set (reg:V4SF 93 [ D.2159 ])
    (vec_merge:V4SF (minus:V4SF (reg/v:V4SF 91 [ x ])
            (reg/v:V4SF 92 [ y ]))
        (plus:V4SF (reg/v:V4SF 91 [ x ])
            (reg/v:V4SF 92 [ y ]))
        (const_int 5 [0x5])))

Please note that similar testcases always generate expected ADDSUBPD:

--cut here--
typedef double v4df __attribute__((vector_size(32)));
typedef long long v4di __attribute__((vector_size(32)));
v4df foo_v4df (v4df x, v4df y)
{
  v4df tem0 = x - y;
  v4df tem1 = x + y;
  return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
}

v4df bar_v4df (v4df x, v4df y)
{
  v4df tem0 = x + y;
  v4df tem1 = x - y;
  return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 });
}
--cut here--

foo_v4df:

Successfully matched this instruction:
(set (reg:V4DF 93 [ D.2162 ])
    (vec_select:V4DF (vec_concat:V8DF (minus:V4DF (reg/v:V4DF 91 [ x ])
                (reg/v:V4DF 92 [ y ]))
            (plus:V4DF (reg/v:V4DF 91 [ x ])
                (reg/v:V4DF 92 [ y ])))
        (parallel [
                (const_int 0 [0])
                (const_int 5 [0x5])
                (const_int 2 [0x2])
                (const_int 7 [0x7])
            ])))

bar_v4df:

Successfully matched this instruction:
(set (reg:V4DF 93 [ D.2165 ])
    (vec_select:V4DF (vec_concat:V8DF (minus:V4DF (reg/v:V4DF 91 [ x ])
                (reg/v:V4DF 92 [ y ]))
            (plus:V4DF (reg/v:V4DF 91 [ x ])
                (reg/v:V4DF 92 [ y ])))
        (parallel [
                (const_int 0 [0])
                (const_int 5 [0x5])
                (const_int 2 [0x2])
                (const_int 7 [0x7])
            ])))

Reply via email to