Re: [PATCH 1/2, x86] Add palignr support for AVX2.

Evgeny Stupachenko Tue, 26 Aug 2014 05:59:26 -0700

That is covered by a separate part of the patch:
(make check and bootstrap passed: 2 new passes for core-avx2)
is it ok?


diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d6155cf..68ee65a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -81,6 +81,7 @@
   ;; For AVX2 support
   UNSPEC_VPERMVAR
   UNSPEC_VPERMTI
+  UNSPEC_VPALIGNRDI
   UNSPEC_GATHER
   UNSPEC_VSIBADDR

@@ -14167,6 +14168,19 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])

+(define_insn "avx2_palignrv4di"
+  [(set (match_operand:V4DI 0 "register_operand" "=x")
+       (unspec:V4DI
+         [(match_operand:V4DI 1 "register_operand" "x")
+          (match_operand:V4DI 2 "nonimmediate_operand" "xm")
+          (match_operand:SI 3 "const_0_to_255_operand" "n")]
+         UNSPEC_VPALIGNRDI))]
+  "TARGET_AVX2"
+  "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
 (define_insn "avx2_vec_dupv4df"
   [(set (match_operand:V4DF 0 "register_operand" "=x")
        (vec_duplicate:V4DF
@@ -14658,6 +14672,49 @@
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "orig,vex")])

+(define_insn_and_split "avx2_rotate<mode>_perm"
+  [(set (match_operand:V_256 0 "register_operand" "=&x")
+      (vec_select:V_256
+       (match_operand:V_256 1 "register_operand" "x")
+       (match_parallel 2 "palignr_operand"
+         [(match_operand 3 "const_int_operand" "n")])))]
+  "TARGET_AVX2"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  enum machine_mode imode = GET_MODE_INNER (<MODE>mode);
+  int shift = INTVAL (operands[3]) * GET_MODE_SIZE (imode);
+  rtx op0 = gen_rtx_REG (V4DImode, REGNO (operands[0]));
+  rtx op1 = gen_rtx_REG (V4DImode, REGNO (operands[1]));
+
+  if (shift == 0)
+    emit_move_insn (operands[0], operands[1]);
+  else
+    {
+      emit_insn (gen_avx2_permv2ti (op0,
+                                   op1,
+                                   op1,
+                                   GEN_INT (33)));
+      if (shift < 16)
+       emit_insn (gen_avx2_palignrv4di (op0,
+                                        op0,
+                                        op1,
+                                        GEN_INT (shift)));
+      else if (shift > 16)
+       emit_insn (gen_avx2_palignrv4di (op0,
+                                        op1,
+                                        op0,
+                                        GEN_INT (shift - 16)));
+    }
+  DONE;
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "vex")])
+
+
 (define_expand "avx_vinsertf128<mode>"
   [(match_operand:V_256 0 "register_operand")
    (match_operand:V_256 1 "register_operand")

The test case covering this is "gcc.target/i386/pr52252-atom.c".
It will pass for "-march=core-avx2" when the patch committed.

On Thu, Aug 14, 2014 at 6:55 PM, H.J. Lu <[email protected]> wrote:
> On Thu, Aug 14, 2014 at 1:08 AM, Evgeny Stupachenko <[email protected]> 
> wrote:
>> Ping.
>>
>> On Thu, Jul 10, 2014 at 7:29 PM, Evgeny Stupachenko <[email protected]> 
>> wrote:
>>> On Mon, Jul 7, 2014 at 6:40 PM, Richard Henderson <[email protected]> wrote:
>>>> On 07/03/2014 02:53 AM, Evgeny Stupachenko wrote:
>>>>> -expand_vec_perm_palignr (struct expand_vec_perm_d *d)
>>>>> +expand_vec_perm_palignr (struct expand_vec_perm_d *d, int insn_num)
>>>>
>>>> insn_num might as well be "bool avx2", since it's only ever set to two 
>>>> values.
>>>
>>> Agree. However:
>>>  after the alignment, one operand permutation could be just move and
>>> take 2 instructions for AVX2 as well
>>>  for AVX2 there could be other cases when the scheme takes 4 or 5 
>>> instructions
>>>  we can leave it for potential avx512 extension
>>>
>>>>
>>>>> -  /* Even with AVX, palignr only operates on 128-bit vectors.  */
>>>>> -  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
>>>>> +  /* SSSE3 is required to apply PALIGNR on 16 bytes operands.  */
>>>>> +  if (GET_MODE_SIZE (d->vmode) == 16)
>>>>> +    {
>>>>> +      if (!TARGET_SSSE3)
>>>>> +       return false;
>>>>> +    }
>>>>> +  /* AVX2 is required to apply PALIGNR on 32 bytes operands.  */
>>>>> +  else if (GET_MODE_SIZE (d->vmode) == 32)
>>>>> +    {
>>>>> +      if (!TARGET_AVX2)
>>>>> +       return false;
>>>>> +    }
>>>>> +  /* Other sizes are not supported.  */
>>>>> +  else
>>>>>      return false;
>>>>
>>>> And you'd better check it up here because...
>>>>
>>>
>>> Correct. The following should resolve the issue:
>>>   /* For AVX2 we need more than 2 instructions when the alignment
>>>      by itself does not produce the desired permutation.  */
>>>   if (TARGET_AVX2 && insn_num <= 2)
>>>     return false;
>>>
>>>>> +  /* For SSSE3 we need 1 instruction for palignr plus 1 for one
>>>>> +     operand permutaoin.  */
>>>>> +  if (insn_num == 2)
>>>>> +    {
>>>>> +      ok = expand_vec_perm_1 (&dcopy);
>>>>> +      gcc_assert (ok);
>>>>> +    }
>>>>> +  /* For AVX2 we need 2 instructions for the shift: vpalignr and
>>>>> +     vperm plus 4 instructions for one operand permutation.  */
>>>>> +  else if (insn_num == 6)
>>>>> +    {
>>>>> +      ok = expand_vec_perm_vpshufb2_vpermq (&dcopy);
>>>>> +      gcc_assert (ok);
>>>>> +    }
>>>>> +  else
>>>>> +    ok = false;
>>>>>    return ok;
>>>>
>>>> ... down here you'll simply ICE from the gcc_assert.
>>>
>
> Can you modify your patch to fix
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62128
>
> with a testcase?
>
>
> --
> H.J.

Re: [PATCH 1/2, x86] Add palignr support for AVX2.

Reply via email to