That is covered by a separate part of the patch:
(make check and bootstrap passed: 2 new passes for core-avx2)
is it ok?
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index d6155cf..68ee65a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -81,6 +81,7 @@
;; For AVX2 support
UNSPEC_VPERMVAR
UNSPEC_VPERMTI
+ UNSPEC_VPALIGNRDI
UNSPEC_GATHER
UNSPEC_VSIBADDR
@@ -14167,6 +14168,19 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+(define_insn "avx2_palignrv4di"
+ [(set (match_operand:V4DI 0 "register_operand" "=x")
+ (unspec:V4DI
+ [(match_operand:V4DI 1 "register_operand" "x")
+ (match_operand:V4DI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_VPALIGNRDI))]
+ "TARGET_AVX2"
+ "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "OI")])
+
(define_insn "avx2_vec_dupv4df"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_duplicate:V4DF
@@ -14658,6 +14672,49 @@
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,vex")])
+(define_insn_and_split "avx2_rotate<mode>_perm"
+ [(set (match_operand:V_256 0 "register_operand" "=&x")
+ (vec_select:V_256
+ (match_operand:V_256 1 "register_operand" "x")
+ (match_parallel 2 "palignr_operand"
+ [(match_operand 3 "const_int_operand" "n")])))]
+ "TARGET_AVX2"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ enum machine_mode imode = GET_MODE_INNER (<MODE>mode);
+ int shift = INTVAL (operands[3]) * GET_MODE_SIZE (imode);
+ rtx op0 = gen_rtx_REG (V4DImode, REGNO (operands[0]));
+ rtx op1 = gen_rtx_REG (V4DImode, REGNO (operands[1]));
+
+ if (shift == 0)
+ emit_move_insn (operands[0], operands[1]);
+ else
+ {
+ emit_insn (gen_avx2_permv2ti (op0,
+ op1,
+ op1,
+ GEN_INT (33)));
+ if (shift < 16)
+ emit_insn (gen_avx2_palignrv4di (op0,
+ op0,
+ op1,
+ GEN_INT (shift)));
+ else if (shift > 16)
+ emit_insn (gen_avx2_palignrv4di (op0,
+ op1,
+ op0,
+ GEN_INT (shift - 16)));
+ }
+ DONE;
+}
+ [(set_attr "type" "sseishft")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "vex")])
+
+
(define_expand "avx_vinsertf128<mode>"
[(match_operand:V_256 0 "register_operand")
(match_operand:V_256 1 "register_operand")
The test case covering this is "gcc.target/i386/pr52252-atom.c".
It will pass for "-march=core-avx2" when the patch committed.
On Thu, Aug 14, 2014 at 6:55 PM, H.J. Lu <[email protected]> wrote:
> On Thu, Aug 14, 2014 at 1:08 AM, Evgeny Stupachenko <[email protected]>
> wrote:
>> Ping.
>>
>> On Thu, Jul 10, 2014 at 7:29 PM, Evgeny Stupachenko <[email protected]>
>> wrote:
>>> On Mon, Jul 7, 2014 at 6:40 PM, Richard Henderson <[email protected]> wrote:
>>>> On 07/03/2014 02:53 AM, Evgeny Stupachenko wrote:
>>>>> -expand_vec_perm_palignr (struct expand_vec_perm_d *d)
>>>>> +expand_vec_perm_palignr (struct expand_vec_perm_d *d, int insn_num)
>>>>
>>>> insn_num might as well be "bool avx2", since it's only ever set to two
>>>> values.
>>>
>>> Agree. However:
>>> after the alignment, one operand permutation could be just move and
>>> take 2 instructions for AVX2 as well
>>> for AVX2 there could be other cases when the scheme takes 4 or 5
>>> instructions
>>> we can leave it for potential avx512 extension
>>>
>>>>
>>>>> - /* Even with AVX, palignr only operates on 128-bit vectors. */
>>>>> - if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
>>>>> + /* SSSE3 is required to apply PALIGNR on 16 bytes operands. */
>>>>> + if (GET_MODE_SIZE (d->vmode) == 16)
>>>>> + {
>>>>> + if (!TARGET_SSSE3)
>>>>> + return false;
>>>>> + }
>>>>> + /* AVX2 is required to apply PALIGNR on 32 bytes operands. */
>>>>> + else if (GET_MODE_SIZE (d->vmode) == 32)
>>>>> + {
>>>>> + if (!TARGET_AVX2)
>>>>> + return false;
>>>>> + }
>>>>> + /* Other sizes are not supported. */
>>>>> + else
>>>>> return false;
>>>>
>>>> And you'd better check it up here because...
>>>>
>>>
>>> Correct. The following should resolve the issue:
>>> /* For AVX2 we need more than 2 instructions when the alignment
>>> by itself does not produce the desired permutation. */
>>> if (TARGET_AVX2 && insn_num <= 2)
>>> return false;
>>>
>>>>> + /* For SSSE3 we need 1 instruction for palignr plus 1 for one
>>>>> + operand permutaoin. */
>>>>> + if (insn_num == 2)
>>>>> + {
>>>>> + ok = expand_vec_perm_1 (&dcopy);
>>>>> + gcc_assert (ok);
>>>>> + }
>>>>> + /* For AVX2 we need 2 instructions for the shift: vpalignr and
>>>>> + vperm plus 4 instructions for one operand permutation. */
>>>>> + else if (insn_num == 6)
>>>>> + {
>>>>> + ok = expand_vec_perm_vpshufb2_vpermq (&dcopy);
>>>>> + gcc_assert (ok);
>>>>> + }
>>>>> + else
>>>>> + ok = false;
>>>>> return ok;
>>>>
>>>> ... down here you'll simply ICE from the gcc_assert.
>>>
>
> Can you modify your patch to fix
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62128
>
> with a testcase?
>
>
> --
> H.J.