On Sat, Feb 09, 2019 at 10:56:38AM +0100, Jakub Jelinek wrote:
> On Sat, Feb 09, 2019 at 10:50:43AM +0100, Uros Bizjak wrote:
> > > Also need this patch since we no longer set MODE_XI for
> > > AVX512VL.
> >
> > No. Please figure out correct condition to set mode attribute to XImode
> > instead.
>
> If it is AVX512VL, isn't MODE_OI or MODE_TI correct in those cases though?
> While the instructions need EVEX encoding if they have [xy]mm{16,...31}
> operands, they operate just on 256 or 128 bits.
That said, mov{oi,ti}_internal is severely broken for avx512f without
avx512vl even after this patch.
I think the following patch, incremental to H.J.'s patch, should fix that.
It is pretty much a copy of what sse.md (*mov<mode>_internal) pattern does,
just specialized to the particular instructions (i.e. that it is integral,
not floating, and always 32-byte or always 16-byte). sse.md has:
/* There is no evex-encoded vmov* for sizes smaller than 64-bytes
in avx512f, so we need to use workarounds, to access sse registers
16-31, which are evex-only. In avx512vl we don't need workarounds. */
if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
&& (EXT_REX_SSE_REG_P (operands[0])
|| EXT_REX_SSE_REG_P (operands[1])))
{
if (memory_operand (operands[0], <MODE>mode))
{
if (<MODE_SIZE> == 32)
return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1,
0x0}";
else if (<MODE_SIZE> == 16)
return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1,
0x0}";
else
gcc_unreachable ();
}
else if (memory_operand (operands[1], <MODE>mode))
{
if (<MODE_SIZE> == 32)
return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
else if (<MODE_SIZE> == 16)
return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
else
gcc_unreachable ();
}
else
/* Reg -> reg move is always aligned. Just use wider move. */
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "vmovaps\t{%g1, %g0|%g0, %g1}";
case MODE_V4DF:
case MODE_V2DF:
return "vmovapd\t{%g1, %g0|%g0, %g1}";
case MODE_OI:
case MODE_TI:
return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
default:
gcc_unreachable ();
}
}
before it tries to handle the normal cases. Ok for trunk if it passes
bootstrap/regtest?
2019-02-09 Jakub Jelinek <[email protected]>
PR target/89229
* config/i386/i386.md (*movoi_internal_avx, *movti_internal): Handle
MODE_XI properly.
--- gcc/config/i386/i386.md.jj 2019-02-09 11:18:53.995450055 +0100
+++ gcc/config/i386/i386.md 2019-02-09 11:26:04.364342306 +0100
@@ -1905,6 +1905,18 @@ (define_insn "*movoi_internal_avx"
return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV:
+ /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
+ in avx512f, so we need to use workarounds to access sse registers
+ 16-31, which are evex-only. In avx512vl we don't need workarounds. */
+ if (get_attr_mode (insn) == MODE_XI)
+ {
+ if (memory_operand (operands[0], OImode))
+ return "vextracti64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+ else if (memory_operand (operands[1], OImode))
+ return "vbroadcasti64x4\t{%1, %g0|%g0, %1}";
+ else
+ return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+ }
if (misaligned_operand (operands[0], OImode)
|| misaligned_operand (operands[1], OImode))
{
@@ -1968,6 +1980,18 @@ (define_insn "*movti_internal"
return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV:
+ /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
+ in avx512f, so we need to use workarounds to access sse registers
+ 16-31, which are evex-only. In avx512vl we don't need workarounds. */
+ if (get_attr_mode (insn) == MODE_XI)
+ {
+ if (memory_operand (operands[0], TImode))
+ return "vextracti32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+ else if (memory_operand (operands[1], TImode))
+ return "vbroadcasti32x4\t{%1, %g0|%g0, %1}";
+ else
+ return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+ }
/* TDmode values are passed as TImode on the stack. Moving them
to stack may result in unaligned memory access. */
if (misaligned_operand (operands[0], TImode)
Jakub