On Sun, Feb 10, 2019 at 11:25 PM Uros Bizjak <[email protected]> wrote:
>
> On Mon, Feb 11, 2019 at 2:04 AM H.J. Lu <[email protected]> wrote:
> >
> > On Sun, Feb 10, 2019 at 1:49 PM Uros Bizjak <[email protected]> wrote:
> > >
> > > On Sun, Feb 10, 2019 at 10:45 PM Uros Bizjak <[email protected]> wrote:
> > >
> > > > > > > + [(const_int 0)]
> > > > > > > +{
> > > > > > > + /* Emulate MMX vec_dupv2si with SSE vec_dupv4si. */
> > > > > > > + rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > > > > + rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > > > > + emit_insn (insn);
> > > > > > > + DONE;
> > > > > >
> > > > > > Please write this simple RTX explicitly in the place of (const_int
> > > > > > 0) above.
> > > > >
> > > > > rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > >
> > > > > is easy. How do I write
> > > > >
> > > > > rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > >
> > > > > in place of (const_int 0)?
> > > >
> > > > [(set (match_dup 2)
> > > > (vec_duplicate:V4SI (match_dup 1)))]
> > > >
> > > > with
> > > >
> > > > "operands[2] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > > >
> > > > or even better:
> > > >
> > > > "operands[2] = gen_lowpart (V4SImode, operands[0]);"
> > > >
> > > > in the preparation statement.
> > >
> > > Even shorter is
> > >
> > > "operands[0] = gen_lowpart (V4SImode, operands[0]);"
> > >
> > > and use (match_dup 0) instead of (match_dup 2) in the RTX.
> > >
> > > There is plenty of examples throughout sse.md.
> > >
> >
> > This works:
> >
> > (define_insn_and_split "*vec_dupv2si"
> > [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
> > (vec_duplicate:V2SI
> > (match_operand:SI 1 "register_operand" "0,0,Yv")))]
> > "TARGET_MMX || TARGET_MMX_WITH_SSE"
> > "@
> > punpckldq\t%0, %0
> > #
> > #"
> > "TARGET_MMX_WITH_SSE && reload_completed"
> > [(set (match_dup 0)
> > (vec_duplicate:V4SI (match_dup 1)))]
> > "operands[0] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> > (set_attr "type" "mmxcvt,ssemov,ssemov")
> > (set_attr "mode" "DI,TI,TI")])
>
> If it works, then gen_lowpart is preferred due to extra checks.
> However, it would result in a paradoxical subreg, so I wonder if these
> extra checks allow this transformation.
gen_lowpart dosn't work:
#include <mmintrin.h>
__m64
foo (int i)
{
__v2si x = { i, i };
return (__m64) x;
}
(gdb) f 1
#1 0x0000000000ba7cca in gen_reg_rtx (mode=E_V2SImode)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/emit-rtl.c:1155
1155 gcc_assert (can_create_pseudo_p ());
(gdb) bt
#0 fancy_abort (
file=0x22180e0 "/export/gnu/import/git/gitlab/x86-gcc/gcc/emit-rtl.c",
line=1155,
function=0x22193a8 <gen_reg_rtx(machine_mode)::__FUNCTION__> "gen_reg_rtx")
at /export/gnu/import/git/gitlab/x86-gcc/gcc/diagnostic.c:1607
#1 0x0000000000ba7cca in gen_reg_rtx (mode=E_V2SImode)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/emit-rtl.c:1155
#2 0x0000000000bd3044 in copy_to_reg (x=0x7fffea99b528)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/explow.c:594
#3 0x00000000010c7c0a in gen_lowpart_general (mode=E_V4SImode,
x=0x7fffea99b528)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/rtlhooks.c:56
...
#1 0x0000000000ba7cca in gen_reg_rtx (mode=E_V2SImode)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/emit-rtl.c:1155
1155 gcc_assert (can_create_pseudo_p ());
(gdb)
--
H.J.