On Tue, May 12, 2020 at 10:07 PM H.J. Lu <[email protected]> wrote:
>
> Update STV pass to properly count cost of XMM register push. In 32-bit
> mode, to convert XMM register push in DImode, we do an XMM store in
> DImode, followed by 2 memory pushes in SImode, instead of 2 integer
> register pushes in SImode. To convert XM register push in SImode, we
> do an XMM register to integer register move in SImode, followed an
> integer register push in SImode, instead of an integer register push in
> SImode. In 64-bit mode, we do an XMM register to integer register move
> in SImode or DImode, followed an integer register push in SImode or
> DImode, instead of an integer register push SImode or DImode.
>
> Tested on Linux/x86 and Linux/x86-64.
I think it is better to implement XMM register pushes, and split them
after reload to a sequence of:
(set (reg:P SP_REG) (plus:P SP_REG) (const_int -8)))
(set (match_dup 0) (match_dup 1))
This is definitelly better that tripsthrough memory to stack.
There are plenty of examples of fake pushes in i386.md, just grep for
"%%% Kill this when call"
Uros.
> OK for master?
>
> Thanks.
>
> H.J.
> --
> gcc/
> PR target/95021
> * config/i386/i386-features.c
> (general_scalar_chain::general_scalar_chain): Initialize
> n_sse_push.
> (general_scalar_chain::mark_dual_mode_def): Add a df_ref
> argument for reference. Increment n_sse_push for XMM register
> push.
> (timode_scalar_chain::mark_dual_mode_def): Add a dummy df_ref
> argument.
> (scalar_chain::analyze_register_chain): Pass chain->ref
> to mark_dual_mode_def.
> (general_scalar_chain::compute_convert_gain): Count cost of
> XMM register push.
> * config/i386/i386-features.h (scalar_chain::mark_dual_mode_def):
> Add a df_ref argument.
> (general_scalar_chain): Add n_sse_push.
> (general_scalar_chain::mark_dual_mode_def): Add a df_ref
> argument.
> (timode_scalar_chain::mark_dual_mode_def): Add a df_ref
> argument.
>
> gcc/testsuite/
>
> PR target/95021
> * gcc.target/i386/pr95021-1.c: New test.
> * gcc.target/i386/pr95021-2.c: Likewise.
> ---
> gcc/config/i386/i386-features.c | 33 ++++++++++++++++++++---
> gcc/config/i386/i386-features.h | 7 ++---
> gcc/testsuite/gcc.target/i386/pr95021-1.c | 25 +++++++++++++++++
> gcc/testsuite/gcc.target/i386/pr95021-2.c | 25 +++++++++++++++++
> 4 files changed, 83 insertions(+), 7 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-1.c
> create mode 100644 gcc/testsuite/gcc.target/i386/pr95021-2.c
>
> diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
> index 78fb373db6e..c85ab41350c 100644
> --- a/gcc/config/i386/i386-features.c
> +++ b/gcc/config/i386/i386-features.c
> @@ -326,6 +326,7 @@ general_scalar_chain::general_scalar_chain (enum
> machine_mode smode_,
> insns_conv = BITMAP_ALLOC (NULL);
> n_sse_to_integer = 0;
> n_integer_to_sse = 0;
> + n_sse_push = 0;
> }
>
> general_scalar_chain::~general_scalar_chain ()
> @@ -337,7 +338,7 @@ general_scalar_chain::~general_scalar_chain ()
> conversion. */
>
> void
> -general_scalar_chain::mark_dual_mode_def (df_ref def)
> +general_scalar_chain::mark_dual_mode_def (df_ref def, df_ref ref)
> {
> gcc_assert (DF_REF_REG_DEF_P (def));
>
> @@ -356,6 +357,12 @@ general_scalar_chain::mark_dual_mode_def (df_ref def)
> if (!reg_new)
> return;
> n_sse_to_integer++;
> + rtx_insn *insn = DF_REF_INSN (ref);
> + rtx set = single_set (insn);
> + /* Count XMM register push. */
Count XMM register pushes.
> + if (set
> + && push_operand (SET_DEST (set), GET_MODE (SET_DEST (set))))
> + n_sse_push++;
> }
>
> if (dump_file)
> @@ -367,7 +374,7 @@ general_scalar_chain::mark_dual_mode_def (df_ref def)
> /* For TImode conversion, it is unused. */
>
> void
> -timode_scalar_chain::mark_dual_mode_def (df_ref)
> +timode_scalar_chain::mark_dual_mode_def (df_ref, df_ref)
> {
> gcc_unreachable ();
> }
> @@ -408,14 +415,14 @@ scalar_chain::analyze_register_chain (bitmap
> candidates, df_ref ref)
> if (dump_file)
> fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
> DF_REF_REGNO (chain->ref), uid);
> - mark_dual_mode_def (chain->ref);
> + mark_dual_mode_def (chain->ref, chain->ref);
> }
> else
> {
> if (dump_file)
> fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
> DF_REF_REGNO (chain->ref), uid);
> - mark_dual_mode_def (ref);
> + mark_dual_mode_def (ref, chain->ref);
> }
> }
> }
> @@ -627,6 +634,24 @@ general_scalar_chain::compute_convert_gain ()
> are at the moment. */
> cost += n_integer_to_sse * ix86_cost->sse_to_integer;
>
> + /* In 32-bit mode, to convert XMM register push in DImode, we do
> + an XMM store in DImode, followed by 2 memory pushes in SImode,
In 32-bit mode, DImode XMM register push is converted to a DImode XMM
store, followed by 2 SImode pushes from memory ...
> + instead of 2 integer register pushes in SImode. To convert XM
> + register push in SImode, we do an XMM register to integer register
> + move in SImode, followed an integer register push in SImode,
> + instead of an integer register push in SImode. In 64-bit mode,
... instead of 2 SImode pushes from integer registers. SImode XMM
register push is converted to a move to an integer register, followed
by a SImode push from an integer register.
> + we do an XMM register to integer register move in SImode or DImode,
> + followed an integer register push in SImode or DImode, instead of
> + an integer register push SImode or DImode. */
... SImode or DImode XMM register push is converted to a move to an
integer register, followed by a SImode push from memory instead of a
SImode or DImode integer register push.
> + if (n_sse_push)
> + {
> + if (TARGET_64BIT || m == 1)
> + cost += n_sse_push * ix86_cost->sse_to_integer;
> + else
> + cost += n_sse_push * (ix86_cost->sse_store[sse_cost_idx]
> + + m * ix86_cost->int_load[2]);
> + }
> +
> if (dump_file)
> fprintf (dump_file, " Registers conversion cost: %d\n", cost);
>
> diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
> index ee6b10f12c1..3358015dc89 100644
> --- a/gcc/config/i386/i386-features.h
> +++ b/gcc/config/i386/i386-features.h
> @@ -159,7 +159,7 @@ class scalar_chain
> private:
> void add_insn (bitmap candidates, unsigned insn_uid);
> void analyze_register_chain (bitmap candidates, df_ref ref);
> - virtual void mark_dual_mode_def (df_ref def) = 0;
> + virtual void mark_dual_mode_def (df_ref def, df_ref ref) = 0;
> virtual void convert_insn (rtx_insn *insn) = 0;
> virtual void convert_registers () = 0;
> };
> @@ -175,7 +175,8 @@ class general_scalar_chain : public scalar_chain
> bitmap insns_conv;
> unsigned n_sse_to_integer;
> unsigned n_integer_to_sse;
> - void mark_dual_mode_def (df_ref def);
> + unsigned n_sse_push;
> + void mark_dual_mode_def (df_ref def, df_ref ref);
> void convert_insn (rtx_insn *insn);
> void convert_op (rtx *op, rtx_insn *insn);
> void convert_reg (rtx_insn *insn, rtx dst, rtx src);
> @@ -193,7 +194,7 @@ class timode_scalar_chain : public scalar_chain
> int compute_convert_gain () { return 1; }
>
> private:
> - void mark_dual_mode_def (df_ref def);
> + void mark_dual_mode_def (df_ref def, df_ref ref);
> void fix_debug_reg_uses (rtx reg);
> void convert_insn (rtx_insn *insn);
> /* We don't convert registers to difference size. */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95021-1.c
> b/gcc/testsuite/gcc.target/i386/pr95021-1.c
> new file mode 100644
> index 00000000000..b47ba332e81
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95021-1.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile { target ia32 } } */
> +/* { dg-options "-O2 -msse2 -mstv -mregparm=0 -W" } */
> +/* { dg-final { scan-assembler-not "xmm" } } */
> +
> +typedef long long a;
> +struct __jmp_buf_tag {
> +};
> +typedef struct __jmp_buf_tag sigjmp_buf[1];
> +sigjmp_buf jmp_buf;
> +int __sigsetjmp (sigjmp_buf, int);
> +typedef struct {
> + a target;
> +} b;
> +extern a *target_p;
> +b *c;
> +extern void foo (a);
> +void
> +d(void)
> +{
> + if (__sigsetjmp(jmp_buf, 1)) {
> + a target = *target_p;
> + c->target = target;
> + foo(target);
> + }
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr95021-2.c
> b/gcc/testsuite/gcc.target/i386/pr95021-2.c
> new file mode 100644
> index 00000000000..b213b5db072
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95021-2.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile { target ia32 } } */
> +/* { dg-options "-O2 -msse2 -mstv -mregparm=3 -W" } */
> +/* { dg-final { scan-assembler "movq\[ \t\]+\[^\n\]*, %xmm" } } */
> +
> +typedef long long a;
> +struct __jmp_buf_tag {
> +};
> +typedef struct __jmp_buf_tag sigjmp_buf[1];
> +sigjmp_buf jmp_buf;
> +int __sigsetjmp (sigjmp_buf, int);
> +typedef struct {
> + a target;
> +} b;
> +extern a *target_p;
> +b *c;
> +extern void foo (a);
> +void
> +d(void)
> +{
> + if (__sigsetjmp(jmp_buf, 1)) {
> + a target = *target_p;
> + c->target = target;
> + foo(target);
> + }
> +}
> --
> 2.26.2
>