On Thu, Feb 17, 2022 at 9:47 PM Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > The x86 backend piggy-backs on mode-switching for insertion of > vzeroupper. A recent improvement there was implemented in a way > to walk possibly the whole basic-block for all DF reg def definitions > in its mode_needed hook which is called for each instruction in > a basic-block during mode-switching local analysis. > > The following mostly reverts this improvement. It needs to be > re-done in a way more consistent with a local dataflow which > probably means making targets aware of the state of the local > dataflow analysis. > > This improves compile-time of some 538.imagick_r TU from > 362s to 16s with -Ofast -mavx2 -fprofile-generate. > > Bootstrapped and tested on x86_64-unknown-linux-gnu, OK? LGTM, have talked to H.J, he also agrees. > > Thanks, > Richard. > > 2022-02-17 Richard Biener <rguent...@suse.de> > > PR target/104581 > * config/i386/i386.cc (ix86_avx_u128_mode_source): Remove. > (ix86_avx_u128_mode_needed): Return AVX_U128_DIRTY instead > of calling ix86_avx_u128_mode_source which would eventually > have returned AVX_U128_ANY in some very special case. > > * gcc.target/i386/pr101456-1.c: XFAIL. > --- > gcc/config/i386/i386.cc | 78 +--------------------- > gcc/testsuite/gcc.target/i386/pr101456-1.c | 3 +- > 2 files changed, 5 insertions(+), 76 deletions(-) > > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index cf246e74e57..e4b42fbba6f 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -14377,80 +14377,12 @@ ix86_check_avx_upper_register (const_rtx exp) > > static void > ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) > - { > - if (ix86_check_avx_upper_register (dest)) > +{ > + if (ix86_check_avx_upper_register (dest)) > { > bool *used = (bool *) data; > *used = true; > } > - } > - > -/* For YMM/ZMM store or YMM/ZMM extract. Return mode for the source > - operand of SRC DEFs in the same basic block before INSN. */ > - > -static int > -ix86_avx_u128_mode_source (rtx_insn *insn, const_rtx src) > -{ > - basic_block bb = BLOCK_FOR_INSN (insn); > - rtx_insn *end = BB_END (bb); > - > - /* Return AVX_U128_DIRTY if there is no DEF in the same basic > - block. */ > - int status = AVX_U128_DIRTY; > - > - for (df_ref def = DF_REG_DEF_CHAIN (REGNO (src)); > - def; def = DF_REF_NEXT_REG (def)) > - if (DF_REF_BB (def) == bb) > - { > - /* Ignore DEF from different basic blocks. */ > - rtx_insn *def_insn = DF_REF_INSN (def); > - > - /* Check if DEF_INSN is before INSN. */ > - rtx_insn *next; > - for (next = NEXT_INSN (def_insn); > - next != nullptr && next != end && next != insn; > - next = NEXT_INSN (next)) > - ; > - > - /* Skip if DEF_INSN isn't before INSN. */ > - if (next != insn) > - continue; > - > - /* Return AVX_U128_DIRTY if the source operand of DEF_INSN > - isn't constant zero. */ > - > - if (CALL_P (def_insn)) > - { > - bool avx_upper_reg_found = false; > - note_stores (def_insn, > - ix86_check_avx_upper_stores, > - &avx_upper_reg_found); > - > - /* Return AVX_U128_DIRTY if call returns AVX. */ > - if (avx_upper_reg_found) > - return AVX_U128_DIRTY; > - > - continue; > - } > - > - rtx set = single_set (def_insn); > - if (!set) > - return AVX_U128_DIRTY; > - > - rtx dest = SET_DEST (set); > - > - /* Skip if DEF_INSN is not an AVX load. Return AVX_U128_DIRTY > - if the source operand isn't constant zero. */ > - if (ix86_check_avx_upper_register (dest) > - && standard_sse_constant_p (SET_SRC (set), > - GET_MODE (dest)) != 1) > - return AVX_U128_DIRTY; > - > - /* We get here only if all AVX loads are from constant zero. */ > - status = AVX_U128_ANY; > - } > - > - return status; > } > > /* Return needed mode for entity in optimize_mode_switching pass. */ > @@ -14520,11 +14452,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) > { > FOR_EACH_SUBRTX (iter, array, src, NONCONST) > if (ix86_check_avx_upper_register (*iter)) > - { > - int status = ix86_avx_u128_mode_source (insn, *iter); > - if (status == AVX_U128_DIRTY) > - return status; > - } > + return AVX_U128_DIRTY; > } > > /* This isn't YMM/ZMM load/store. */ > diff --git a/gcc/testsuite/gcc.target/i386/pr101456-1.c > b/gcc/testsuite/gcc.target/i386/pr101456-1.c > index 803fc6e0207..7fb3a3f055c 100644 > --- a/gcc/testsuite/gcc.target/i386/pr101456-1.c > +++ b/gcc/testsuite/gcc.target/i386/pr101456-1.c > @@ -30,4 +30,5 @@ foo3 (void) > bar (); > } > > -/* { dg-final { scan-assembler-not "vzeroupper" } } */ > +/* See PR104581 for the XFAIL reason. */ > +/* { dg-final { scan-assembler-not "vzeroupper" { xfail *-*-* } } } */ > -- > 2.34.1
-- BR, Hongtao