On Tue, May 21, 2024 at 7:13 AM liuhongt <hongtao....@intel.com> wrote: > > For CONST_VECTOR_DUPLICATE_P in constant_pool, it is just broadcast or > variants in ix86_vector_duplicate_simode_const. > Adjust the cost to COSTS_N_INSNS (2) + speed which should be a little > bit larger than broadcast. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? > > gcc/ChangeLog: > PR target/114428 > * config/i386/i386.cc (ix86_rtx_costs): Adjust cost for > CONST_VECTOR_DUPLICATE_P in constant_pool. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr114428.c: New test.
LGTM. Thanks, Uros. > --- > gcc/config/i386/i386-expand.cc | 2 +- > gcc/config/i386/i386-protos.h | 1 + > gcc/config/i386/i386.cc | 13 +++++++++++++ > gcc/testsuite/gcc.target/i386/pr114428.c | 18 ++++++++++++++++++ > 4 files changed, 33 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr114428.c > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index 4e16aedc5c1..d96c365e144 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -588,7 +588,7 @@ ix86_expand_move (machine_mode mode, rtx operands[]) > > /* OP is a memref of CONST_VECTOR, return scalar constant mem > if CONST_VECTOR is a vec_duplicate, else return NULL. */ > -static rtx > +rtx > ix86_broadcast_from_constant (machine_mode mode, rtx op) > { > int nunits = GET_MODE_NUNITS (mode); > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index dbc861fb1ea..90712769200 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -107,6 +107,7 @@ extern void ix86_expand_clear (rtx); > extern void ix86_expand_move (machine_mode, rtx[]); > extern void ix86_expand_vector_move (machine_mode, rtx[]); > extern void ix86_expand_vector_move_misalign (machine_mode, rtx[]); > +extern rtx ix86_broadcast_from_constant (machine_mode, rtx); > extern rtx ix86_fixup_binary_operands (enum rtx_code, machine_mode, > rtx[], bool = false); > extern void ix86_fixup_binary_operands_no_copy (enum rtx_code, machine_mode, > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc > index b4838b7939e..fdd9343e47a 100644 > --- a/gcc/config/i386/i386.cc > +++ b/gcc/config/i386/i386.cc > @@ -22197,6 +22197,19 @@ ix86_rtx_costs (rtx x, machine_mode mode, int > outer_code_i, int opno, > return true; > > case MEM: > + /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast. > + or variants in ix86_vector_duplicate_simode_const. */ > + > + if (GET_MODE_SIZE (mode) >= 16 > + && VECTOR_MODE_P (mode) > + && SYMBOL_REF_P (XEXP (x, 0)) > + && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)) > + && ix86_broadcast_from_constant (mode, x)) > + { > + *total = COSTS_N_INSNS (2) + speed; > + return true; > + } > + > /* An insn that accesses memory is slightly more expensive > than one that does not. */ > if (speed) > diff --git a/gcc/testsuite/gcc.target/i386/pr114428.c > b/gcc/testsuite/gcc.target/i386/pr114428.c > new file mode 100644 > index 00000000000..bbbc5a080f6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr114428.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=x86-64-v3 -mno-avx512f -O2" } */ > +/* { dg-final { scan-assembler-not "vpsra[dw]" } } */ > + > +void > +foo2 (char* __restrict a, short* b) > +{ > + for (int i = 0; i != 32; i++) > + a[i] = b[i] >> (short)8; > +} > + > +void > +foo3 (char* __restrict a, short* b) > +{ > + for (int i = 0; i != 16; i++) > + a[i] = b[i] >> (short)8; > +} > + > -- > 2.31.1 >