On Wed, 29 Jan 2025, Christoph Müllner wrote: > The avoid-store-forwarding pass is disabled by default and therefore > in the risk of bit-rotting. This patch addresses this by enabling > the pass at O2 or higher. > > The assembly patterns in `bitfield-bitint-abi-align16.c` and > `bitfield-bitint-abi-align8.c` have been updated to account for > the ASF transformations. > > This was bootstrapped on x86-64 and AArch64 and showed no > regressions in the test suite (--enable-checking=yes,extra and > all languages).
OK for GCC 16 stage1. Richard. > gcc/ChangeLog: > > * doc/invoke.texi: Document asf as an O2 enabled option. > * opts.cc: Enable asf at O2. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/bitfield-bitint-abi-align16.c: > Modify testcases to account for the asf transformations. > * gcc.target/aarch64/bitfield-bitint-abi-align8.c: Likewise. > * gcc.target/aarch64/avoid-store-forwarding-6.c: New test. > > Co-developed-by: Konstantinos Eleftheriou <konstantinos.elefther...@vrull.eu> > Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu> > --- > gcc/doc/invoke.texi | 3 +- > gcc/opts.cc | 1 + > .../aarch64/avoid-store-forwarding-6.c | 29 +++++++++++++++++++ > .../aarch64/bitfield-bitint-abi-align16.c | 28 ++++++++++-------- > .../aarch64/bitfield-bitint-abi-align8.c | 28 ++++++++++-------- > 5 files changed, 64 insertions(+), 25 deletions(-) > create mode 100644 > gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c > > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index dddde54a287..52d0489ab24 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -12716,6 +12716,7 @@ also turns on the following optimization flags: > @c Please keep the following list alphabetized! > @gccoptlist{-falign-functions -falign-jumps > -falign-labels -falign-loops > +-favoid-store-forwarding > -fcaller-saves > -fcode-hoisting > -fcrossjumping > @@ -12876,7 +12877,7 @@ Many CPUs will stall for many cycles when a load > partially depends on previous > smaller stores. This pass tries to detect such cases and avoid the penalty > by > changing the order of the load and store and then fixing up the loaded value. > > -Disabled by default. > +Enabled by default at @option{-O2} and higher. > > @opindex ffp-contract > @item -ffp-contract=@var{style} > diff --git a/gcc/opts.cc b/gcc/opts.cc > index 23900c7b1c0..9914d20ad47 100644 > --- a/gcc/opts.cc > +++ b/gcc/opts.cc > @@ -627,6 +627,7 @@ static const struct default_options > default_options_table[] = > { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 }, > > /* -O2 and -Os optimizations. */ > + { OPT_LEVELS_2_PLUS, OPT_favoid_store_forwarding, NULL, 1 }, > { OPT_LEVELS_2_PLUS, OPT_fcaller_saves, NULL, 1 }, > { OPT_LEVELS_2_PLUS, OPT_fcode_hoisting, NULL, 1 }, > { OPT_LEVELS_2_PLUS, OPT_fcrossjumping, NULL, 1 }, > diff --git a/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c > b/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c > new file mode 100644 > index 00000000000..320fa5e101e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/avoid-store-forwarding-6.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > + > +/* Same as avoid-store-forwarding-1.c but without -favoid-store-forwarding. > */ > + > +typedef union { > + char arr_8[8]; > + long long_value; > +} DataUnion; > + > +long ssll_1 (DataUnion *data, char x) > +{ > + data->arr_8[0] = x; > + return data->long_value; > +} > + > +long ssll_2 (DataUnion *data, char x) > +{ > + data->arr_8[1] = x; > + return data->long_value; > +} > + > +long ssll_3 (DataUnion *data, char x) > +{ > + data->arr_8[7] = x; > + return data->long_value; > +} > + > +/* { dg-final { scan-assembler-times {ldr\tx[0-9]+, > \[x[0-9]+\]\n\tstrb\tw[0-9]+, \[x[0-9]+(, \d+)?\]\n\tbfi\tx[0-9]+, x[0-9]+, > \d+, \d+} 3 } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c > b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c > index c29a230a771..b4501d81c45 100644 > --- a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c > +++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align16.c > @@ -91,10 +91,11 @@ > ** mov (w[0-9]+), 0 > ** bfi \3, w\2, 0, 1 > ** and x3, x\2, 9223372036854775807 > -** mov x2, 0 > +** mov (x[0-9]+), 0 > +** bfi \4, (x[0-9]+), 0, 8 > ** str xzr, \[sp\] > ** strb \3, \[sp\] > -** ldr x1, \[sp\] > +** mov \5, 0 > ** add sp, sp, 16 > ** b fp > */ > @@ -183,19 +184,21 @@ > ** sxtw (x[0-9]+), w1 > ** mov x0, \2 > ** and x7, \2, 9223372036854775807 > +** mov (x[0-9]+), 0 > ** mov (w[0-9]+), 0 > -** bfi \3, w\1, 0, 1 > +** bfi \4, w\1, 0, 1 > +** mov (x[0-9]+), \3 > +** bfi \5, (x[0-9]+), 0, 8 > +** stp x7, \5, \[sp\] > ** strb wzr, \[sp, 16\] > ** mov x6, x7 > ** mov x5, x7 > ** mov x4, x7 > -** mov x3, x7 > -** mov x2, x7 > -** str xzr, \[sp, 48\] > -** strb \3, \[sp, 48\] > -** ldr (x[0-9]+), \[sp, 48\] > -** stp x7, \4, \[sp\] > -** mov x1, x7 > +** mov \5, x7 > +** str \3, \[sp, 48\] > +** strb \4, \[sp, 48\] > +** mov \3, x7 > +** mov \6, x7 > ** bl fp_stack > ** sbfx x0, x0, 0, 63 > **... > @@ -343,10 +346,11 @@ > ** mov w0, w1 > ** mov (w[0-9]+), 0 > ** bfi \2, w\1, 0, 1 > -** mov x2, 0 > +** mov (x[0-9]+), 0 > +** bfi \3, (x[0-9]+), 0, 8 > ** str xzr, \[sp\] > ** strb \2, \[sp\] > -** ldr x1, \[sp\] > +** mov \4, 0 > **... > ** b fp_stdarg > */ > diff --git a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c > b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c > index 13ffbf416ca..a9ac917d3a6 100644 > --- a/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c > +++ b/gcc/testsuite/gcc.target/aarch64/bitfield-bitint-abi-align8.c > @@ -91,10 +91,11 @@ > ** mov (w[0-9]+), 0 > ** bfi \3, w\2, 0, 1 > ** and x3, x\2, 9223372036854775807 > -** mov x2, 0 > +** mov (x[0-9]+), 0 > +** bfi \4, (x[0-9]+), 0, 8 > ** str xzr, \[sp\] > ** strb \3, \[sp\] > -** ldr x1, \[sp\] > +** mov \5, 0 > ** add sp, sp, 16 > ** b fp > */ > @@ -183,19 +184,21 @@ > ** sxtw (x[0-9]+), w1 > ** mov x0, \2 > ** and x7, \2, 9223372036854775807 > +** mov (x[0-9]+), 0 > ** mov (w[0-9]+), 0 > -** bfi \3, w\1, 0, 1 > +** bfi \4, w\1, 0, 1 > +** mov (x[0-9]+), \3 > +** bfi \5, (x[0-9]+), 0, 8 > +** stp x7, \5, \[sp\] > ** strb wzr, \[sp, 16\] > ** mov x6, x7 > ** mov x5, x7 > ** mov x4, x7 > -** mov x3, x7 > -** mov x2, x7 > -** str xzr, \[sp, 48\] > -** strb \3, \[sp, 48\] > -** ldr (x[0-9]+), \[sp, 48\] > -** stp x7, \4, \[sp\] > -** mov x1, x7 > +** mov \5, x7 > +** str \3, \[sp, 48\] > +** strb \4, \[sp, 48\] > +** mov \3, x7 > +** mov \6, x7 > ** bl fp_stack > ** sbfx x0, x0, 0, 63 > **... > @@ -345,10 +348,11 @@ > ** mov w0, w1 > ** mov (w[0-9]+), 0 > ** bfi \2, w\1, 0, 1 > -** mov x2, 0 > +** mov (x[0-9]+), 0 > +** bfi \3, (x[0-9]+), 0, 8 > ** str xzr, \[sp\] > ** strb \2, \[sp\] > -** ldr x1, \[sp\] > +** mov \4, 0 > **... > ** b fp_stdarg > */ > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)