On Sun, May 25, 2025 at 7:02 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > On Sun, May 25, 2025 at 8:12 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > On Sun, May 25, 2025 at 7:47 AM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > > > commit ef26c151c14a87177d46fd3d725e7f82e040e89f > > > Author: Roger Sayle <ro...@nextmovesoftware.com> > > > Date: Thu Dec 23 12:33:07 2021 +0000 > > > > > > x86: PR target/103773: Fix wrong-code with -Oz from pop to memory. > > > > > > transformed "mov $0,mem" to the shorter and "$0,mem" for -Oz. But > > > > > > (define_insn "*mov<mode>_and" > > > [(set (match_operand:SWI248 0 "memory_operand" "=m") > > > (match_operand:SWI248 1 "const0_operand")) > > > (clobber (reg:CC FLAGS_REG))] > > > "reload_completed" > > > "and{<imodesuffix>}\t{%1, %0|%0, %1}" > > > [(set_attr "type" "alu1") > > > (set_attr "mode" "<MODE>") > > > (set_attr "length_immediate" "1")]) > > > > > > isn't guarded for -Oz. As a result, "and $0,mem" is generated without > > > -Oz. Enable *mov<mode>_and only for -Oz. > > > > > > gcc/ > > > > > > PR target/120427 > > > * config/i386/i386.md (*mov<mode>_and): Enable only for -Oz. > > > > > > gcc/testsuite/ > > > > > > PR target/120427 > > > * gcc.target/i386/pr120427.c: New test. > > > > > > OK for master? > > > > > > > "mov $-1,mem" has the same issue. Here is the updated patch to also > > enable "or $-1,mem" only for -Oz. > > > > OK for master? > > It doesn't work since "*mov<mode>_or" was extended from load. Here is > the v2 patch: > > 1. Add "*mov<mode>_or_store" for "or $-1,mem". > 2. Rename "*mov<mode>_or" to "*mov<mode>_or_load", replacing > nonimmediate_operand with register_operand. > 3. Enable "*mov<mode>_and" and "*mov<mode>_or_store" only for -Oz. > > Tested on x86-64.
Here is the v3 patch. Change "*mov<mode>_or" to define_insn_and_split and split it to "mov $-1,mem" if not -Oz. Don't transform "mov $-1,reg" to "push $-1; pop reg" for -Oz since it should be transformed to "or $-1,reg". Tested on x86-64. OK for master? Thanks. -- H.J.
From 01f897854a20d4b3144587eed0e3933223bbefba Mon Sep 17 00:00:00 2001 From: "H.J. Lu" <hjl.to...@gmail.com> Date: Sun, 25 May 2025 07:40:29 +0800 Subject: [PATCH v3] x86: Enable *mov<mode>_(and|or) only for -Oz commit ef26c151c14a87177d46fd3d725e7f82e040e89f Author: Roger Sayle <ro...@nextmovesoftware.com> Date: Thu Dec 23 12:33:07 2021 +0000 x86: PR target/103773: Fix wrong-code with -Oz from pop to memory. added "*mov<mode>_and" and extended "*mov<mode>_or" to transform "mov $0,mem" to the shorter "and $0,mem" and "mov $-1,mem" to the shorter "or $-1,mem" for -Oz. But the new pattern: (define_insn "*mov<mode>_and" [(set (match_operand:SWI248 0 "memory_operand" "=m") (match_operand:SWI248 1 "const0_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "and{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) and the extended pattern: (define_insn "*mov<mode>_or" [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") (match_operand:SWI248 1 "constm1_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "or{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) aren't guarded for -Oz. As a result, "and $0,mem" and "or $-1,mem" are generated without -Oz. Enable *mov<mode>_and" only for -Oz. Change "*mov<mode>_or" to define_insn_and_split and split it to "mov $-1,mem" if not -Oz. Don't transform "mov $-1,reg" to "push $-1; pop reg" for -Oz since it should be transformed to "or $-1,reg". gcc/ PR target/120427 * config/i386/i386.md (*mov<mode>_and): Enable only for -Oz. (*mov<mode>_or): Changed to define_insn_and_split. Split it to "mov $-1,mem" if not -Oz. (peephole2): Don't transform "mov $-1,reg" to "push $-1; pop reg" for -Oz since it will be transformed to "or $-1,reg". gcc/testsuite/ PR target/120427 * gcc.target/i386/cold-attribute-4.c: Compile with -Oz. * gcc.target/i386/pr120427-1.c: New test. * gcc.target/i386/pr120427-2.c: Likewise. * gcc.target/i386/pr120427-3.c: Likewise. * gcc.target/i386/pr120427-4.c: Likewise. Signed-off-by: H.J. Lu <hjl.to...@gmail.com> --- gcc/config/i386/i386.md | 11 ++++- .../gcc.target/i386/cold-attribute-4.c | 2 +- gcc/testsuite/gcc.target/i386/pr120427-1.c | 28 ++++++++++++ gcc/testsuite/gcc.target/i386/pr120427-2.c | 28 ++++++++++++ gcc/testsuite/gcc.target/i386/pr120427-3.c | 45 +++++++++++++++++++ gcc/testsuite/gcc.target/i386/pr120427-4.c | 6 +++ 6 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-2.c create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-3.c create mode 100644 gcc/testsuite/gcc.target/i386/pr120427-4.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b7a18d583da..266d404362d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2442,18 +2442,24 @@ (define_insn "*mov<mode>_and" [(set (match_operand:SWI248 0 "memory_operand" "=m") (match_operand:SWI248 1 "const0_operand")) (clobber (reg:CC FLAGS_REG))] - "reload_completed" + "reload_completed + && optimize_insn_for_size_p () && optimize_size > 1" "and{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) -(define_insn "*mov<mode>_or" +;; Generate shorter "or $-1,mem" for -Oz. Split it to "mov $-1,mem" +;; otherwise. +(define_insn_and_split "*mov<mode>_or" [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm") (match_operand:SWI248 1 "constm1_operand")) (clobber (reg:CC FLAGS_REG))] "reload_completed" "or{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& !(optimize_insn_for_size_p () && optimize_size > 1)" + [(set (match_dup 0) (match_dup 1))] + "" [(set_attr "type" "alu1") (set_attr "mode" "<MODE>") (set_attr "length_immediate" "1")]) @@ -2958,6 +2964,7 @@ (define_peephole2 (match_operand:SWI248 1 "const_int_operand"))] "optimize_insn_for_size_p () && optimize_size > 1 && operands[1] != const0_rtx + && operands[1] != constm1_rtx && IN_RANGE (INTVAL (operands[1]), -128, 127) && !ix86_red_zone_used && REGNO (operands[0]) != SP_REG" diff --git a/gcc/testsuite/gcc.target/i386/cold-attribute-4.c b/gcc/testsuite/gcc.target/i386/cold-attribute-4.c index 37a41e954da..e0808c53905 100644 --- a/gcc/testsuite/gcc.target/i386/cold-attribute-4.c +++ b/gcc/testsuite/gcc.target/i386/cold-attribute-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2" } */ +/* { dg-options "-Oz" } */ #include <string.h> int diff --git a/gcc/testsuite/gcc.target/i386/pr120427-1.c b/gcc/testsuite/gcc.target/i386/pr120427-1.c new file mode 100644 index 00000000000..7f1690e49b4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120427-1.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=sapphirerapids" } */ +/* { dg-final { scan-assembler-not "and\[lq\]?\[\\t \]+\\\$0, \[0-9\]*\\(" } } */ + +struct __pthread_mutex_s +{ + int __lock; + unsigned int __count; + int __owner; + unsigned int __nusers; + int __kind; + short __spins; + short __elision; + void *p[2]; +}; +typedef union +{ + struct __pthread_mutex_s __data; + char __size[40]; + long int __align; +} pthread_mutex_t; +typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t; +void +foo (__rtld_lock_recursive_t *lock, int i) +{ + lock[i] = (__rtld_lock_recursive_t) {{ { 0, 0, 0, 0, 1, + 0, 0, { ((void *)0) , ((void *)0) } } }}; +} diff --git a/gcc/testsuite/gcc.target/i386/pr120427-2.c b/gcc/testsuite/gcc.target/i386/pr120427-2.c new file mode 100644 index 00000000000..a380c128ccb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120427-2.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mtune=sapphirerapids" } */ +/* { dg-final { scan-assembler-not "or\[lq\]?\[\\t \]+\\\$-1, \[0-9\]*\\(" } } */ + +struct __pthread_mutex_s +{ + int __lock; + unsigned int __count; + int __owner; + unsigned int __nusers; + int __kind; + short __spins; + short __elision; + void *p[2]; +}; +typedef union +{ + struct __pthread_mutex_s __data; + char __size[40]; + long int __align; +} pthread_mutex_t; +typedef struct { pthread_mutex_t mutex; } __rtld_lock_recursive_t; +void +foo (__rtld_lock_recursive_t *lock, int i) +{ + lock[i] = (__rtld_lock_recursive_t) {{ { -1, -1, -1, -1, 1, + -1, -1, { ((void *)-1) , ((void *)-1) } } }}; +} diff --git a/gcc/testsuite/gcc.target/i386/pr120427-3.c b/gcc/testsuite/gcc.target/i386/pr120427-3.c new file mode 100644 index 00000000000..951cb1f5ddb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120427-3.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef int SItype __attribute__ ((mode (SI))); +typedef unsigned int USItype __attribute__ ((mode (SI))); +typedef unsigned int UDItype __attribute__ ((mode (DI))); +typedef UDItype __attribute__ ((__may_alias__)) bar_t; + +static inline __attribute__((__always_inline__)) SItype +bar (const bar_t **p, SItype prec) +{ + bar_t mslimb = 0; + SItype i = 20; + SItype n = ((USItype) prec) % 4; + if (n) + { + prec -= n; + if (prec == 0) + return 1; + mslimb = (*p)[i]; + } + while (mslimb == 0) + { + prec -= 4; + if (prec == 0) + return 1; + --i; + mslimb = (*p)[i]; + } + return prec; +} +UDItype +foo (const bar_t *i, SItype iprec) +{ + iprec = bar (&i, iprec); + USItype aiprec = iprec < 0 ? -iprec : iprec; + bar_t msb = *i; + UDItype mantissa = 0; + if (aiprec % 4) + msb &= ((bar_t) 1 << aiprec) - 1; + if (aiprec >= 54) + mantissa = (UDItype) msb << 32; + + return (mantissa ^ (UDItype) 0x20000000000000); +} diff --git a/gcc/testsuite/gcc.target/i386/pr120427-4.c b/gcc/testsuite/gcc.target/i386/pr120427-4.c new file mode 100644 index 00000000000..2b453b787ec --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120427-4.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#include "cold-attribute-4.c" + +/* { dg-final { scan-assembler "movl" } } */ -- 2.49.0