Hi, Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609993.html BR, Kewen > >>>> on 2023/1/16 17:08, Kewen.Lin via Gcc-patches wrote: >>>>> Hi, >>>>> >>>>> As Honza pointed out in [1], the current uses of function >>>>> optimize_function_for_speed_p in rs6000_option_override_internal >>>>> are too early, since the query results from the functions >>>>> optimize_function_for_{speed,size}_p could be changed later due >>>>> to profile feedback and some function attributes handlings etc. >>>>> >>>>> This patch is to move optimize_function_for_speed_p to all the >>>>> use places of the corresponding flags, which follows the existing >>>>> practices. Maybe we can cache it somewhere at an appropriate >>>>> timing, but that's another thing. >>>>> >>>>> Comparing with v1[2], this version added one test case for >>>>> SAVE_TOC_INDIRECT as Segher questioned and suggested, and it >>>>> also considered the possibility of explicit option (see test >>>>> cases pr108184-2.c and pr108184-4.c). I believe that excepting >>>>> for the intentional change on optimize_function_for_{speed, >>>>> size}_p, there is no other function change. >>>>> >>>>> [1] https://gcc.gnu.org/pipermail/gcc-patches/2022-November/607527.html >>>>> [2] https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609379.html >>>>> >>>>> Bootstrapped and regtested on powerpc64-linux-gnu P8, >>>>> powerpc64le-linux-gnu P{9,10} and powerpc-ibm-aix. >>>>> >>>>> Is it ok for trunk? >>>>> >>>>> BR, >>>>> Kewen >>>>> ----- >>>>> gcc/ChangeLog: >>>>> >>>>> * config/rs6000/rs6000.cc (rs6000_option_override_internal): Remove >>>>> all optimize_function_for_speed_p uses. >>>>> (fusion_gpr_load_p): Call optimize_function_for_speed_p along >>>>> with TARGET_P8_FUSION_SIGN. >>>>> (expand_fusion_gpr_load): Likewise. >>>>> (rs6000_call_aix): Call optimize_function_for_speed_p along with >>>>> TARGET_SAVE_TOC_INDIRECT. >>>>> * config/rs6000/predicates.md (fusion_gpr_mem_load): Call >>>>> optimize_function_for_speed_p along with TARGET_P8_FUSION_SIGN. >>>>> >>>>> gcc/testsuite/ChangeLog: >>>>> >>>>> * gcc.target/powerpc/pr108184-1.c: New test. >>>>> * gcc.target/powerpc/pr108184-2.c: New test. >>>>> * gcc.target/powerpc/pr108184-3.c: New test. >>>>> * gcc.target/powerpc/pr108184-4.c: New test. >>>>> --- >>>>> gcc/config/rs6000/predicates.md | 5 +++- >>>>> gcc/config/rs6000/rs6000.cc | 19 +++++++++----- >>>>> gcc/testsuite/gcc.target/powerpc/pr108184-1.c | 16 ++++++++++++ >>>>> gcc/testsuite/gcc.target/powerpc/pr108184-2.c | 15 +++++++++++ >>>>> gcc/testsuite/gcc.target/powerpc/pr108184-3.c | 25 +++++++++++++++++++ >>>>> gcc/testsuite/gcc.target/powerpc/pr108184-4.c | 24 ++++++++++++++++++ >>>>> 6 files changed, 97 insertions(+), 7 deletions(-) >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108184-1.c >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108184-2.c >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108184-3.c >>>>> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108184-4.c >>>>> >>>>> diff --git a/gcc/config/rs6000/predicates.md >>>>> b/gcc/config/rs6000/predicates.md >>>>> index a1764018545..9f84468db84 100644 >>>>> --- a/gcc/config/rs6000/predicates.md >>>>> +++ b/gcc/config/rs6000/predicates.md >>>>> @@ -1878,7 +1878,10 @@ (define_predicate "fusion_gpr_mem_load" >>>>> >>>>> /* Handle sign/zero extend. */ >>>>> if (GET_CODE (op) == ZERO_EXTEND >>>>> - || (TARGET_P8_FUSION_SIGN && GET_CODE (op) == SIGN_EXTEND)) >>>>> + || (TARGET_P8_FUSION_SIGN >>>>> + && GET_CODE (op) == SIGN_EXTEND >>>>> + && (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN >>>>> + || optimize_function_for_speed_p (cfun)))) >>>>> { >>>>> op = XEXP (op, 0); >>>>> mode = GET_MODE (op); >>>>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >>>>> index 6ac3adcec6b..f47d21980a9 100644 >>>>> --- a/gcc/config/rs6000/rs6000.cc >>>>> +++ b/gcc/config/rs6000/rs6000.cc >>>>> @@ -3997,8 +3997,7 @@ rs6000_option_override_internal (bool global_init_p) >>>>> /* If we can shrink-wrap the TOC register save separately, then use >>>>> -msave-toc-indirect unless explicitly disabled. */ >>>>> if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0 >>>>> - && flag_shrink_wrap_separate >>>>> - && optimize_function_for_speed_p (cfun)) >>>>> + && flag_shrink_wrap_separate) >>>>> rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT; >>>>> >>>>> /* Enable power8 fusion if we are tuning for power8, even if we aren't >>>>> @@ -4032,7 +4031,6 @@ rs6000_option_override_internal (bool global_init_p) >>>>> zero extending load, and an explicit sign extension. */ >>>>> if (TARGET_P8_FUSION >>>>> && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN) >>>>> - && optimize_function_for_speed_p (cfun) >>>>> && optimize >= 3) >>>>> rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN; >>>>> >>>>> @@ -25690,7 +25688,10 @@ rs6000_call_aix (rtx value, rtx func_desc, rtx >>>>> tlsarg, rtx cookie) >>>>> >>>>> /* Can we optimize saving the TOC in the prologue or >>>>> do we need to do it at every call? */ >>>>> - if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca) >>>>> + if (TARGET_SAVE_TOC_INDIRECT >>>>> + && !cfun->calls_alloca >>>>> + && (rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT >>>>> + || optimize_function_for_speed_p (cfun))) >>>>> cfun->machine->save_toc_in_prologue = true; >>>>> else >>>>> { >>>>> @@ -27557,7 +27558,10 @@ fusion_gpr_load_p (rtx addis_reg, /* >>>>> register set via addis. */ >>>>> >>>>> /* Allow sign/zero extension. */ >>>>> if (GET_CODE (mem) == ZERO_EXTEND >>>>> - || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)) >>>>> + || (GET_CODE (mem) == SIGN_EXTEND >>>>> + && TARGET_P8_FUSION_SIGN >>>>> + && (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN >>>>> + || optimize_function_for_speed_p (cfun)))) >>>>> mem = XEXP (mem, 0); >>>>> >>>>> if (!MEM_P (mem)) >>>>> @@ -27621,7 +27625,10 @@ expand_fusion_gpr_load (rtx *operands) >>>>> enum rtx_code extend = UNKNOWN; >>>>> >>>>> if (GET_CODE (orig_mem) == ZERO_EXTEND >>>>> - || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND)) >>>>> + || (TARGET_P8_FUSION_SIGN >>>>> + && GET_CODE (orig_mem) == SIGN_EXTEND >>>>> + && (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN >>>>> + || optimize_function_for_speed_p (cfun)))) >>>>> { >>>>> extend = GET_CODE (orig_mem); >>>>> orig_mem = XEXP (orig_mem, 0); >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108184-1.c >>>>> b/gcc/testsuite/gcc.target/powerpc/pr108184-1.c >>>>> new file mode 100644 >>>>> index 00000000000..eae3bb9b7c4 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108184-1.c >>>>> @@ -0,0 +1,16 @@ >>>>> +/* Only possible to fuse sign extended loads with the addis when >>>>> + optimize >= 3 and Power8 fusion takes effects. -mno-prefixed >>>>> + ensures test point isn't broken (due to prefixed plha). */ >>>>> +/* { dg-options "-mdejagnu-tune=power8 -O3 -mno-prefixed" } */ >>>>> + >>>>> +/* Verify it doesn't optimize this function for speed as it's cold, >>>>> + by checking it doesn't try to fuse sign extended loads with addis, >>>>> + which results in a zero extended load and a sign extension. */ >>>>> + >>>>> +__attribute__ ((cold)) int >>>>> +fusion_short (short *p) >>>>> +{ >>>>> + return p[0x12345]; >>>>> +} >>>>> + >>>>> +/* { dg-final { scan-assembler-not {\mextsh\M} } } */ >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108184-2.c >>>>> b/gcc/testsuite/gcc.target/powerpc/pr108184-2.c >>>>> new file mode 100644 >>>>> index 00000000000..9f703f7fc6b >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108184-2.c >>>>> @@ -0,0 +1,15 @@ >>>>> +/* -mno-prefixed ensures test point isn't broken (due to prefixed plha). >>>>> */ >>>>> +/* { dg-options "-mdejagnu-tune=power8 -O2 -mpower8-fusion-sign >>>>> -mno-prefixed" } */ >>>>> + >>>>> +/* Verify the explicit option -mpower8-fusion-sign can still fuse >>>>> + sign extended loads with addis, which results in a zero extended >>>>> + load and a sign extension. It means the cold attribute which >>>>> + indicates to optimize for size doesn't affect. */ >>>>> + >>>>> +__attribute__ ((cold)) int >>>>> +fusion_short (short *p) >>>>> +{ >>>>> + return p[0x12345]; >>>>> +} >>>>> + >>>>> +/* { dg-final { scan-assembler-times {\mextsh\M} 1 } } */ >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108184-3.c >>>>> b/gcc/testsuite/gcc.target/powerpc/pr108184-3.c >>>>> new file mode 100644 >>>>> index 00000000000..ceaa96e4421 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108184-3.c >>>>> @@ -0,0 +1,25 @@ >>>>> +/* { dg-do compile { target { powerpc*-*-aix* powerpc*-*-linux* } } } */ >>>>> +/* { dg-require-effective-target fpic } */ >>>>> +/* { dg-options "-fpic -mno-pcrel -O2" } */ >>>>> + >>>>> +/* Verify it doesn't imply -msave-toc-indirect, so >>>>> + it doesn't take effect and we have two separated >>>>> + toc savings for these two long calls. */ >>>>> + >>>>> +void foo (void) __attribute__((__longcall__)); >>>>> +int baz (void) __attribute__((__longcall__)); >>>>> + >>>>> +__attribute__ ((cold)) int >>>>> +bar (void) >>>>> +{ >>>>> + foo (); >>>>> + return baz () + 1; >>>>> +} >>>>> + >>>>> +/* Linux LE */ >>>>> +/* { dg-final { scan-assembler-times {\mstd 2,24\(1\)} 2 { target >>>>> powerpc_elfv2 } } } */ >>>>> +/* Linux BE 64 bit or AIX 64 bit */ >>>>> +/* { dg-final { scan-assembler-times {\mstd 2,40\(1\)} 2 { target { lp64 >>>>> && { ! powerpc_elfv2 } } } } } */ >>>>> +/* AIX 32 bit */ >>>>> +/* { dg-final { scan-assembler-times {\mstw 2,20\(1\)} 2 { target { >>>>> ilp32 && powerpc*-*-aix* } } } } */ >>>>> + >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108184-4.c >>>>> b/gcc/testsuite/gcc.target/powerpc/pr108184-4.c >>>>> new file mode 100644 >>>>> index 00000000000..2a70a603047 >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108184-4.c >>>>> @@ -0,0 +1,24 @@ >>>>> +/* { dg-do compile { target { powerpc*-*-aix* powerpc*-*-linux* } } } */ >>>>> +/* { dg-require-effective-target fpic } */ >>>>> +/* { dg-options "-fpic -mno-pcrel -O2 -msave-toc-indirect" } */ >>>>> + >>>>> +/* Verify the explicit -msave-toc-indirect always >>>>> + takes effect, so there is only one toc saving. */ >>>>> + >>>>> +void foo (void) __attribute__((__longcall__)); >>>>> +int baz (void) __attribute__((__longcall__)); >>>>> + >>>>> +__attribute__ ((cold)) int >>>>> +bar (void) >>>>> +{ >>>>> + foo (); >>>>> + return baz () + 1; >>>>> +} >>>>> + >>>>> +/* Linux LE */ >>>>> +/* { dg-final { scan-assembler-times {\mstd 2,24\(1\)} 1 { target >>>>> powerpc_elfv2 } } } */ >>>>> +/* Linux BE 64 bit or AIX 64 bit */ >>>>> +/* { dg-final { scan-assembler-times {\mstd 2,40\(1\)} 1 { target { lp64 >>>>> && { ! powerpc_elfv2 } } } } } */ >>>>> +/* AIX 32 bit */ >>>>> +/* { dg-final { scan-assembler-times {\mstw 2,20\(1\)} 1 { target { >>>>> ilp32 && powerpc*-*-aix* } } } } */ >>>>> + >>>>> -- >>>>> 2.37.0