[pushed] testsuite, X86, Darwin: Skip tests with incompatible output.
Tested on x86_64-darwin, pushed to trunk, thanks Iain --- 8< --- Darwin platforms do not currently emit .cfi_xxx instructions so that these tests do not work there. gcc/testsuite/ChangeLog: * gcc.target/i386/apx-interrupt-1.c: Skip for Darwin. * gcc.target/i386/apx-push2pop2-1.c: Likewise. * gcc.target/i386/apx-push2pop2_force_drap-1.c: Likewise. Signed-off-by: Iain Sandoe --- gcc/testsuite/gcc.target/i386/apx-interrupt-1.c| 1 + gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c| 1 + gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c | 1 + 3 files changed, 3 insertions(+) diff --git a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c index dc1fc3fe373..5f732d3e316 100644 --- a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c @@ -1,5 +1,6 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-mapxf -m64 -O2 -mgeneral-regs-only -mno-cld -mno-push-args -maccumulate-outgoing-args" } */ +/* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ extern void foo (void *) __attribute__ ((interrupt)); extern int bar (int); diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c index c7968d674e5..089941d3726 100644 --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2-1.c @@ -1,5 +1,6 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -mapxf" } */ +/* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ extern int bar (int); diff --git a/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c b/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c index 38787990288..656ca91391a 100644 --- a/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-push2pop2_force_drap-1.c @@ -1,5 +1,6 @@ /* { dg-do compile { target { ! ia32 } } } */ /* { dg-options "-O2 -mapxf -mforce-drap" } */ +/* { dg-skip-if "does not emit .cfi_xxx" "*-*-darwin*" } */ #include "apx-push2pop2-1.c" -- 2.39.2 (Apple Git-143)
[pushed] testsuite, X86, Darwin: Skip a test for mcmodel=large.
Tested on x86_64-darwin, pushed to trunk, thanks Iain --- 8< --- The large model is not implemented so far for Darwin (and the codegen will be different when it is). gcc/testsuite/ChangeLog: * gcc.target/i386/large-data.c: Skip for Darwin. Signed-off-by: Iain Sandoe --- gcc/testsuite/gcc.target/i386/large-data.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.target/i386/large-data.c b/gcc/testsuite/gcc.target/i386/large-data.c index bdd4acd30b8..164b38bdf67 100644 --- a/gcc/testsuite/gcc.target/i386/large-data.c +++ b/gcc/testsuite/gcc.target/i386/large-data.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target lp64 } */ /* { dg-options "-O2 -mcmodel=large -mlarge-data-threshold=4" } */ +/* { dg-skip-if "PR90698" "*-*-darwin*" } */ /* { dg-final { scan-assembler {\.lbss} } } */ /* { dg-final { scan-assembler {\.bss} } } */ /* { dg-final { scan-assembler {\.ldata} } } */ -- 2.39.2 (Apple Git-143)
[PATCH] testsuite, x86: Handle a broken assembler.
Tested on x86_64-darwin and x86_64-linux, OK for trunk? thanks Iain --- 8< --- Earlier assembler support for complex fp16 on x86_64 Darin is broken. This adds an additional test to the existing target-supports that fails for the broken assemblers but works for the newer, fixed, ones. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Test an asm line that fails on broken Darwin assembler versions. Signed-off-by: Iain Sandoe --- gcc/testsuite/lib/target-supports.exp | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index f0b692a2e19..61ab063afbe 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -10062,6 +10062,7 @@ proc check_effective_target_avx512fp16 { } { void foo (void) { asm volatile ("vmovw %edi, %xmm0"); + asm volatile ("vfcmulcph %xmm1, %xmm2, %xmm3{%k1}"); } } "-O2 -mavx512fp16" ] } -- 2.39.2 (Apple Git-143)
[ARC PATCH] Improved ARC rtx_costs/insn_cost for SHIFTs and ROTATEs.
This patch overhauls the ARC backend's insn_cost target hook, and makes some related improvements to rtx_costs, BRANCH_COST, etc. The primary goal is to allow the backend to indicate that shifts and rotates are slow (discouraged) when the CPU doesn't have a barrel shifter. I should also acknowledge Richard Sandiford for inspiring the use of set_cost in this rewrite of arc_insn_cost; this implementation borrows heavily for the target hooks for AArch64 and ARM. The motivating example is derived from PR rtl-optimization/110717. struct S { int a : 5; }; unsigned int foo (struct S *p) { return p->a; } With a barrel shifter, GCC -O2 generates the reasonable: foo:ldb_s r0,[r0] asl_s r0,r0,27 j_s.d [blink] asr_s r0,r0,27 What's interesting is that during combine, the middle-end actually has two shifts by three bits, and a sign-extension from QI to SI. Trying 8, 9 -> 11: 8: r158:SI=r157:QI#0<<0x3 REG_DEAD r157:QI 9: r159:SI=sign_extend(r158:SI#0) REG_DEAD r158:SI 11: r155:SI=r159:SI>>0x3 REG_DEAD r159:SI Whilst it's reasonable to simplify this to two shifts by 27 bits when the CPU has a barrel shifter, it's actually a significant pessimization when these shifts are implemented by loops. This combination can be prevented if the backend provides accurate-ish estimates for insn_cost. Previously, without a barrel shifter, GCC -O2 -mcpu=em generates: foo:ldb_s r0,[r0] mov lp_count,27 lp 2f add r0,r0,r0 nop 2: # end single insn loop mov lp_count,27 lp 2f asr r0,r0 nop 2: # end single insn loop j_s [blink] which contains two loops and requires about ~113 cycles to execute. With this patch to rtx_cost/insn_cost, GCC -O2 -mcpu=em generates: foo:ldb_s r0,[r0] mov_s r2,0;3 add3r0,r2,r0 sexb_s r0,r0 asr_s r0,r0 asr_s r0,r0 j_s.d [blink] asr_s r0,r0 which requires only ~6 cycles, for the shorter shifts by 3 and sign extension. Tested with a cross-compiler to arc-linux hosted on x86_64, with no new (compile-only) regressions from make -k check. Ok for mainline if this passes Claudiu's nightly testing? 2023-10-29 Roger Sayle gcc/ChangeLog * config/arc/arc.cc (arc_rtx_costs): Improve cost estimates. Provide reasonable values for SHIFTS and ROTATES by constant bit counts depending upon TARGET_BARREL_SHIFTER. (arc_insn_cost): Use insn attributes if the instruction is recognized. Avoid calling get_attr_length for type "multi", i.e. define_insn_and_split patterns without explicit type. Fall-back to set_rtx_cost for single_set and pattern_cost otherwise. * config/arc/arc.h (COSTS_N_BYTES): Define helper macro. (BRANCH_COST): Improve/correct definition. (LOGICAL_OP_NON_SHORT_CIRCUIT): Preserve previous behavior. Thanks again, Roger -- diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index 353ac69..ae83e5e 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -5492,7 +5492,7 @@ arc_rtx_costs (rtx x, machine_mode mode, int outer_code, case CONST: case LABEL_REF: case SYMBOL_REF: - *total = speed ? COSTS_N_INSNS (1) : COSTS_N_INSNS (4); + *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (4); return true; case CONST_DOUBLE: @@ -5516,26 +5516,32 @@ arc_rtx_costs (rtx x, machine_mode mode, int outer_code, case ASHIFT: case ASHIFTRT: case LSHIFTRT: +case ROTATE: +case ROTATERT: + if (mode == DImode) + return false; if (TARGET_BARREL_SHIFTER) { - if (CONSTANT_P (XEXP (x, 0))) + *total = COSTS_N_INSNS (1); + if (CONSTANT_P (XEXP (x, 1))) { - *total += rtx_cost (XEXP (x, 1), mode, (enum rtx_code) code, + *total += rtx_cost (XEXP (x, 0), mode, (enum rtx_code) code, 0, speed); return true; } - *total = COSTS_N_INSNS (1); } else if (GET_CODE (XEXP (x, 1)) != CONST_INT) - *total = COSTS_N_INSNS (16); + *total = speed ? COSTS_N_INSNS (16) : COSTS_N_INSNS (4); else { - *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1))); - /* ??? want_to_gcse_p can throw negative shift counts at us, -and then panics when it gets a negative cost as result. -Seen for gcc.c-torture/compile/20020710-1.c -Os . */ - if (*total < 0) - *total = 0; + int n = INTVAL (XEXP (x, 1)) & 31; + if (n < 4) + *total = COSTS_N_INSNS (n); + else + *total = speed ? COSTS_N_INSNS (n + 2) : COSTS_N_INSNS (4); + *total += rtx_cost (XEXP (x, 0), mode, (enum rtx_code) code, + 0, speed); + return true;
[PATCH V15 4/4] ree: Improve ree pass using defined abi interfaces
Hello Vineet, Jeff and Bernhard: This version 15 of the patch uses abi interfaces to remove zero and sign extension elimination. Bootstrapped and regtested on powerpc-linux-gnu. In this version (version 15) of the patch following review comments are incorporated. a) Removal of hard code zero_extend and sign_extend in abi interfaces. b) Source and destination with different registers are considered. c) Further enhancements. d) Added sign extension elimination using abi interfaces. d) Addressed remaining review comments from Vineet. e) Addressed review comments from Bernhard. f) Fix aarch64 regressions failure. Please let me know if there is anything missing in this patch. Ok for trunk? Thanks & Regards Ajit ree: Improve ree pass using defined abi interfaces For rs6000 target we see zero and sign extend with missing definitions. Improved to eliminate such zero and sign extension using defined ABI interfaces. 2023-10-29 Ajit Kumar Agarwal gcc/ChangeLog: * ree.cc (combine_reaching_defs): Eliminate zero_extend and sign_extend using defined abi interfaces. (add_removable_extension): Use of defined abi interfaces for no reaching defs. (abi_extension_candidate_return_reg_p): New function. (abi_extension_candidate_p): New function. (abi_extension_candidate_argno_p): New function. (abi_handle_regs): New function. (abi_target_promote_function_mode): New function. gcc/testsuite/ChangeLog: * g++.target/powerpc/zext-elim-3.C --- changes since v6: - Added missing abi interfaces. - Rearranging and restructuring the code. - Removal of hard coded zero extend and sign extend in abi interfaces. - Relaxed different registers with source and destination in abi interfaces. - Using CSE in abi interfaces. - Fix aarch64 regressions. - Add Sign extension removal in abi interfaces. - Modified comments as per coding convention. - Modified code as per coding convention. - Fix bug bootstrapping RISCV failures --- gcc/ree.cc| 136 +- .../g++.target/powerpc/zext-elim-3.C | 13 ++ 2 files changed, 143 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C diff --git a/gcc/ree.cc b/gcc/ree.cc index fc04249fa84..6af82093eaf 100644 --- a/gcc/ree.cc +++ b/gcc/ree.cc @@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg) if (REGNO (DF_REF_REG (def)) == REGNO (reg)) break; - gcc_assert (def != NULL); + if (def == NULL) +return NULL; ref_chain = DF_REF_CHAIN (def); @@ -750,6 +751,109 @@ get_extended_src_reg (rtx src) return src; } +/* Return TRUE if target mode is equal to source mode, false otherwise. */ + +static bool +abi_target_promote_function_mode (machine_mode mode) +{ + int unsignedp; + machine_mode tgt_mode += targetm.calls.promote_function_mode (NULL_TREE, mode, &unsignedp, + NULL_TREE, 1); + + return tgt_mode == mode; +} + +/* Return TRUE if regno is a return register. */ + +static inline bool +abi_extension_candidate_return_reg_p (int regno) +{ + if (targetm.calls.function_value_regno_p (regno)) +return true; + + return false; +} + +/* Return TRUE if + reg source operand is argument register and not return register, + mode of source and destination operand are different, + if not promoted REGNO of source and destination operand are the same. */ +static bool +abi_extension_candidate_p (rtx_insn *insn) +{ + rtx set = single_set (insn); + machine_mode dst_mode = GET_MODE (SET_DEST (set)); + rtx orig_src = XEXP (SET_SRC (set), 0); + + if (FUNCTION_ARG_REGNO_P (REGNO (orig_src)) + && !abi_extension_candidate_return_reg_p (REGNO (orig_src)) + && dst_mode != GET_MODE (orig_src)) + { + if (!abi_target_promote_function_mode (GET_MODE (orig_src)) + && REGNO (SET_DEST (set)) != REGNO (orig_src)) +return false; + + return true; + } + return false; +} + +/* Return TRUE if regno is an argument register. */ + +static inline bool +abi_extension_candidate_argno_p (int regno) +{ + return FUNCTION_ARG_REGNO_P (regno); +} + +/* Return TRUE if the candidate insn doesn't have defs and have + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class. */ + +static bool +abi_handle_regs (rtx_insn *insn) +{ + if (side_effects_p (PATTERN (insn))) +return false; + + struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn))); + + if (!uses) +return false; + + for (df_link *use = uses; use; use = use->next) +{ + if (!use->ref) + return false; + + if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref))) + return false; + + rtx_insn *use_insn = DF_REF_INSN (use->ref); + + if (GET_CODE (PATTERN (use_insn)) == SET) + { + rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn))); + + if (GET_RTX_CLASS (code)
Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
On 28/10/23 3:55 pm, Ajit Agarwal wrote: > > > On 27/10/23 10:46 pm, Bernhard Reutner-Fischer wrote: >> On Wed, 25 Oct 2023 16:41:07 +0530 >> Ajit Agarwal wrote: >> >>> On 25/10/23 2:19 am, Vineet Gupta wrote: On 10/24/23 13:36, rep.dot@gmail.com wrote: > As said, I don't see why the below was not cleaned up before the V1 > submission. > Iff it breaks when manually CSEing, I'm curious why? >>> The function below looks identical in v12 of the patch. >>> Why didn't you use common subexpressions? >>> ba >> Using CSE here breaks aarch64 regressions hence I have reverted it back >> not to use CSE, > Just for my own education, can you please paste your patch perusing > common subexpressions and an assembly diff of the failing versus working > aarch64 testcase, along how you configured that failing (cross-?)compiler > and the command-line of a typical testcase that broke when manually > CSEing the function below? I was meaning to ask this before, but what exactly is the CSE issue, manually or whatever. >> >> If nothing else it would hopefully improve the readability. >> >>> Here is the abi interface where I CSE'D and got a mail from automated >>> regressions run that aarch64 >>> test fails. >> >> We already concluded that this failure was obviously a hiccup on the >> testers, no problem. > > Thanks. >> >>> +static inline bool >>> +abi_extension_candidate_return_reg_p (int regno) >>> +{ >>> + return targetm.calls.function_value_regno_p (regno); >>> +} >> >> But i was referring to abi_extension_candidate_p :) >> >> your v13 looks like this: >> >> +static bool >> +abi_extension_candidate_p (rtx_insn *insn) >> +{ >> + rtx set = single_set (insn); >> + machine_mode dst_mode = GET_MODE (SET_DEST (set)); >> + rtx orig_src = XEXP (SET_SRC (set), 0); >> + >> + if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src)) >> + || abi_extension_candidate_return_reg_p (REGNO (orig_src))) >> +return false; >> + >> + /* Return FALSE if mode of destination and source is same. */ >> + if (dst_mode == GET_MODE (orig_src)) >> +return false; >> + >> + machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0)); >> + bool promote_p = abi_target_promote_function_mode (mode); >> + >> + /* Return FALSE if promote is false and REGNO of source and destination >> + is different. */ >> + if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src)) >> +return false; >> + >> + return true; >> +} >> >> and i suppose it would be easier to read if phrased something like >> >> static bool >> abi_extension_candidate_p (rtx_insn *insn) >> { >> rtx set = single_set (insn); >> rtx orig_src = XEXP (SET_SRC (set), 0); >> unsigned int src_regno = REGNO (orig_src); >> >> /* Not a function argument reg or is a function values return reg. */ >> if (!FUNCTION_ARG_REGNO_P (src_regno) >> || abi_extension_candidate_return_reg_p (src_regno)) >> return false; >> >> rtx dst = SET_DST (set); >> machine_mode src_mode = GET_MODE (orig_src); >> >> /* Return FALSE if mode of destination and source is the same. */ >> if (GET_MODE (dst) == src_mode) >> return false; >> >> /* Return FALSE if the FIX THE COMMENT and REGNO of source and destination >> is different. */ >> if (!abi_target_promote_function_mode_p (src_mode) >> && REGNO (dst) != src_regno) >> return false; >> >> return true; >> } >> >> so no, that's not exactly better. >> >> Maybe just do what the function comment says (i did not check the "not >> promoted" part, but you get the idea): >> >> ^L >> >> /* Return TRUE if >>reg source operand is argument register and not return register, >>mode of source and destination operand are different, >>if not promoted REGNO of source and destination operand are the same. */ >> static bool >> abi_extension_candidate_p (rtx_insn *insn) >> { >> rtx set = single_set (insn); >> rtx orig_src = XEXP (SET_SRC (set), 0); >> >> if (FUNCTION_ARG_REGNO_P (REGNO (orig_src)) >> && !abi_extension_candidate_return_reg_p (REGNO (orig_src)) >> && GET_MODE (SET_DST (set)) != GET_MODE (orig_src) >> && abi_target_promote_function_mode_p (GET_MODE (orig_src)) >> && REGNO (SET_DST (set)) == REGNO (orig_src)) >> return true; >> >> return false; >> } >> >> I think this is much easier to actually read (and that's why good >> function comments are important). In the end it's not important and >> just personal preference. >> Either way, I did not check the plausibility of the logic therein. >> >>> > > Addressed in V15 of the patch. The above rearranging code breaks the logic and I have modified as follows. +/* Return TRUE if + reg source operand is argument register and not return register, + mode of source and destination operand are different, + if not promoted REGNO of source and destination operand are the same. */ +static bool +abi_extension_candidat
Re: [PATCH v9 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces
On 28/10/23 3:56 pm, Ajit Agarwal wrote: > > > On 28/10/23 4:09 am, Vineet Gupta wrote: >> >> >> On 10/27/23 10:16, Bernhard Reutner-Fischer wrote: >>> On Wed, 25 Oct 2023 16:41:07 +0530 >>> Ajit Agarwal wrote: >>> On 25/10/23 2:19 am, Vineet Gupta wrote: > On 10/24/23 13:36, rep.dot@gmail.com wrote: >> As said, I don't see why the below was not cleaned up before the V1 >> submission. >> Iff it breaks when manually CSEing, I'm curious why? The function below looks identical in v12 of the patch. Why didn't you use common subexpressions? ba >>> Using CSE here breaks aarch64 regressions hence I have reverted it back >>> not to use CSE, >> Just for my own education, can you please paste your patch perusing >> common subexpressions and an assembly diff of the failing versus working >> aarch64 testcase, along how you configured that failing >> (cross-?)compiler and the command-line of a typical testcase that broke >> when manually CSEing the function below? > I was meaning to ask this before, but what exactly is the CSE issue, > manually or whatever. >>> If nothing else it would hopefully improve the readability. >>> > Here is the abi interface where I CSE'D and got a mail from automated regressions run that aarch64 test fails. >>> We already concluded that this failure was obviously a hiccup on the >>> testers, no problem. >>> +static inline bool +abi_extension_candidate_return_reg_p (int regno) +{ + return targetm.calls.function_value_regno_p (regno); +} >>> But i was referring to abi_extension_candidate_p :) >>> >>> your v13 looks like this: >>> >>> +static bool >>> +abi_extension_candidate_p (rtx_insn *insn) >>> +{ >>> + rtx set = single_set (insn); >>> + machine_mode dst_mode = GET_MODE (SET_DEST (set)); >>> + rtx orig_src = XEXP (SET_SRC (set), 0); >>> + >>> + if (!FUNCTION_ARG_REGNO_P (REGNO (orig_src)) >>> + || abi_extension_candidate_return_reg_p (REGNO (orig_src))) >>> + return false; >>> + >>> + /* Return FALSE if mode of destination and source is same. */ >>> + if (dst_mode == GET_MODE (orig_src)) >>> + return false; >>> + >>> + machine_mode mode = GET_MODE (XEXP (SET_SRC (set), 0)); >>> + bool promote_p = abi_target_promote_function_mode (mode); >>> + >>> + /* Return FALSE if promote is false and REGNO of source and destination >>> + is different. */ >>> + if (!promote_p && REGNO (SET_DEST (set)) != REGNO (orig_src)) >>> + return false; >>> + >>> + return true; >>> +} >>> >>> and i suppose it would be easier to read if phrased something like >>> >>> static bool >>> abi_extension_candidate_p (rtx_insn *insn) >>> { >>> rtx set = single_set (insn); >>> rtx orig_src = XEXP (SET_SRC (set), 0); >>> unsigned int src_regno = REGNO (orig_src); >>> >>> /* Not a function argument reg or is a function values return reg. */ >>> if (!FUNCTION_ARG_REGNO_P (src_regno) >>> || abi_extension_candidate_return_reg_p (src_regno)) >>> return false; >>> >>> rtx dst = SET_DST (set); >>> machine_mode src_mode = GET_MODE (orig_src); >>> >>> /* Return FALSE if mode of destination and source is the same. */ >>> if (GET_MODE (dst) == src_mode) >>> return false; >>> >>> /* Return FALSE if the FIX THE COMMENT and REGNO of source and >>> destination >>> is different. */ >>> if (!abi_target_promote_function_mode_p (src_mode) >>> && REGNO (dst) != src_regno) >>> return false; >>> >>> return true; >>> } >>> >>> so no, that's not exactly better. >>> >>> Maybe just do what the function comment says (i did not check the "not >>> promoted" part, but you get the idea): >>> >>> ^L >>> >>> /* Return TRUE if >>> reg source operand is argument register and not return register, >>> mode of source and destination operand are different, >>> if not promoted REGNO of source and destination operand are the same. >>> */ >>> static bool >>> abi_extension_candidate_p (rtx_insn *insn) >>> { >>> rtx set = single_set (insn); >>> rtx orig_src = XEXP (SET_SRC (set), 0); >>> >>> if (FUNCTION_ARG_REGNO_P (REGNO (orig_src)) >>> && !abi_extension_candidate_return_reg_p (REGNO (orig_src)) >>> && GET_MODE (SET_DST (set)) != GET_MODE (orig_src) >>> && abi_target_promote_function_mode_p (GET_MODE (orig_src)) >>> && REGNO (SET_DST (set)) == REGNO (orig_src)) >>> return true; >>> >>> return false; >>> } >> >> This may have been my doing as I asked to split out the logic as some of the >> conditions merit more commentary. >> e.g. why does the mode need to be same >> But granted this is the usual coding style in gcc and the extra comments >> could still be added before the big if >> > > Addressed in V15 of the patch, The above rearranging code breaks the logic. I have implemented as follows. +/* Return TRUE if + reg source operand is argument
[PATCH] Fix PR ada/111909 On Darwin, determine filesystem case sensitivity at runtime
This change affects only Ada. In gcc/ada/adaint.c(__gnat_get_file_names_case_sensitive), the assumption for __APPLE__ is that file names are case-insensitive unless __arm__ or __arm64__ are defined, in which case file names are declared case-sensitive. The associated comment is "By default, we suppose filesystems aren't case sensitive on Windows and Darwin (but they are on arm-darwin)." This means that on aarch64-apple-darwin, file names are declared case-sensitive, which is not normally the case (but users can set up case-sensitive volumes). It's understood that GCC does not currently support iOS/tvOS/watchOS, so we assume macOS. Bootstrapped on x86_64-apple-darwin with languages c,c++,ada and regression tested (check-gnat). Also, tested with the example from PR ada/81114, extracted into 4 volumes (APFS, APFS-case-sensitive, HFS, HFS-case-sensitive; the example code built successfully on the case-sensitive volumes. Setting GNAT_FILE_NAME_CASE_SENSITIVE successfully overrode the choices made by the new code. gcc/ada/Changelog: 2023-10-29 Simon Wright PR ada/111909 * gcc/ada/adaint.c (__gnat_get_file_names_case_sensitive): Remove the checks for __arm__, __arm64__. Split out the check for __APPLE__; remove the checks for __arm__, __arm64__, and use getattrlist(2) to determine whether the current working directory is on a case-sensitive filesystem. Signed-off-by: Simon Wright --- gcc/ada/adaint.c | 46 ++ 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/gcc/ada/adaint.c b/gcc/ada/adaint.c index 2a193efc002..43d166824b0 100644 --- a/gcc/ada/adaint.c +++ b/gcc/ada/adaint.c @@ -85,6 +85,7 @@ #if defined (__APPLE__) #include +#include #endif #if defined (__hpux__) @@ -613,11 +614,48 @@ __gnat_get_file_names_case_sensitive (void) else { /* By default, we suppose filesystems aren't case sensitive on -Windows and Darwin (but they are on arm-darwin). */ -#if defined (WINNT) || defined (__DJGPP__) \ - || (defined (__APPLE__) && !(defined (__arm__) || defined (__arm64__))) +Windows or DOS. */ +#if defined (WINNT) || defined (__DJGPP__) file_names_case_sensitive_cache = 0; -#else +#elif defined (__APPLE__) + /* Determine whether the current volume is case-sensitive. */ + { + /* Formulate a query for the volume capabilities. */ + struct attrlist attrList + = {ATTR_BIT_MAP_COUNT, +0, /* reserved. */ +0, /* commonattr. */ +ATTR_VOL_INFO | ATTR_VOL_CAPABILITIES, /* volattr. */ +0, /* dirattr. */ +0, /* fileattr. */ +0/* forkattr. */ + }; + + /* A buffer to contain just the volume capabilities. */ + struct returnBuf { + u_int32_t length; + vol_capabilities_attr_t caps; + } __attribute__ ((aligned (4), packed)) retBuf; + + /* Default to case-insensitive. */ + file_names_case_sensitive_cache = 0; + + /* Query the current working directory. */ + if (getattrlist (".", +&attrList, +&retBuf, +sizeof (retBuf), +0) == 0) + /* The call succeeded. */ + if ((retBuf.caps.valid[VOL_CAPABILITIES_FORMAT] + & VOL_CAP_FMT_CASE_SENSITIVE)) + /* The volume could be case-sensitive. */ + if (retBuf.caps.capabilities[VOL_CAPABILITIES_FORMAT] + & VOL_CAP_FMT_CASE_SENSITIVE) + /* The volume is case-sensitive. */ + file_names_case_sensitive_cache = 1; + } +#else /* Neither Windows nor Apple. */ file_names_case_sensitive_cache = 1; #endif } -- 2.39.3 (Apple Git-145)
RE: [Ready to commit V3] RISC-V: Add AVL propagation PASS for RVV auto-vectorization
Should be fixed by the below PATCH, feel free to ping me if any issues. https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634616.html Pan -Original Message- From: Andreas Schwab Sent: Saturday, October 28, 2023 4:16 PM To: 钟居哲 Cc: patrick ; gcc-patches ; kito.cheng ; rdapp.gcc Subject: Re: [Ready to commit V3] RISC-V: Add AVL propagation PASS for RVV auto-vectorization ../../gcc/config/riscv/riscv-avlprop.cc: In member function 'virtual unsigned int pass_avlprop::execute(function*)': ../../gcc/config/riscv/riscv-avlprop.cc:346:23: error: loop variable 'candidate' creates a copy from type 'const std::pair' [-Werror=range-loop-construct] 346 | for (const auto candidate : m_candidates) | ^ ../../gcc/config/riscv/riscv-avlprop.cc:346:23: note: use reference type to prevent copying 346 | for (const auto candidate : m_candidates) | ^ | & -- Andreas Schwab, sch...@linux-m68k.org GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1 "And now for something completely different."
Re: [PATCH] tree-optimization/109334: Improve computation for access attribute
On 2023-10-28 16:29, Martin Uecker wrote: Isn't this testcase h() in builtin-dynamic-object-size-20.c? If you're referring to testcase i(), then maybe "where the size is given by a non-trivial function of a function parameter, e.g. fn (size_t n, char buf[dummy(n)])." h() is supported. For i() we would need something as __builtin_access__with_size to record the result of dummy(). But the comment refers to the simpler case: fn (size_t n, char (*buf)[n]) [[gnu::access(read_write, 2, 1)]] This doesn't work because buf[n] does not have constant size, but it could be made to work more easily because the size is directly given by a function argument. Ah, so it would have been nice to have this more detailed explanation in the comment for clarity :) Thanks, Sid
[Patch, fortran] PR104555 - ICE in gfc_compare_derived_types, at fortran/interface.cc:628 since r10-2912-g70570ec192745095
Bizarrely, since the fix for pr101625, the testcase compiles and runs correctly with s/select type (y => x)/select type (y => (x))/ ! The fix is straightforward and appears to be one of those wrinkles arising from the use of associate variables as a selector. The fault is reasonable since the expression is a reference to the _data field, which is of derived type. However, being a select type selector, the selector must be a class with that declared type. Regtests fine. OK for mainline? Paul Fortran: Fix a problem with SELECT TYPE selectors [PR104555]. 2023-10-29 Paul Thomas gcc/fortran PR fortran/104555 * resolve.cc (resolve_select_type): If the selector expression has no class component references and the expression is a derived type, copy the typespec of the symbol to that of the expression. gcc/testsuite/ PR fortran/104555 * gfortran.dg/pr104555.f90: New test. ! { dg-do compile } ! ! Test the fix for PR104555 in which the select type statement caused an ! ICE because the selector expression was type(t) rather than class(t). ! ! Contributed by Gerhard Steinmetz ! program p type t character(:), allocatable :: a end type call s(t("abcd")) call s([t("efgh")]) contains subroutine s(x) class(t) :: x(..) select rank (x) rank (0) print *, "|", x%a, "|" select type (y => x) type is (t) print *, "|", y%a, "|" end select rank (1) print *, "|", x(1)%a, "|" select type (y => x) type is (t) print *, "|", y(1)%a, "|" end select end select end end diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc index 9f4dc072645..b394f7fc79c 100644 --- a/gcc/fortran/resolve.cc +++ b/gcc/fortran/resolve.cc @@ -9578,6 +9578,12 @@ resolve_select_type (gfc_code *code, gfc_namespace *old_ns) { if (code->expr1->symtree->n.sym->attr.untyped) code->expr1->symtree->n.sym->ts = code->expr2->ts; + /* Sometimes the selector expression is given the typespec of the + '_data' field, which is logical enough but inappropraite here. */ + if (code->expr2->ts.type == BT_DERIVED + && code->expr2->symtree + && code->expr2->symtree->n.sym->ts.type == BT_CLASS) + code->expr2->ts = code->expr2->symtree->n.sym->ts; selector_type = CLASS_DATA (code->expr2) ? CLASS_DATA (code->expr2)->ts.u.derived : code->expr2->ts.u.derived; }
Re: [PING][PATCH] Include safe-ctype.h after C++ standard headers, to avoid over-poisoning
Dimitry Andric writes: > Ping. It would be nice to get this QoL fix in. > Yes please - we've been using this in Gentoo since around when it was first posted. No complaints. I cannot approve but it looks good to me. > -Dimitry > >> On 28 Sep 2023, at 18:37, Dimitry Andric wrote: >> >> Ref: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111632 >> >> When building gcc's C++ sources against recent libc++, the poisoning of >> the ctype macros due to including safe-ctype.h before including C++ >> standard headers such as , , etc, causes many compilation >> errors, similar to: >> >> In file included from /home/dim/src/gcc/master/gcc/gensupport.cc:23: >> In file included from /home/dim/src/gcc/master/gcc/system.h:233: >> In file included from /usr/include/c++/v1/vector:321: >> In file included from >> /usr/include/c++/v1/__format/formatter_bool.h:20: >> In file included from >> /usr/include/c++/v1/__format/formatter_integral.h:32: >> In file included from /usr/include/c++/v1/locale:202: >> /usr/include/c++/v1/__locale:546:5: error: '__abi_tag__' attribute >> only applies to structs, variables, functions, and namespaces >> 546 | _LIBCPP_INLINE_VISIBILITY >> | ^ >> /usr/include/c++/v1/__config:813:37: note: expanded from macro >> '_LIBCPP_INLINE_VISIBILITY' >> 813 | # define _LIBCPP_INLINE_VISIBILITY _LIBCPP_HIDE_FROM_ABI >> | ^ >> /usr/include/c++/v1/__config:792:26: note: expanded from macro >> '_LIBCPP_HIDE_FROM_ABI' >> 792 | >> __attribute__((__abi_tag__(_LIBCPP_TOSTRING( >> _LIBCPP_VERSIONED_IDENTIFIER >> | ^ >> In file included from /home/dim/src/gcc/master/gcc/gensupport.cc:23: >> In file included from /home/dim/src/gcc/master/gcc/system.h:233: >> In file included from /usr/include/c++/v1/vector:321: >> In file included from >> /usr/include/c++/v1/__format/formatter_bool.h:20: >> In file included from >> /usr/include/c++/v1/__format/formatter_integral.h:32: >> In file included from /usr/include/c++/v1/locale:202: >> /usr/include/c++/v1/__locale:547:37: error: expected ';' at end of >> declaration list >> 547 | char_type toupper(char_type __c) const >> | ^ >> /usr/include/c++/v1/__locale:553:48: error: too many arguments >> provided to function-like macro invocation >> 553 | const char_type* toupper(char_type* __low, const >> char_type* __high) const >> |^ >> /home/dim/src/gcc/master/gcc/../include/safe-ctype.h:146:9: note: >> macro 'toupper' defined here >> 146 | #define toupper(c) do_not_use_toupper_with_safe_ctype >> | ^ >> >> This is because libc++ uses different transitive includes than >> libstdc++, and some of those transitive includes pull in various ctype >> declarations (typically via ). >> >> There was already a special case for including before >> safe-ctype.h, so move the rest of the C++ standard header includes to >> the same location, to fix the problem. >> >> Signed-off-by: Dimitry Andric >> --- >> gcc/system.h | 39 ++- >> 1 file changed, 18 insertions(+), 21 deletions(-) >> >> diff --git a/gcc/system.h b/gcc/system.h >> index e924152ad4c..7a516b11438 100644 >> --- a/gcc/system.h >> +++ b/gcc/system.h >> @@ -194,27 +194,8 @@ extern int fprintf_unlocked (FILE *, const char *, ...); >> #undef fread_unlocked >> #undef fwrite_unlocked >> >> -/* Include before "safe-ctype.h" to avoid GCC poisoning >> - the ctype macros through safe-ctype.h */ >> - >> -#ifdef __cplusplus >> -#ifdef INCLUDE_STRING >> -# include >> -#endif >> -#endif >> - >> -/* There are an extraordinary number of issues with . >> - The last straw is that it varies with the locale. Use libiberty's >> - replacement instead. */ >> -#include "safe-ctype.h" >> - >> -#include >> - >> -#include >> - >> -#if !defined (errno) && defined (HAVE_DECL_ERRNO) && !HAVE_DECL_ERRNO >> -extern int errno; >> -#endif >> +/* Include C++ standard headers before "safe-ctype.h" to avoid GCC >> + poisoning the ctype macros through safe-ctype.h */ >> >> #ifdef __cplusplus >> #if defined (INCLUDE_ALGORITHM) || !defined (HAVE_SWAP_IN_UTILITY) >> @@ -229,6 +210,9 @@ extern int errno; >> #ifdef INCLUDE_SET >> # include >> #endif >> +#ifdef INCLUDE_STRING >> +# include >> +#endif >> #ifdef INCLUDE_VECTOR >> # include >> #endif >> @@ -245,6 +229,19 @@ extern int errno; >> # include >> #endif >> >> +/* There are an extraordinary number of issues with . >> + The last straw is that it varies with the locale. Use libiberty's >> + replacement instead. */ >> +#include "safe-ctype.h" >> + >> +#include >> + >> +#include >> + >> +#if !defined (errno) && defined (HAVE_DECL_ERRNO) && !HAVE_DECL_ERRNO >> +extern int errno; >> +#endif >> + >> /* Some of glibc's string inlines cause warnings. Plus we'd rather >> rely on (and therefore test) GCC's string builtins. */ >> #defi
[committed] d: Merge upstream dmd, druntime e48bc0987d, phobos 2458e8f82.
Hi, This patch merges the D front-end and runtime library with upstream dmd e48bc0987d, and standard library with phobos 2458e8f82. Synchronizing with the v2.106.0-beta.1 release. D front-end changes: - Import dmd v2.106.0-beta.1. D runtime changes: - Import druntime v2.106.0-beta.1. Phobos changes: - Import phobos v2.106.0-beta.1. Bootstrapped and regression tested on x86_64-linux-gnu/-m32, and committed to mainline. Regards, Iain. --- gcc/d/ChangeLog: * dmd/MERGE: Merge upstream dmd e48bc0987d. * expr.cc (ExprVisitor::visit (NewExp *)): Update for new front-end interface. * runtime.def (NEWARRAYT): Remove. (NEWARRAYIT): Remove. libphobos/ChangeLog: * libdruntime/MERGE: Merge upstream druntime e48bc0987d. * src/MERGE: Merge upstream phobos 2458e8f82. --- gcc/d/dmd/MERGE | 2 +- gcc/d/dmd/VERSION | 2 +- gcc/d/dmd/aggregate.d | 8 +- gcc/d/dmd/aggregate.h | 8 - gcc/d/dmd/aliasthis.h | 2 +- gcc/d/dmd/attrib.h| 1 - gcc/d/dmd/canthrow.d | 2 +- gcc/d/dmd/cond.d | 2 +- gcc/d/dmd/cond.h | 2 - gcc/d/dmd/cparse.d| 17 +- gcc/d/dmd/dcast.d | 2 +- gcc/d/dmd/dclass.d| 8 +- gcc/d/dmd/declaration.d | 1 + gcc/d/dmd/declaration.h | 12 - gcc/d/dmd/denum.d | 2 +- gcc/d/dmd/dimport.d | 2 +- gcc/d/dmd/dinterpret.d| 3 + gcc/d/dmd/dmodule.d | 2 +- gcc/d/dmd/dscope.d| 2 +- gcc/d/dmd/dstruct.d | 2 +- gcc/d/dmd/dsymbol.d | 7 +- gcc/d/dmd/dsymbolsem.d| 15 +- gcc/d/dmd/dtemplate.d | 8 +- gcc/d/dmd/expression.d| 90 ++- gcc/d/dmd/expression.h| 88 +-- gcc/d/dmd/expressionsem.d | 53 gcc/d/dmd/func.d | 20 +- gcc/d/dmd/globals.h | 6 +- gcc/d/dmd/hdrgen.d| 38 ++- gcc/d/dmd/id.d| 2 + gcc/d/dmd/import.h| 1 - gcc/d/dmd/init.h | 1 - gcc/d/dmd/location.d | 2 +- gcc/d/dmd/module.h| 1 - gcc/d/dmd/mtype.d | 16 +- gcc/d/dmd/mtype.h | 12 - gcc/d/dmd/objc.h | 2 - gcc/d/dmd/scope.h | 2 - gcc/d/dmd/sideeffect.d| 4 +- gcc/d/dmd/statement.d | 6 +- gcc/d/dmd/statement.h | 4 +- gcc/d/dmd/template.h | 5 - gcc/d/dmd/tokens.d| 2 +- gcc/d/dmd/tokens.h| 3 - gcc/d/expr.cc | 20 +- gcc/d/runtime.def | 9 +- .../gdc.test/fail_compilation/ice10727a.d | 2 + .../gdc.test/fail_compilation/ice10727b.d | 2 + libphobos/libdruntime/MERGE | 2 +- .../core/internal/array/construction.d| 167 + .../libdruntime/core/internal/array/utils.d | 236 ++ libphobos/libdruntime/core/lifetime.d | 13 +- .../libdruntime/core/sys/freebsd/ifaddrs.d| 41 +++ .../libdruntime/core/sys/freebsd/net/if_dl.d | 42 .../libdruntime/core/sys/freebsd/sys/socket.d | 131 ++ .../libdruntime/core/sys/freebsd/sys/types.d | 58 + .../libdruntime/core/sys/posix/sys/types.d| 4 +- libphobos/libdruntime/object.d| 2 + libphobos/libdruntime/rt/lifetime.d | 26 +- libphobos/src/MERGE | 2 +- libphobos/src/std/parallelism.d | 2 +- libphobos/src/std/range/primitives.d | 10 +- libphobos/src/std/traits.d| 57 + 63 files changed, 962 insertions(+), 334 deletions(-) create mode 100644 libphobos/libdruntime/core/sys/freebsd/ifaddrs.d create mode 100644 libphobos/libdruntime/core/sys/freebsd/net/if_dl.d create mode 100644 libphobos/libdruntime/core/sys/freebsd/sys/socket.d create mode 100644 libphobos/libdruntime/core/sys/freebsd/sys/types.d diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE index bfadeaa0c68..2a0baf09a4b 100644 --- a/gcc/d/dmd/MERGE +++ b/gcc/d/dmd/MERGE @@ -1,4 +1,4 @@ -f4be7f6f7bae75f1613b862940cdd533b5ae99b2 +e48bc0987dfec35bc76a3015ee3e85906ce86dfd The first li
[PATCH 0/3] start of moving value replacement from phiopt to match
This set of 3 patches, copy what is being done in value replacement and puts it into match-and-simplify form. I will be rewriting value_replacement in phiopt to use match and simplify directly in the next few months but I thought getting these into match form earlier on can help improve code generation independently of that move. Note this does not add the absorbing_element_p optimizations yet; I filed PR 112271 to record that move. Andrew Pinski (3): MATCH: first of the value replacement moving from phiopt MATCH: Move jump_function_from_stmt support to match.pd MATCH: Add some more value_replacement simplifications to match gcc/match.pd | 53 +++ .../analyzer/inlining-3-multiline.c | 5 +- .../c-c++-common/analyzer/inlining-3.c| 3 ++ gcc/testsuite/gcc.dg/tree-ssa/cond-1.c| 17 ++ .../gcc.dg/tree-ssa/phi-opt-value-1.c | 17 ++ .../gcc.dg/tree-ssa/phi-opt-value-1a.c| 19 +++ .../gcc.dg/tree-ssa/phi-opt-value-2.c | 19 +++ .../gcc.dg/tree-ssa/phi-opt-value-3.c | 22 .../gcc.dg/tree-ssa/phi-opt-value-4.c | 36 + gcc/testsuite/gcc.dg/tree-ssa/vrp03.c | 2 +- 10 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cond-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1a.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-4.c -- 2.39.3
[PATCH 2/3] MATCH: Move jump_function_from_stmt support to match.pd
This moves the value_replacement support for jump_function_from_stmt to match pattern. This allows us to optimize things earlier in phiopt1 rather than waiting to phiopt2. Which means phiopt1 needs to be disable for vrp03.c testcase. Bootstrapped and tested on x86_64-linux-gnu. gcc/ChangeLog: * match.pd (PTR == 0 ? 0 : &PTR->field): New pattern. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/vrp03.c: Disable phiopt1. * c-c++-common/analyzer/inlining-3-multiline.c: Likewise. * c-c++-common/analyzer/inlining-3.c: Likewise. * gcc.dg/tree-ssa/phi-opt-value-3.c: New testcase. --- gcc/match.pd | 21 ++ .../analyzer/inlining-3-multiline.c | 5 - .../c-c++-common/analyzer/inlining-3.c| 3 +++ .../gcc.dg/tree-ssa/phi-opt-value-3.c | 22 +++ gcc/testsuite/gcc.dg/tree-ssa/vrp03.c | 2 +- 5 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c diff --git a/gcc/match.pd b/gcc/match.pd index 22899c51a2f..9bc945ccada 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4159,6 +4159,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cond (eq @0 integer_zerop) @1 (op@2 @1 @0)) @2)) +/* PTR == 0 ? 0 : &PTR->field -> PTR if field offset was 0. */ +(simplify + (cond (eq @0 integer_zerop) integer_zerop ADDR_EXPR@1) + (with { + poly_int64 offset; + tree res = NULL_TREE; + tree tem = @1; + if (TREE_CODE (tem) == SSA_NAME) + if (gassign *def = dyn_cast (SSA_NAME_DEF_STMT (tem))) + if (gimple_assign_rhs_code (def) == ADDR_EXPR) + tem = gimple_assign_rhs1 (def); + + if (TREE_CODE (tem) == ADDR_EXPR) + res = get_addr_base_and_unit_offset (TREE_OPERAND (tem, 0), &offset); + } + (if (res + && TREE_CODE (res) == MEM_REF + && known_eq (mem_ref_offset (res) + offset, 0) + && operand_equal_p (TREE_OPERAND (res, 0), @0)) + (convert @0 + /* Simplifications of shift and rotates. */ (for rotate (lrotate rrotate) diff --git a/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c b/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c index fbd20e949b6..9741b91abee 100644 --- a/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c +++ b/gcc/testsuite/c-c++-common/analyzer/inlining-3-multiline.c @@ -3,6 +3,9 @@ /* { dg-additional-options "-O2 -fdiagnostics-show-path-depths" } */ /* { dg-additional-options "-fdiagnostics-path-format=inline-events -fdiagnostics-show-caret" } */ +/* Disable phi-opt1 because get_input_file_name gets optimized to just + `return inpf;`. */ +/* { dg-additional-options "-fdisable-tree-phiopt1" } */ #include "../../gcc.dg/analyzer/analyzer-decls.h" typedef __SIZE_TYPE__ size_t; @@ -96,4 +99,4 @@ test (const input_file *inpf) | (4) ...to here | (5) argument 1 ('') NULL where non-null expected | - { dg-end-multiline-output "" { target c++ } } */ \ No newline at end of file + { dg-end-multiline-output "" { target c++ } } */ diff --git a/gcc/testsuite/c-c++-common/analyzer/inlining-3.c b/gcc/testsuite/c-c++-common/analyzer/inlining-3.c index 0345585bed2..2b2b4858d45 100644 --- a/gcc/testsuite/c-c++-common/analyzer/inlining-3.c +++ b/gcc/testsuite/c-c++-common/analyzer/inlining-3.c @@ -2,6 +2,9 @@ after early inlining. */ /* { dg-additional-options "-O2 -fdiagnostics-show-path-depths" } */ +/* Disable phi-opt1 because get_input_file_name gets optimized to just + `return inpf;`. */ +/* { dg-additional-options "-fdisable-tree-phiopt1" } */ #include "../../gcc.dg/analyzer/analyzer-decls.h" typedef __SIZE_TYPE__ size_t; diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c new file mode 100644 index 000..ad55bd288b9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-3.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-optimized" } */ +struct a +{ +int b[1]; +}; + +int call1(int *a); + +int f(struct a *b) +{ + int *c = b->b; + int t = call1(c); + int *d; + if (b) d = b->b; else d = 0; + int t1 = call1(d); + return t+t1; +} + +/* There should be no if statement and 2 calls to call1. */ +/* { dg-final { scan-tree-dump-not "if " "optimized" } } */ +/* { dg-final { scan-tree-dump-times "call1 " 2 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c index 4cbaca41332..1adbf33cad3 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp03.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 -fno-thread-jumps" } */ +/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 -fno-thread-jumps -fdisable-tree-phiopt1" } */ struct A { -- 2.39.3
[PATCH 3/3] MATCH: Add some more value_replacement simplifications to match
This moves a few more value_replacements simplifications to match. /* a == 1 ? b : a * b -> a * b */ /* a == 1 ? b : b / a -> b / a */ /* a == -1 ? b : a & b -> a & b */ Also adds a testcase to show can we catch these where value_replacement would not (but other passes would). Bootstrapped and tested on x86_64-linux-gnu with no regressions. gcc/ChangeLog: * match.pd (`a == 1 ? b : a OP b`): New pattern. (`a == -1 ? b : a & b`): New pattern. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/phi-opt-value-4.c: New test. --- gcc/match.pd | 18 ++ .../gcc.dg/tree-ssa/phi-opt-value-4.c | 36 +++ 2 files changed, 54 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-4.c diff --git a/gcc/match.pd b/gcc/match.pd index 9bc945ccada..6efa97cc6ae 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4159,6 +4159,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cond (eq @0 integer_zerop) @1 (op@2 @1 @0)) @2)) +/* a == 1 ? b : b / a -> b / a */ +(for op (trunc_div ceil_div floor_div round_div exact_div) + (simplify + (cond (eq @0 integer_onep) @1 (op@2 @1 @0)) + @2)) + +/* a == 1 ? b : a * b -> a * b */ +(for op (mult) + (simplify + (cond (eq @0 integer_onep) @1 (op:c@2 @1 @0)) + @2)) + +/* a == -1 ? b : a & b -> a & b */ +(for op (bit_and) + (simplify + (cond (eq @0 integer_all_onesp) @1 (op:c@2 @1 @0)) + @2)) + /* PTR == 0 ? 0 : &PTR->field -> PTR if field offset was 0. */ (simplify (cond (eq @0 integer_zerop) integer_zerop ADDR_EXPR@1) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-4.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-4.c new file mode 100644 index 000..380082cb463 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-4.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-fre3 -fdump-tree-phiopt1 -fdump-tree-optimized" } */ + +[[gnu::const]] +int constcall(int); + +int fdiv(int a, int b) +{ + int c = b/a; + int t = constcall(c); + int d; + if (a == 1) d = b; else d = c; + return constcall(d) + t; +} +int fmult(int a, int b) +{ + int c = b*a; + int t = constcall(c); + int d; + if (a == 1) d = b; else d = c; + return constcall(d) + t; +} +int fand(int a, int b) +{ + int c = b&a; + int t = constcall(c); + int d; + if (a == -1) d = b; else d = c; + return constcall(d) + t; +} + +/* Should be able to optimize away the if statements in phiopt1. */ +/* { dg-final { scan-tree-dump-not "if " "phiopt1" } } */ +/* fre3 should be optimize each function to just `return constcall(a OP b) * 2;`. */ +/* { dg-final { scan-tree-dump-times "constcall " 3 "fre3" } } */ +/* { dg-final { scan-tree-dump-times "constcall " 3 "optimized" } } */ -- 2.39.3
[PATCH 1/3] MATCH: first of the value replacement moving from phiopt
This moves a few simple patterns that are done in value replacement in phiopt over to match.pd. Just the simple ones which might show up in other code. This allows some optimizations to happen even without depending on sinking from happening and in some cases where phiopt is not invoked (cond-1.c is an example there). Changes since v1: * v2: Add an extra testcase to showcase improvements at -O1. Bootstrapped and tested on x86_64-linux-gnu with no regressions. gcc/ChangeLog: * match.pd: (`a == 0 ? b : b + a`, `a == 0 ? b : b - a`): New patterns. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/cond-1.c: New test. * gcc.dg/tree-ssa/phi-opt-value-1.c: New test. * gcc.dg/tree-ssa/phi-opt-value-1a.c: New test. * gcc.dg/tree-ssa/phi-opt-value-2.c: New test. --- gcc/match.pd | 14 ++ gcc/testsuite/gcc.dg/tree-ssa/cond-1.c| 17 + .../gcc.dg/tree-ssa/phi-opt-value-1.c | 17 + .../gcc.dg/tree-ssa/phi-opt-value-1a.c| 19 +++ .../gcc.dg/tree-ssa/phi-opt-value-2.c | 19 +++ 5 files changed, 86 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cond-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1a.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-2.c diff --git a/gcc/match.pd b/gcc/match.pd index 7d651a6582d..22899c51a2f 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -4145,6 +4145,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && (INTEGRAL_TYPE_P (TREE_TYPE (@0 (op (mult (convert:type @0) @2) @1 +/* ?: Value replacement. */ +/* a == 0 ? b : b + a -> b + a */ +(for op (plus bit_ior bit_xor) + (simplify + (cond (eq @0 integer_zerop) @1 (op:c@2 @1 @0)) + @2)) +/* a == 0 ? b : b - a -> b - a */ +/* a == 0 ? b : b ptr+ a -> b ptr+ a */ +/* a == 0 ? b : b shift/rotate a -> b shift/rotate a */ +(for op (lrotate rrotate lshift rshift minus pointer_plus) + (simplify + (cond (eq @0 integer_zerop) @1 (op@2 @1 @0)) + @2)) + /* Simplifications of shift and rotates. */ (for rotate (lrotate rrotate) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cond-1.c b/gcc/testsuite/gcc.dg/tree-ssa/cond-1.c new file mode 100644 index 000..478a818b206 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cond-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized-raw" } */ + +int sub(int a, int b, int c, int d) { + int e = (a == 0); + int f = !e; + c = b; + d = b - a ; + return ((-e & c) | (-f & d)); +} + +/* In the end we end up with `(a == 0) ? (b - a) : b` + which then can be optimized to just `(b - a)`. */ + +/* { dg-final { scan-tree-dump-not "cond_expr," "optimized" } } */ +/* { dg-final { scan-tree-dump-not "eq_expr," "optimized" } } */ +/* { dg-final { scan-tree-dump-times "minus_expr," 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1.c new file mode 100644 index 000..a90de8926c6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* Phi-OPT should be able to optimize this without sinking being invoked. */ +/* { dg-options "-O -fdump-tree-phiopt2 -fdump-tree-optimized -fno-tree-sink" } */ + +char *f(char *a, __SIZE_TYPE__ b) { + char *d = a + b; + if (b == 0) return a; + return d; +} +int sub(int a, int b, int c) { + int d = a - b; + if (b == 0) return a; + return d; +} + +/* { dg-final { scan-tree-dump-not "goto" "phiopt2" } } */ +/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1a.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1a.c new file mode 100644 index 000..b884f94ddd2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-1a.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +[[gnu::const]] +int constcall(int); + +int f(int a, int b) +{ + int c = b+a; + int t = constcall(c); + int d; + if (a == 0) d= b; else d= c; + return constcall(d) + t; +} + +/* There should be no if statement and 2 calls to call1. */ +/* { dg-final { scan-tree-dump-not "if " "optimized" } } */ +/* { dg-final { scan-tree-dump-times "constcall " 1 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-2.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-2.c new file mode 100644 index 000..809ccfe1479 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-value-2.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* Phi-OPT should be able to optimize this without sinking being invoked. */ +/* { dg-options "-O -fdump-tree-phiopt2 -fdump-tree-optimized -fno-tree-sink" } */ + +int f(int a, int b, int c) { + int d = a + b; + if (c > 5) return c; + if (a == 0) return
Re: [Patch, fortran] PR104555 - ICE in gfc_compare_derived_types, at fortran/interface.cc:628 since r10-2912-g70570ec192745095
Hi Paul, code->expr1->symtree->n.sym->ts = code->expr2->ts; + /* Sometimes the selector expression is given the typespec of the +'_data' field, which is logical enough but inappropraite here. */ s/inappropraite/inappropriate/ + if (code->expr2->ts.type == BT_DERIVED otherwise it LGTM. Thanks for the patch! Harald On 10/29/23 13:29, Paul Richard Thomas wrote: Bizarrely, since the fix for pr101625, the testcase compiles and runs correctly with s/select type (y => x)/select type (y => (x))/ ! The fix is straightforward and appears to be one of those wrinkles arising from the use of associate variables as a selector. The fault is reasonable since the expression is a reference to the _data field, which is of derived type. However, being a select type selector, the selector must be a class with that declared type. Regtests fine. OK for mainline? Paul Fortran: Fix a problem with SELECT TYPE selectors [PR104555]. 2023-10-29 Paul Thomas gcc/fortran PR fortran/104555 * resolve.cc (resolve_select_type): If the selector expression has no class component references and the expression is a derived type, copy the typespec of the symbol to that of the expression. gcc/testsuite/ PR fortran/104555 * gfortran.dg/pr104555.f90: New test.
[committed] d: Fix ICE: verify_gimple_failed (conversion of register to a different size in 'view_convert_expr') [PR110712]
Hi, This patch fixes an ICE cause by the way the D front-end generates its codegen around va_list types. Static arrays in D are passed around by value, rather than decaying to a pointer. On x86_64 __builtin_va_list is an exception to this rule, but semantically it's still treated as a static array. This makes certain assignment operations fail due a mismatch in types. As all examples in the test program are rejected by C/C++ front-ends, these are now errors in D too to be consistent. Bootstrapped and regression tested on x86-64-linux-gnu/-m32, committed to mainline and backported to releases/gcc-12 and releases/gcc-13. Regards, Iain. --- PR d/110712 gcc/d/ChangeLog: * d-codegen.cc (d_build_call): Update call to convert_for_argument. * d-convert.cc (is_valist_parameter_type): New function. (check_valist_conversion): New function. (convert_for_assignment): Update signature. Add check whether assigning va_list is permissible. (convert_for_argument): Likewise. * d-tree.h (convert_for_assignment): Update signature. (convert_for_argument): Likewise. * expr.cc (ExprVisitor::visit (AssignExp *)): Update call to convert_for_assignment. gcc/testsuite/ChangeLog: * gdc.dg/pr110712.d: New test. --- gcc/d/d-codegen.cc | 6 +- gcc/d/d-convert.cc | 127 ++-- gcc/d/d-tree.h | 4 +- gcc/d/expr.cc | 12 +-- gcc/testsuite/gdc.dg/pr110712.d | 23 ++ 5 files changed, 139 insertions(+), 33 deletions(-) create mode 100644 gcc/testsuite/gdc.dg/pr110712.d diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc index 270cb5e2be6..5c53cf78577 100644 --- a/gcc/d/d-codegen.cc +++ b/gcc/d/d-codegen.cc @@ -2245,14 +2245,16 @@ d_build_call (TypeFunction *tf, tree callable, tree object, for (size_t i = 0; i < arguments->length; ++i) { Expression *arg = (*arguments)[i]; - tree targ = build_expr (arg); + tree targ; if (i - varargs < nparams && i >= varargs) { /* Actual arguments for declared formal arguments. */ Parameter *parg = tf->parameterList[i - varargs]; - targ = convert_for_argument (targ, parg); + targ = convert_for_argument (arg, parg); } + else + targ = build_expr (arg); /* Don't pass empty aggregates by value. */ if (empty_aggregate_p (TREE_TYPE (targ)) && !TREE_ADDRESSABLE (targ) diff --git a/gcc/d/d-convert.cc b/gcc/d/d-convert.cc index 71d7a41374e..4c5375cba9a 100644 --- a/gcc/d/d-convert.cc +++ b/gcc/d/d-convert.cc @@ -694,16 +694,86 @@ convert_for_rvalue (tree expr, Type *etype, Type *totype) return result ? result : convert_expr (expr, etype, totype); } +/* Helper for convert_for_assigment and convert_for_argument. + Returns true if EXPR is a va_list static array parameter. */ + +static bool +is_valist_parameter_type (Expression *expr) +{ + Declaration *decl = NULL; + + if (VarExp *ve = expr->isVarExp ()) +decl = ve->var; + else if (SymOffExp *se = expr->isSymOffExp ()) +decl = se->var; + + if (decl != NULL && decl->isParameter () && valist_array_p (decl->type)) +return true; + + return false; +} + +/* Helper for convert_for_assigment and convert_for_argument. + Report erroneous uses of assigning or passing a va_list parameter. */ + +static void +check_valist_conversion (Expression *expr, Type *totype, bool in_assignment) +{ + /* Parameter symbol and its converted type. */ + Declaration *decl = NULL; + /* Type of parameter when evaluated in the expression. */ + Type *type = NULL; + + if (VarExp *ve = expr->isVarExp ()) +{ + decl = ve->var; + type = ve->var->type->nextOf ()->pointerTo (); +} + else if (SymOffExp *se = expr->isSymOffExp ()) +{ + decl = se->var; + type = se->var->type->nextOf ()->pointerTo ()->pointerTo (); +} + + /* Should not be called unless is_valist_parameter_type also matched. */ + gcc_assert (decl != NULL && decl->isParameter () + && valist_array_p (decl->type)); + + /* OK if conversion between types is allowed. */ + if (type->implicitConvTo (totype) != MATCH::nomatch) +return; + + if (in_assignment) +{ + error_at (make_location_t (expr->loc), "cannot convert parameter %qs " + "from type %qs to type %qs in assignment", + expr->toChars(), type->toChars (), totype->toChars ()); +} + else +{ + error_at (make_location_t (expr->loc), "cannot convert parameter %qs " + "from type %qs to type %qs in argument passing", + expr->toChars(), type->toChars (), totype->toChars ()); +} + + inform (make_location_t (decl->loc), "parameters of type % " + "{aka %qs} are decayed to pointer types, and require % " + "to be converted back into a static array type
[committed][_GLIBCXX_INLINE_VERSION] Fix constract violation
This fixes handle_contract_violation under versioned namespace mode. Tested under Linux x64 and confirmed to also fix Darwin build. libstdc++: [_GLIBCXX_INLINE_VERSION] Provide handle_contract_violation symbol libstdc++-v3/ChangeLog: * src/experimental/contract.cc [_GLIBCXX_INLINE_VERSION](handle_contract_violation): Provide symbol without version namespace decoration for gcc. François diff --git a/libstdc++-v3/src/experimental/contract.cc b/libstdc++-v3/src/experimental/contract.cc index 504a6c041f1..d550b49c4eb 100644 --- a/libstdc++-v3/src/experimental/contract.cc +++ b/libstdc++-v3/src/experimental/contract.cc @@ -67,3 +67,11 @@ handle_contract_violation (const std::experimental::contract_violation &violatio std::cerr << std::endl; #endif } + +#if _GLIBCXX_INLINE_VERSION +// Provide symbol without version namespace decoration for gcc. +extern "C" __attribute__ ((weak)) void +_Z25handle_contract_violationRKNSt12experimental18contract_violationE +(const std::experimental::contract_violation &violation) +{ handle_contract_violation(violation); } +#endif
[committed][_GLIBCXX_INLINE_VERSION] Add emul TLS symbol exports
libstdc++: [_GLIBCXX_INLINE_VERSION] Add emul TLS symbols libstdc++-v3/ChangeLog: * config/abi/pre/gnu-versioned-namespace.ver: Add missing emul TLS symbols. François diff --git a/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver b/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver index 9fab8bead15..3140a9628d8 100644 --- a/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver +++ b/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver @@ -78,6 +78,8 @@ GLIBCXX_8.0 { # thread/mutex/condition_variable/future __once_proxy; +__emutls_v._ZSt3__811__once_call; +__emutls_v._ZSt3__815__once_callable; # std::__convert_to_v _ZNSt3__814__convert_to_v*;
Re: [PATCH v2 1/2] riscv: thead: Add support for the XTheadMemIdx ISA extension
On 10/20/23 03:53, Christoph Muellner wrote: From: Christoph Müllner The XTheadMemIdx ISA extension provides a additional load and store instructions with new addressing modes. The following memory accesses types are supported: * load: b,bu,h,hu,w,wu,d * store: b,h,w,d The following addressing modes are supported: * immediate offset with PRE_MODIFY or POST_MODIFY (22 instructions): l.ia, l.ib, s.ia, s.ib * register offset with additional immediate offset (11 instructions): lr, sr * zero-extended register offset with additional immediate offset (11 instructions): lur, sur The RISC-V base ISA does not support index registers, so the changes are kept separate from the RISC-V standard support as much as possible. To combine the shift/multiply instructions into the memory access instructions, this patch comes with a few insn_and_split optimizations that allow the combiner to do this task. Handling the different cases of extensions results in a couple of INSNs that look redundant on first view, but they are just the equivalence of what we already have for Zbb as well. The only difference is, that we have much more load instructions. We already have a constraint with the name 'th_f_fmv', therefore, the new constraints follow this pattern and have the same length as required ('th_m_mia', 'th_m_mib', 'th_m_mir', 'th_m_miu'). The added tests ensure that this feature won't regress without notice. Testing: GCC regression test suite, GCC bootstrap build, and SPEC CPU 2017 intrate (base&peak) on C920. Signed-off-by: Christoph Müllner gcc/ChangeLog: * config/riscv/constraints.md (th_m_mia): New constraint. (th_m_mib): Likewise. (th_m_mir): Likewise. (th_m_miu): Likewise. * config/riscv/riscv-protos.h (enum riscv_address_type): Add new address types ADDRESS_REG_REG, ADDRESS_REG_UREG, and ADDRESS_REG_WB and their documentation. (struct riscv_address_info): Add new field 'shift' and document the field usage for the new address types. (riscv_valid_base_register_p): New prototype. (th_memidx_legitimate_modify_p): Likewise. (th_memidx_legitimate_index_p): Likewise. (th_classify_address): Likewise. (th_output_move): Likewise. (th_print_operand_address): Likewise. * config/riscv/riscv.cc (riscv_index_reg_class): Return GR_REGS for XTheadMemIdx. (riscv_regno_ok_for_index_p): Add support for XTheadMemIdx. (riscv_classify_address): Call th_classify_address() on top. (riscv_output_move): Call th_output_move() on top. (riscv_print_operand_address): Call th_print_operand_address() on top. * config/riscv/riscv.h (HAVE_POST_MODIFY_DISP): New macro. (HAVE_PRE_MODIFY_DISP): Likewise. * config/riscv/riscv.md (zero_extendqi2): Disable for XTheadMemIdx. (*zero_extendqi2_internal): Convert to expand, create INSN with same name and disable it for XTheadMemIdx. (extendsidi2): Likewise. (*extendsidi2_internal): Disable for XTheadMemIdx. * config/riscv/thead.cc (valid_signed_immediate): New helper function. (th_memidx_classify_address_modify): New function. (th_memidx_legitimate_modify_p): Likewise. (th_memidx_output_modify): Likewise. (is_memidx_mode): Likewise. (th_memidx_classify_address_index): Likewise. (th_memidx_legitimate_index_p): Likewise. (th_memidx_output_index): Likewise. (th_classify_address): Likewise. (th_output_move): Likewise. (th_print_operand_address): Likewise. * config/riscv/thead.md (*th_memidx_operand): New splitter. (*th_memidx_zero_extendqi2): New INSN. (*th_memidx_extendsidi2): Likewise. (*th_memidx_zero_extendsidi2): Likewise. (*th_memidx_zero_extendhi2): Likewise. (*th_memidx_extend2): Likewise. (*th_memidx_bb_zero_extendsidi2): Likewise. (*th_memidx_bb_zero_extendhi2): Likewise. (*th_memidx_bb_extendhi2): Likewise. (*th_memidx_bb_extendqi2): Likewise. (TH_M_ANYI): New mode iterator. (TH_M_NOEXTI): Likewise. (*th_memidx_I_a): New combiner optimization. (*th_memidx_I_b): Likewise. (*th_memidx_I_c): Likewise. (*th_memidx_US_a): Likewise. (*th_memidx_US_b): Likewise. (*th_memidx_US_c): Likewise. (*th_memidx_UZ_a): Likewise. (*th_memidx_UZ_b): Likewise. (*th_memidx_UZ_c): Likewise. gcc/testsuite/ChangeLog: * gcc.target/riscv/xtheadmemidx-helpers.h: New test. * gcc.target/riscv/xtheadmemidx-index-update.c: New test. * gcc.target/riscv/xtheadmemidx-index-xtheadbb-update.c: New test. * gcc.target/riscv/xtheadmemidx-index-xtheadbb.c: New test. * gcc.target/riscv/xtheadmemidx-index.c: New test. * gcc.target/riscv/xtheadmemidx-modify-xtheadbb.c: New test.
Re: [PATCH v2 2/2] riscv: thead: Add support for the XTheadFMemIdx ISA extension
On 10/20/23 03:53, Christoph Muellner wrote: From: Christoph Müllner The XTheadFMemIdx ISA extension provides additional load and store instructions for floating-point registers with new addressing modes. The following memory accesses types are supported: * load/store: [w,d] (single-precision FP, double-precision FP) The following addressing modes are supported: * register offset with additional immediate offset (4 instructions): flr, fsr * zero-extended register offset with additional immediate offset (4 instructions): flur, fsur These addressing modes are also part of the similar XTheadMemIdx ISA extension support, whose code is reused and extended to support floating-point registers. One challenge that this patch needs to solve are GP registers in FP-mode (e.g. "(reg:DF a2)"), which cannot be handled by the XTheadFMemIdx instructions. Such registers are the result of independent optimizations, which can happen after register allocation. This patch uses a simple but efficient method to address this: add a dependency for XTheadMemIdx to XTheadFMemIdx optimizations. This allows to use the instructions from XTheadMemIdx in case of such registers. Or alternately define secondary reloads so that you can get a scratch register to reload the address into a GPR. Your call on whether or not to try to implement that. I guess it largely depends on how likely it is you'll have one extension defined, but not the other. The added tests ensure that this feature won't regress without notice. Testing: GCC regression test suite and SPEC CPU 2017 intrate (base&peak). Signed-off-by: Christoph Müllner gcc/ChangeLog: * config/riscv/riscv.cc (riscv_index_reg_class): Return GR_REGS for XTheadFMemIdx. (riscv_regno_ok_for_index_p): Add support for XTheadFMemIdx. * config/riscv/riscv.h (HARDFP_REG_P): New macro. * config/riscv/thead.cc (is_fmemidx_mode): New function. (th_memidx_classify_address_index): Add support for XTheadFMemIdx. (th_fmemidx_output_index): New function. (th_output_move): Add support for XTheadFMemIdx. * config/riscv/thead.md (TH_M_ANYF): New mode iterator. (TH_M_NOEXTF): Likewise. (*th_fmemidx_movsf_hardfloat): New INSN. (*th_fmemidx_movdf_hardfloat_rv64): Likewise. (*th_fmemidx_I_a): Likewise. (*th_fmemidx_I_c): Likewise. (*th_fmemidx_US_a): Likewise. (*th_fmemidx_US_c): Likewise. (*th_fmemidx_UZ_a): Likewise. (*th_fmemidx_UZ_c): Likewise. gcc/testsuite/ChangeLog: * gcc.target/riscv/xtheadfmemidx-index-update.c: New test. * gcc.target/riscv/xtheadfmemidx-index-xtheadbb-update.c: New test. * gcc.target/riscv/xtheadfmemidx-index-xtheadbb.c: New test. * gcc.target/riscv/xtheadfmemidx-index.c: New test. * gcc.target/riscv/xtheadfmemidx-uindex-update.c: New test. * gcc.target/riscv/xtheadfmemidx-uindex-xtheadbb-update.c: New test. * gcc.target/riscv/xtheadfmemidx-uindex-xtheadbb.c: New test. * gcc.target/riscv/xtheadfmemidx-uindex.c: New test. --- Same note as with the prior patch WRT wrapping assembly instructions when using scan-assembler. diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index eb162abcb92..1e9813b4f39 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -372,6 +372,8 @@ ASM_MISA_SPEC ((unsigned int) ((int) (REGNO) - GP_REG_FIRST) < GP_REG_NUM) #define FP_REG_P(REGNO) \ ((unsigned int) ((int) (REGNO) - FP_REG_FIRST) < FP_REG_NUM) +#define HARDFP_REG_P(REGNO) \ + ((REGNO) >= FP_REG_FIRST && (REGNO) <= FP_REG_LAST) #define V_REG_P(REGNO) \ ((unsigned int) ((int) (REGNO) - V_REG_FIRST) < V_REG_NUM) #define VL_REG_P(REGNO) ((REGNO) == VL_REGNUM) @@ -755,6 +768,40 @@ th_memidx_output_index (rtx x, machine_mode mode, bool load) return buf; } +/* Provide a buffer for a th.flX/th.fluX/th.fsX/th.fsuX instruction + for the given MODE. If LOAD is true, a load instruction will be + provided (otherwise, a store instruction). If X is not suitable + return NULL. */ + +static const char * +th_fmemidx_output_index (rtx x, machine_mode mode, bool load) +{ + struct riscv_address_info info; + static char buf[128] = {0}; Same comment WRT static buffers as in the previous patch. OK for the trunk after fixing the testcases and potentially adjusting the static buffer. No need to get another review round, post for for the archiver and commit. jeff
Re: [PATCH v4 1/1] gcc: config: microblaze: fix cpu version check
On 10/26/23 13:37, Neal Frager wrote: The MICROBLAZE_VERSION_COMPARE was incorrectly using strcasecmp instead of strverscmp to check the mcpu version against feature options. By simply changing the define to use strverscmp, the new version 10.0 is treated correctly as a higher version than previous versions. Signed-off-by: Neal Frager --- V1->V2: - No need to create a new microblaze specific version check routine as strverscmp is the correct solution. V2->V3: - Changed mcpu define for microblaze isa testsuite examples. V3->V4: - Added ChangeLog --- gcc/ChangeLog | 4 gcc/config/microblaze/microblaze.cc| 2 +- gcc/testsuite/gcc.target/microblaze/isa/bshift.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/div.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/fcmp1.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/fcmp2.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/fcmp3.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/fcmp4.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/fcvt.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/float.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/fsqrt.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/mul-bshift-pcmp.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/mul-bshift.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/mul.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/mulh-bshift-pcmp.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/mulh.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/nofcmp.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/nofloat.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/pcmp.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/vanilla.c | 2 +- gcc/testsuite/gcc.target/microblaze/microblaze.exp | 2 +- 21 files changed, 24 insertions(+), 20 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d50cd42a7d4..d5fee35bda4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2023-10-26 Neal Frager + + * config/microblaze/microblaze.cc: Fix mcpu version check. gcc/testsuite/ChangeLog? + 2023-10-25 Iain Sandoe * config/darwin.cc (darwin_override_options): Handle fPIE. diff --git a/gcc/config/microblaze/microblaze.cc b/gcc/config/microblaze/microblaze.cc index c9f6c4198cf..60ad55120d2 100644 --- a/gcc/config/microblaze/microblaze.cc +++ b/gcc/config/microblaze/microblaze.cc @@ -56,7 +56,7 @@ /* This file should be included last. */ #include "target-def.h" -#define MICROBLAZE_VERSION_COMPARE(VA,VB) strcasecmp (VA, VB) +#define MICROBLAZE_VERSION_COMPARE(VA,VB) strverscmp (VA, VB) /* Classifies an address. diff --git a/gcc/testsuite/gcc.target/microblaze/isa/bshift.c b/gcc/testsuite/gcc.target/microblaze/isa/bshift.c index 64cf1e2e59e..664586bff9f 100644 --- a/gcc/testsuite/gcc.target/microblaze/isa/bshift.c +++ b/gcc/testsuite/gcc.target/microblaze/isa/bshift.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -mcpu=v6.00.a -mxl-barrel-shift" } */ +/* { dg-options "-O3 -mcpu=v10.0 -mxl-barrel-shift" } */ volatile int m1, m2, m3; volatile unsigned int u1, u2, u3; diff --git a/gcc/testsuite/gcc.target/microblaze/isa/div.c b/gcc/testsuite/gcc.target/microblaze/isa/div.c index 25ee42ce5c8..783e7c0f684 100644 --- a/gcc/testsuite/gcc.target/microblaze/isa/div.c +++ b/gcc/testsuite/gcc.target/microblaze/isa/div.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -mcpu=v6.00.a -mno-xl-soft-div" } */ +/* { dg-options "-O3 -mcpu=v10.0 -mno-xl-soft-div" } */ volatile int m1, m2, m3; volatile long l1, l2; diff --git a/gcc/testsuite/gcc.target/microblaze/isa/fcmp1.c b/gcc/testsuite/gcc.target/microblaze/isa/fcmp1.c index 4041a241391..b6202e168d6 100644 --- a/gcc/testsuite/gcc.target/microblaze/isa/fcmp1.c +++ b/gcc/testsuite/gcc.target/microblaze/isa/fcmp1.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -mcpu=v6.00.a -mhard-float" } */ +/* { dg-options "-O3 -mcpu=v10.0 -mhard-float" } */ volatile float f1, f2, f3; diff --git a/gcc/testsuite/gcc.target/microblaze/isa/fcmp2.c b/gcc/testsuite/gcc.target/microblaze/isa/fcmp2.c index 3902b839db9..4386c6e6cc3 100644 --- a/gcc/testsuite/gcc.target/microblaze/isa/fcmp2.c +++ b/gcc/testsuite/gcc.target/microblaze/isa/fcmp2.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -mcpu=v6.00.a -mhard-float" } */ +/* { dg-options "-O3 -mcpu=v10.0 -mhard-float" } */ volatile float f1, f2, f3; diff --git a/gcc/testsuite/gcc.target/microblaze/isa/fcmp3.c b/gcc/testsuite/gcc.target/microblaze/isa/fcmp3.c index 8555974dda5..b414e48fe1b 100644 --- a/gcc/testsuite/gcc.target/microblaze/isa/fcmp3.c +++ b/gcc/testsuite/gcc.target/microblaze/isa/fcmp3.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -mcpu=v6.00.a -mhard-float" } */ +/* { dg-options "-O3 -mcpu=v10.0 -mhard-float" } */ vo
Re: Enable top-level recursive 'autoreconf'
> From: Thomas Schwinge > Date: Thu, 19 Oct 2023 12:42:26 +0200 > It's just GCC and Binutils/GDB, or are the top-level files also shared > with additional projects? Not sure if that counts as "shared", but I regularly drop in* newlib to build simulator targets (*-elf, *-newabi). That's git://sourceware.org/git/newlib-cygwin.git but I extract tarballs for the merging. "Drop in" is actually the other way round, starting with a newlib tarball, so gcc files overwrite newlib ones. (FWIW, I never drop in binutils like that; they're better built separately. "Better" for your long-term sanity.) brgds, H-P
[PATCH v2] RISC-V: elide unnecessary sign extend when expanding cmp_and_jump
RV64 compare and branch instructions only support 64-bit operands. At Expand time, the backend conservatively zero/sign extends its operands even if not needed, such as incoming 32-bit function args which ABI/ISA guarantee to be sign-extended already. And subsequently REE fails to eliminate them as "missing defintion(s)" or "multiple definition(s) since function args don't have explicit definition. So during expand riscv_extend_comparands (), if an operand is a subreg-promoted SI with inner DI, which is representative of a function arg, just peel away the subreg to expose the DI, eliding the sign extension. As Jeff noted this routine is also used in if-conversion so also helps there. Note there's currently patches floating around to improve REE and also a new pass to eliminate unneccesary extensions, but it is still beneficial to not generate those extra extensions in first place. It is obviously less work for post-reload passes such as REE, but even for earlier passes, such as combine, having to deal with one less thing and ensuing fewer combinations is a win too. Way too many existing tests used to observe this issue. e.g. gcc.c-torture/compile/20190827-1.c -O2 -march=rv64gc It elimiates the SEXT.W Tested with rv64gc with no regressions, I'm relying on PAtrick's pre-commit CI to do the full testing. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_sign_extend_if_not_subreg_prom): New. * (riscv_extend_comparands): Call New function on operands. Signed-off-by: Vineet Gupta --- Changes since v1: - Elide sign extension for 32-bit operarnds only - Apply elison for both arguments --- gcc/config/riscv/riscv.cc | 23 +-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index f2dcb0db6fbd..3af834f92977 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3678,6 +3678,24 @@ riscv_zero_if_equal (rtx cmp0, rtx cmp1) cmp0, cmp1, 0, 0, OPTAB_DIRECT); } +/* Helper function for riscv_extend_comparands to Sign-extend the OP. + However if the OP is SI subreg promoted with an inner DI, such as + (subreg/s/v:SI (reg/v:DI 150 [ xx ]) 0) + just peel off the SUBREG to get DI, avoiding extraneous extension. */ + +static void +riscv_sign_extend_if_not_subreg_prom (rtx *op) +{ + if (GET_MODE(*op) == SImode + && GET_CODE (*op) == SUBREG + && SUBREG_PROMOTED_VAR_P (*op) + && GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant () +== GET_MODE_SIZE (word_mode)) +*op = XEXP (*op, 0); + else +*op = gen_rtx_SIGN_EXTEND (word_mode, *op); +} + /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ static void @@ -3707,9 +3725,10 @@ riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1) } else { - *op0 = gen_rtx_SIGN_EXTEND (word_mode, *op0); + riscv_sign_extend_if_not_subreg_prom(op0); + if (*op1 != const0_rtx) - *op1 = gen_rtx_SIGN_EXTEND (word_mode, *op1); + riscv_sign_extend_if_not_subreg_prom(op1); } } } -- 2.34.1
Re: [ARC PATCH] Convert (signed<<31)>>31 to -(signed&1) without barrel shifter.
On 10/28/23 10:47, Roger Sayle wrote: This patch optimizes PR middle-end/101955 for the ARC backend. On ARC CPUs with a barrel shifter, using two shifts is (probably) optimal as: asl_s r0,r0,31 asr_s r0,r0,31 but without a barrel shifter, GCC -O2 -mcpu=em currently generates: and r2,r0,1 ror r2,r2 add.f 0,r2,r2 sbc r0,r0,r0 with this patch, we now generate the smaller, faster and non-flags clobbering: bmsk_s r0,r0,0 neg_s r0,r0 Tested with a cross-compiler to arc-linux hosted on x86_64, with no new (compile-only) regressions from make -k check. Ok for mainline if this passes Claudiu's nightly testing? 2023-10-28 Roger Sayle gcc/ChangeLog PR middle-end/101955 * config/arc/arc.md (*extvsi_1_0): New define_insn_and_split to convert sign extract of the least significant bit into an AND $1 then a NEG when !TARGET_BARREL_SHIFTER. gcc/testsuite/ChangeLog PR middle-end/101955 * gcc.target/arc/pr101955.c: New test case. Good catch. Looking to do something very similar on the H8 based on your work here. One the H8 we can use bld to load a bit from an 8 bit register into the C flag. Then we use subtract with carry to get an 8 bit 0/-1 which we can then sign extend to 16 or 32 bits. That covers bit positions 0..15 of an SImode input. For bits 16..31 we can move the high half into the low half, the use the bld sequence. For bit zero the and+neg is the same number of clocks and size as bld based sequence. But it'll simulate faster, so it's special cased. Jeff
Re: [PATCH, expand] Checking available optabs for scalar modes in by pieces operations
Committed as r14-5001. Thanks Gui Haochen 在 2023/10/27 17:29, Richard Sandiford 写道: > HAO CHEN GUI writes: >> Hi, >> This patch checks available optabs for scalar modes used in by >> pieces operations. It fixes the regression cases caused by previous >> patch. Now both scalar and vector modes are examined by the same >> approach. >> >> Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no >> regressions. Is this OK for trunk? >> >> Thanks >> Gui Haochen >> >> >> ChangeLog >> Expand: Checking available optabs for scalar modes in by pieces operations >> >> The former patch (f08ca5903c7) examines the scalar modes by target >> hook scalar_mode_supported_p. It causes some i386 regression cases >> as XImode and OImode are not enabled in i386 target function. This >> patch examines the scalar mode by checking if the corresponding optabs >> are available for the mode. >> >> gcc/ >> PR target/111449 >> * expr.cc (qi_vector_mode_supported_p): Rename to... >> (by_pieces_mode_supported_p): ...this, and extends it to do >> the checking for both scalar and vector mode. >> (widest_fixed_size_mode_for_size): Call >> by_pieces_mode_supported_p to examine the mode. >> (op_by_pieces_d::smallest_fixed_size_mode_for_size): Likewise. > > OK, thanks. > > Richard > >> patch.diff >> diff --git a/gcc/expr.cc b/gcc/expr.cc >> index 7aac575eff8..2af9fcbed18 100644 >> --- a/gcc/expr.cc >> +++ b/gcc/expr.cc >> @@ -1000,18 +1000,21 @@ can_use_qi_vectors (by_pieces_operation op) >> /* Return true if optabs exists for the mode and certain by pieces >> operations. */ >> static bool >> -qi_vector_mode_supported_p (fixed_size_mode mode, by_pieces_operation op) >> +by_pieces_mode_supported_p (fixed_size_mode mode, by_pieces_operation op) >> { >> + if (optab_handler (mov_optab, mode) == CODE_FOR_nothing) >> +return false; >> + >>if ((op == SET_BY_PIECES || op == CLEAR_BY_PIECES) >> - && optab_handler (vec_duplicate_optab, mode) != CODE_FOR_nothing) >> -return true; >> + && VECTOR_MODE_P (mode) >> + && optab_handler (vec_duplicate_optab, mode) == CODE_FOR_nothing) >> +return false; >> >>if (op == COMPARE_BY_PIECES >> - && optab_handler (mov_optab, mode) != CODE_FOR_nothing >> - && can_compare_p (EQ, mode, ccp_jump)) >> -return true; >> + && !can_compare_p (EQ, mode, ccp_jump)) >> +return false; >> >> - return false; >> + return true; >> } >> >> /* Return the widest mode that can be used to perform part of an >> @@ -1035,7 +1038,7 @@ widest_fixed_size_mode_for_size (unsigned int size, >> by_pieces_operation op) >>{ >> if (GET_MODE_SIZE (candidate) >= size) >>break; >> -if (qi_vector_mode_supported_p (candidate, op)) >> +if (by_pieces_mode_supported_p (candidate, op)) >>result = candidate; >>} >> >> @@ -1049,7 +1052,7 @@ widest_fixed_size_mode_for_size (unsigned int size, >> by_pieces_operation op) >> { >>mode = tmode.require (); >>if (GET_MODE_SIZE (mode) < size >> - && targetm.scalar_mode_supported_p (mode)) >> + && by_pieces_mode_supported_p (mode, op)) >>result = mode; >> } >> >> @@ -1454,7 +1457,7 @@ op_by_pieces_d::smallest_fixed_size_mode_for_size >> (unsigned int size) >>break; >> >> if (GET_MODE_SIZE (candidate) >= size >> -&& qi_vector_mode_supported_p (candidate, m_op)) >> +&& by_pieces_mode_supported_p (candidate, m_op)) >>return candidate; >>} >> }
Re: [PATCH v2] RISC-V: elide unnecessary sign extend when expanding cmp_and_jump
On 10/29/23 19:04, Vineet Gupta wrote: RV64 compare and branch instructions only support 64-bit operands. At Expand time, the backend conservatively zero/sign extends its operands even if not needed, such as incoming 32-bit function args which ABI/ISA guarantee to be sign-extended already. And subsequently REE fails to eliminate them as "missing defintion(s)" or "multiple definition(s) since function args don't have explicit definition. So during expand riscv_extend_comparands (), if an operand is a subreg-promoted SI with inner DI, which is representative of a function arg, just peel away the subreg to expose the DI, eliding the sign extension. As Jeff noted this routine is also used in if-conversion so also helps there. Note there's currently patches floating around to improve REE and also a new pass to eliminate unneccesary extensions, but it is still beneficial to not generate those extra extensions in first place. It is obviously less work for post-reload passes such as REE, but even for earlier passes, such as combine, having to deal with one less thing and ensuing fewer combinations is a win too. Way too many existing tests used to observe this issue. e.g. gcc.c-torture/compile/20190827-1.c -O2 -march=rv64gc It elimiates the SEXT.W Tested with rv64gc with no regressions, I'm relying on PAtrick's pre-commit CI to do the full testing. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_sign_extend_if_not_subreg_prom): New. * (riscv_extend_comparands): Call New function on operands. Signed-off-by: Vineet Gupta --- Changes since v1: - Elide sign extension for 32-bit operarnds only - Apply elison for both arguments --- gcc/config/riscv/riscv.cc | 23 +-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index f2dcb0db6fbd..3af834f92977 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3678,6 +3678,24 @@ riscv_zero_if_equal (rtx cmp0, rtx cmp1) cmp0, cmp1, 0, 0, OPTAB_DIRECT); } +/* Helper function for riscv_extend_comparands to Sign-extend the OP. + However if the OP is SI subreg promoted with an inner DI, such as + (subreg/s/v:SI (reg/v:DI 150 [ xx ]) 0) + just peel off the SUBREG to get DI, avoiding extraneous extension. */ + +static void +riscv_sign_extend_if_not_subreg_prom (rtx *op) +{ + if (GET_MODE(*op) == SImode Weird, this is flagged in pre-commit CI, but contrib scripts think this is ok contrib/gcc-changelog/git_check_commit.py Checking 3d9823e2fb1c1f99bb875bffd999ab8dafd53a50: OK + && GET_CODE (*op) == SUBREG + && SUBREG_PROMOTED_VAR_P (*op) + && GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant () +== GET_MODE_SIZE (word_mode)) +*op = XEXP (*op, 0); + else +*op = gen_rtx_SIGN_EXTEND (word_mode, *op); +} + /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ static void @@ -3707,9 +3725,10 @@ riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1) } else { - *op0 = gen_rtx_SIGN_EXTEND (word_mode, *op0); + riscv_sign_extend_if_not_subreg_prom(op0); + if (*op1 != const0_rtx) - *op1 = gen_rtx_SIGN_EXTEND (word_mode, *op1); + riscv_sign_extend_if_not_subreg_prom(op1); } } }
[PATCH v3] RISC-V: elide unnecessary sign extend when expanding cmp_and_jump
RV64 compare and branch instructions only support 64-bit operands. At Expand time, the backend conservatively zero/sign extends its operands even if not needed, such as incoming 32-bit function args which ABI/ISA guarantee to be sign-extended already. And subsequently REE fails to eliminate them as "missing defintion(s)" or "multiple definition(s) since function args don't have explicit definition. So during expand riscv_extend_comparands (), if an operand is a subreg-promoted SI with inner DI, which is representative of a function arg, just peel away the subreg to expose the DI, eliding the sign extension. As Jeff noted this routine is also used in if-conversion so also helps there. Note there's currently patches floating around to improve REE and also a new pass to eliminate unneccesary extensions, but it is still beneficial to not generate those extra extensions in first place. It is obviously less work for post-reload passes such as REE, but even for earlier passes, such as combine, having to deal with one less thing and ensuing fewer combinations is a win too. Way too many existing tests used to observe this issue. e.g. gcc.c-torture/compile/20190827-1.c -O2 -march=rv64gc It elimiates the SEXT.W Tested with rv64gc with no regressions, I'm relying on PAtrick's pre-commit CI to do the full testing. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_sign_extend_if_not_subreg_prom): New. * (riscv_extend_comparands): Call New function on operands. Signed-off-by: Vineet Gupta --- Changes since v2: - Fix linting issues flagged by pre-commit CI Changes since v1: - Elide sign extension for 32-bit operarnds only - Apply elison for both arguments --- gcc/config/riscv/riscv.cc | 23 +-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index ca9a2ca81d53..269beb3b159b 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3678,6 +3678,24 @@ riscv_zero_if_equal (rtx cmp0, rtx cmp1) cmp0, cmp1, 0, 0, OPTAB_DIRECT); } +/* Helper function for riscv_extend_comparands to Sign-extend the OP. + However if the OP is SI subreg promoted with an inner DI, such as + (subreg/s/v:SI (reg/v:DI) 0) + just peel off the SUBREG to get DI, avoiding extraneous extension. */ + +static void +riscv_sign_extend_if_not_subreg_prom (rtx *op) +{ + if (GET_MODE (*op) == SImode + && GET_CODE (*op) == SUBREG + && SUBREG_PROMOTED_VAR_P (*op) + && GET_MODE_SIZE (GET_MODE (XEXP (*op, 0))).to_constant () +== GET_MODE_SIZE (word_mode)) +*op = XEXP (*op, 0); + else +*op = gen_rtx_SIGN_EXTEND (word_mode, *op); +} + /* Sign- or zero-extend OP0 and OP1 for integer comparisons. */ static void @@ -3707,9 +3725,10 @@ riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1) } else { - *op0 = gen_rtx_SIGN_EXTEND (word_mode, *op0); + riscv_sign_extend_if_not_subreg_prom (op0); + if (*op1 != const0_rtx) - *op1 = gen_rtx_SIGN_EXTEND (word_mode, *op1); + riscv_sign_extend_if_not_subreg_prom (op1); } } } -- 2.34.1
RE: [x86 PATCH] PR target/110551: Fix reg allocation for widening multiplications.
Hi Roger, It seems that your patch caused some regression on x86_64: https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078390.html https://gcc.gnu.org/pipermail/gcc-regression/2023-October/078391.html Could you help verify that? A simple reproducer under build folder will be: make check RUNTESTFLAGS="conformance.exp=std/time/year_month_day/io.cc --target_board='unix{-m64\ -march=cascadelake,-m32\ -march=cascadelake,-m32,-m64}'" Thx, Haochen > -Original Message- > From: Roger Sayle > Sent: Wednesday, October 18, 2023 10:30 PM > To: gcc-patches@gcc.gnu.org > Cc: 'Uros Bizjak' ; tobias.bur...@siemens.com > Subject: RE: [x86 PATCH] PR target/110551: Fix reg allocation for widening > multiplications. > > > Many thanks to Tobias Burnus for pointing out the mistake/typo in the PR > number. > This fix is for PR 110551, not PR 110511. I'll update the ChangeLog and > filename > of the new testcase, if approved. > > Sorry for any inconvenience/confusion. > Cheers, > Roger > -- > > > -Original Message- > > From: Roger Sayle > > Sent: 17 October 2023 20:06 > > To: 'gcc-patches@gcc.gnu.org' > > Cc: 'Uros Bizjak' > > Subject: [x86 PATCH] PR target/110511: Fix reg allocation for widening > > multiplications. > > > > > > This patch contains clean-ups of the widening multiplication patterns in > i386.md, > > and provides variants of the existing highpart multiplication > > peephole2 transformations (that tidy up register allocation after reload), > and > > thereby fixes PR target/110511, which is a superfluous move instruction. > > > > For the new test case, compiled on x86_64 with -O2. > > > > Before: > > mulx64: movabsq $-7046029254386353131, %rcx > > movq%rcx, %rax > > mulq%rdi > > xorq%rdx, %rax > > ret > > > > After: > > mulx64: movabsq $-7046029254386353131, %rax > > mulq%rdi > > xorq%rdx, %rax > > ret > > > > The clean-ups are (i) that operand 1 is consistently made register_operand > and > > operand 2 becomes nonimmediate_operand, so that predicates match the > > constraints, (ii) the representation of the BMI2 mulx instruction is > updated to use > > the new umul_highpart RTX, and (iii) because operands > > 0 and 1 have different modes in widening multiplications, "a" is a more > > appropriate constraint than "0" (which avoids spills/reloads containing > SUBREGs). > > The new peephole2 transformations are based upon those at around line > 9951 > of > > i386.md, that begins with the comment ;; Highpart multiplication > peephole2s to > > tweak register allocation. > > ;; mov imm,%rdx; mov %rdi,%rax; imulq %rdx -> mov imm,%rax; imulq %rdi > > > > > > This patch has been tested on x86_64-pc-linux-gnu with make bootstrap and > > make -k check, both with and without --target_board=unix{-m32} with no > new > > failures. Ok for mainline? > > > > > > 2023-10-17 Roger Sayle > > > > gcc/ChangeLog > > PR target/110511 > > * config/i386/i386.md (mul3): Make operands 1 and > > 2 take "regiser_operand" and "nonimmediate_operand" respectively. > > (mulqihi3): Likewise. > > (*bmi2_umul3_1): Operand 2 needs to be > register_operand > > matching the %d constraint. Use umul_highpart RTX to represent > > the highpart multiplication. > > (*umul3_1): Operand 2 should use regiser_operand > > predicate, and "a" rather than "0" as operands 0 and 2 have > > different modes. > > (define_split): For mul to mulx conversion, use the new > > umul_highpart RTX representation. > > (*mul3_1): Operand 1 should be register_operand > > and the constraint %a as operands 0 and 1 have different modes. > > (*mulqihi3_1): Operand 1 should be register_operand matching > > the constraint %0. > > (define_peephole2): Providing widening multiplication variants > > of the peephole2s that tweak highpart multiplication register > > allocation. > > > > gcc/testsuite/ChangeLog > > PR target/110511 > > * gcc.target/i386/pr110511.c: New test case. > > > > > > Thanks in advance, > > Roger >
[PATCH v5 1/1] gcc: config: microblaze: fix cpu version check
The MICROBLAZE_VERSION_COMPARE was incorrectly using strcasecmp instead of strverscmp to check the mcpu version against feature options. By simply changing the define to use strverscmp, the new version 10.0 is treated correctly as a higher version than previous versions. Signed-off-by: Neal Frager --- V1->V2: - No need to create a new microblaze specific version check routine as strverscmp is the correct solution. V2->V3: - Changed mcpu define for microblaze isa testsuite examples. V3->V4: - Added ChangeLog V4->V5: - Added testsuite ChangeLog --- gcc/ChangeLog | 4 gcc/config/microblaze/microblaze.cc| 2 +- gcc/testsuite/ChangeLog| 4 gcc/testsuite/gcc.target/microblaze/isa/bshift.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/div.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/fcmp1.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/fcmp2.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/fcmp3.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/fcmp4.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/fcvt.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/float.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/fsqrt.c| 2 +- gcc/testsuite/gcc.target/microblaze/isa/mul-bshift-pcmp.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/mul-bshift.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/mul.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/mulh-bshift-pcmp.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/mulh.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/nofcmp.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/nofloat.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/pcmp.c | 2 +- gcc/testsuite/gcc.target/microblaze/isa/vanilla.c | 2 +- gcc/testsuite/gcc.target/microblaze/microblaze.exp | 2 +- 22 files changed, 28 insertions(+), 20 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4964796c6a6..7f63f39d4cd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2023-10-30 Neal Frager + + * config/microblaze/microblaze.cc: Fix mcpu version check. + 2023-10-29 Martin Uecker PR tree-optimization/109334 diff --git a/gcc/config/microblaze/microblaze.cc b/gcc/config/microblaze/microblaze.cc index c9f6c4198cf..60ad55120d2 100644 --- a/gcc/config/microblaze/microblaze.cc +++ b/gcc/config/microblaze/microblaze.cc @@ -56,7 +56,7 @@ /* This file should be included last. */ #include "target-def.h" -#define MICROBLAZE_VERSION_COMPARE(VA,VB) strcasecmp (VA, VB) +#define MICROBLAZE_VERSION_COMPARE(VA,VB) strverscmp (VA, VB) /* Classifies an address. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5c18129b4ac..1d7abcf2584 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2023-10-30 Neal Frager + + * gcc.target/microblaze: Bump tests to mcpu=v10.0. + 2023-10-29 Iain Buclaw PR d/110712 diff --git a/gcc/testsuite/gcc.target/microblaze/isa/bshift.c b/gcc/testsuite/gcc.target/microblaze/isa/bshift.c index 64cf1e2e59e..664586bff9f 100644 --- a/gcc/testsuite/gcc.target/microblaze/isa/bshift.c +++ b/gcc/testsuite/gcc.target/microblaze/isa/bshift.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -mcpu=v6.00.a -mxl-barrel-shift" } */ +/* { dg-options "-O3 -mcpu=v10.0 -mxl-barrel-shift" } */ volatile int m1, m2, m3; volatile unsigned int u1, u2, u3; diff --git a/gcc/testsuite/gcc.target/microblaze/isa/div.c b/gcc/testsuite/gcc.target/microblaze/isa/div.c index 25ee42ce5c8..783e7c0f684 100644 --- a/gcc/testsuite/gcc.target/microblaze/isa/div.c +++ b/gcc/testsuite/gcc.target/microblaze/isa/div.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -mcpu=v6.00.a -mno-xl-soft-div" } */ +/* { dg-options "-O3 -mcpu=v10.0 -mno-xl-soft-div" } */ volatile int m1, m2, m3; volatile long l1, l2; diff --git a/gcc/testsuite/gcc.target/microblaze/isa/fcmp1.c b/gcc/testsuite/gcc.target/microblaze/isa/fcmp1.c index 4041a241391..b6202e168d6 100644 --- a/gcc/testsuite/gcc.target/microblaze/isa/fcmp1.c +++ b/gcc/testsuite/gcc.target/microblaze/isa/fcmp1.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -mcpu=v6.00.a -mhard-float" } */ +/* { dg-options "-O3 -mcpu=v10.0 -mhard-float" } */ volatile float f1, f2, f3; diff --git a/gcc/testsuite/gcc.target/microblaze/isa/fcmp2.c b/gcc/testsuite/gcc.target/microblaze/isa/fcmp2.c index 3902b839db9..4386c6e6cc3 100644 --- a/gcc/testsuite/gcc.target/microblaze/isa/fcmp2.c +++ b/gcc/testsuite/gcc.target/microblaze/isa/fcmp2.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -mcpu=v6.00.a -mhard-float" } */ +/* { dg-options "-O3 -mcpu=v10.0 -mhard-float" } */ volatile float f1, f2, f3; diff --git a/gcc/testsuite/gcc.target/microblaze/isa/fcmp3.c b/gcc/testsuite/gcc.target/microblaze/isa/fcmp3.c
[PATCH] rs6000: Consider inline asm as safe if no assembler complains [PR111828]
Hi, As discussed in PR111828, rs6000_update_ipa_fn_target_info is much conservative, currently for any non-empty inline asm, without any parsing, it would take inline asm could have HTM insns. It means for one function attributed with power8 having inline asm, even if it has no HTM insns, we don't make a function attributed with power10 inline it. Peter pointed out an inline asm parser can be a slippery slope, and noticed that the current gnu assembler still allows HTM insns even with power10 machine type, so he suggested that we can aggressively ignore the handling on inline asm, this patch goes for this suggestion. Considering that there are a few assembler alternatives and assembler can update its behaviors (complaining HTM insns at power10 and later cpus sounds reasonable from a certain point of view), this patch also checks assembler complains on HTM insns at power10 or not. For a case that a caller attributed power10 calls a callee attributed power8 having inline asm with HTM insn, without inlining at least the compilation succeeds, but if assembler complains HTM insns at power10, after inlining the compilation would fail. The two associated test cases are fine without and with this patch (effective target takes effect or not). Bootstrapped and regtested on x86_64-redhat-linux, powerpc64-linux-gnu P8/P9 and powerpc64le-linux-gnu P9/P10. I'm going to push this a week later if no objections. BR, Kewen - PR target/111828 gcc/ChangeLog: * config.in: Regenerate. * config/rs6000/rs6000.cc (rs6000_update_ipa_fn_target_info): Guard inline asm handling under !HAVE_AS_POWER10_HTM. * configure: Regenerate. * configure.ac: Detect assembler support for HTM insns at power10. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_powerpc_as_p10_htm): New proc. * g++.target/powerpc/pr111828-1.C: New test. * g++.target/powerpc/pr111828-2.C: New test. --- gcc/config.in | 6 +++ gcc/config/rs6000/rs6000.cc | 5 +- gcc/configure | 43 +++ gcc/configure.ac | 17 ++ gcc/testsuite/g++.target/powerpc/pr111828-1.C | 49 + gcc/testsuite/g++.target/powerpc/pr111828-2.C | 52 +++ gcc/testsuite/lib/target-supports.exp | 40 ++ 7 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.target/powerpc/pr111828-1.C create mode 100644 gcc/testsuite/g++.target/powerpc/pr111828-2.C diff --git a/gcc/config.in b/gcc/config.in index d04718ad128..c9681351389 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -682,6 +682,12 @@ #endif +/* Define if your assembler supports htm insns on power10. */ +#ifndef USED_FOR_TARGET +#undef HAVE_AS_POWER10_HTM +#endif + + /* Define if your assembler supports .ref */ #ifndef USED_FOR_TARGET #undef HAVE_AS_REF diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index cc24dd5301e..6d084069014 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -25682,6 +25682,7 @@ rs6000_need_ipa_fn_target_info (const_tree decl, static bool rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt) { +#ifndef HAVE_AS_POWER10_HTM /* Assume inline asm can use any instruction features. */ if (gimple_code (stmt) == GIMPLE_ASM) { @@ -25693,7 +25694,9 @@ rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt) info |= RS6000_FN_TARGET_INFO_HTM; return false; } - else if (gimple_code (stmt) == GIMPLE_CALL) +#endif + + if (gimple_code (stmt) == GIMPLE_CALL) { tree fndecl = gimple_call_fndecl (stmt); if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)) diff --git a/gcc/configure b/gcc/configure index c43bde8174b..afad4462dd3 100755 --- a/gcc/configure +++ b/gcc/configure @@ -28218,6 +28218,49 @@ if test $gcc_cv_as_powerpc_mfcrf = yes; then $as_echo "#define HAVE_AS_MFCRF 1" >>confdefs.h +fi + + +case $target in + *-*-aix*) conftest_s=' .machine "pwr10" + .csect .text[PR] + tend. 0';; + *-*-darwin*) conftest_s='.text + tend. 0';; + *) conftest_s=' .machine power10 + .text + tend. 0';; +esac + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for htm support on Power10" >&5 +$as_echo_n "checking assembler for htm support on Power10... " >&6; } +if ${gcc_cv_as_power10_htm+:} false; then : + $as_echo_n "(cached) " >&6 +else + gcc_cv_as_power10_htm=no + if test x$gcc_cv_as != x; then +$as_echo "$conftest_s" > conftest.s +if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status