[gcc r15-3697] RISC-V: Fix signed SAT_ADD test case for int64_t
https://gcc.gnu.org/g:1d1687513400c1b805bf5924f732c21dbf166ae8 commit r15-3697-g1d1687513400c1b805bf5924f732c21dbf166ae8 Author: Pan Li Date: Fri Sep 13 09:16:48 2024 +0800 RISC-V: Fix signed SAT_ADD test case for int64_t The int8_t test for signed SAT_ADD is sat_s_add-1.c, the sat_s_add-4.c should be for int64_t. Thus, update sat_s_add-4.c for int64_t type. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_s_add-4.c: Update test for int64_t instead of int8_t. Signed-off-by: Pan Li Diff: --- gcc/testsuite/gcc.target/riscv/sat_s_add-4.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c b/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c index f85675c1a053..12c9540eaeca 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c +++ b/gcc/testsuite/gcc.target/riscv/sat_s_add-4.c @@ -5,26 +5,25 @@ #include "sat_arith.h" /* -** sat_s_add_int8_t_fmt_1: +** sat_s_add_int64_t_fmt_1: ** add\s+[atx][0-9]+,\s*a0,\s*a1 ** xor\s+[atx][0-9]+,\s*a0,\s*a1 ** xor\s+[atx][0-9]+,\s*a0,\s*[atx][0-9]+ -** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7 -** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*7 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63 ** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ -** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 ** srai\s+[atx][0-9]+,\s*[atx][0-9]+,\s*63 -** xori\s+[atx][0-9]+,\s*[atx][0-9]+,\s*127 +** li\s+[atx][0-9]+,\s*-1 +** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*1 +** xor\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ ** neg\s+[atx][0-9]+,\s*[atx][0-9]+ ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ ** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-1 ** and\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+ ** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+ -** slliw\s+a0,\s*a0,\s*24 -** sraiw\s+a0,\s*a0,\s*24 ** ret */ -DEF_SAT_S_ADD_FMT_1(int8_t, uint8_t, INT8_MIN, INT8_MAX) +DEF_SAT_S_ADD_FMT_1(int64_t, uint64_t, INT64_MIN, INT64_MAX) /* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
[gcc r15-3694] c++: alias of decltype(lambda) is opaque [PR116714, PR107390]
https://gcc.gnu.org/g:82c2acd0bc4411524a8248fcdce219927d921a71 commit r15-3694-g82c2acd0bc4411524a8248fcdce219927d921a71 Author: Patrick Palka Date: Wed Sep 18 13:50:43 2024 -0400 c++: alias of decltype(lambda) is opaque [PR116714, PR107390] Here for using type = decltype([]{}); static_assert(is_same_v); we strip the alias ahead of time during template argument coercion which effectively transforms the template-id into is_same_v which is wrong because later substitution into the template-id will produce two new lambdas with distinct types and cause is_same_v to return false. This demonstrates that such aliases should be considered opaque (a notion that we recently introduced in r15-2331-g523836716137d0). (An alternative solution might be to consider memoizing lambda-expr substitution rather than always producing a new lambda, but this is much simpler.) PR c++/116714 PR c++/107390 gcc/cp/ChangeLog: * pt.cc (dependent_opaque_alias_p): Also return true for a decltype(lambda) alias. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/lambda-uneval18.C: New test. Reviewed-by: Jason Merrill Diff: --- gcc/cp/pt.cc | 11 ++-- gcc/testsuite/g++.dg/cpp2a/lambda-uneval18.C | 39 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 769e7999dac1..e826206be164 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -6759,8 +6759,15 @@ dependent_opaque_alias_p (const_tree t) { return (TYPE_P (t) && typedef_variant_p (t) - && any_dependent_type_attributes_p (DECL_ATTRIBUTES - (TYPE_NAME (t; + && (any_dependent_type_attributes_p (DECL_ATTRIBUTES + (TYPE_NAME (t))) + /* Treat a dependent decltype(lambda) alias as opaque so that we +don't prematurely strip it when used as a template argument. +Otherwise substitution into each occurrence of the (stripped) +alias would incorrectly yield a distinct lambda type. */ + || (TREE_CODE (t) == DECLTYPE_TYPE + && TREE_CODE (DECLTYPE_TYPE_EXPR (t)) == LAMBDA_EXPR + && !typedef_variant_p (DECL_ORIGINAL_TYPE (TYPE_NAME (t)); } /* Return the number of innermost template parameters in TMPL. */ diff --git a/gcc/testsuite/g++.dg/cpp2a/lambda-uneval18.C b/gcc/testsuite/g++.dg/cpp2a/lambda-uneval18.C new file mode 100644 index ..b7d864c62453 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/lambda-uneval18.C @@ -0,0 +1,39 @@ +// PR c++/116714 +// PR c++/107390 +// { dg-do compile { target c++20 } } + +template +inline constexpr bool is_same_v = __is_same(T, U); + +template +struct is_same { static constexpr bool value = false; }; + +template +struct is_same { static constexpr bool value = true; }; + +template +void f() { + using type = decltype([]{}); + static_assert(is_same_v); + static_assert(is_same::value); +}; + +template +void g() { + using ty1 = decltype([]{}); + using ty2 = ty1; + static_assert(is_same_v); + static_assert(is_same::value); +}; + +template +void h() { + using ty1 = decltype([]{}); + using ty2 = decltype([]{}); + static_assert(!is_same_v); + static_assert(!is_same::value); +}; + +template void f(); +template void g(); +template void h();
[gcc r15-3695] [PATCH] configure: fix typos
https://gcc.gnu.org/g:cc62b2c3da118f08f71d2ae9c08bafb55b35767a commit r15-3695-gcc62b2c3da118f08f71d2ae9c08bafb55b35767a Author: Andrew Kreimer Date: Wed Sep 18 11:50:58 2024 -0600 [PATCH] configure: fix typos / * configure.ac: Fix typos. * configure: Rebuilt. Diff: --- configure| 2 +- configure.ac | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configure b/configure index e9583f2ba0c7..6a77d454fd53 100755 --- a/configure +++ b/configure @@ -9086,7 +9086,7 @@ if test -d ${srcdir}/gcc; then lang_requires_boot_languages= # set srcdir during sourcing lang_frag to the gcc dir. # Sadly overriding srcdir on the . line doesn't work in plain sh as it -# polutes this shell +# pollutes this shell saved_srcdir=${srcdir} srcdir=${srcdir}/gcc . ${lang_frag} srcdir=${saved_srcdir} diff --git a/configure.ac b/configure.ac index f61dbe64a942..2567757e74df 100644 --- a/configure.ac +++ b/configure.ac @@ -2136,7 +2136,7 @@ if test -d ${srcdir}/gcc; then lang_requires_boot_languages= # set srcdir during sourcing lang_frag to the gcc dir. # Sadly overriding srcdir on the . line doesn't work in plain sh as it -# polutes this shell +# pollutes this shell saved_srcdir=${srcdir} srcdir=${srcdir}/gcc . ${lang_frag} srcdir=${saved_srcdir}
[gcc r15-3699] testsuite/gcc.dg/pr84877.c: Add machinery to stabilize stack aligmnent
https://gcc.gnu.org/g:b1ea710b1bcdda233f96538c5404228d2b244e01 commit r15-3699-gb1ea710b1bcdda233f96538c5404228d2b244e01 Author: Hans-Peter Nilsson Date: Thu Sep 5 17:02:23 2024 +0200 testsuite/gcc.dg/pr84877.c: Add machinery to stabilize stack aligmnent This test awkwardly "blinks"; xfails and xpasses apparently randomly for cris-elf using the "gdb simulator". On inspection, I see that the stack address depends on the number of environment variables, deliberately passed to the simulator, each adding the size of a pointer. This test is IMHO important enough not to be just skipped just because it blinks (fixing the actual problem is a different task). I guess a random non-16 stack-alignment could happen for other targets as well, so let's try and add a generic machinery to "stabilize" the test as failing, by allocating a dynamic amount to make sure it's misaligned. The most target-dependent item here is an offset between the incoming stack-pointer value (within main in the added framework) and outgoing (within "xmain" as called from main when setting up the p0 parameter). I know there are other wonderful stack shapes, but such targets would fall under the "complicated situations"-label and are no worse off than before. * gcc.dg/pr84877.c: Try to make the test result consistent by misaligning the stack. Diff: --- gcc/testsuite/gcc.dg/pr84877.c | 26 ++ 1 file changed, 26 insertions(+) diff --git a/gcc/testsuite/gcc.dg/pr84877.c b/gcc/testsuite/gcc.dg/pr84877.c index e82991f42dd4..2f2e29578df9 100644 --- a/gcc/testsuite/gcc.dg/pr84877.c +++ b/gcc/testsuite/gcc.dg/pr84877.c @@ -3,6 +3,32 @@ #include +#ifdef __CRIS__ +#define OUTGOING_SP_OFFSET (-sizeof (void *)) +/* Suggestion: append #elif defined() after this comment, + either defining OUTGOING_SP_OFFSET to whatever the pertinent amount is at -O2, + if that makes your target consistently fail this test, or define + DO_NOT_TAMPER for more complicated situations. Either way, compile with + -DDO_NO_TAMPER to avoid any meddling. */ +#endif + +#if defined (OUTGOING_SP_OFFSET) && !defined (DO_NOT_TAMPER) +extern int xmain () __attribute__ ((__noipa__)); +int main () +{ + uintptr_t misalignment += (OUTGOING_SP_OFFSET ++ (15 & (uintptr_t) __builtin_stack_address ())); + /* Allocate a minimal amount if the stack was accidentally aligned. */ + void *q = __builtin_alloca (misalignment == 0); + xmain (); + /* Fake use to avoid the "allocation" being optimized out. */ + asm volatile ("" : : "rm" (q)); + return 0; +} +#define main xmain +#endif + struct U { int M0; int M1;
[gcc r15-3690] contrib: Set check-params-in-docs.py to skip tables of values of a param
https://gcc.gnu.org/g:4b7e6d5faa137f18a36d8c6323a8640e61ee48f1 commit r15-3690-g4b7e6d5faa137f18a36d8c6323a8640e61ee48f1 Author: Filip Kastl Date: Wed Sep 18 16:38:30 2024 +0200 contrib: Set check-params-in-docs.py to skip tables of values of a param Currently check-params-in-docs.py reports extra params being listed in invoke.texi. However, those aren't actual params but items in a table of possible values of the aarch64-autove-preference param. This patch changes check-params-in-docs.py to ignore similar tables. contrib/ChangeLog: * check-params-in-docs.py: Skip tables of values of a param. Remove code that skips items beginning with a number. Signed-off-by: Filip Kastl Diff: --- contrib/check-params-in-docs.py | 13 +++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/contrib/check-params-in-docs.py b/contrib/check-params-in-docs.py index ccdb8d721696..102f0e64e989 100755 --- a/contrib/check-params-in-docs.py +++ b/contrib/check-params-in-docs.py @@ -66,14 +66,23 @@ texi = takewhile(lambda x: '@node Instrumentation Options' not in x, texi) texi = list(texi)[1:] texi_params = [] +skip = False for line in texi: +# Skip @table @samp sections of manual where values of a param are usually +# listed +if skip: +if line.startswith('@end table'): +skip = False +continue +elif line.startswith('@table @samp'): +skip = True +continue + for token in ('@item ', '@itemx '): if line.startswith(token): texi_params.append(line[len(token):]) break -# Skip digits -texi_params = [x for x in texi_params if not x[0].isdigit()] # Skip target-specific params texi_params = [x for x in texi_params if not target_specific(x)]
[gcc r15-3691] [PATCH v3] RISC-V: Fixed incorrect semantic description in DF to DI pattern in the Zfa extension on
https://gcc.gnu.org/g:85fcf740342e308da4776a45a4cd726987725a6a commit r15-3691-g85fcf740342e308da4776a45a4cd726987725a6a Author: Jin Ma Date: Wed Sep 18 08:56:23 2024 -0600 [PATCH v3] RISC-V: Fixed incorrect semantic description in DF to DI pattern in the Zfa extension on rv32. gcc/ChangeLog: * config/riscv/riscv.md: Change "truncate" to unspec for the Zfa extension on rv32. gcc/testsuite/ChangeLog: * gcc.target/riscv/zfa-fmovh-fmovp-bug.c: New test. Diff: --- gcc/config/riscv/riscv.md| 16 +--- gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp-bug.c | 9 + 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index fd1cbebc435b..0410d990ec58 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -56,6 +56,8 @@ UNSPEC_FLT_QUIET UNSPEC_FLE_QUIET UNSPEC_COPYSIGN + UNSPEC_FMV_X_W + UNSPEC_FMVH_X_D UNSPEC_RINT UNSPEC_ROUND UNSPEC_FLOOR @@ -2626,8 +2628,9 @@ (define_insn "movsidf2_low_rv32" [(set (match_operand:SI 0 "register_operand" "= r") - (truncate:SI - (match_operand:DF 1 "register_operand" "zmvf")))] + (unspec:SI + [(match_operand:DF 1 "register_operand" "zmvf")] + UNSPEC_FMV_X_W))] "TARGET_HARD_FLOAT && !TARGET_64BIT && TARGET_ZFA" "fmv.x.w\t%0,%1" [(set_attr "move_type" "fmove") @@ -2636,11 +2639,10 @@ (define_insn "movsidf2_high_rv32" - [(set (match_operand:SI 0 "register_operand""= r") - (truncate:SI -(lshiftrt:DF -(match_operand:DF 1 "register_operand" "zmvf") -(const_int 32] + [(set (match_operand:SI 0 "register_operand" "= r") + (unspec:SI + [(match_operand:DF 1 "register_operand" "zmvf")] + UNSPEC_FMVH_X_D))] "TARGET_HARD_FLOAT && !TARGET_64BIT && TARGET_ZFA" "fmvh.x.d\t%0,%1" [(set_attr "move_type" "fmove") diff --git a/gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp-bug.c b/gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp-bug.c new file mode 100644 index ..e00047b09e3a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp-bug.c @@ -0,0 +1,9 @@ +/* Test that we do not have ice when compile */ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zfa -mabi=ilp32d -O2 -g" } */ + +unsigned int +foo (double a) { + unsigned int tt = *(unsigned long long *)&a & 0x; + return tt; +}
[gcc r15-3680] tree-optimization/116585 - SSA corruption with split_constant_offset
https://gcc.gnu.org/g:1d0cb3b5fca69b81e69cfdb4aea0eebc1ac04750 commit r15-3680-g1d0cb3b5fca69b81e69cfdb4aea0eebc1ac04750 Author: Richard Biener Date: Wed Sep 18 09:52:55 2024 +0200 tree-optimization/116585 - SSA corruption with split_constant_offset split_constant_offset when looking through SSA defs can end up picking SSA leafs that are subject to abnormal coalescing. This can lead to downstream consumers to insert code based on the result (like from dataref analysis) in places that violate constraints for abnormal coalescing. It's best to not expand defs whose operands are subject to abnormal coalescing - and not either do something when a subexpression has operands like that already. PR tree-optimization/116585 * tree-data-ref.cc (split_constant_offset_1): When either operand is subject to abnormal coalescing do no further processing. * gcc.dg/torture/pr116585.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/torture/pr116585.c | 32 gcc/tree-data-ref.cc| 11 --- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr116585.c b/gcc/testsuite/gcc.dg/torture/pr116585.c new file mode 100644 index ..108c481e1043 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116585.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ + +char *s1, *s2; +extern int* my_alloc (int); +extern int _setjmp (); +extern void bar(); +void foo(int s1len, int s2len) +{ + int e; + e = _setjmp (); +{ + int l, i; + int *md = my_alloc(((sizeof(int)) * (s1len + 1) * (s2len))); + s1len++; + for (; s1len; l) + for (; s2len; l) + for (; s1len; i) + { + int j = 1; + for (; j < s2len; j++) + { + int cost; + if (s1[1] == s2[1]) + cost = 0; + else + cost = 1; + md[j * s1len ] = ((cost)); + } + } + bar(); +} +} diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc index 48798f458b80..26e6d9a56572 100644 --- a/gcc/tree-data-ref.cc +++ b/gcc/tree-data-ref.cc @@ -766,6 +766,14 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)) return false; + if (TREE_CODE (op0) == SSA_NAME + && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0)) +return false; + if (op1 + && TREE_CODE (op1) == SSA_NAME + && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1)) +return false; + switch (code) { case INTEGER_CST: @@ -861,9 +869,6 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, case SSA_NAME: { - if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0)) - return false; - gimple *def_stmt = SSA_NAME_DEF_STMT (op0); enum tree_code subcode;
[gcc r15-3687] [PATCH] RISC-V: Fix th.extu operands exceeding range on rv32.
https://gcc.gnu.org/g:ec34a4481b63bb5028b2a8c61322a7a3d362b27c commit r15-3687-gec34a4481b63bb5028b2a8c61322a7a3d362b27c Author: Xianmiao Qu Date: Wed Sep 18 07:28:44 2024 -0600 [PATCH] RISC-V: Fix th.extu operands exceeding range on rv32. The Combine Pass may generate zero_extract instructions that are out of range. Drawing from other architectures like AArch64, we should impose restrictions on the "*th_extu4" pattern. gcc/ * config/riscv/thead.md (*th_extu4): Fix th.extu operands exceeding range on rv32. gcc/testsuite/ * gcc.target/riscv/xtheadbb-extu-4.c: New. Diff: --- gcc/config/riscv/thead.md| 4 +++- gcc/testsuite/gcc.target/riscv/xtheadbb-extu-4.c | 17 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md index 2a3af76b55c2..7a76cc8cf4a9 100644 --- a/gcc/config/riscv/thead.md +++ b/gcc/config/riscv/thead.md @@ -85,7 +85,9 @@ (zero_extract:GPR (match_operand:GPR 1 "register_operand" "r") (match_operand 2 "const_int_operand") (match_operand 3 "const_int_operand")))] - "TARGET_XTHEADBB" + "TARGET_XTHEADBB + && (UINTVAL (operands[2]) + UINTVAL (operands[3]) + <= GET_MODE_BITSIZE (mode))" { operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]) - 1); return "th.extu\t%0,%1,%2,%3"; diff --git a/gcc/testsuite/gcc.target/riscv/xtheadbb-extu-4.c b/gcc/testsuite/gcc.target/riscv/xtheadbb-extu-4.c new file mode 100644 index ..41d3fc1f5b40 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/xtheadbb-extu-4.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { rv32 } } } */ +/* { dg-options "-march=rv32gc_xtheadbb" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Os" "-Og" "-Oz" } } */ + +struct c { + int f : 25; +} d; + +int b; +extern unsigned int e[]; + +void g() +{ + d.f = e[2] >> (b << ~4194303 + 4194332) - 58096371; +} + +/* { dg-final { scan-assembler-not {th.extu\t[ax][0-9]+,[ax][0-9]+,37,13} } } */ \ No newline at end of file
[gcc r15-3688] [PATCH 1/2] RISC-V: Fix the outer_code when calculating the cost of SET expression.
https://gcc.gnu.org/g:ad5bfc2b7044ba962396de0dabcad1cd54234689 commit r15-3688-gad5bfc2b7044ba962396de0dabcad1cd54234689 Author: Xianmiao Qu Date: Wed Sep 18 07:35:12 2024 -0600 [PATCH 1/2] RISC-V: Fix the outer_code when calculating the cost of SET expression. I think it is a typo. When calculating the 'SET_SRC (x)' cost, outer_code should be set to SET. gcc/ * config/riscv/riscv.cc (riscv_rtx_costs): Fix the outer_code when calculating the cost of SET expression. Diff: --- gcc/config/riscv/riscv.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index fbf2da71e10d..7be3939a7f93 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3773,7 +3773,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN *total = COSTS_N_INSNS (1); return true; } - riscv_rtx_costs (SET_SRC (x), mode, outer_code, opno, total, speed); + riscv_rtx_costs (SET_SRC (x), mode, SET, opno, total, speed); return true; }
[gcc r15-3689] Fail vectorization when not using SLP and --param vect-force-slp == 1
https://gcc.gnu.org/g:de1389e24e8dc98b65bc8d40976172214ac4ecc0 commit r15-3689-gde1389e24e8dc98b65bc8d40976172214ac4ecc0 Author: Richard Biener Date: Sun Sep 8 11:21:19 2024 +0200 Fail vectorization when not using SLP and --param vect-force-slp == 1 The following adds --param vect-force-slp to enable the transition to full SLP. Full SLP is enforced during stmt analysis where it detects failed SLP discovery and at loop analysis time where it avoids analyzing a loop with SLP disabled. Failure to SLP results in vectorization to fail. * params.opt (vect-force-slp): New param, default 0. * doc/invoke.texi (--param vect-force-slp): Document. * tree-vect-loop.cc (vect_analyze_loop_2): When analyzing without SLP but --param vect-force-slp is 1 fail. * tree-vect-stmts.cc (vect_analyze_stmt): Fail vectorization for non-SLP stmts when --param vect-force-slp is 1. Diff: --- gcc/doc/invoke.texi| 3 +++ gcc/params.opt | 4 gcc/tree-vect-loop.cc | 6 ++ gcc/tree-vect-stmts.cc | 6 ++ 4 files changed, 19 insertions(+) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 511b8c8d3111..b91fb9c9cca6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -16955,6 +16955,9 @@ this parameter. The default value of this parameter is 50. @item vect-induction-float Enable loop vectorization of floating point inductions. +@item vect-force-slp +Force the use of SLP when vectorizing, fail if not possible. + @item vrp-block-limit Maximum number of basic blocks before VRP switches to a lower memory algorithm. diff --git a/gcc/params.opt b/gcc/params.opt index c17ba17b91b0..949b47544980 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -1198,6 +1198,10 @@ The maximum factor which the loop vectorizer applies to the cost of statements i Common Joined UInteger Var(param_vect_induction_float) Init(1) IntegerRange(0, 1) Param Optimization Enable loop vectorization of floating point inductions. +-param=vect-force-slp= +Common Joined UInteger Var(param_vect_force_slp) Init(0) IntegerRange(0, 1) Param Optimization +Force the use of SLP when vectorizing, fail if not possible. + -param=vrp-block-limit= Common Joined UInteger Var(param_vrp_block_limit) Init(15) Optimization Param Maximum number of basic blocks before VRP switches to a fast model with less memory requirements. diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 62c7f90779fa..d42694d19747 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2891,6 +2891,12 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, /* This is the point where we can re-start analysis with SLP forced off. */ start_over: + /* When we arrive here with SLP disabled and we are supposed + to use SLP for everything fail vectorization. */ + if (!slp && param_vect_force_slp) +return opt_result::failure_at (vect_location, + "may need non-SLP handling\n"); + /* Apply the suggested unrolling factor, this was determined by the backend during finish_cost the first time we ran the analyzis for this vector mode. */ diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index b1353c91fce1..495f45e40e63 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -13353,6 +13353,12 @@ vect_analyze_stmt (vec_info *vinfo, return opt_result::success (); } + /* When we arrive here with a non-SLP statement and we are supposed + to use SLP for everything fail vectorization. */ + if (!node && param_vect_force_slp) +return opt_result::failure_at (stmt_info->stmt, + "needs non-SLP handling\n"); + ok = true; if (!bb_vinfo && (STMT_VINFO_RELEVANT_P (stmt_info)
[gcc r15-3692] hppa: Add peephole2 optimizations for REG+D loads and stores
https://gcc.gnu.org/g:4b03750f8cda0a8745b10639a8ac7df71aced0cc commit r15-3692-g4b03750f8cda0a8745b10639a8ac7df71aced0cc Author: John David Anglin Date: Wed Sep 18 11:02:32 2024 -0400 hppa: Add peephole2 optimizations for REG+D loads and stores The PA 1.x architecture only supports long displacements in integer loads and stores. Floating-point loads and stores only support short displacements. As a result, we have to wait until reload is complete before generating insns with long displacements. The PA 2.0 architecture supports long displacements in both integer and floating-point loads and stores. The peephole2 optimizations added in this change are only enabled when 14-bit long displacements aren't supported for floating-point loads and stores. 2024-09-18 John David Anglin gcc/ChangeLog: * config/pa/pa.h (GENERAL_REGNO_P): Define. * config/pa/pa.md: Add SImode and SFmode peephole2 patterns to generate loads and stores with long displacements. Diff: --- gcc/config/pa/pa.h | 3 ++ gcc/config/pa/pa.md | 100 2 files changed, 103 insertions(+) diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h index 7e45c358895b..6fcc2fa2ac76 100644 --- a/gcc/config/pa/pa.h +++ b/gcc/config/pa/pa.h @@ -480,6 +480,9 @@ extern rtx hppa_pic_save_rtx (void); #define INDEX_REG_CLASS GENERAL_REGS #define BASE_REG_CLASS GENERAL_REGS +/* True if register is a general register. */ +#define GENERAL_REGNO_P(N) ((N) >= 1 && (N) <= 31) + #define FP_REG_CLASS_P(CLASS) \ ((CLASS) == FP_REGS || (CLASS) == FPUPPER_REGS) diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 1e781efb66b0..f0520bb2c353 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -2280,6 +2280,58 @@ (set_attr "pa_combine_type" "addmove") (set_attr "length" "4")]) +; Rewrite RTL using a REG+D store. This will allow the insn that +; computes the address to be deleted if the register it sets is dead. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") +(match_operand:SI 2 "const_int_operand" ""))) + (set (mem:SI (match_dup 0)) + (match_operand:SI 3 "register_operand" ""))] + "!TARGET_64BIT + && !INT14_OK_STRICT + && GENERAL_REGNO_P (REGNO (operands[0])) + && GENERAL_REGNO_P (REGNO (operands[3])) + && REGNO (operands[0]) != REGNO (operands[3]) + && base14_operand (operands[2], E_SImode)" + [(set (mem:SI (plus:SI (match_dup 1) (match_dup 2))) (match_dup 3)) + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))] + "") + +; Rewrite RTL using a REG+D load. This will allow the insn that +; computes the address to be deleted if the register it sets is dead. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") +(match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (mem:SI (match_dup 0)))] + "!TARGET_64BIT + && !INT14_OK_STRICT + && GENERAL_REGNO_P (REGNO (operands[0])) + && GENERAL_REGNO_P (REGNO (operands[3])) + && REGNO (operands[0]) != REGNO (operands[3]) + && REGNO (operands[1]) != REGNO (operands[3]) + && base14_operand (operands[2], E_SImode)" + [(set (match_dup 3) (mem:SI (plus:SI (match_dup 1) (match_dup 2 + (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))] + "") + +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") +(match_operand:SI 2 "const_int_operand" ""))) + (set (match_operand:SI 3 "register_operand" "") + (mem:SI (match_dup 0)))] + "!TARGET_64BIT + && !INT14_OK_STRICT + && GENERAL_REGNO_P (REGNO (operands[0])) + && GENERAL_REGNO_P (REGNO (operands[3])) + && REGNO (operands[0]) == REGNO (operands[3]) + && base14_operand (operands[2], E_SImode)" + [(set (match_dup 3) (mem:SI (plus:SI (match_dup 1) (match_dup 2] + "") + ; Rewrite RTL using an indexed store. This will allow the insn that ; computes the address to be deleted if the register it sets is dead. (define_peephole2 @@ -4507,6 +4559,54 @@ (set_attr "pa_combine_type" "addmove") (set_attr "length" "4")]) +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (plus:SI (match_operand:SI 1 "register_operand" "") +(match_operand:SI 2 "const_int_operand" ""))) + (set (mem:SF (match_dup 0)) + (match_operand:SF 3 "register_operand" ""))] + "!TARGET_64BIT + && !INT14_OK_STRICT + && GENERAL_REGNO_P (REGNO (operands[0])) + && GENERAL_REGNO_P (REGNO (operands[3])) + && REGNO (operands[0]) != REGNO (operands[3]) + && base14_operand (operands[2], E_SImode)" + [(set (mem:SF (plus:SI (match_dup 1) (match_dup
[gcc r15-3693] jit: Ensure ssize_t is defined
https://gcc.gnu.org/g:fe1ed68000d5e9d41ed48ef1202fd21c8b8c9ff8 commit r15-3693-gfe1ed68000d5e9d41ed48ef1202fd21c8b8c9ff8 Author: Francois-Xavier Coudert Date: Sat May 11 17:08:05 2024 +0200 jit: Ensure ssize_t is defined On some targets it seems that ssize_t is not defined by any of the headers transitively included by . This leads to a bootstrap fail when jit is enabled. gcc/jit/ChangeLog: * libgccjit.h: Include Diff: --- gcc/jit/libgccjit.h | 5 + 1 file changed, 5 insertions(+) diff --git a/gcc/jit/libgccjit.h b/gcc/jit/libgccjit.h index 1d5be27374ec..03bfc0f58a53 100644 --- a/gcc/jit/libgccjit.h +++ b/gcc/jit/libgccjit.h @@ -21,6 +21,11 @@ along with GCC; see the file COPYING3. If not see #define LIBGCCJIT_H #include +#ifdef __has_include +#if __has_include () +#include +#endif +#endif #ifdef __cplusplus extern "C" {
[gcc r14-10679] tree-optimization/116258 - fix i386 testcase
https://gcc.gnu.org/g:198b13e248e7adfea2d28c4e66ac9f5006b8f825 commit r14-10679-g198b13e248e7adfea2d28c4e66ac9f5006b8f825 Author: Richard Biener Date: Thu Aug 8 09:35:42 2024 +0200 tree-optimization/116258 - fix i386 testcase With -march=cascadelake we use vpermilps instead of shufps. PR tree-optimization/116258 * gcc.target/i386/pr116258.c: Also allow vpermilps. (cherry picked from commit 5aa4cd913e48cfce3ca0ab58cf6f80f55dbb0f58) Diff: --- gcc/testsuite/gcc.target/i386/pr116258.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/pr116258.c b/gcc/testsuite/gcc.target/i386/pr116258.c index bd7d3a97b2c8..cb67e4085c5d 100644 --- a/gcc/testsuite/gcc.target/i386/pr116258.c +++ b/gcc/testsuite/gcc.target/i386/pr116258.c @@ -10,5 +10,5 @@ return (x + h(t)); } -/* { dg-final { scan-assembler-times "shufps" 1 } } */ +/* { dg-final { scan-assembler-times "shufps|permilps" 1 } } */ /* { dg-final { scan-assembler-not "unpck" } } */
[gcc r14-10680] aarch64/testsuite: Add testcases for recently fixed PRs
https://gcc.gnu.org/g:090926ba817bee6de7ee210efeea5d43d5335868 commit r14-10680-g090926ba817bee6de7ee210efeea5d43d5335868 Author: Andrew Pinski Date: Wed Aug 7 09:36:38 2024 -0700 aarch64/testsuite: Add testcases for recently fixed PRs The commit for PR 116258, added a x86_64 specific testcase, I thought it would be a good idea to add an aarch64 testcase too. And since it also fixed VLA vectors too so add a SVE testcase. Pushed as obvious after a test for aarch64-linux-gnu. PR middle-end/116258 PR middle-end/116259 gcc/testsuite/ChangeLog: * gcc.target/aarch64/pr116258.c: New test. * gcc.target/aarch64/sve/pr116259-1.c: New test. Signed-off-by: Andrew Pinski (cherry picked from commit 2c6174402ea315ecf618cfcba741e8cb18bc5282) Diff: --- gcc/testsuite/gcc.target/aarch64/pr116258.c | 17 + gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c | 12 2 files changed, 29 insertions(+) diff --git a/gcc/testsuite/gcc.target/aarch64/pr116258.c b/gcc/testsuite/gcc.target/aarch64/pr116258.c new file mode 100644 index ..e727ad4b72a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr116258.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#pragma GCC target "+nosve" + +#define vect16 __attribute__((vector_size(16))) +#define h(a) __builtin_assoc_barrier((a)) + + vect16 float f( vect16 float x, vect16 float vconstants0) +{ + vect16 float t = (x * (vconstants0[0])); + return (x + h(t)); +} + +/* { dg-final { scan-assembler-times "\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-not "dup\t" } } */ +/* { dg-final { scan-assembler-not "ins\t" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c new file mode 100644 index ..bb2eed4728c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr116259-1.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* PR middle-end/116259 */ + +#include + +/* PAREN_EXPR lowering for VLA vectors was ICEing. + It should not be lowered in a similar way as moves + are not lowered. */ +svfloat64_t f(svfloat64_t x) +{ + return __builtin_assoc_barrier(x); +}
[gcc r14-10677] middle-end/115641 - invalid address construction
https://gcc.gnu.org/g:98dc0471d5409701ae700cd7aba8716fdc500401 commit r14-10677-g98dc0471d5409701ae700cd7aba8716fdc500401 Author: Richard Biener Date: Thu Jul 18 13:35:33 2024 +0200 middle-end/115641 - invalid address construction fold_truth_andor_1 via make_bit_field_ref builds an address of a CALL_EXPR which isn't valid GENERIC and later causes an ICE. The following simply avoids the folding for f ().a != 1 || f ().b != 2 as it is a premature optimization anyway. The alternative would have been to build a TARGET_EXPR around the call. To get this far f () has to be const as otherwise the two calls are not semantically equivalent for the optimization. PR middle-end/115641 * fold-const.cc (decode_field_reference): If the inner reference isn't something we can take the address of, fail. * gcc.dg/torture/pr115641.c: New testcase. (cherry picked from commit 3670c70c561656a19f6bff36dd229f18120af127) Diff: --- gcc/fold-const.cc | 3 +++ gcc/testsuite/gcc.dg/torture/pr115641.c | 29 + 2 files changed, 32 insertions(+) diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index f496b3436df4..644a39456890 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -4992,6 +4992,9 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize, || *pbitsize < 0 || offset != 0 || TREE_CODE (inner) == PLACEHOLDER_EXPR + /* We eventually want to build a larger reference and need to take +the address of this. */ + || (!REFERENCE_CLASS_P (inner) && !DECL_P (inner)) /* Reject out-of-bound accesses (PR79731). */ || (! AGGREGATE_TYPE_P (TREE_TYPE (inner)) && compare_tree_int (TYPE_SIZE (TREE_TYPE (inner)), diff --git a/gcc/testsuite/gcc.dg/torture/pr115641.c b/gcc/testsuite/gcc.dg/torture/pr115641.c new file mode 100644 index ..65fb09ca64fc --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr115641.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ + +typedef struct { + char hours, day, month; + short year; +} T; + +T g (void) +{ + T now; + now.hours = 1; + now.day = 2; + now.month = 3; + now.year = 4; + return now; +} + +__attribute__((const)) T f (void) +{ + T virk = g (); + return virk; +} + +int main () +{ + if (f ().hours != 1 || f ().day != 2 || f ().month != 3 || f ().year != 4) +__builtin_abort (); + return 0; +}
[gcc r14-10678] tree-optimization/116258 - do not lower PAREN_EXPR of vectors
https://gcc.gnu.org/g:05db1bea8c1d61d8d9cdb8ede5e305766869d136 commit r14-10678-g05db1bea8c1d61d8d9cdb8ede5e305766869d136 Author: Richard Biener Date: Wed Aug 7 13:54:53 2024 +0200 tree-optimization/116258 - do not lower PAREN_EXPR of vectors The following avoids lowering of PAREN_EXPR of vectors as unsupported to scalars. Instead PAREN_EXPR is like a plain move or a VIEW_CONVERT. PR tree-optimization/116258 * tree-vect-generic.cc (expand_vector_operations_1): Do not lower PAREN_EXPR. * gcc.target/i386/pr116258.c: New testcase. (cherry picked from commit 5b97d1a2102dca57918947d7e40a6ca68871) Diff: --- gcc/testsuite/gcc.target/i386/pr116258.c | 14 ++ gcc/tree-vect-generic.cc | 9 +++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr116258.c b/gcc/testsuite/gcc.target/i386/pr116258.c new file mode 100644 index ..bd7d3a97b2c8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116258.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ + +#define vect16 __attribute__((vector_size(16))) +#define h(a) __builtin_assoc_barrier((a)) + + vect16 float f( vect16 float x, vect16 float vconstants0) +{ + vect16 float t = (x * (vconstants0[0])); + return (x + h(t)); +} + +/* { dg-final { scan-assembler-times "shufps" 1 } } */ +/* { dg-final { scan-assembler-not "unpck" } } */ diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index ab640096ca26..86d273923bb5 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -2190,10 +2190,15 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi, } } + /* Plain moves do not need lowering. */ + if (code == SSA_NAME + || code == VIEW_CONVERT_EXPR + || code == PAREN_EXPR) +return; + if (CONVERT_EXPR_CODE_P (code) || code == FLOAT_EXPR - || code == FIX_TRUNC_EXPR - || code == VIEW_CONVERT_EXPR) + || code == FIX_TRUNC_EXPR) return; /* The signedness is determined from input argument. */
[gcc r14-10683] fold: Fix `a * 1j` if a has side effects [PR116454]
https://gcc.gnu.org/g:12c00048d9f3598e57b98ec7723f7356bd255d04 commit r14-10683-g12c00048d9f3598e57b98ec7723f7356bd255d04 Author: Andrew Pinski Date: Wed Aug 21 17:41:38 2024 -0700 fold: Fix `a * 1j` if a has side effects [PR116454] The problem here was a missing save_expr around arg0 since it is used twice, once in REALPART_EXPR and once in IMAGPART_EXPR. Thia adds the save_expr and reformats the code slightly so it is a little easier to understand. It excludes the case when arg0 is a COMPLEX_EXPR since in that case we'll end up with the distinct real and imaginary parts. This is important to retain early optimization in some testcases. Bootstapped and tested on x86_64-linux-gnu with no regressions. PR middle-end/116454 gcc/ChangeLog: * fold-const.cc (fold_binary_loc): Fix `a * +-1i` by wrapping arg0 with save_expr when it is not COMPLEX_EXPR. gcc/testsuite/ChangeLog: * gcc.dg/torture/pr116454-1.c: New test. * gcc.dg/torture/pr116454-2.c: New test. Signed-off-by: Andrew Pinski Co-Authored-By: Richard Biener (cherry picked from commit b07f8a301158e53717b8688cc8ea430b6f02574c) Diff: --- gcc/fold-const.cc | 32 +-- gcc/testsuite/gcc.dg/torture/pr116454-1.c | 16 gcc/testsuite/gcc.dg/torture/pr116454-2.c | 12 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 644a39456890..869f6363560d 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -12081,17 +12081,29 @@ fold_binary_loc (location_t loc, enum tree_code code, tree type, { tree rtype = TREE_TYPE (TREE_TYPE (arg0)); if (real_onep (TREE_IMAGPART (arg1))) - return - fold_build2_loc (loc, COMPLEX_EXPR, type, - negate_expr (fold_build1_loc (loc, IMAGPART_EXPR, -rtype, arg0)), - fold_build1_loc (loc, REALPART_EXPR, rtype, arg0)); + { + if (TREE_CODE (arg0) != COMPLEX_EXPR) + arg0 = save_expr (arg0); + tree iarg0 = fold_build1_loc (loc, IMAGPART_EXPR, + rtype, arg0); + tree rarg0 = fold_build1_loc (loc, REALPART_EXPR, + rtype, arg0); + return fold_build2_loc (loc, COMPLEX_EXPR, type, + negate_expr (iarg0), + rarg0); + } else if (real_minus_onep (TREE_IMAGPART (arg1))) - return - fold_build2_loc (loc, COMPLEX_EXPR, type, - fold_build1_loc (loc, IMAGPART_EXPR, rtype, arg0), - negate_expr (fold_build1_loc (loc, REALPART_EXPR, -rtype, arg0))); + { + if (TREE_CODE (arg0) != COMPLEX_EXPR) + arg0 = save_expr (arg0); + tree iarg0 = fold_build1_loc (loc, IMAGPART_EXPR, + rtype, arg0); + tree rarg0 = fold_build1_loc (loc, REALPART_EXPR, + rtype, arg0); + return fold_build2_loc (loc, COMPLEX_EXPR, type, + iarg0, + negate_expr (rarg0)); + } } /* Optimize z * conj(z) for floating point complex numbers. diff --git a/gcc/testsuite/gcc.dg/torture/pr116454-1.c b/gcc/testsuite/gcc.dg/torture/pr116454-1.c new file mode 100644 index ..6210dcce4a42 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116454-1.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ffast-math" } */ + +static int t = 0; +_Complex float f() +{ +t++; +return 0; +} +int main() { + t = 0; + /* Would cause f() to be incorrectly invoked twice. */ + f() * 1j; + if (t != 1) + __builtin_abort(); +} diff --git a/gcc/testsuite/gcc.dg/torture/pr116454-2.c b/gcc/testsuite/gcc.dg/torture/pr116454-2.c new file mode 100644 index ..a1e1604e6169 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116454-2.c @@ -0,0 +1,12 @@ +/* { dg-do run } */ +/* { dg-additional-options "-ffast-math" } */ +_Complex float arr[2]; + +int main() { + _Complex float *ptr; + ptr = arr; + *++ptr * 1j; + /* ptr should only increment once, not twice. */ + if (ptr != arr + 1) +__builtin_abort (); +}
[gcc r14-10681] tree-optimization/116274 - overzealous SLP vectorization
https://gcc.gnu.org/g:d5d4f3bae5a9478dc2189e53da933175a6d7b197 commit r14-10681-gd5d4f3bae5a9478dc2189e53da933175a6d7b197 Author: Richard Biener Date: Thu Aug 8 11:36:43 2024 +0200 tree-optimization/116274 - overzealous SLP vectorization The following tries to address that the vectorizer fails to have precise knowledge of argument and return calling conventions and views some accesses as loads and stores that are not. This is mainly important when doing basic-block vectorization as otherwise loop indexing would force such arguments to memory. On x86 the reduction in the number of apparent loads and stores often dominates cost analysis so the following tries to mitigate this aggressively by adjusting only the scalar load and store cost, reducing them to the cost of a simple scalar statement, but not touching the vector access cost which would be much harder to estimate. Thereby we error on the side of not performing basic-block vectorization. PR tree-optimization/116274 * tree-vect-slp.cc (vect_bb_slp_scalar_cost): Cost scalar loads and stores as simple scalar stmts when they access a non-global, not address-taken variable that doesn't have BLKmode assigned. * gcc.target/i386/pr116274-2.c: New testcase. (cherry picked from commit b8ea13ebf1211714503fd72f25c04376483bfa53) Diff: --- gcc/testsuite/gcc.target/i386/pr116274-2.c | 9 + gcc/tree-vect-slp.cc | 12 +++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/i386/pr116274-2.c b/gcc/testsuite/gcc.target/i386/pr116274-2.c new file mode 100644 index ..d5811344b935 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116274-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-slp2-optimized" } */ + +struct a { long x,y; }; +long test(struct a a) { return a.x+a.y; } + +/* { dg-final { scan-tree-dump-not "basic block part vectorized" "slp2" } } */ +/* { dg-final { scan-assembler-times "addl|leaq" 1 } } */ +/* { dg-final { scan-assembler-not "padd" } } */ diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 0795605ec527..d0635b7a146c 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -7102,7 +7102,17 @@ next_lane: vect_cost_for_stmt kind; if (STMT_VINFO_DATA_REF (orig_stmt_info)) { - if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info))) + data_reference_p dr = STMT_VINFO_DATA_REF (orig_stmt_info); + tree base = get_base_address (DR_REF (dr)); + /* When the scalar access is to a non-global not address-taken +decl that is not BLKmode assume we can access it with a single +non-load/store instruction. */ + if (DECL_P (base) + && !is_global_var (base) + && !TREE_ADDRESSABLE (base) + && DECL_MODE (base) != BLKmode) + kind = scalar_stmt; + else if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info))) kind = scalar_load; else kind = scalar_store;
[gcc r14-10682] tree-optimization/116380 - bogus SSA update with loop distribution
https://gcc.gnu.org/g:8f8a7e1f1904e389e736c0265b4f515f3ce13659 commit r14-10682-g8f8a7e1f1904e389e736c0265b4f515f3ce13659 Author: Richard Biener Date: Wed Aug 21 13:56:40 2024 +0200 tree-optimization/116380 - bogus SSA update with loop distribution When updating LC PHIs after copying loops we have to handle defs defined outside of the loop appropriately (by not setting them to NULL ...). This mimics how we handle this in the SSA updating code of the vectorizer. PR tree-optimization/116380 * tree-loop-distribution.cc (copy_loop_before): Handle out-of-loop defs appropriately. * gcc.dg/torture/pr116380.c: New testcase. (cherry picked from commit af0d2d95a5f767d92bd64f959679fb4612247b0b) Diff: --- gcc/testsuite/gcc.dg/torture/pr116380.c | 16 gcc/tree-loop-distribution.cc | 3 +++ 2 files changed, 19 insertions(+) diff --git a/gcc/testsuite/gcc.dg/torture/pr116380.c b/gcc/testsuite/gcc.dg/torture/pr116380.c new file mode 100644 index ..5ffd99459d26 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116380.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-fno-tree-scev-cprop" } */ + +int a[3], d[3], c; +int f(int e, int b) +{ + for (; e < 3; e++) +{ + a[0] = 0; + if (b) + c = b; + d[e] = 0; + a[e] = 0; +} + return e; +} diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc index c5a05ee151df..cb804ba48ffe 100644 --- a/gcc/tree-loop-distribution.cc +++ b/gcc/tree-loop-distribution.cc @@ -980,6 +980,9 @@ copy_loop_before (class loop *loop, bool redirect_lc_phi_defs) if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME) { tree new_def = get_current_def (USE_FROM_PTR (use_p)); + if (!new_def) + /* Something defined outside of the loop. */ + continue; SET_USE (use_p, new_def); } }
[gcc r14-10684] tree-optimization/116610 - wrong SLP induction bias for mask peeling
https://gcc.gnu.org/g:cacc976a71027e7da8e3438b60da76ecdf990d38 commit r14-10684-gcacc976a71027e7da8e3438b60da76ecdf990d38 Author: Richard Biener Date: Thu Sep 5 11:18:57 2024 +0200 tree-optimization/116610 - wrong SLP induction bias for mask peeling The following fixes a mistake when applying the bias for peeling via masking to the inital value of SLP inductions. This resolves gcc.target/aarch64/sve/peel_ind_1.c (a scan-assembler only unfortunately) when forcing single-lane SLP for it. PR tree-optimization/116610 * tree-vect-loop.cc (vectorizable_induction): Use MINUS_EXPR to apply a mask peeling adjustment. (cherry picked from commit 6a1a856ba78589f7f5285b00ecd40ba2bbeef8b0) Diff: --- gcc/tree-vect-loop.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index acc6b75fb170..dcd61292caf1 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -10228,7 +10228,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, vec_steps.safe_push (vec_step); tree step_mul = gimple_build_vector (&init_stmts, &mul_elts); if (peel_mul) - step_mul = gimple_build (&init_stmts, PLUS_EXPR, step_vectype, + step_mul = gimple_build (&init_stmts, MINUS_EXPR, step_vectype, step_mul, peel_mul); if (!init_node) vec_init = gimple_build_vector (&init_stmts, &init_elts);
[gcc r14-10685] middle-end/115426 - wrong gimplification of "rm" asm output operand
https://gcc.gnu.org/g:5b264a4b95b8f27c3c73892892d5c2030d3c8ea7 commit r14-10685-g5b264a4b95b8f27c3c73892892d5c2030d3c8ea7 Author: Richard Biener Date: Tue Jun 11 13:11:08 2024 +0200 middle-end/115426 - wrong gimplification of "rm" asm output operand When the operand is gimplified to an extract of a register or a register we have to disallow memory as we otherwise fail to gimplify it properly. Instead of __asm__("" : "=rm" __imag ); we want __asm__("" : "=rm" D.2772); _1 = REALPART_EXPR ; r = COMPLEX_EXPR <_1, D.2772>; otherwise SSA rewrite will fail and generate wrong code with 'r' left bare in the asm output. PR middle-end/115426 * gimplify.cc (gimplify_asm_expr): Handle "rm" output constraint gimplified to a register (operation). * gcc.dg/pr115426.c: New testcase. (cherry picked from commit a4bbdec2be1c9f8fb49276b8a54ee86024ceac17) Diff: --- gcc/gimplify.cc | 8 gcc/testsuite/gcc.dg/pr115426.c | 14 ++ 2 files changed, 22 insertions(+) diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 5753eb90ff5d..401b663591f6 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -7035,6 +7035,14 @@ gimplify_asm_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p) ret = tret; } + /* If the gimplified operand is a register we do not allow memory. */ + if (allows_reg + && allows_mem + && (is_gimple_reg (TREE_VALUE (link)) + || (handled_component_p (TREE_VALUE (link)) + && is_gimple_reg (TREE_OPERAND (TREE_VALUE (link), 0) + allows_mem = 0; + /* If the constraint does not allow memory make sure we gimplify it to a register if it is not already but its base is. This happens for complex and vector components. */ diff --git a/gcc/testsuite/gcc.dg/pr115426.c b/gcc/testsuite/gcc.dg/pr115426.c new file mode 100644 index ..02bfc3f21fa7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr115426.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-std=gnu11" } */ + +_Complex int fcs (_Complex int r) +{ + __asm__("" : "=rm" (__imag__ r)); + return r; +} + +_Complex int fcs2 (_Complex int r) +{ + __asm__("" : "=m" (__imag__ r)); + return r; +}
[gcc r14-10686] Fix vect/pr115278.cc for targets where uint32_t is distinct from unsigned.
https://gcc.gnu.org/g:a308afb455d648d54430355cb07345a338501348 commit r14-10686-ga308afb455d648d54430355cb07345a338501348 Author: Joern Rennecke Date: Wed Aug 7 02:48:45 2024 +0100 Fix vect/pr115278.cc for targets where uint32_t is distinct from unsigned. gcc/testsuite/ * g++.dg/vect/pr115278.cc: Make cast's type agree with assignment destination WRITE. (cherry picked from commit b844775283a620b8826adf734ecfc97d820c3611) Diff: --- gcc/testsuite/g++.dg/vect/pr115278.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/vect/pr115278.cc b/gcc/testsuite/g++.dg/vect/pr115278.cc index 331075fb2781..df521e43a97c 100644 --- a/gcc/testsuite/g++.dg/vect/pr115278.cc +++ b/gcc/testsuite/g++.dg/vect/pr115278.cc @@ -21,7 +21,7 @@ union BitfieldStructUnion { BitfieldStructUnion(uint32_t value_low, uint32_t value_high) : value_low(value_low), value_high(value_high) {} }; -volatile uint32_t *WRITE = (volatile unsigned*)0x42; +volatile uint32_t *WRITE = (volatile uint32_t *)0x42; void buggy() { for (int i = 0; i < runs; i++) {
[gcc r13-9044] doc: Add more alias option and reorder Intel CPU -march documentation
https://gcc.gnu.org/g:de1d625849a7760da5d5a3a08601d8ac890c6100 commit r13-9044-gde1d625849a7760da5d5a3a08601d8ac890c6100 Author: Haochen Jiang Date: Wed Sep 18 11:20:15 2024 +0800 doc: Add more alias option and reorder Intel CPU -march documentation This patch is backported from GCC15 with some tweaks. Since r15-3539, there are requests coming in to add other alias option documentation. This patch will add all of them, including corei7, corei7-avx, core-avx-i, core-avx2, atom, slm and emerarldrapids. Also in the patch, I reordered that part of documentation, currently all the CPUs/products are just all over the place. I regrouped them by date-to-now products (since the very first CPU to latest Panther Lake), P-core (since the clients become hybrid cores, starting from Sapphire Rapids) and E-core (since Bonnell to latest Clearwater Forest). In GCC14 and eariler GCC, Xeon Phi CPUs are still there, I put them after E-core CPUs. And in the patch, I refined the product names in documentation. gcc/ChangeLog: * doc/invoke.texi: Add corei7, corei7-avx, core-avx-i, core-avx2, atom, slm and emerarldrapids. Reorder the -march documentation by splitting them into date-to-now products, P-core, E-core and Xeon Phi. Refine the product names in documentation. Diff: --- gcc/doc/invoke.texi | 203 +++- 1 file changed, 105 insertions(+), 98 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0f665ed6779a..28a3d0ae291b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -32372,6 +32372,7 @@ Intel Core 2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, CX16, SAHF and FXSR instruction set support. @item nehalem +@itemx corei7 Intel Nehalem CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF and FXSR instruction set support. @@ -32380,17 +32381,20 @@ Intel Westmere CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR and PCLMUL instruction set support. @item sandybridge +@itemx corei7-avx Intel Sandy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE and PCLMUL instruction set support. @item ivybridge +@itemx core-avx-i Intel Ivy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND and F16C instruction set support. @item haswell -Intel Haswell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +@itemx core-avx2 +Intel Haswell CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE and HLE instruction set support. @@ -32406,65 +32410,6 @@ SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES and SGX instruction set support. -@item bonnell -Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3 -instruction set support. - -@item silvermont -Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND -instruction set support. - -@item goldmont -Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, -RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction -set support. - -@item goldmont-plus -Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, -SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, -RDPID and SGX instruction set support. - -@item tremont -Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, -RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID, -SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set -support. - -@item sierraforest -Intel Sierra Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, -XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set -support. - -@item grandridge -Intel Grand Ridge CPU with 64-bit
[gcc(refs/users/aoliva/heads/testme)] fold truth-and only in ifcombine
https://gcc.gnu.org/g:15a55a94711d51d95fb6b5ba763903d75e85324e commit 15a55a94711d51d95fb6b5ba763903d75e85324e Author: Alexandre Oliva Date: Tue Sep 17 20:15:35 2024 -0300 fold truth-and only in ifcombine Diff: --- gcc/gimple-fold.cc| 2 ++ gcc/tree-ssa-ifcombine.cc | 24 +--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 85a0ec028030..5b7d83edbea9 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -8738,12 +8738,14 @@ maybe_fold_and_comparisons (tree type, op2b, outer_cond_bb)) return t; +#if 0 if (tree t = fold_truth_andor_maybe_separate (UNKNOWN_LOCATION, TRUTH_ANDIF_EXPR, type, code2, op2a, op2b, code1, op1a, op1b, NULL)) return t; +#endif return NULL_TREE; } diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 79a4bdd363b9..61480e5fa894 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -399,6 +399,14 @@ update_profile_after_ifcombine (basic_block inner_cond_bb, outer2->probability = profile_probability::never (); } +/* FIXME: move to a header file. */ +extern tree +fold_truth_andor_maybe_separate (location_t loc, +enum tree_code code, tree truth_type, +enum tree_code lcode, tree ll_arg, tree lr_arg, +enum tree_code rcode, tree rl_arg, tree rr_arg, +tree *separatep); + /* If-convert on a and pattern with a common else block. The inner if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. inner_inv, outer_inv and result_inv indicate whether the conditions @@ -576,7 +584,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, else if (TREE_CODE_CLASS (gimple_cond_code (inner_cond)) == tcc_comparison && TREE_CODE_CLASS (gimple_cond_code (outer_cond)) == tcc_comparison) { - tree t; + tree t, ts = NULL_TREE; enum tree_code inner_cond_code = gimple_cond_code (inner_cond); enum tree_code outer_cond_code = gimple_cond_code (outer_cond); @@ -599,7 +607,17 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, outer_cond_code, gimple_cond_lhs (outer_cond), gimple_cond_rhs (outer_cond), - gimple_bb (outer_cond + gimple_bb (outer_cond))) + && !(t = ts = (fold_truth_andor_maybe_separate +(UNKNOWN_LOCATION, TRUTH_ANDIF_EXPR, + boolean_type_node, + outer_cond_code, + gimple_cond_lhs (outer_cond), + gimple_cond_rhs (outer_cond), + inner_cond_code, + gimple_cond_lhs (inner_cond), + gimple_cond_rhs (inner_cond), + NULL { { tree t1, t2; @@ -636,7 +654,7 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, NULL, true, GSI_SAME_STMT); } /* ??? Fold should avoid this. */ - else if (!is_gimple_condexpr_for_cond (t)) + else if (ts && !is_gimple_condexpr_for_cond (t)) goto gimplify_after_fold; if (result_inv) t = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (t), t);
[gcc r15-3683] match.pd: Check trunc_mod vector obtap before folding.
https://gcc.gnu.org/g:6f3b6a451771cd54c98768e7db3c5d58aab2b6aa commit r15-3683-g6f3b6a451771cd54c98768e7db3c5d58aab2b6aa Author: Jennifer Schmitz Date: Thu Sep 5 08:10:02 2024 -0700 match.pd: Check trunc_mod vector obtap before folding. In the pattern X - (X / Y) * Y to X % Y, this patch guards the simplification for vector types by a check for: 1) Support of the mod optab for vectors OR 2) Application before vector lowering for non-VL vectors. This is to prevent reverting vectorization of modulo to div/mult/sub if the target does not support vector mod optab. The patch was bootstrapped and tested with no regression on aarch64-linux-gnu and x86_64-linux-gnu. OK for mainline? Signed-off-by: Jennifer Schmitz gcc/ PR tree-optimization/116569 * match.pd: Guard simplification to trunc_mod with check for mod optab support. gcc/testsuite/ PR tree-optimization/116569 * gcc.dg/torture/pr116569.c: New test. Diff: --- gcc/match.pd| 7 ++- gcc/testsuite/gcc.dg/torture/pr116569.c | 18 ++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index 5566c0e4c41c..4aa610e22708 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -894,7 +894,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* X - (X / Y) * Y is the same as X % Y. */ (simplify (minus (convert1? @0) (convert2? (mult:c (trunc_div @@0 @@1) @1))) - (if (INTEGRAL_TYPE_P (type) || VECTOR_INTEGER_TYPE_P (type)) + (if (INTEGRAL_TYPE_P (type) + || (VECTOR_INTEGER_TYPE_P (type) + && ((optimize_vectors_before_lowering_p () + && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) + || target_supports_op_p (type, TRUNC_MOD_EXPR, + optab_vector (convert (trunc_mod @0 @1 /* x * (1 + y / x) - y -> x - y % x */ diff --git a/gcc/testsuite/gcc.dg/torture/pr116569.c b/gcc/testsuite/gcc.dg/torture/pr116569.c new file mode 100644 index ..b74c749721bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116569.c @@ -0,0 +1,18 @@ +/* { dg-additional-options "-mcpu=neoverse-v2" { target aarch64*-*-* } } */ +int a; +short b, c, e; +long d, f; +long g (long h) +{ + if (h) +return h; + return d; +} +void i (int h[][0][0][0]) +{ + for (short j; j; j += 3) +{ + a = g(h[1][2] ? 0 : h[1][1][1][1]); + b = e ?: f % c; +} +}
[gcc r15-3685] c++: -Wdangling-reference diagnostic
https://gcc.gnu.org/g:5c8f9f4d4cebabf85e68c5bdbe2d4ee6646edc7c commit r15-3685-g5c8f9f4d4cebabf85e68c5bdbe2d4ee6646edc7c Author: Jason Merrill Date: Mon Sep 16 13:29:05 2024 +0200 c++: -Wdangling-reference diagnostic The -Wdangling-reference diagnostic talks about the full-expression, but prints one call, while the full-expression in a declaration is the entire initialization. It seems more useful to point out the temporary that the compiler thinks we might be getting a dangling reference to. gcc/cp/ChangeLog: * call.cc (do_warn_dangling_reference): Return temporary instead of the call it's passed to. (maybe_warn_dangling_reference): Adjust diagnostic. gcc/testsuite/ChangeLog: * g++.dg/warn/Wdangling-reference1.C: Adjust diagnostic. Diff: --- gcc/cp/call.cc | 23 +++ gcc/testsuite/g++.dg/warn/Wdangling-reference1.C | 2 +- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 1ecf3aac7051..3f753e2d2f98 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -14253,19 +14253,18 @@ reference_like_class_p (tree ctype) return false; } -/* Helper for maybe_warn_dangling_reference to find a problematic CALL_EXPR - that initializes the LHS (and at least one of its arguments represents - a temporary, as outlined in maybe_warn_dangling_reference), or NULL_TREE +/* Helper for maybe_warn_dangling_reference to find a problematic temporary + in EXPR (as outlined in maybe_warn_dangling_reference), or NULL_TREE if none found. For instance: - const S& s = S().self(); // S::self (&TARGET_EXPR <...>) - const int& r = (42, f(1)); // f(1) - const int& t = b ? f(1) : f(2); // f(1) - const int& u = b ? f(1) : f(g); // f(1) - const int& v = b ? f(g) : f(2); // f(2) + const S& s = S().self(); // S() + const int& r = (42, f(1)); // temporary for passing 1 to f + const int& t = b ? f(1) : f(2); // temporary for 1 + const int& u = b ? f(1) : f(g); // temporary for 1 + const int& v = b ? f(g) : f(2); // temporary for 2 const int& w = b ? f(g) : f(g); // NULL_TREE const int& y = (f(1), 42); // NULL_TREE - const int& z = f(f(1)); // f(f(1)) + const int& z = f(f(1)); // temporary for 1 EXPR is the initializer. If ARG_P is true, we're processing an argument to a function; the point is to distinguish between, for example, @@ -14365,7 +14364,7 @@ do_warn_dangling_reference (tree expr, bool arg_p) && !reference_related_p (TREE_TYPE (rettype), TREE_TYPE (arg))) continue; - return expr; + return arg; } /* Don't warn about member functions like: std::any a(...); @@ -14438,8 +14437,8 @@ maybe_warn_dangling_reference (const_tree decl, tree init) auto_diagnostic_group d; if (warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wdangling_reference, "possibly dangling reference to a temporary")) - inform (EXPR_LOCATION (call), "the temporary was destroyed at " - "the end of the full expression %qE", call); + inform (EXPR_LOCATION (call), "%qT temporary created here", + TREE_TYPE (call)); } } diff --git a/gcc/testsuite/g++.dg/warn/Wdangling-reference1.C b/gcc/testsuite/g++.dg/warn/Wdangling-reference1.C index a184317dd5c3..5e60a4158367 100644 --- a/gcc/testsuite/g++.dg/warn/Wdangling-reference1.C +++ b/gcc/testsuite/g++.dg/warn/Wdangling-reference1.C @@ -117,7 +117,7 @@ const B& b10 = lox (H{}); // { dg-warning "dangling reference" } struct S { const int &r; // { dg-warning "dangling reference" } - S() : r(f(10)) { } // { dg-message "destroyed" } + S() : r(f(10)) { } // { dg-message "created" } }; // From cppreference.
[gcc r15-3684] c++: -Wdangling-reference and empty class [PR115361]
https://gcc.gnu.org/g:8733d5d3873977d6ca82d71b28728650f988e9c8 commit r15-3684-g8733d5d3873977d6ca82d71b28728650f988e9c8 Author: Jason Merrill Date: Sun Sep 15 13:50:04 2024 +0200 c++: -Wdangling-reference and empty class [PR115361] We can't have a dangling reference to an empty class unless it's specifically to that class or one of its bases. This was giving a false positive on the _ExtractKey pattern in libstdc++ hashtable.h. This also adjusts the order of arguments to reference_related_p, which is relevant for empty classes (unlike scalars). Several of the classes in the testsuite needed to gain data members to continue to warn. PR c++/115361 gcc/cp/ChangeLog: * call.cc (do_warn_dangling_reference): Check is_empty_class. gcc/testsuite/ChangeLog: * g++.dg/ext/attr-no-dangling6.C * g++.dg/ext/attr-no-dangling7.C * g++.dg/ext/attr-no-dangling8.C * g++.dg/ext/attr-no-dangling9.C * g++.dg/warn/Wdangling-reference1.C * g++.dg/warn/Wdangling-reference2.C * g++.dg/warn/Wdangling-reference3.C: Make classes non-empty. * g++.dg/warn/Wdangling-reference23.C: New test. Diff: --- gcc/cp/call.cc| 12 +++- gcc/testsuite/g++.dg/ext/attr-no-dangling6.C | 6 +++--- gcc/testsuite/g++.dg/ext/attr-no-dangling7.C | 6 +++--- gcc/testsuite/g++.dg/ext/attr-no-dangling8.C | 2 ++ gcc/testsuite/g++.dg/ext/attr-no-dangling9.C | 1 + gcc/testsuite/g++.dg/warn/Wdangling-reference1.C | 1 + gcc/testsuite/g++.dg/warn/Wdangling-reference2.C | 2 +- gcc/testsuite/g++.dg/warn/Wdangling-reference23.C | 14 ++ gcc/testsuite/g++.dg/warn/Wdangling-reference3.C | 1 + 9 files changed, 33 insertions(+), 12 deletions(-) diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 664088eed9c7..1ecf3aac7051 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -14356,12 +14356,14 @@ do_warn_dangling_reference (tree expr, bool arg_p) if ((arg = do_warn_dangling_reference (arg, /*arg_p=*/true))) { /* If we know the temporary could not bind to the return type, - don't warn. This is for scalars only because for classes - we can't be sure we are not returning its sub-object. */ - if (SCALAR_TYPE_P (TREE_TYPE (arg)) + don't warn. This is for scalars and empty classes only + because for other classes we can't be sure we are not + returning its sub-object. */ + if ((SCALAR_TYPE_P (TREE_TYPE (arg)) +|| is_empty_class (TREE_TYPE (arg))) && TYPE_REF_P (rettype) - && !reference_related_p (TREE_TYPE (arg), -TREE_TYPE (rettype))) + && !reference_related_p (TREE_TYPE (rettype), +TREE_TYPE (arg))) continue; return expr; } diff --git a/gcc/testsuite/g++.dg/ext/attr-no-dangling6.C b/gcc/testsuite/g++.dg/ext/attr-no-dangling6.C index 5b349e8e6827..1fc426d20d3d 100644 --- a/gcc/testsuite/g++.dg/ext/attr-no-dangling6.C +++ b/gcc/testsuite/g++.dg/ext/attr-no-dangling6.C @@ -2,9 +2,9 @@ // { dg-do compile { target c++20 } } // { dg-options "-Wdangling-reference" } -class X { }; -const X x1; -const X x2; +class X { int i; }; +const X x1 {}; +const X x2 {}; constexpr bool val () { return true; } struct ST { static constexpr bool value = true; }; diff --git a/gcc/testsuite/g++.dg/ext/attr-no-dangling7.C b/gcc/testsuite/g++.dg/ext/attr-no-dangling7.C index a5fb809e6bdb..04c6badf0b6f 100644 --- a/gcc/testsuite/g++.dg/ext/attr-no-dangling7.C +++ b/gcc/testsuite/g++.dg/ext/attr-no-dangling7.C @@ -2,9 +2,9 @@ // { dg-do compile { target c++20 } } // { dg-options "-Wdangling-reference" } -class X { }; -const X x1; -const X x2; +class X { int i; }; +const X x1 {}; +const X x2 {}; template [[gnu::no_dangling(N)]] const X& get(const int& i); // { dg-error "parameter packs not expanded" } diff --git a/gcc/testsuite/g++.dg/ext/attr-no-dangling8.C b/gcc/testsuite/g++.dg/ext/attr-no-dangling8.C index 8208d751a4bb..aa196315a38a 100644 --- a/gcc/testsuite/g++.dg/ext/attr-no-dangling8.C +++ b/gcc/testsuite/g++.dg/ext/attr-no-dangling8.C @@ -8,6 +8,7 @@ template constexpr bool is_reference_v = true; template struct [[gnu::no_dangling(is_reference_v)]] S { + int i; int &foo (const int &); }; @@ -15,6 +16,7 @@ template struct X { template struct [[gnu::no_dangling(is_reference_v && is_reference_v)]] Y { +int i; int &foo (const int &); }; }; diff --git a/gcc/testsuite/g++.dg/ext/attr-no-dangling9.C b/gcc/testsuite/g++.dg/ext/attr-no-dangling9.C index 65b4f7145a92..d7fd897de539 100644 --- a/gcc/t
[gcc r13-9033] tree-optimization/115197 - fix ICE w/ constant in LC PHI and loop distribution
https://gcc.gnu.org/g:062168c8bd4dbca659a5c6cc581f40e409f7d2ad commit r13-9033-g062168c8bd4dbca659a5c6cc581f40e409f7d2ad Author: Richard Biener Date: Thu May 23 14:36:39 2024 +0200 tree-optimization/115197 - fix ICE w/ constant in LC PHI and loop distribution Forgot a check for an SSA name before trying to replace a PHI arg with its current definition. PR tree-optimization/115197 * tree-loop-distribution.cc (copy_loop_before): Constant PHI args remain the same. * gcc.dg/pr115197.c: New testcase. (cherry picked from commit 2b2476d4d18c92b8aba3567ebccd2100c2f7c258) Diff: --- gcc/testsuite/gcc.dg/pr115197.c | 14 ++ gcc/tree-loop-distribution.cc | 7 +-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.dg/pr115197.c b/gcc/testsuite/gcc.dg/pr115197.c new file mode 100644 index ..00d674b3bd9a --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr115197.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fno-tree-scev-cprop -ftree-pre -ftree-loop-distribute-patterns" } */ + +int a, b[2], c, d, e, f[2]; +int main() { + while (a) +if (d) { + if (e) +return 0; + for (; c; c++) +f[c] = 0 < (b[c] = ~(f[c + 1] < a)); +} + return 0; +} diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc index 3d92d1c73b5f..907610d56704 100644 --- a/gcc/tree-loop-distribution.cc +++ b/gcc/tree-loop-distribution.cc @@ -963,8 +963,11 @@ copy_loop_before (class loop *loop, bool redirect_lc_phi_defs) if (virtual_operand_p (gimple_phi_result (phi))) continue; use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit); - tree new_def = get_current_def (USE_FROM_PTR (use_p)); - SET_USE (use_p, new_def); + if (TREE_CODE (USE_FROM_PTR (use_p)) == SSA_NAME) + { + tree new_def = get_current_def (USE_FROM_PTR (use_p)); + SET_USE (use_p, new_def); + } } }
[gcc r13-9034] tree-optimization/115278 - fix DSE in if-conversion wrt volatiles
https://gcc.gnu.org/g:22c9080c88cd133e048cd9dcacd2fa13d8fd267f commit r13-9034-g22c9080c88cd133e048cd9dcacd2fa13d8fd267f Author: Richard Biener Date: Fri May 31 10:14:25 2024 +0200 tree-optimization/115278 - fix DSE in if-conversion wrt volatiles The following adds the missing guard for volatile stores to the embedded DSE in the loop if-conversion pass. PR tree-optimization/115278 * tree-if-conv.cc (ifcvt_local_dce): Do not DSE volatile stores. * g++.dg/vect/pr115278.cc: New testcase. (cherry picked from commit 65dbe0ab7cdaf2aa84b09a74e594f0faacf1945c) Diff: --- gcc/testsuite/g++.dg/vect/pr115278.cc | 38 +++ gcc/tree-if-conv.cc | 4 +++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/vect/pr115278.cc b/gcc/testsuite/g++.dg/vect/pr115278.cc new file mode 100644 index ..331075fb2781 --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr115278.cc @@ -0,0 +1,38 @@ +// { dg-do compile } +// { dg-require-effective-target c++11 } +// { dg-additional-options "-fdump-tree-optimized" } + +#include + +const int runs = 92; + +union BitfieldStructUnion { +struct { +uint64_t a : 17; +uint64_t padding: 39; +uint64_t b : 8; +} __attribute__((packed)); + +struct { +uint32_t value_low; +uint32_t value_high; +} __attribute__((packed)); + +BitfieldStructUnion(uint32_t value_low, uint32_t value_high) : value_low(value_low), value_high(value_high) {} +}; + +volatile uint32_t *WRITE = (volatile unsigned*)0x42; + +void buggy() { +for (int i = 0; i < runs; i++) { +BitfieldStructUnion rt{*WRITE, *WRITE}; + +rt.a = 99; +rt.b = 1; + +*WRITE = rt.value_low; +*WRITE = rt.value_high; +} +} + +// { dg-final { scan-tree-dump-times "\\\*WRITE\[^\r\n\]* ={v} " 2 "optimized" } } diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index b1dbb8706ed6..71f5d98c2129 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -3194,7 +3194,9 @@ ifcvt_local_dce (class loop *loop) gimple_stmt_iterator gsiprev = gsi; gsi_prev (&gsiprev); stmt = gsi_stmt (gsi); - if (gimple_store_p (stmt) && gimple_vdef (stmt)) + if (!gimple_has_volatile_ops (stmt) + && gimple_store_p (stmt) + && gimple_vdef (stmt)) { tree lhs = gimple_get_lhs (stmt); ao_ref write;
[gcc r13-9035] Fix vect/pr115278.cc for targets where uint32_t is distinct from unsigned.
https://gcc.gnu.org/g:c4525852c20f10c2027dd8aa4ed9f28df1a9a4be commit r13-9035-gc4525852c20f10c2027dd8aa4ed9f28df1a9a4be Author: Joern Rennecke Date: Wed Aug 7 02:48:45 2024 +0100 Fix vect/pr115278.cc for targets where uint32_t is distinct from unsigned. gcc/testsuite/ * g++.dg/vect/pr115278.cc: Make cast's type agree with assignment destination WRITE. (cherry picked from commit b844775283a620b8826adf734ecfc97d820c3611) Diff: --- gcc/testsuite/g++.dg/vect/pr115278.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/vect/pr115278.cc b/gcc/testsuite/g++.dg/vect/pr115278.cc index 331075fb2781..df521e43a97c 100644 --- a/gcc/testsuite/g++.dg/vect/pr115278.cc +++ b/gcc/testsuite/g++.dg/vect/pr115278.cc @@ -21,7 +21,7 @@ union BitfieldStructUnion { BitfieldStructUnion(uint32_t value_low, uint32_t value_high) : value_low(value_low), value_high(value_high) {} }; -volatile uint32_t *WRITE = (volatile unsigned*)0x42; +volatile uint32_t *WRITE = (volatile uint32_t *)0x42; void buggy() { for (int i = 0; i < runs; i++) {
[gcc r13-9037] tree-optimization/115669 - fix SLP reduction association
https://gcc.gnu.org/g:e630a20d8367eb2e1929edcaaa03ffe2951b0851 commit r13-9037-ge630a20d8367eb2e1929edcaaa03ffe2951b0851 Author: Richard Biener Date: Thu Jun 27 11:26:08 2024 +0200 tree-optimization/115669 - fix SLP reduction association The following avoids associating a reduction path as that might get STMT_VINFO_REDUC_IDX out-of-sync with the SLP operand order. This is a latent issue with SLP reductions but now easily exposed as we're doing single-lane SLP reductions. When we achieved SLP only we can move and update this meta-data. PR tree-optimization/115669 * tree-vect-slp.cc (vect_build_slp_tree_2): Do not reassociate chains that participate in a reduction. * gcc.dg/vect/pr115669.c: New testcase. (cherry picked from commit 7886830bb45c4f5dca0496d4deae9a45204d78f5) Diff: --- gcc/testsuite/gcc.dg/vect/pr115669.c | 22 ++ gcc/tree-vect-slp.cc | 3 +++ 2 files changed, 25 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/pr115669.c b/gcc/testsuite/gcc.dg/vect/pr115669.c new file mode 100644 index ..361a17a64e68 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115669.c @@ -0,0 +1,22 @@ +/* { dg-additional-options "-fwrapv" } */ + +#include "tree-vect.h" + +int a = 10; +unsigned b; +long long c[100]; +int foo() +{ + long long *d = c; + for (short e = 0; e < a; e++) +b += ~(d ? d[e] : 0); + return b; +} + +int main() +{ + check_vect (); + if (foo () != -10) +abort (); + return 0; +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index c01dc02afff6..c228087df734 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1897,6 +1897,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, else if (is_a (vinfo) /* ??? We don't handle !vect_internal_def defs below. */ && STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def + /* ??? Do not associate a reduction, this will wreck REDUC_IDX + mapping as long as that exists on the stmt_info level. */ + && STMT_VINFO_REDUC_IDX (stmt_info) == -1 && is_gimple_assign (stmt_info->stmt) && (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt)) || gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR)
[gcc r13-9036] tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield
https://gcc.gnu.org/g:120f25440da533b4c16268a21feb1d864bb1a843 commit r13-9036-g120f25440da533b4c16268a21feb1d864bb1a843 Author: Richard Biener Date: Tue Jun 25 16:13:02 2024 +0200 tree-optimization/115646 - ICE with pow shrink-wrapping from bitfield The following makes analysis and transform agree on constraints. PR tree-optimization/115646 * tree-call-cdce.cc (check_pow): Check for bit_sz values as allowed by transform. * gcc.dg/pr115646.c: New testcase. (cherry picked from commit 453b1d291d1a0f89087ad91cf6b1bed1ec68eff3) Diff: --- gcc/testsuite/gcc.dg/pr115646.c | 13 + gcc/tree-call-cdce.cc | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/pr115646.c b/gcc/testsuite/gcc.dg/pr115646.c new file mode 100644 index ..24bc1e45 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr115646.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern double pow(double x, double y); + +struct S { +unsigned int a : 3, b : 8, c : 21; +}; + +void foo (struct S *p) +{ + pow (p->c, 42); +} diff --git a/gcc/tree-call-cdce.cc b/gcc/tree-call-cdce.cc index 143975dd112f..f8148093058c 100644 --- a/gcc/tree-call-cdce.cc +++ b/gcc/tree-call-cdce.cc @@ -260,7 +260,7 @@ check_pow (gcall *pow_call) /* If the type of the base is too wide, the resulting shrink wrapping condition will be too conservative. */ - if (bit_sz > MAX_BASE_INT_BIT_SIZE) + if (bit_sz != 8 && bit_sz != 16 && bit_sz != MAX_BASE_INT_BIT_SIZE) return false; return true;
[gcc r13-9039] tree-optimization/115701 - factor out maybe_duplicate_ssa_info_at_copy
https://gcc.gnu.org/g:0dee54fabeb1d7bdf74cdeee080c7e5201008d6e commit r13-9039-g0dee54fabeb1d7bdf74cdeee080c7e5201008d6e Author: Richard Biener Date: Sun Jun 30 11:28:11 2024 +0200 tree-optimization/115701 - factor out maybe_duplicate_ssa_info_at_copy The following factors out the code that preserves SSA info of the LHS of a SSA copy LHS = RHS when LHS is about to be eliminated to RHS. PR tree-optimization/115701 * tree-ssanames.h (maybe_duplicate_ssa_info_at_copy): Declare. * tree-ssanames.cc (maybe_duplicate_ssa_info_at_copy): New function, split out from ... * tree-ssa-copy.cc (fini_copy_prop): ... here. * tree-ssa-sccvn.cc (eliminate_dom_walker::eliminate_stmt): ... and here. (cherry picked from commit b5c64b413fd5bc03a1a8ef86d005892071e42cbe) Diff: --- gcc/tree-ssa-copy.cc | 32 ++-- gcc/tree-ssa-sccvn.cc | 21 ++--- gcc/tree-ssanames.cc | 28 gcc/tree-ssanames.h | 3 ++- 4 files changed, 34 insertions(+), 50 deletions(-) diff --git a/gcc/tree-ssa-copy.cc b/gcc/tree-ssa-copy.cc index 4cfa116326f3..8f6954995e0c 100644 --- a/gcc/tree-ssa-copy.cc +++ b/gcc/tree-ssa-copy.cc @@ -527,38 +527,10 @@ fini_copy_prop (void) || copy_of[i].value == var) continue; - /* In theory the points-to solution of all members of the - copy chain is their intersection. For now we do not bother -to compute this but only make sure we do not lose points-to -information completely by setting the points-to solution -of the representative to the first solution we find if -it doesn't have one already. */ + /* Duplicate points-to and range info appropriately. */ if (copy_of[i].value != var && TREE_CODE (copy_of[i].value) == SSA_NAME) - { - basic_block copy_of_bb - = gimple_bb (SSA_NAME_DEF_STMT (copy_of[i].value)); - basic_block var_bb = gimple_bb (SSA_NAME_DEF_STMT (var)); - if (POINTER_TYPE_P (TREE_TYPE (var)) - && SSA_NAME_PTR_INFO (var) - && !SSA_NAME_PTR_INFO (copy_of[i].value)) - { - duplicate_ssa_name_ptr_info (copy_of[i].value, - SSA_NAME_PTR_INFO (var)); - /* Points-to information is cfg insensitive, -but [E]VRP might record context sensitive alignment -info, non-nullness, etc. So reset context sensitive -info if the two SSA_NAMEs aren't defined in the same -basic block. */ - if (var_bb != copy_of_bb) - reset_flow_sensitive_info (copy_of[i].value); - } - else if (!POINTER_TYPE_P (TREE_TYPE (var)) - && SSA_NAME_RANGE_INFO (var) - && !SSA_NAME_RANGE_INFO (copy_of[i].value) - && var_bb == copy_of_bb) - duplicate_ssa_name_range_info (copy_of[i].value, var); - } + maybe_duplicate_ssa_info_at_copy (var, copy_of[i].value); } class copy_folder copy_folder; diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc index fa1d8d9214ee..55ae05dc4cd2 100644 --- a/gcc/tree-ssa-sccvn.cc +++ b/gcc/tree-ssa-sccvn.cc @@ -6771,27 +6771,10 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, gimple_stmt_iterator *gsi) /* If this now constitutes a copy duplicate points-to and range info appropriately. This is especially -important for inserted code. See tree-ssa-copy.cc -for similar code. */ +important for inserted code. */ if (sprime && TREE_CODE (sprime) == SSA_NAME) - { - basic_block sprime_b = gimple_bb (SSA_NAME_DEF_STMT (sprime)); - if (POINTER_TYPE_P (TREE_TYPE (lhs)) - && SSA_NAME_PTR_INFO (lhs) - && ! SSA_NAME_PTR_INFO (sprime)) - { - duplicate_ssa_name_ptr_info (sprime, - SSA_NAME_PTR_INFO (lhs)); - if (b != sprime_b) - reset_flow_sensitive_info (sprime); - } - else if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - && SSA_NAME_RANGE_INFO (lhs) - && ! SSA_NAME_RANGE_INFO (sprime) - && b == sprime_b) - duplicate_ssa_name_range_info (sprime, lhs); - } + maybe_duplicate_ssa_info_at_copy (lhs, sprime); /* Inhibit the use of an inserted PHI on a loop header when the address of the memory reference is a simple induction diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc index 08aa166ef176..0181737b8a30 100644 --- a/gcc/tree-ssanames.cc +++ b/gcc/tree-ssanames.cc @@ -752,6 +752,34 @@ duplicate_ssa_name_range_info (tree name, tree src) } } +/* For a SSA copy DEST = SRC duplicate SSA info present on DEST to SRC + to preser
[gcc r13-9040] tree-optimization/115701 - fix maybe_duplicate_ssa_info_at_copy
https://gcc.gnu.org/g:29c236c57f272944b1f9fffbe248689fb86e91f4 commit r13-9040-g29c236c57f272944b1f9fffbe248689fb86e91f4 Author: Richard Biener Date: Sun Jun 30 11:34:43 2024 +0200 tree-optimization/115701 - fix maybe_duplicate_ssa_info_at_copy The following restricts copying of points-to info from defs that might be in regions invoking UB and are never executed. PR tree-optimization/115701 * tree-ssanames.cc (maybe_duplicate_ssa_info_at_copy): Only copy info from within the same BB. * gcc.dg/torture/pr115701.c: New testcase. (cherry picked from commit b77f17c5feec9614568bf2dee7f7d811465ee4a5) Diff: --- gcc/testsuite/gcc.dg/torture/pr115701.c | 22 ++ gcc/tree-ssanames.cc| 22 -- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/gcc/testsuite/gcc.dg/torture/pr115701.c b/gcc/testsuite/gcc.dg/torture/pr115701.c new file mode 100644 index ..9b7c34b23d78 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr115701.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* IPA PTA disables local PTA recompute after IPA. */ +/* { dg-additional-options "-fipa-pta" } */ + +int a, c, d; +static int b; +int main() +{ + int *e = &a, **f = &e; + while (1) { +int **g, ***h = &f; +if (c) + *g = e; +else if (!b) + break; +*e = **g; +e = &d; + } + if (e != &a) +__builtin_abort(); + return 0; +} diff --git a/gcc/tree-ssanames.cc b/gcc/tree-ssanames.cc index 0181737b8a30..de1897a33dff 100644 --- a/gcc/tree-ssanames.cc +++ b/gcc/tree-ssanames.cc @@ -758,25 +758,19 @@ duplicate_ssa_name_range_info (tree name, tree src) void maybe_duplicate_ssa_info_at_copy (tree dest, tree src) { + /* While points-to info is flow-insensitive we have to avoid copying + info from not executed regions invoking UB to dominating defs. */ + if (gimple_bb (SSA_NAME_DEF_STMT (src)) + != gimple_bb (SSA_NAME_DEF_STMT (dest))) +return; + if (POINTER_TYPE_P (TREE_TYPE (dest)) && SSA_NAME_PTR_INFO (dest) && ! SSA_NAME_PTR_INFO (src)) -{ - duplicate_ssa_name_ptr_info (src, SSA_NAME_PTR_INFO (dest)); - /* Points-to information is cfg insensitive, -but VRP might record context sensitive alignment -info, non-nullness, etc. So reset context sensitive -info if the two SSA_NAMEs aren't defined in the same -basic block. */ - if (gimple_bb (SSA_NAME_DEF_STMT (src)) - != gimple_bb (SSA_NAME_DEF_STMT (dest))) - reset_flow_sensitive_info (src); -} +duplicate_ssa_name_ptr_info (src, SSA_NAME_PTR_INFO (dest)); else if (INTEGRAL_TYPE_P (TREE_TYPE (dest)) && SSA_NAME_RANGE_INFO (dest) - && ! SSA_NAME_RANGE_INFO (src) - && (gimple_bb (SSA_NAME_DEF_STMT (src)) - == gimple_bb (SSA_NAME_DEF_STMT (dest + && ! SSA_NAME_RANGE_INFO (src)) duplicate_ssa_name_range_info (src, dest); }
[gcc r13-9041] tree-optimization/115841 - reduction epilogue placement issue
https://gcc.gnu.org/g:e87c0c7f7ab1e7acd0ffbac0b15e020275f97ca8 commit r13-9041-ge87c0c7f7ab1e7acd0ffbac0b15e020275f97ca8 Author: Richard Biener Date: Tue Jul 16 11:53:17 2024 +0200 tree-optimization/115841 - reduction epilogue placement issue When emitting the compensation to the vectorized main loop for a vector reduction value to be re-used in the vectorized epilogue we fail to place it in the correct block when the main loop is known to be entered (no loop_vinfo->main_loop_edge) but the epilogue is not (a loop_vinfo->skip_this_loop_edge). The code currently disregards this situation. With the recent znver4 cost fix I couldn't trigger this situation with the testcase but I adjusted it so it could eventually trigger on other targets. PR tree-optimization/115841 * tree-vect-loop.cc (vect_transform_cycle_phi): Correctly place the partial vector reduction for the accumulator re-use when the main loop cannot be skipped but the epilogue can. * gcc.dg/vect/pr115841.c: New testcase. (cherry picked from commit 016c947b02e79a5c0c0c2d4ad5cb71aa04db3efd) Diff: --- gcc/testsuite/gcc.dg/vect/pr115841.c | 42 gcc/tree-vect-loop.cc| 7 +++--- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr115841.c b/gcc/testsuite/gcc.dg/vect/pr115841.c new file mode 100644 index ..aa5c66004a03 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr115841.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast -fcommon -fvect-cost-model=dynamic --param vect-partial-vector-usage=1" } */ +/* { dg-additional-options "-mavx512vl" { target avx512vl } } */ + +/* To trigger the bug costing needs to determine that aligning the A170 + accesses with a prologue is good and there should be a vectorized + epilogue with a smaller vector size, re-using the vector accumulator + from the vectorized main loop that's statically known to execute + but the epilogue loop is not. */ + +static unsigned char xl[192]; +unsigned char A170[192*3]; + +void jerate (unsigned char *, unsigned char *); +float foo (unsigned n) +{ + jerate (xl, A170); + + unsigned i = 32; + int kr = 1; + float sfn11s = 0.f; + float sfn12s = 0.f; + do +{ + int krm1 = kr - 1; + long j = krm1; + float a = (*(float(*)[n])A170)[j]; + float b = (*(float(*)[n])xl)[j]; + float c = a * b; + float d = c * 6.93149983882904052734375e-1f; + float e = (*(float(*)[n])A170)[j+48]; + float f = (*(float(*)[n])A170)[j+96]; + float g = d * e; + sfn11s = sfn11s + g; + float h = f * d; + sfn12s = sfn12s + h; + kr++; +} + while (--i != 0); + float tem = sfn11s + sfn12s; + return tem; +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 8bb2e3ff1c82..7a319e7f98ef 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -8148,14 +8148,15 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo, /* And the reduction could be carried out using a different sign. */ if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def))) def = gimple_convert (&stmts, vectype_out, def); - if (loop_vinfo->main_loop_edge) + edge e; + if ((e = loop_vinfo->main_loop_edge) + || (e = loop_vinfo->skip_this_loop_edge)) { /* While we'd like to insert on the edge this will split blocks and disturb bookkeeping, we also will eventually need this on the skip edge. Rely on sinking to fixup optimal placement and insert in the pred. */ - gimple_stmt_iterator gsi - = gsi_last_bb (loop_vinfo->main_loop_edge->src); + gimple_stmt_iterator gsi = gsi_last_bb (e->src); /* Insert before a cond that eventually skips the epilogue. */ if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
[gcc r13-9038] tree-optimization/115694 - ICE with complex store rewrite
https://gcc.gnu.org/g:07c12b394dfb424404019b745b5e4a9e938f6693 commit r13-9038-g07c12b394dfb424404019b745b5e4a9e938f6693 Author: Richard Biener Date: Sun Jun 30 13:07:14 2024 +0200 tree-optimization/115694 - ICE with complex store rewrite The following adds a missed check when forwprop attempts to rewrite a complex store. PR tree-optimization/115694 * tree-ssa-forwprop.cc (pass_forwprop::execute): Check the store is complex before rewriting it. * g++.dg/torture/pr115694.C: New testcase. (cherry picked from commit 543a5b9da964f821b9e723ed9c93d6cdca464d47) Diff: --- gcc/testsuite/g++.dg/torture/pr115694.C | 13 + gcc/tree-ssa-forwprop.cc| 2 ++ 2 files changed, 15 insertions(+) diff --git a/gcc/testsuite/g++.dg/torture/pr115694.C b/gcc/testsuite/g++.dg/torture/pr115694.C new file mode 100644 index ..bbce47decf83 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr115694.C @@ -0,0 +1,13 @@ +// { dg-do compile } + +_Complex a; +typedef struct { + double a[2]; +} b; +void c(b); +void d() +{ + _Complex b1 = a; + b t = __builtin_bit_cast (b, b1); + c(t); +} diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index 862a7f2b92a7..f05016763c08 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -3722,6 +3722,8 @@ pass_forwprop::execute (function *fun) && gimple_store_p (use_stmt) && !gimple_has_volatile_ops (use_stmt) && is_gimple_assign (use_stmt) + && (TREE_CODE (TREE_TYPE (gimple_assign_lhs (use_stmt))) + == COMPLEX_TYPE) && (TREE_CODE (gimple_assign_lhs (use_stmt)) != TARGET_MEM_REF)) {
[gcc r13-9042] tree-optimization/116057 - wrong code with CCP and vector CTORs
https://gcc.gnu.org/g:ef25f1dd600cc9351c80e3e018d7170e16a2c6ff commit r13-9042-gef25f1dd600cc9351c80e3e018d7170e16a2c6ff Author: Richard Biener Date: Wed Jul 24 13:16:35 2024 +0200 tree-optimization/116057 - wrong code with CCP and vector CTORs The following fixes an issue with CCPs likely_value when faced with a vector CTOR containing undef SSA names and constants. This should be classified as CONSTANT and not UNDEFINED. PR tree-optimization/116057 * tree-ssa-ccp.cc (likely_value): Also walk CTORs in stmt operands to look for constants. * gcc.dg/torture/pr116057.c: New testcase. (cherry picked from commit 1ea551514b9c285d801ac5ab8d78b22483ff65af) Diff: --- gcc/testsuite/gcc.dg/torture/pr116057.c | 20 gcc/tree-ssa-ccp.cc | 11 +++ 2 files changed, 31 insertions(+) diff --git a/gcc/testsuite/gcc.dg/torture/pr116057.c b/gcc/testsuite/gcc.dg/torture/pr116057.c new file mode 100644 index ..a7021c8e746e --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr116057.c @@ -0,0 +1,20 @@ +/* { dg-do run } */ +/* { dg-additional-options "-Wno-psabi" } */ + +#define vect8 __attribute__((vector_size(8))) + +vect8 int __attribute__((noipa)) +f(int a) +{ + int b; + vect8 int t={1,1}; + if(a) return t; + return (vect8 int){0, b}; +} + +int main () +{ + if (f(0)[0] != 0) +__builtin_abort (); + return 0; +} diff --git a/gcc/tree-ssa-ccp.cc b/gcc/tree-ssa-ccp.cc index 6c9da603ef95..074edb68679f 100644 --- a/gcc/tree-ssa-ccp.cc +++ b/gcc/tree-ssa-ccp.cc @@ -759,6 +759,17 @@ likely_value (gimple *stmt) continue; if (is_gimple_min_invariant (op)) has_constant_operand = true; + else if (TREE_CODE (op) == CONSTRUCTOR) + { + unsigned j; + tree val; + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (op), j, val) + if (CONSTANT_CLASS_P (val)) + { + has_constant_operand = true; + break; + } + } } if (has_constant_operand)
[gcc r15-3682] reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS.
https://gcc.gnu.org/g:5bfb91c14f98f6750281217f737b3d95c4e73584 commit r15-3682-g5bfb91c14f98f6750281217f737b3d95c4e73584 Author: Georg-Johann Lay Date: Fri Sep 6 11:23:06 2024 +0200 reload1.cc: rtl-optimization/116326 - Use RELOAD_ELIMINABLE_REGS. The new macro is required because reload and LRA are using different representations for a multi-register frame pointer. As ELIMINABLE_REGS is used to initialize static const objects, it can't depend on -mlra. PR rtl-optimization/116326 gcc/ * reload1.cc (reg_eliminate_1): Initialize from RELOAD_ELIMINABLE_REGS if defined. * config/avr/avr.h (RELOAD_ELIMINABLE_REGS): Copy from ELIMINABLE_REGS. (ELIMINABLE_REGS): Don't mention sub-regnos of the frame pointer. * doc/tm.texi.in (Eliminating Frame Pointer and Arg Pointer) : Add documentation. * doc/tm.texi: Rebuild. gcc/testsuite/ * gcc.target/avr/torture/lra-pr116324.c: New test. * gcc.target/avr/torture/lra-pr116325.c: New test. Diff: --- gcc/config/avr/avr.h | 9 +- gcc/doc/tm.texi| 8 ++ gcc/doc/tm.texi.in | 8 ++ gcc/reload1.cc | 6 ++ .../gcc.target/avr/torture/lra-pr116324.c | 86 +++ .../gcc.target/avr/torture/lra-pr116325.c | 117 + 6 files changed, 233 insertions(+), 1 deletion(-) diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h index 1cf4180e5343..3fa2ee76c435 100644 --- a/gcc/config/avr/avr.h +++ b/gcc/config/avr/avr.h @@ -308,12 +308,19 @@ enum reg_class { #define STATIC_CHAIN_REGNUM ((AVR_TINY) ? 18 :2) -#define ELIMINABLE_REGS { \ +#define RELOAD_ELIMINABLE_REGS { \ { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM }, \ { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ { FRAME_POINTER_REGNUM + 1, STACK_POINTER_REGNUM + 1 } } +#define ELIMINABLE_REGS\ + {\ +{ ARG_POINTER_REGNUM, STACK_POINTER_REGNUM }, \ +{ ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM }, \ +{ FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM } \ + } + #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ OFFSET = avr_initial_elimination_offset (FROM, TO) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index cc33084ed322..9e520429ba91 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -4005,6 +4005,14 @@ Note that the elimination of the argument pointer with the stack pointer is specified first since that is the preferred elimination. @end defmac +@defmac RELOAD_ELIMINABLE_REGS +Like @code{ELIMINABLE_REGS}, but only used in the old reload framework where +it takes precedence over @code{ELIMINABLE_REGS}. This macro can be useful +during the transition to LRA because there are cases where reload and LRA +disagree on how eliminable registers should be represented. For an example, +see @file{avr.h}. +@end defmac + @deftypefn {Target Hook} bool TARGET_CAN_ELIMINATE (const int @var{from_reg}, const int @var{to_reg}) This target hook should return @code{true} if the compiler is allowed to try to replace register number @var{from_reg} with register number diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index 8af3f4145058..a34674e33c99 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -3179,6 +3179,14 @@ Note that the elimination of the argument pointer with the stack pointer is specified first since that is the preferred elimination. @end defmac +@defmac RELOAD_ELIMINABLE_REGS +Like @code{ELIMINABLE_REGS}, but only used in the old reload framework where +it takes precedence over @code{ELIMINABLE_REGS}. This macro can be useful +during the transition to LRA because there are cases where reload and LRA +disagree on how eliminable registers should be represented. For an example, +see @file{avr.h}. +@end defmac + @hook TARGET_CAN_ELIMINATE @defmac INITIAL_ELIMINATION_OFFSET (@var{from-reg}, @var{to-reg}, @var{offset-var}) diff --git a/gcc/reload1.cc b/gcc/reload1.cc index 2e059b099703..120328e0f2f7 100644 --- a/gcc/reload1.cc +++ b/gcc/reload1.cc @@ -283,7 +283,13 @@ static const struct elim_table_1 const int to; } reg_eliminate_1[] = + /* Reload and LRA don't agree on how a multi-register frame pointer + is represented for elimination. See avr.h for a use case. */ +#ifdef RELOAD_ELIMINABLE_REGS + RELOAD_ELIMINABLE_REGS; +#else ELIMINABLE_REGS; +#endif #define NUM_ELIMINABLE_REGS ARRAY_SIZE (reg_eliminate_1) diff --git a/gcc/testsuite/gcc.target/avr/torture/lra-pr116324.c b/gcc/testsuite/g
[gcc r12-10716] doc: Add more alias option and reorder Intel CPU -march documentation
https://gcc.gnu.org/g:8483527158024d200b3a9e4edecbe188fa22fdaa commit r12-10716-g8483527158024d200b3a9e4edecbe188fa22fdaa Author: Haochen Jiang Date: Wed Sep 18 11:20:15 2024 +0800 doc: Add more alias option and reorder Intel CPU -march documentation This patch is backported from GCC15 with some tweaks. Since r15-3539, there are requests coming in to add other alias option documentation. This patch will add all of them, including corei7, corei7-avx, core-avx-i, core-avx2, atom and slm. Also in the patch, I reordered that part of documentation, currently all the CPUs/products are just all over the place. I regrouped them by date-to-now products (since the very first CPU to latest Panther Lake), P-core (since the clients become hybrid cores, starting from Sapphire Rapids) and E-core (since Bonnell). In GCC14 and eariler GCC, Xeon Phi CPUs are still there, I put them after E-core CPUs. And in the patch, I refined the product names in documentation. gcc/ChangeLog: * doc/invoke.texi: Add corei7, corei7-avx, core-avx-i, core-avx2, atom, and slm. Reorder the -march documentation by splitting them into date-to-now products, P-core, E-core and Xeon Phi. Refine the product names in documentation. Diff: --- gcc/doc/invoke.texi | 162 +++- 1 file changed, 84 insertions(+), 78 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index fbfa3241e7f6..5db66718d10b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -31449,6 +31449,7 @@ Intel Core 2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, CX16, SAHF and FXSR instruction set support. @item nehalem +@itemx corei7 Intel Nehalem CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF and FXSR instruction set support. @@ -31457,17 +31458,20 @@ Intel Westmere CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR and PCLMUL instruction set support. @item sandybridge +@itemx corei7-avx Intel Sandy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE and PCLMUL instruction set support. @item ivybridge +@itemx core-avx-i Intel Ivy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND and F16C instruction set support. @item haswell -Intel Haswell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, +@itemx core-avx2 +Intel Haswell CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE and HLE instruction set support. @@ -31483,47 +31487,6 @@ SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES and SGX instruction set support. -@item bonnell -Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3 -instruction set support. - -@item silvermont -Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND -instruction set support. - -@item goldmont -Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, -RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction -set support. - -@item goldmont-plus -Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, -SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, -RDPID and SGX instruction set support. - -@item tremont -Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, -RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID, -SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set -support. - -@item knl -Intel Knight's Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, -RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, -AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1 instruction set support. - -@item knm -Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, -RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, -AVX512PF, AVX512ER, AVX512F,
[gcc r15-3700] i386: Add missing avx512f-mask-type.h include
https://gcc.gnu.org/g:2b7b8d3bb52a23aa8b1d6e9a2d57c83db2078f73 commit r15-3700-g2b7b8d3bb52a23aa8b1d6e9a2d57c83db2078f73 Author: Haochen Jiang Date: Sat Sep 14 15:55:53 2024 +0800 i386: Add missing avx512f-mask-type.h include Since commit r15-3594, we fixed the bugs in MASK_TYPE for AVX10.2 testcases, but we missed the following four. The tests are not FAIL since the binutils part haven't been merged yet, which leads to UNSUPPORTED test. But the avx512f-mask-type.h needs to be included, otherwise, it will be compile error. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-512-vpdpbssd-2.c: Include avx512f-mask-type.h. * gcc.target/i386/avx10_2-vminmaxsd-2.c: Ditto. * gcc.target/i386/avx10_2-vminmaxsh-2.c: Ditto. * gcc.target/i386/avx10_2-vminmaxss-2.c: Ditto. Diff: --- gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c | 2 ++ gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c| 1 + gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c| 1 + gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c| 1 + 4 files changed, 5 insertions(+) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c index add9de893511..624a1a8e50ea 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vpdpbssd-2.c @@ -13,6 +13,8 @@ #define SRC_SIZE (AVX512F_LEN / 8) #define SIZE (AVX512F_LEN / 32) +#include "avx512f-mask-type.h" + static void CALC (int *r, int *dst, char *s1, char *s2) { diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c index 1e2d78c4068d..f550e09be6c9 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsd-2.c @@ -8,6 +8,7 @@ #include "avx10-helper.h" #include #include "avx10-minmax-helper.h" +#include "avx512f-mask-type.h" void static CALC (double *r, double *s1, double *s2, int R) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c index e6a93c403b50..dbf1087d9c3b 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxsh-2.c @@ -8,6 +8,7 @@ #include "avx10-helper.h" #include #include "avx10-minmax-helper.h" +#include "avx512f-mask-type.h" void static CALC (_Float16 *r, _Float16 *s1, _Float16 *s2, int R) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c index 47177e696409..7baa396a2d3f 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-vminmaxss-2.c @@ -8,6 +8,7 @@ #include "avx10-helper.h" #include #include "avx10-minmax-helper.h" +#include "avx512f-mask-type.h" void static CALC (float *r, float *s1, float *s2, int R)
[gcc r15-3701] i386: Enhance AVX10.2 convert tests
https://gcc.gnu.org/g:89e62d42f366cd835022f0ba00ba1d10305ae0ce commit r15-3701-g89e62d42f366cd835022f0ba00ba1d10305ae0ce Author: Haochen Jiang Date: Thu Sep 5 11:27:33 2024 +0800 i386: Enhance AVX10.2 convert tests For AVX10.2 convert tests, all of them are missing mask tests previously, this patch will add them in the tests. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c: Enhance mask test. * gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtbiasph2bf8s-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtbiasph2hf8-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtbiasph2hf8s-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvthf82ph-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c: Ditto. * gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c: Ditto. * gcc.target/i386/avx512f-helper.h: Fix a typo in macro define. Diff: --- .../gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c| 35 ++ .../gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c | 25 +--- .../i386/avx10_2-512-vcvtbiasph2bf8s-2.c | 28 + .../gcc.target/i386/avx10_2-512-vcvtbiasph2hf8-2.c | 25 +--- .../i386/avx10_2-512-vcvtbiasph2hf8s-2.c | 25 +--- .../gcc.target/i386/avx10_2-512-vcvthf82ph-2.c | 27 + .../gcc.target/i386/avx10_2-512-vcvtne2ph2bf8-2.c | 25 .../gcc.target/i386/avx10_2-512-vcvtne2ph2bf8s-2.c | 25 .../gcc.target/i386/avx10_2-512-vcvtne2ph2hf8-2.c | 25 .../gcc.target/i386/avx10_2-512-vcvtne2ph2hf8s-2.c | 25 .../gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c | 29 +- .../gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c | 27 + .../gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c | 27 + .../gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c | 27 + gcc/testsuite/gcc.target/i386/avx512f-helper.h | 2 +- 15 files changed, 295 insertions(+), 82 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c index 40dbe18abbe8..5e355ae53d41 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvt2ps2phx-2.c @@ -10,24 +10,25 @@ #include "avx10-helper.h" #include -#define SIZE_RES (AVX512F_LEN / 16) +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" static void CALC (_Float16 *res_ref, float *src1, float *src2) { float fp32; int i; - for (i = 0; i < SIZE_RES / 2; i++) + for (i = 0; i < SIZE / 2; i++) { fp32 = (float) 2 * i + 7 + i * 0.5; res_ref[i] = fp32; src2[i] = fp32; } - for (i = SIZE_RES / 2; i < SIZE_RES; i++) + for (i = SIZE / 2; i < SIZE; i++) { fp32 = (float)2 * i + 7 + i * 0.5; res_ref[i] = fp32; - src1[i - (SIZE_RES / 2)] = fp32; + src1[i - (SIZE / 2)] = fp32; } } @@ -35,17 +36,27 @@ void TEST (void) { int i; - UNION_TYPE (AVX512F_LEN, h) res1; + UNION_TYPE (AVX512F_LEN, h) res1, res2, res3; UNION_TYPE (AVX512F_LEN, ) src1, src2; - _Float16 res_ref[SIZE_RES]; - float fp32; - - for (i = 0; i < SIZE_RES; i++) -res1.a[i] = 5; - + MASK_TYPE mask = MASK_VALUE; + _Float16 res_ref[SIZE]; + + for (i = 0; i < SIZE; i++) +res2.a[i] = DEFAULT_VALUE; + CALC (res_ref, src1.a, src2.a); - + res1.x = INTRINSIC (_cvtx2ps_ph) (src1.x, src2.x); if (UNION_CHECK (AVX512F_LEN, h) (res1, res_ref)) abort (); + + res2.x = INTRINSIC (_mask_cvtx2ps_ph) (res2.x, mask, src1.x, src2.x); + MASK_MERGE (h) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, h) (res2, res_ref)) +abort (); + + res3.x = INTRINSIC (_maskz_cvtx2ps_ph) (mask, src1.x, src2.x); + MASK_ZERO (h) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, h) (res3, res_ref)) +abort (); } diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c index 9ce3c9059f1f..08450418daed 100644 --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtbiasph2bf8-2.c @@ -15,6 +15,9 @@ #define SRC_F16 (AVX512F_LEN / 16) #define DST_F8_I8 (AVX512F_LEN_HALF / 8) #define DST_F16 (AVX512F_LEN_HALF / 16) +#define SIZE SRC_F16 + +#
[gcc r15-3681] AVR: doc/install.texi - Update avr specific installation notes.
https://gcc.gnu.org/g:cdeebc71c48db922b14d34c361e15660c1e31fc1 commit r15-3681-gcdeebc71c48db922b14d34c361e15660c1e31fc1 Author: Georg-Johann Lay Date: Tue Sep 17 11:26:19 2024 +0200 AVR: doc/install.texi - Update avr specific installation notes. gcc/ * doc/install.texi (Host/Target specific installation notes for GCC) [avr]: Update web links to AVR-LibC and AVR Options. Remove outdated note about Binutils. Diff: --- gcc/doc/install.texi | 18 +++--- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 1ca0f14b44d1..e339d736969a 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -2695,7 +2695,7 @@ functions like @code{__addsf3} to be omitted from @file{libgcc.a} on the assumption that it will be provided by @file{libm.a}. For more technical details, cf. @uref{https://gcc.gnu.org/PR54461,,PR54461}. It is not supported for -RTEMS configurations, which currently use newlib. The option is +RTEMS configurations, which currently use Newlib. The option is supported since version 4.7.2 and is the default in 4.8.0 and newer. @item --with-double=@{32|64|32,64|64,32@} @@ -4007,27 +4007,23 @@ applications. There are no standard Unix configurations. Collection (GCC)}, @end ifnothtml @ifhtml -See ``AVR Options'' in the main manual +See @uref{https://gcc.gnu.org/onlinedocs/gcc/AVR-Options.html,,AVR Options} +in the main manual @end ifhtml for the list of supported MCU types. -Use @samp{configure --target=avr --enable-languages="c"} to configure GCC@. +Use @samp{configure --target=avr --enable-languages="c,c++"} to configure GCC@. Further installation notes and other useful information about AVR tools can also be obtained from: @itemize @bullet @item -@uref{http://www.nongnu.org/avr/,,http://www.nongnu.org/avr/} +@uref{https://avrdudes.github.io/avr-libc/avr-libc-user-manual/install_tools.html,,AVR-LibC: Building and Installing the GNU Tool Chain} +@item +@uref{https://github.com/sprintersb/atest?tab=readme-ov-file#running-the-avr-gcc-testsuite-using-the-avrtest-simulator,,AVRtest: Running the avr-gcc Testsuite} @end itemize -The following error: -@smallexample -Error: register required -@end smallexample - -indicates that you should upgrade to a newer version of the binutils. - @html @end html
[gcc r15-3686] [PATCH] RISC-V: Allow zero operand for DI variants of vssubu.vx
https://gcc.gnu.org/g:0756f335fb6e455641850a76e68f892f1f82ada2 commit r15-3686-g0756f335fb6e455641850a76e68f892f1f82ada2 Author: Bohan Lei Date: Wed Sep 18 07:20:23 2024 -0600 [PATCH] RISC-V: Allow zero operand for DI variants of vssubu.vx The RISC-V vector machine description relies on the helper function `sew64_scalar_helper` to emit actual insns for the DI variants of vssub.vx and vssubu.vx. This works with vssub.vx, but can cause problems with vssubu.vx with the scalar operand being constant zero, because `has_vi_variant_p` returns false, and the operand will be taken without being loaded into a reg. The attached testcases can cause an internal compiler error as a result. Allowing a constant zero operand in those insns seems to be a simple solution that only affects minimum existing code. gcc/ChangeLog: * config/riscv/vector.md: Allow zero operand for DI variants of vssubu.vx gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/vssubu-1.c: New test. * gcc.target/riscv/rvv/base/vssubu-2.c: New test. Diff: --- gcc/config/riscv/vector.md | 8 gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-1.c | 11 +++ gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-2.c | 11 +++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index d0677325ba1d..92e3061c7f85 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -4400,10 +4400,10 @@ (sat_int_minus_binop:VI_D (match_operand:VI_D 3 "register_operand" " vr, vr, vr, vr") (vec_duplicate:VI_D - (match_operand: 4 "register_operand" " r, r, r, r"))) + (match_operand: 4 "reg_or_0_operand" " rJ, rJ, rJ, rJ"))) (match_operand:VI_D 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" - "v.vx\t%0,%3,%4%p1" + "v.vx\t%0,%3,%z4%p1" [(set_attr "type" "") (set_attr "mode" "")]) @@ -4422,10 +4422,10 @@ (match_operand:VI_D 3 "register_operand" " vr, vr, vr, vr") (vec_duplicate:VI_D (sign_extend: - (match_operand: 4 "register_operand" " r, r, r, r" + (match_operand: 4 "reg_or_0_operand" " rJ, rJ, rJ, rJ" (match_operand:VI_D 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR && !TARGET_64BIT" - "v.vx\t%0,%3,%4%p1" + "v.vx\t%0,%3,%z4%p1" [(set_attr "type" "") (set_attr "mode" "")]) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-1.c new file mode 100644 index ..f19b42aed04c --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv64gcv -mabi=lp64d" } */ + +#include + +vuint64m1_t test_vssubu_vx_u64m1(vuint64m1_t op1) +{ + return __riscv_vssubu_vx_u64m1(op1,0,0); +} + +/* { dg-final { scan-assembler-not {\tvssubu} } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-2.c new file mode 100644 index ..cb4e4f48a9b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vssubu-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv32gcv -mabi=ilp32d" } */ + +#include + +vuint64m1_t test_vssubu_vx_u64m1(vuint64m1_t op1) +{ + return __riscv_vssubu_vx_u64m1(op1,0,0); +} + +/* { dg-final { scan-assembler-not {\tvssubu} } } */ \ No newline at end of file
[gcc r14-10687] tree-optimization/116460 - ICE with DCE in forwprop
https://gcc.gnu.org/g:bdc5937df68a197032e800fc27361037d016cbf1 commit r14-10687-gbdc5937df68a197032e800fc27361037d016cbf1 Author: Richard Biener Date: Mon Aug 26 13:50:00 2024 +0200 tree-optimization/116460 - ICE with DCE in forwprop The following avoids removing stmts with defs that might still have uses in the IL before calling simple_dce_from_worklist which might remove those as that will wreck debug stmt generation. Instead first perform use-based DCE and then remove stmts which may have uses in code that CFG cleanup will remove. This requires tracking stmts in to_remove by their SSA def so we can check whether it was removed before without running into the issue that PHIs can be ggc_free()d upon removal. So this adds to_remove_defs in addition to to_remove which has to stay to track GIMPLE_NOPs we want to elide. PR tree-optimization/116460 * tree-ssa-forwprop.cc (pass_forwprop::execute): First do simple_dce_from_worklist and then remove stmts in to_remove. Track defs to be removed in to_remove_defs. * g++.dg/torture/pr116460.C: New testcase. (cherry picked from commit 172637cf0d9b7b2798f83b9c5f9598b449675cb0) Diff: --- gcc/testsuite/g++.dg/torture/pr116460.C | 609 gcc/tree-ssa-forwprop.cc| 38 +- 2 files changed, 637 insertions(+), 10 deletions(-) diff --git a/gcc/testsuite/g++.dg/torture/pr116460.C b/gcc/testsuite/g++.dg/torture/pr116460.C new file mode 100644 index ..3c7d6372fba2 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr116460.C @@ -0,0 +1,609 @@ +// { dg-do compile } +// { dg-additional-options "-g" } + +namespace std { +typedef __SIZE_TYPE__ size_t; +typedef __PTRDIFF_TYPE__ ptrdiff_t; +void __throw_length_error(const char *) __attribute__((__noreturn__, __cold__)); +} +extern "C++" { +namespace std __attribute__((__visibility__("default"))) { + template struct __is_integer { +enum { __value = 1 }; + }; + template struct __is_nonvolatile_trivially_copyable { +enum { __value = __is_trivially_copyable(_Tp) }; + }; + template struct __memcpyable {}; + template + struct __memcpyable<_Tp *, _Tp *> : __is_nonvolatile_trivially_copyable<_Tp> { + }; + template + struct __memcpyable<_Tp *, const _Tp *> + : __is_nonvolatile_trivially_copyable<_Tp> {}; + template struct __is_move_iterator { +enum { __value = 0 }; + }; + template inline _Iterator __miter_base(_Iterator __it) { +return __it; + } +} // namespace ) +} +namespace __gnu_cxx __attribute__((__visibility__("default"))) { + template + struct __is_integer_nonstrict : public std::__is_integer<_Tp> { +using std::__is_integer<_Tp>::__value; +enum { __width = __value ? sizeof(_Tp) * 8 : 0 }; + }; + template struct __numeric_traits_integer { +static const bool __is_signed = (_Value)(-1) < 0; +static const int __digits = +__is_integer_nonstrict<_Value>::__width - __is_signed; +static const _Value __max = +__is_signed ? (_Value)1 << (__digits - 1)) - 1) << 1) + 1) +: ~(_Value)0; + }; + template + struct __numeric_traits : public __numeric_traits_integer<_Value> {}; +} // namespace ) +namespace std __attribute__((__visibility__("default"))) { + template struct integral_constant { +static constexpr _Tp value = __v; +using type = integral_constant<_Tp, __v>; + }; + template using __bool_constant = integral_constant; + using true_type = __bool_constant; + using false_type = __bool_constant; + template struct enable_if {}; + template struct enable_if { using type = _Tp; }; + template + using __enable_if_t = typename enable_if<_Cond, _Tp>::type; + template struct __conditional { +template using type = _Tp; + }; + template + using __conditional_t = + typename __conditional<_Cond>::template type<_If, _Else>; + namespace __detail { + template auto __and_fn(...) -> false_type; + } + template + struct __and_ : decltype(__detail::__and_fn<_Bn...>(0)) {}; + template struct __not_ : __bool_constant {}; + template using __void_t = void; + template + struct is_trivial : public __bool_constant<__is_trivial(_Tp)> {}; + template _Up __declval(int); + template auto declval() noexcept->decltype(__declval<_Tp>(0)); + template + using __is_constructible_impl = + __bool_constant<__is_constructible(_Tp, _Args...)>; + template + struct __add_lvalue_reference_helper { +using type = _Tp &; + }; + template + using __add_lval_ref_t = typename __add_lvalue_reference_helper<_Tp>::type; + template + struct is_copy_constructible + : public __is_constructible_impl<_Tp, __add_lval_ref_t> {}; + template + struct __add_rvalue_reference_helper { +using type = _Tp; + }; + template + using __add_rval_ref_t = typename __add_rvalue_reference_helper<_Tp>::type; + template + struct is_move_
[gcc r15-3696] libstdc++: add braces
https://gcc.gnu.org/g:aa338bdd46a4946e9d5ac0923ce9bf9bc621c852 commit r15-3696-gaa338bdd46a4946e9d5ac0923ce9bf9bc621c852 Author: Jason Merrill Date: Sun Sep 15 11:48:46 2024 +0200 libstdc++: add braces GCC compiles with -fno-exceptions, so __throw_exception_again is a no-op, and compilation gives a -Wempty-body warning here, so let's wrap it as is already done in a few other files. libstdc++-v3/ChangeLog: * include/bits/basic_ios.h: Add braces. Diff: --- libstdc++-v3/include/bits/basic_ios.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libstdc++-v3/include/bits/basic_ios.h b/libstdc++-v3/include/bits/basic_ios.h index bc3be4d2e371..2c2334d0fe3c 100644 --- a/libstdc++-v3/include/bits/basic_ios.h +++ b/libstdc++-v3/include/bits/basic_ios.h @@ -171,7 +171,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Turn this on without causing an ios::failure to be thrown. _M_streambuf_state |= __state; if (this->exceptions() & __state) - __throw_exception_again; + { __throw_exception_again; } } /**
[gcc r15-3702] doc: Add more alias option and reorder Intel CPU -march documentation
https://gcc.gnu.org/g:877fb9bdb06d18df51c6043f74dde66fe6f46b78 commit r15-3702-g877fb9bdb06d18df51c6043f74dde66fe6f46b78 Author: Haochen Jiang Date: Wed Sep 18 11:20:15 2024 +0800 doc: Add more alias option and reorder Intel CPU -march documentation Since r15-3539, there are requests coming in to add other alias option documentation. This patch will add all ot them, including corei7, corei7-avx, core-avx-i, core-avx2, atom, slm, gracemont and emerarldrapids. Also in the patch, I reordered that part of documentation, currently all the CPUs/products are just all over the place. I regrouped them by date-to-now products (since the very first CPU to latest Panther Lake), P-core (since the clients become hybrid cores, starting from Sapphire Rapids) and E-core (since Bonnell to latest Clearwater Forest). And in the patch, I refined the product names in documentation. gcc/ChangeLog: * doc/invoke.texi: Add corei7, corei7-avx, core-avx-i, core-avx2, atom, slm, gracemont and emerarldrapids. Reorder the -march documentation by splitting them into date-to-now products, P-core and E-core. Refine the product names in documentation. Diff: --- gcc/doc/invoke.texi | 234 +++- 1 file changed, 121 insertions(+), 113 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b91fb9c9cca6..2bcf71c35f7b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -34601,6 +34601,7 @@ Intel Core 2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, CX16, SAHF and FXSR instruction set support. @item nehalem +@itemx corei7 Intel Nehalem CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF and FXSR instruction set support. @@ -34609,16 +34610,19 @@ Intel Westmere CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR and PCLMUL instruction set support. @item sandybridge +@itemx corei7-avx Intel Sandy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE and PCLMUL instruction set support. @item ivybridge +@itemx core-avx-i Intel Ivy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND and F16C instruction set support. @item haswell +@itemx core-avx2 Intel Haswell CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE and HLE instruction set support. @@ -34635,61 +34639,6 @@ SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES and SGX instruction set support. -@item bonnell -Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3 -instruction set support. - -@item silvermont -Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND -instruction set support. - -@item goldmont -Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, -RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction -set support. - -@item goldmont-plus -Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, -SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, -RDPID and SGX instruction set support. - -@item tremont -Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, -RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID, -SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set -support. - -@item sierraforest -Intel Sierra Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, -XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set -support. - -@item grandridge -Intel Grand Ridge CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, -XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -MOVDIR64B, CLDEMOTE
[gcc r15-3704] i386: Add ssemov2, sseicvt2 for some load instructions that use memory on operand2
https://gcc.gnu.org/g:1cf1bf7899985df31e1ebccb5d6f1ca762991dcf commit r15-3704-g1cf1bf7899985df31e1ebccb5d6f1ca762991dcf Author: Hu, Lin1 Date: Wed Sep 11 10:10:40 2024 +0800 i386: Add ssemov2, sseicvt2 for some load instructions that use memory on operand2 The memory attr of some instructions should be 'load', but these are 'none', currently. gcc/ChangeLog: * config/i386/i386.md: Add ssemov2, sseicvt2. * config/i386/sse.md (sse2_cvtsi2sd): Apply sseicvt2. (sse2_cvtsi2sdq): Ditto. (vec_set_0): Apply ssemov2 for 4, 6. Diff: --- gcc/config/i386/i386.md | 11 +++ gcc/config/i386/sse.md | 6 -- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index c04415149490..9c2a0aa61126 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -539,10 +539,10 @@ str,bitmanip, fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp, fxch,fistp,fisttp,frndint, - sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1, + sse,ssemov,ssemov2,sseadd,sseadd1,sseiadd,sseiadd1, ssemul,sseimul,ssediv,sselog,sselog1, sseishft,sseishft1,ssecmp,ssecomi, - ssecvt,ssecvt1,sseicvt,sseins, + ssecvt,ssecvt1,sseicvt,sseicvt2,sseins, sseshuf,sseshuf1,ssemuladd,sse4arg, lwp,mskmov,msklog, mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft" @@ -560,10 +560,10 @@ (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp, fxch,fistp,fisttp,frndint") (const_string "i387") -(eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1, +(eq_attr "type" "sse,ssemov,ssemov2,sseadd,sseadd1,sseiadd,sseiadd1, ssemul,sseimul,ssediv,sselog,sselog1, sseishft,sseishft1,ssecmp,ssecomi, - ssecvt,ssecvt1,sseicvt,sseins, + ssecvt,ssecvt1,sseicvt,sseicvt2,sseins, sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov") (const_string "sse") (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") @@ -858,6 +858,9 @@ mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog") (match_operand 2 "memory_operand")) (const_string "load") +(and (eq_attr "type" "ssemov2,sseicvt2") + (match_operand 2 "memory_operand")) + (const_string "load") (and (eq_attr "type" "icmov,ssemuladd,sse4arg") (match_operand 3 "memory_operand")) (const_string "load") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 1ae61182d0cc..ff4f33b7b637 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -8876,7 +8876,7 @@ cvtsi2sd{l}\t{%2, %0|%0, %2} vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,noavx,avx") - (set_attr "type" "sseicvt") + (set_attr "type" "sseicvt2") (set_attr "athlon_decode" "double,direct,*") (set_attr "amdfam10_decode" "vector,double,*") (set_attr "bdver1_decode" "double,direct,*") @@ -8898,7 +8898,7 @@ cvtsi2sd{q}\t{%2, %0|%0, %2} vcvtsi2sd{q}\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,noavx,avx") - (set_attr "type" "sseicvt") + (set_attr "type" "sseicvt2") (set_attr "athlon_decode" "double,direct,*") (set_attr "amdfam10_decode" "vector,double,*") (set_attr "bdver1_decode" "double,direct,*") @@ -11808,6 +11808,8 @@ (const_string "imov") (eq_attr "alternative" "14") (const_string "fmov") + (eq_attr "alternative" "4,6") + (const_string "ssemov2") ] (const_string "ssemov"))) (set (attr "addr")
[gcc(refs/users/aoliva/heads/testme)] rework truth_andor folding into tree-ssa-ifcombine
https://gcc.gnu.org/g:d041471d649c47763535d673ad689654d3630223 commit d041471d649c47763535d673ad689654d3630223 Author: Alexandre Oliva Date: Tue Sep 17 20:15:22 2024 -0300 rework truth_andor folding into tree-ssa-ifcombine Diff: --- gcc/fold-const.cc | 1048 + gcc/gimple-fold.cc| 1149 + gcc/tree-ssa-ifcombine.cc |7 +- 3 files changed, 1170 insertions(+), 1034 deletions(-) diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 6dbb9208dc29..552a706ab6de 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -137,7 +137,6 @@ static tree range_successor (tree); static tree fold_range_test (location_t, enum tree_code, tree, tree, tree); static tree fold_cond_expr_with_comparison (location_t, tree, enum tree_code, tree, tree, tree, tree); -static tree unextend (tree, int, int, tree); static tree extract_muldiv (tree, tree, enum tree_code, tree, bool *); static tree extract_muldiv_1 (tree, tree, enum tree_code, tree, bool *); static tree fold_binary_op_with_conditional_arg (location_t, @@ -4701,7 +4700,7 @@ invert_truthvalue_loc (location_t loc, tree arg) is the original memory reference used to preserve the alias set of the access. */ -static tree +tree make_bit_field_ref (location_t loc, tree inner, tree orig_inner, tree type, HOST_WIDE_INT bitsize, poly_int64 bitpos, int unsignedp, int reversep) @@ -4951,212 +4950,6 @@ optimize_bit_field_compare (location_t loc, enum tree_code code, return lhs; } -/* If *R_ARG is a constant zero, and L_ARG is a possibly masked - BIT_XOR_EXPR, return 1 and set *r_arg to l_arg. - Otherwise, return 0. - - The returned value should be passed to decode_field_reference for it - to handle l_arg, and then doubled for r_arg. */ -static int -prepare_xor (tree l_arg, tree *r_arg) -{ - int ret = 0; - - if (!integer_zerop (*r_arg)) -return ret; - - tree exp = l_arg; - STRIP_NOPS (exp); - - if (TREE_CODE (exp) == BIT_AND_EXPR) -{ - tree and_mask = TREE_OPERAND (exp, 1); - exp = TREE_OPERAND (exp, 0); - STRIP_NOPS (exp); STRIP_NOPS (and_mask); - if (TREE_CODE (and_mask) != INTEGER_CST) - return ret; -} - - if (TREE_CODE (exp) == BIT_XOR_EXPR) -{ - *r_arg = l_arg; - return 1; -} - - return ret; -} - -/* Subroutine for fold_truth_andor_1: decode a field reference. - - If EXP is a comparison reference, we return the innermost reference. - - *PBITSIZE is set to the number of bits in the reference, *PBITPOS is - set to the starting bit number. - - If the innermost field can be completely contained in a mode-sized - unit, *PMODE is set to that mode. Otherwise, it is set to VOIDmode. - - *PVOLATILEP is set to 1 if the any expression encountered is volatile; - otherwise it is not changed. - - *PUNSIGNEDP is set to the signedness of the field. - - *PREVERSEP is set to the storage order of the field. - - *PMASK is set to the mask used. This is either contained in a - BIT_AND_EXPR or derived from the width of the field. - - *PAND_MASK is set to the mask found in a BIT_AND_EXPR, if any. - - XOR_WHICH is 1 or 2 if EXP was found to be a (possibly masked) - BIT_XOR_EXPR compared with zero. We're to take the first or second - operand thereof if so. It should be zero otherwise. - - Return 0 if this is not a component reference or is one that we can't - do anything with. */ - -static tree -decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize, - HOST_WIDE_INT *pbitpos, machine_mode *pmode, - int *punsignedp, int *preversep, int *pvolatilep, - tree *pmask, tree *pand_mask, int xor_which) -{ - tree exp = *exp_; - tree outer_type = 0; - tree and_mask = 0; - tree mask, inner, offset; - tree unsigned_type; - unsigned int precision; - HOST_WIDE_INT shiftrt = 0; - - /* All the optimizations using this function assume integer fields. - There are problems with FP fields since the type_for_size call - below can fail for, e.g., XFmode. */ - if (! INTEGRAL_TYPE_P (TREE_TYPE (exp))) -return NULL_TREE; - - /* We are interested in the bare arrangement of bits, so strip everything - that doesn't affect the machine mode. However, record the type of the - outermost expression if it may matter below. */ - if (CONVERT_EXPR_P (exp) - || TREE_CODE (exp) == NON_LVALUE_EXPR) -outer_type = TREE_TYPE (exp); - STRIP_NOPS (exp); - - if (TREE_CODE (exp) == BIT_AND_EXPR) -{ - and_mask = TREE_OPERAND (exp, 1); - exp = TREE_OPERAND (exp, 0); - STRIP_NOPS (exp); STRIP_NOPS (and_mask); - if (TREE_CODE (and_mask) != INTEGER_CST) - return NULL_TREE; -} - - if (xor_which) -{ - gcc_checking_assert (TREE_CODE (exp) == BIT_XOR_EXPR); -
[gcc(refs/users/aoliva/heads/testme)] check for mergeable loads, choose insertion points accordingly
https://gcc.gnu.org/g:6ce741d00f03f73e1fb3e797e85707aef9cfd832 commit 6ce741d00f03f73e1fb3e797e85707aef9cfd832 Author: Alexandre Oliva Date: Tue Sep 17 20:15:28 2024 -0300 check for mergeable loads, choose insertion points accordingly Diff: --- gcc/gimple-fold.cc | 253 ++--- 1 file changed, 219 insertions(+), 34 deletions(-) diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 64426bd76977..85a0ec028030 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -69,6 +69,7 @@ along with GCC; see the file COPYING3. If not see #include "varasm.h" #include "internal-fn.h" #include "gimple-range.h" +#include "tree-ssa-loop-niter.h" // stmt_dominates_stmt_p /* ??? Move this to some header, it's defined in fold-const.c. */ extern tree @@ -7395,7 +7396,7 @@ maybe_fold_comparisons_from_match_pd (tree type, enum tree_code code, Same as ssa_is_replaceable_p, except that we don't insist it has a single use. */ -bool +static bool ssa_is_substitutable_p (gimple *stmt) { #if 0 @@ -7476,9 +7477,10 @@ is_cast_p (tree *name) if (gimple_num_ops (def) != 2) break; - if (get_gimple_rhs_class (gimple_expr_code (def)) - == GIMPLE_SINGLE_RHS) + if (gimple_assign_single_p (def)) { + if (gimple_assign_load_p (def)) + break; *name = gimple_assign_rhs1 (def); continue; } @@ -7515,8 +7517,7 @@ is_binop_p (enum tree_code code, tree *name) return 0; case 2: - if (get_gimple_rhs_class (gimple_expr_code (def)) - == GIMPLE_SINGLE_RHS) + if (gimple_assign_single_p (def) && !gimple_assign_load_p (def)) { *name = gimple_assign_rhs1 (def); continue; @@ -7524,7 +7525,7 @@ is_binop_p (enum tree_code code, tree *name) return 0; case 3: - ; + break; } if (gimple_assign_rhs_code (def) != code) @@ -7569,6 +7570,26 @@ prepare_xor (tree l_arg, tree *r_arg) return ret; } +/* If EXP is a SSA_NAME whose DEF is a load stmt, set *LOAD to it and + return its RHS, otherwise return EXP. */ + +static tree +follow_load (tree exp, gimple **load) +{ + if (TREE_CODE (exp) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (exp)) +{ + gimple *def = SSA_NAME_DEF_STMT (exp); + if (gimple_assign_load_p (def)) + { + *load = def; + exp = gimple_assign_rhs1 (def); + } +} + + return exp; +} + /* Subroutine for fold_truth_andor_1: decode a field reference. If EXP is a comparison reference, we return the innermost reference. @@ -7595,6 +7616,9 @@ prepare_xor (tree l_arg, tree *r_arg) BIT_XOR_EXPR compared with zero. We're to take the first or second operand thereof if so. It should be zero otherwise. + *LOAD is set to the load stmt of the innermost reference, if any, + *and NULL otherwise. + Return 0 if this is not a component reference or is one that we can't do anything with. */ @@ -7602,7 +7626,8 @@ static tree decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize, HOST_WIDE_INT *pbitpos, machine_mode *pmode, int *punsignedp, int *preversep, int *pvolatilep, - tree *pmask, tree *pand_mask, int xor_which) + tree *pmask, tree *pand_mask, int xor_which, + gimple **load) { tree exp = *exp_; tree outer_type = 0; @@ -7612,11 +7637,13 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize, unsigned int precision; HOST_WIDE_INT shiftrt = 0; + *load = NULL; + /* All the optimizations using this function assume integer fields. There are problems with FP fields since the type_for_size call below can fail for, e.g., XFmode. */ if (! INTEGRAL_TYPE_P (TREE_TYPE (exp))) -return 0; +return NULL_TREE; /* We are interested in the bare arrangement of bits, so strip everything that doesn't affect the machine mode. However, record the type of the @@ -7626,7 +7653,7 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize, if ((and_mask = is_binop_p (BIT_AND_EXPR, &exp))) { if (TREE_CODE (and_mask) != INTEGER_CST) - return 0; + return NULL_TREE; } if (xor_which) @@ -7644,16 +7671,18 @@ decode_field_reference (location_t loc, tree *exp_, HOST_WIDE_INT *pbitsize, if (tree shift = is_binop_p (RSHIFT_EXPR, &exp)) { if (TREE_CODE (shift) != INTEGER_CST || !tree_fits_shwi_p (shift)) - return 0; + return NULL_TREE; shiftrt = tree_to_shwi (shift); if (shiftrt <= 0) - return 0; + return NULL_TREE; } if (tree t = is_cast_p (&exp)) if (!outer_type) outer_type = t; + exp = follow_load (exp, load); + poly_int64 poly_bitsize, poly_bitpos; inner = ge
[gcc/aoliva/heads/testme] (46 commits) support noncontiguous ifcombine
The branch 'aoliva/heads/testme' was updated to point to: a29037a8f9c7... support noncontiguous ifcombine It previously pointed to: 8a7e9581280c... support noncontiguous ifcombine Diff: !!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST): --- 8a7e958... support noncontiguous ifcombine 4f6753d... support noncontiguous ifcombine 7b7dfff... relax ifcombine to accept vuses e731ae8... fold truth-and only in ifcombine fbf1f80... check for mergeable loads, choose insertion points accordin b4b872b... rework truth_andor folding into tree-ssa-ifcombine 8aa412b... assorted improvements for fold_truth_andor_1 Summary of changes (added commits): --- a29037a... support noncontiguous ifcombine 3ed1ed8... refactor ifcombine b0b68cb... support noncontiguous ifcombine 575a4da... relax ifcombine to accept vuses 15a55a9... fold truth-and only in ifcombine 6ce741d... check for mergeable loads, choose insertion points accordin d041471... rework truth_andor folding into tree-ssa-ifcombine d675d49... assorted improvements for fold_truth_andor_1 d6d8445... c++: fix constexpr cast from void* diag issue [PR116741] (*) 7ca4868... c++: ICE with -Wtautological-compare in template [PR116534] (*) dfe0d43... c++: crash with anon VAR_DECL [PR116676] (*) e311dd1... SVE intrinsics: Fold svdiv with all-zero operands to zero v (*) 008f451... Daily bump. (*) a92f54f... aarch64: Improve vector constant generation using SVE INDEX (*) 58bc39c... modula2: gcc/m2/Make-lang.in fix includes during bootstrap (*) f544838... AVR: Update weblinks to AVR-LibC. (*) 4af196b... aarch64: Emit ADD X, Y, Y instead of SHL X, Y, #1 for SVE i (*) f6e629a... PR modula2/116181 Use GCC tree location_t and separate poin (*) 7fb1117... AVR: Tweak >= and < compares with consts that are 0 mod 256 (*) 952df9c... riscv: Fix duplicate assmbler label in @tlsdesc insn (*) eb67e23... libstdc++: Add .editorconfig files (*) 48a0f69... vect: Set pattern_stmt_p on the newly created stmt_vec_info (*) 8d402c3... AVR: Tidy up enum and struct tags. (*) 9f8e182... AVR: Partially revert r15-3623. (*) 719edcb... libstdc++: Update link to installation docs (*) 4f2cd25... Daily bump. (*) d204bee... fortran: Remove useless nested end of scalarization chain h (*) a9f9391... c++: __extension__ and -Wconditionally-supported (*) 5ef73ba... c++: conversion location (*) 2af87d9... libstdc++: Adjust std::span::iterator to be ADL-proof (*) 1dde83f... libstdc++: Enable most of for freestanding (*) f91fe35... libstdc++: Add assertion for valid facet type arguments (*) c5fd1a4... libstdc++: Make PSTL algorithms accept C++20 iterators [PR1 (*) 368ba7a... c++, coroutines: Fix handling of bool await_suspend() [PR11 (*) 6e4244e... phi-opt: Improve heuristics for factoring out with constant (*) 0b31335... vect: release defs of removed statement (*) d2f10fc... Mark the copy/move constructor/operator= of auto_bitmap as (*) e07fbc9... Daily bump. (*) 1dd6dd1... testsuite; Fix execute/pr52286.c for 16bit (*) 8b5e547... c++: avoid init_priority warning in system header (*) 005f717... c++: Don't mix timevar_start and auto_cond_timevar for TV_N (*) a900349... AVR: Use rtx code copysign. (*) 99b8be4... libstdc++: Tweak localized formatting for floating-point ty (*) 01670a4... libstdc++: Refactor loops in std::__platform_semaphore (*) 49cb715... testsuite: adjust pragma-diag-17.c diagnostics (*) bec1f2c... c++: Fix g++.dg/ext/sve-sizeless-1.C regression (*) (*) This commit already exists in another branch. Because the reference `refs/users/aoliva/heads/testme' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc/aoliva/heads/testbase] (38 commits) c++: fix constexpr cast from void* diag issue [PR116741]
The branch 'aoliva/heads/testbase' was updated to point to: d6d8445c8550... c++: fix constexpr cast from void* diag issue [PR116741] It previously pointed to: b56bd542942b... testsuite: a few more hostedlib adjustments Diff: Summary of changes (added commits): --- d6d8445... c++: fix constexpr cast from void* diag issue [PR116741] (*) 7ca4868... c++: ICE with -Wtautological-compare in template [PR116534] (*) dfe0d43... c++: crash with anon VAR_DECL [PR116676] (*) e311dd1... SVE intrinsics: Fold svdiv with all-zero operands to zero v (*) 008f451... Daily bump. (*) a92f54f... aarch64: Improve vector constant generation using SVE INDEX (*) 58bc39c... modula2: gcc/m2/Make-lang.in fix includes during bootstrap (*) f544838... AVR: Update weblinks to AVR-LibC. (*) 4af196b... aarch64: Emit ADD X, Y, Y instead of SHL X, Y, #1 for SVE i (*) f6e629a... PR modula2/116181 Use GCC tree location_t and separate poin (*) 7fb1117... AVR: Tweak >= and < compares with consts that are 0 mod 256 (*) 952df9c... riscv: Fix duplicate assmbler label in @tlsdesc insn (*) eb67e23... libstdc++: Add .editorconfig files (*) 48a0f69... vect: Set pattern_stmt_p on the newly created stmt_vec_info (*) 8d402c3... AVR: Tidy up enum and struct tags. (*) 9f8e182... AVR: Partially revert r15-3623. (*) 719edcb... libstdc++: Update link to installation docs (*) 4f2cd25... Daily bump. (*) d204bee... fortran: Remove useless nested end of scalarization chain h (*) a9f9391... c++: __extension__ and -Wconditionally-supported (*) 5ef73ba... c++: conversion location (*) 2af87d9... libstdc++: Adjust std::span::iterator to be ADL-proof (*) 1dde83f... libstdc++: Enable most of for freestanding (*) f91fe35... libstdc++: Add assertion for valid facet type arguments (*) c5fd1a4... libstdc++: Make PSTL algorithms accept C++20 iterators [PR1 (*) 368ba7a... c++, coroutines: Fix handling of bool await_suspend() [PR11 (*) 6e4244e... phi-opt: Improve heuristics for factoring out with constant (*) 0b31335... vect: release defs of removed statement (*) d2f10fc... Mark the copy/move constructor/operator= of auto_bitmap as (*) e07fbc9... Daily bump. (*) 1dd6dd1... testsuite; Fix execute/pr52286.c for 16bit (*) 8b5e547... c++: avoid init_priority warning in system header (*) 005f717... c++: Don't mix timevar_start and auto_cond_timevar for TV_N (*) a900349... AVR: Use rtx code copysign. (*) 99b8be4... libstdc++: Tweak localized formatting for floating-point ty (*) 01670a4... libstdc++: Refactor loops in std::__platform_semaphore (*) 49cb715... testsuite: adjust pragma-diag-17.c diagnostics (*) bec1f2c... c++: Fix g++.dg/ext/sve-sizeless-1.C regression (*) (*) This commit already exists in another branch. Because the reference `refs/users/aoliva/heads/testbase' matches your hooks.email-new-commits-only configuration, no separate email is sent for this commit.
[gcc(refs/users/aoliva/heads/testme)] relax ifcombine to accept vuses
https://gcc.gnu.org/g:575a4da1213668119e0e60326a7b18f7c1a342d6 commit 575a4da1213668119e0e60326a7b18f7c1a342d6 Author: Alexandre Oliva Date: Tue Sep 17 20:15:46 2024 -0300 relax ifcombine to accept vuses Diff: --- gcc/config/i386/t-i386 | 2 ++ gcc/testsuite/gcc.dg/field-merge-6.c | 26 ++ gcc/tree-ssa-ifcombine.cc| 2 +- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386 index bf4ae109af98..1b904787ec62 100644 --- a/gcc/config/i386/t-i386 +++ b/gcc/config/i386/t-i386 @@ -79,3 +79,5 @@ s-i386-bt: $(srcdir)/config/i386/i386-builtin-types.awk \ $(AWK) -f $^ > tmp-bt.inc $(SHELL) $(srcdir)/../move-if-change tmp-bt.inc i386-builtin-types.inc $(STAMP) $@ + +insn-attrtab.o-warn = -Wno-error diff --git a/gcc/testsuite/gcc.dg/field-merge-6.c b/gcc/testsuite/gcc.dg/field-merge-6.c new file mode 100644 index ..7fd48a138d14 --- /dev/null +++ b/gcc/testsuite/gcc.dg/field-merge-6.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-O" } */ +/* { dg-shouldfail } */ + +/* Check that the third compare won't be pulled ahead of the second one and + prevent, which would prevent the NULL pointer dereference that should cause + the execution to fail. */ + +struct s { + char a, b; + int *p; +}; + +struct s a = { 0, 1, 0 }; +struct s b = { 0, 0, 0 }; + +int f () { + return (a.a != b.a + || *b.p != *a.p + || a.b != b.b); +} + +int main() { + f (); + return 0; +} diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 61480e5fa894..7678c87e0170 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -129,7 +129,7 @@ bb_no_side_effects_p (basic_block bb) enum tree_code rhs_code; if (gimple_has_side_effects (stmt) || gimple_could_trap_p (stmt) - || gimple_vuse (stmt) + /* || gimple_vuse (stmt) */ /* We need to rewrite stmts with undefined overflow to use unsigned arithmetic but cannot do so for signed division. */ || ((ass = dyn_cast (stmt))
[gcc(refs/users/aoliva/heads/testme)] support noncontiguous ifcombine
https://gcc.gnu.org/g:a29037a8f9c752e41a906f0eac66ff3792e98bcc commit a29037a8f9c752e41a906f0eac66ff3792e98bcc Author: Alexandre Oliva Date: Tue Sep 17 20:15:55 2024 -0300 support noncontiguous ifcombine Diff: --- gcc/tree-ssa-ifcombine.cc | 33 ++--- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 3d57c615d827..79ccc70b2678 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -779,13 +779,13 @@ tree_ssa_ifcombine_bb_1 (basic_block inner_cond_bb, basic_block outer_cond_bb, if-conversion helper. We start with BB as the innermost worker basic-block. Returns true if a transformation was done. */ -static bool +static basic_block tree_ssa_ifcombine_bb (basic_block inner_cond_bb) { basic_block then_bb = NULL, else_bb = NULL; if (!recognize_if_then_else (inner_cond_bb, &then_bb, &else_bb)) -return false; +return NULL; /* Recognize && and || of two conditions with a common then/else block which entry edges we can merge. That is: @@ -802,7 +802,7 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb) if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb, then_bb, else_bb, inner_cond_bb)) - return true; + return bb; if (forwarder_block_to (else_bb, then_bb)) { @@ -814,7 +814,7 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb) edge from outer_cond_bb and the forwarder block. */ if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb, else_bb, then_bb, else_bb)) - return true; + return bb; } else if (forwarder_block_to (then_bb, else_bb)) { @@ -826,11 +826,11 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb) edge from outer_cond_bb and the forwarder block. */ if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb, else_bb, then_bb, then_bb)) - return true; + return bb; } } - return false; + return NULL; } /* Main entry for the tree if-conversion pass. */ @@ -881,12 +881,14 @@ pass_tree_ifcombine::execute (function *fun) inner ones, and also that we do not try to visit a removed block. This is opposite of PHI-OPT, because we cascade the combining rather than cascading PHIs. */ + basic_block seen = NULL; + bool changed = false; for (i = n_basic_blocks_for_fn (fun) - NUM_FIXED_BLOCKS - 1; i >= 0; i--) { basic_block bb = bbs[i]; if (safe_is_a (*gsi_last_bb (bb))) - if (tree_ssa_ifcombine_bb (bb)) + if (basic_block outer_bb = tree_ssa_ifcombine_bb (bb)) { /* Clear range info from all stmts in BB which is now executed conditional on a always true/false condition. */ @@ -905,7 +907,24 @@ pass_tree_ifcombine::execute (function *fun) rewrite_to_defined_overflow (&gsi); } cfg_changed |= true; + if (seen) + changed |= true; + else + seen = bb; + /* Go back and check whether the modified outer_bb can be further + optimized. ??? How could it? */ + do + i++; + while (bbs[i] != outer_bb); + continue; } + + if (bb == seen) + { + gcc_assert (!changed); + seen = NULL; + changed = false; + } } free (bbs);
[gcc(refs/users/aoliva/heads/testme)] refactor ifcombine
https://gcc.gnu.org/g:3ed1ed8f0533f3f3f4372a2280c4e1c29304cd78 commit 3ed1ed8f0533f3f3f4372a2280c4e1c29304cd78 Author: Alexandre Oliva Date: Thu Sep 19 02:43:51 2024 -0300 refactor ifcombine Diff: --- gcc/tree-ssa-ifcombine.cc | 181 +++--- 1 file changed, 89 insertions(+), 92 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index eb4317bebdfb..3d57c615d827 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -107,6 +107,14 @@ recognize_if_then_else (basic_block cond_bb, if (!*else_bb) *else_bb = e->dest; + gcond *cond = safe_dyn_cast (*gsi_last_bb (cond_bb)); + if (!cond) +return false; + + if (CONSTANT_CLASS_P (gimple_cond_lhs (cond)) + && CONSTANT_CLASS_P (gimple_cond_rhs (cond))) +return false; + return true; } @@ -407,15 +415,67 @@ fold_truth_andor_maybe_separate (location_t loc, enum tree_code rcode, tree rl_arg, tree rr_arg, tree *separatep); +/* Replace the conditions in INNER_COND and OUTER_COND with COND and COND2. + COND and COND2 are computed for insertion at INNER_COND, with OUTER_COND + replaced with a constant, but if there are intervening blocks, it's best to + adjust COND for insertion at OUTER_COND, placing COND2 at INNER_COND. */ + +static tree +ifcombine_replace_cond (gcond *inner_cond, bool inner_inv, + gcond *outer_cond, bool outer_inv, + tree cond, bool must_canon, + tree cond2) +{ + tree t = cond; + bool result_inv = inner_inv; + + /* ??? Support intervening blocks. */ + if (single_pred (gimple_bb (inner_cond)) != gimple_bb (outer_cond)) +return NULL_TREE; + + /* ??? Use both conditions. */ + if (cond2) +t = fold_build2 (TRUTH_AND_EXPR, TREE_TYPE (t), cond, cond2); + + /* ??? Insert at outer_cond. */ + if (result_inv) +t = fold_build1 (TRUTH_NOT_EXPR, TREE_TYPE (t), t); + tree ret = t; + + if (tree tcanon = canonicalize_cond_expr_cond (t)) +ret = t = tcanon; + else if (must_canon) +return NULL_TREE; + if (!is_gimple_condexpr_for_cond (t)) +{ + gimple_stmt_iterator gsi = gsi_for_stmt (inner_cond); + t = force_gimple_operand_gsi_1 (&gsi, t, is_gimple_condexpr_for_cond, + NULL, true, GSI_SAME_STMT); +} + gimple_cond_set_condition_from_tree (inner_cond, t); + update_stmt (inner_cond); + + /* Leave CFG optimization to cfg_cleanup. */ + gimple_cond_set_condition_from_tree (outer_cond, + outer_inv + ? boolean_false_node + : boolean_true_node); + update_stmt (outer_cond); + + update_profile_after_ifcombine (gimple_bb (inner_cond), + gimple_bb (outer_cond)); + + return ret; +} + /* If-convert on a and pattern with a common else block. The inner if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB. - inner_inv, outer_inv and result_inv indicate whether the conditions - are inverted. + inner_inv, outer_inv indicate whether the conditions are inverted. Returns true if the edges to the common else basic-block were merged. */ static bool ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, - basic_block outer_cond_bb, bool outer_inv, bool result_inv) + basic_block outer_cond_bb, bool outer_inv) { gimple_stmt_iterator gsi; tree name1, name2, bit1, bit2, bits1, bits2; @@ -454,26 +514,13 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv, t2 = fold_build2 (BIT_AND_EXPR, TREE_TYPE (name1), name1, t); t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE, true, GSI_SAME_STMT); - t = fold_build2 (result_inv ? NE_EXPR : EQ_EXPR, - boolean_type_node, t2, t); - t = canonicalize_cond_expr_cond (t); - if (!t) - return false; - if (!is_gimple_condexpr_for_cond (t)) - { - gsi = gsi_for_stmt (inner_cond); - t = force_gimple_operand_gsi_1 (&gsi, t, is_gimple_condexpr_for_cond, - NULL, true, GSI_SAME_STMT); - } - gimple_cond_set_condition_from_tree (inner_cond, t); - update_stmt (inner_cond); - /* Leave CFG optimization to cfg_cleanup. */ - gimple_cond_set_condition_from_tree (outer_cond, - outer_inv ? boolean_false_node : boolean_true_node); - update_stmt (outer_cond); + t = fold_build2 (EQ_EXPR, boolean_type_node, t2, t); - update_profile_after_ifcombine (inner_cond_bb, outer_cond_bb); + if (!ifcombine_replace_cond (inner_cond, inner_inv, + outer_cond, outer_inv, + t, true, NULL_TREE)) + return false; if (dump_f
[gcc(refs/users/aoliva/heads/testme)] support noncontiguous ifcombine
https://gcc.gnu.org/g:b0b68cbc1ed13ee0c61e0e2d768d997e8a1dfaa8 commit b0b68cbc1ed13ee0c61e0e2d768d997e8a1dfaa8 Author: Alexandre Oliva Date: Tue Sep 17 20:15:50 2024 -0300 support noncontiguous ifcombine Diff: --- gcc/tree-ssa-ifcombine.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index 7678c87e0170..eb4317bebdfb 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -798,10 +798,10 @@ tree_ssa_ifcombine_bb (basic_block inner_cond_bb) if (a && b) ; This requires a single predecessor of the inner cond_bb. */ - if (single_pred_p (inner_cond_bb) - && bb_no_side_effects_p (inner_cond_bb)) + for (basic_block bb = inner_cond_bb; + single_pred_p (bb) && bb_no_side_effects_p (bb); ) { - basic_block outer_cond_bb = single_pred (inner_cond_bb); + basic_block outer_cond_bb = bb = single_pred (bb); if (tree_ssa_ifcombine_bb_1 (inner_cond_bb, outer_cond_bb, then_bb, else_bb, inner_cond_bb))