[gcc r15-4358] dce: Remove FIXME that has not been true for years
https://gcc.gnu.org/g:8065982aebe0737300cc036ed30b55868b977b00 commit r15-4358-g8065982aebe0737300cc036ed30b55868b977b00 Author: Andrew Pinski Date: Mon Oct 14 18:50:45 2024 -0700 dce: Remove FIXME that has not been true for years This FIXME: FIXME: Aggressive mode before PRE doesn't work currently because the dominance info is not invalidated after DCE1. Has not been true since at least r0-104723-g5ac60b564faa85 which added a call to calculate_dominance_info. Plus we run agressive mode before PRE since r0-89162-g11b08ee9118d10 too. And since r0-95499-gb5b8b0ac643d31, dominance information was required even for non-agressive mode. Also we have been verifying dominance information is correct and not needing to invalidate since ssa branch was merged so this comment has been out of date even before it was merged in. gcc/ChangeLog: * tree-ssa-dce.cc (perform_tree_ssa_dce): Remove FIXME note. Signed-off-by: Andrew Pinski Diff: --- gcc/tree-ssa-dce.cc | 8 +--- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc index 3075459e25f4..015c17984e16 100644 --- a/gcc/tree-ssa-dce.cc +++ b/gcc/tree-ssa-dce.cc @@ -1965,13 +1965,7 @@ make_forwarders_with_degenerate_phis (function *fn) In conservative mode, we ignore control dependence and simply declare all but the most trivially dead branches necessary. This mode is fast. In aggressive mode, control dependences are taken into account, which - results in more dead code elimination, but at the cost of some time. - - FIXME: Aggressive mode before PRE doesn't work currently because - the dominance info is not invalidated after DCE1. This is - not an issue right now because we only run aggressive DCE - as the last tree SSA pass, but keep this in mind when you - start experimenting with pass ordering. */ + results in more dead code elimination, but at the cost of some time. */ static unsigned int perform_tree_ssa_dce (bool aggressive)
[gcc r15-4357] passes: Remove limit on the number of params
https://gcc.gnu.org/g:061a4e35c81f24c6ec5e56214469d229f1808971 commit r15-4357-g061a4e35c81f24c6ec5e56214469d229f1808971 Author: Andrew Pinski Date: Sun Oct 13 15:57:41 2024 -0700 passes: Remove limit on the number of params Having a limit of 2 params for NEXT_PASS was just done because I didn't think there was a way to handle arbitrary number of params. But I found that we can handle this via a static const variable array (constexpr so we know it is true or false at compile time) and just loop over the array. Note I keep around NEXT_PASS_WITH_ARG and NEXT_PASS macros instead of always using NEXT_PASS_WITH_ARGS macro to make sure these cases get optimized for -O0 (stage1). Tested INSERT_PASS_AFTER/INSERT_PASS_BEFORE manually by changing config/i386/i386-passes.def's stv lines to have a 2nd argument and checked the resuling pass-instances.def to see the NEXT_PASS_WITH_ARGS was correctly done. changes from v1: * v2: Handle INSERT_PASS_AFTER/INSERT_PASS_BEFORE too. Bootstrapped and tested on x86_64-linux-gnu. gcc/ChangeLog: * gen-pass-instances.awk: Remove the limit of the params. * pass_manager.h (NEXT_PASS_WITH_ARG2): Rename to ... (NEXT_PASS_WITH_ARGS): This. * passes.cc (NEXT_PASS_WITH_ARG2): Rename to ... (NEXT_PASS_WITH_ARGS): This and support more than 2 params by using a constexpr array. Signed-off-by: Andrew Pinski Diff: --- gcc/gen-pass-instances.awk | 22 ++ gcc/pass_manager.h | 2 +- gcc/passes.cc | 13 + 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/gcc/gen-pass-instances.awk b/gcc/gen-pass-instances.awk index def093477658..1e5b3f0c8cc8 100644 --- a/gcc/gen-pass-instances.awk +++ b/gcc/gen-pass-instances.awk @@ -100,7 +100,7 @@ function adjust_linenos(above, increment, p, i) lineno += increment; } -function insert_remove_pass(line, fnname, arg3) +function insert_remove_pass(line, fnname, arg3, i) { parse_line($0, fnname); pass_name = args[1]; @@ -110,8 +110,13 @@ function insert_remove_pass(line, fnname, arg3) arg3 = args[3]; sub(/^[ \t]*/, "", arg3); new_line = prefix "NEXT_PASS (" arg3; - if (args[4]) -new_line = new_line "," args[4]; + # Add the optional params back. + i = 4; + while (args[i]) +{ + new_line = new_line "," args[i]; + i++; +} new_line = new_line ")" postfix; if (!pass_lines[pass_name, pass_num]) { @@ -195,7 +200,6 @@ function replace_pass(line, fnname, num, i) } END { - max_number_args = 2; for (i = 1; i < lineno; i++) { ret = parse_line(lines[i], "NEXT_PASS"); @@ -220,13 +224,8 @@ END { if (num_args > 0) { printf "NEXT_PASS_WITH_ARG"; - if (num_args > max_number_args) - { - print "ERROR: Only supports up to " max_number_args " args to NEXT_PASS"; - exit 1; - } if (num_args != 1) - printf num_args; + printf "S"; } else printf "NEXT_PASS"; @@ -266,8 +265,7 @@ END { print "#undef POP_INSERT_PASSES" print "#undef NEXT_PASS" print "#undef NEXT_PASS_WITH_ARG" - for (i = 2; i <= max_number_args; i++) -print "#undef NEXT_PASS_WITH_ARG" i + print "#undef NEXT_PASS_WITH_ARGS" print "#undef TERMINATE_PASS_LIST" } diff --git a/gcc/pass_manager.h b/gcc/pass_manager.h index f18ae026257a..294cdd0b1f7f 100644 --- a/gcc/pass_manager.h +++ b/gcc/pass_manager.h @@ -130,7 +130,7 @@ private: #define POP_INSERT_PASSES() #define NEXT_PASS(PASS, NUM) opt_pass *PASS ## _ ## NUM #define NEXT_PASS_WITH_ARG(PASS, NUM, ARG) NEXT_PASS (PASS, NUM) -#define NEXT_PASS_WITH_ARG2(PASS, NUM, ARG0, ARG1) NEXT_PASS (PASS, NUM) +#define NEXT_PASS_WITH_ARGS(PASS, NUM, ...) NEXT_PASS (PASS, NUM) #define TERMINATE_PASS_LIST(PASS) #include "pass-instances.def" diff --git a/gcc/passes.cc b/gcc/passes.cc index b5475fce5228..ae80f40b96a5 100644 --- a/gcc/passes.cc +++ b/gcc/passes.cc @@ -1589,7 +1589,7 @@ pass_manager::pass_manager (context *ctxt) #define POP_INSERT_PASSES() #define NEXT_PASS(PASS, NUM) PASS ## _ ## NUM = NULL #define NEXT_PASS_WITH_ARG(PASS, NUM, ARG) NEXT_PASS (PASS, NUM) -#define NEXT_PASS_WITH_ARG2(PASS, NUM, ARG0, ARG1) NEXT_PASS (PASS, NUM) +#define NEXT_PASS_WITH_ARGS(PASS, NUM, ...) NEXT_PASS (PASS, NUM) #define TERMINATE_PASS_LIST(PASS) #include "pass-instances.def" @@ -1636,11 +1636,16 @@ pass_manager::pass_manager (context *ctxt) PASS ## _ ## NUM->set_pass_param (0, ARG); \ } while (0) -#define NEXT_PASS_WITH_ARG2(PASS, NUM, ARG0, ARG1) \ +#define NEXT_PASS_WITH_ARGS(PASS, NUM, ...)\ do { \ NEXT_PASS (PASS, NUM);
[gcc r15-4362] C++: Add opindex for -Wchanges-meaning [PR117157]
https://gcc.gnu.org/g:11f0ea45b1b63ec7eb6a52179ca6fd4304e7e312 commit r15-4362-g11f0ea45b1b63ec7eb6a52179ca6fd4304e7e312 Author: Andrew Pinski Date: Tue Oct 15 08:05:26 2024 -0700 C++: Add opindex for -Wchanges-meaning [PR117157] Adds missing opindex for -Wchanges-meaning Pushed as obvious after building the HTML and checking the index. gcc/ChangeLog: PR c++/117157 * doc/invoke.texi (Wno-changes-meaning): Add opindex. Signed-off-by: Andrew Pinski Diff: --- gcc/doc/invoke.texi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 4f4ca6375495..0db754c888a6 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -6448,6 +6448,8 @@ union U @{ @end itemize +@opindex Wchanges-meaning +@opindex Wno-changes-meaning @item -Wno-changes-meaning @r{(C++ and Objective-C++ only)} C++ requires that unqualified uses of a name within a class have the same meaning in the complete scope of the class, so declaring the name
[gcc r15-4360] testsuite, jit: fix test-error-pr63969-missing-driver.c
https://gcc.gnu.org/g:f8dcb559e615dbb4557a23363f9532a3544a7241 commit r15-4360-gf8dcb559e615dbb4557a23363f9532a3544a7241 Author: David Malcolm Date: Tue Oct 15 10:59:48 2024 -0400 testsuite, jit: fix test-error-pr63969-missing-driver.c jit.dg/test-error-pr63969-missing-driver.c tries to break PATH and verify that an error is generated when using an external driver. However it does this by unsetting PATH, and so the test could accidentally find the driver if the system supplies a default and the driver happens to be installed in that path (reported as rhbz#2318021). Fix the test by instead setting PATH to a bogus value. gcc/testsuite/ChangeLog: * jit.dg/test-error-pr63969-missing-driver.c (create_code): When breaking PATH, use setenv with a bogus value, rather than unsetenv, in case the system uses a default path that contains the driver binary. Signed-off-by: David Malcolm Diff: --- gcc/testsuite/jit.dg/test-error-pr63969-missing-driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/jit.dg/test-error-pr63969-missing-driver.c b/gcc/testsuite/jit.dg/test-error-pr63969-missing-driver.c index 733522310deb..152e236443cc 100644 --- a/gcc/testsuite/jit.dg/test-error-pr63969-missing-driver.c +++ b/gcc/testsuite/jit.dg/test-error-pr63969-missing-driver.c @@ -28,7 +28,7 @@ create_code (gcc_jit_context *ctxt, void *user_data) /* Break PATH, so that the driver can't be found by gcc::jit::playback::context::compile () within gcc_jit_context_compile. */ - unsetenv ("PATH"); + setenv ("PATH", "/this/is/not/a/valid/path", 1); } void
[gcc r15-4363] C++: Regenerate c.opt.urls [PR117157]
https://gcc.gnu.org/g:a72064c8c12f2cc3ab6fde7cd42f31f27193b5c4 commit r15-4363-ga72064c8c12f2cc3ab6fde7cd42f31f27193b5c4 Author: Andrew Pinski Date: Tue Oct 15 08:31:32 2024 -0700 C++: Regenerate c.opt.urls [PR117157] I forgot to regenerate the c.opt.urls files after adding the opindex for changes-meaning. Fixed thusly. gcc/c-family/ChangeLog: PR c++/117157 * c.opt.urls: Regenerate. Signed-off-by: Andrew Pinski Diff: --- gcc/c-family/c.opt.urls | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/c-family/c.opt.urls b/gcc/c-family/c.opt.urls index c1738095e6dd..d045af14c3f0 100644 --- a/gcc/c-family/c.opt.urls +++ b/gcc/c-family/c.opt.urls @@ -220,6 +220,9 @@ UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wcatch-value) Wcatch-value= UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wcatch-value) +Wchanges-meaning +UrlSuffix(gcc/Warning-Options.html#index-Wchanges-meaning) + Wchar-subscripts UrlSuffix(gcc/Warning-Options.html#index-Wchar-subscripts)
[gcc r15-4364] testsuite/i386: Require AVX2 effective target in pr107432-9.c
https://gcc.gnu.org/g:0fa5017df91731fb276aef5ded8a153e80bae358 commit r15-4364-g0fa5017df91731fb276aef5ded8a153e80bae358 Author: Uros Bizjak Date: Tue Oct 15 17:45:13 2024 +0200 testsuite/i386: Require AVX2 effective target in pr107432-9.c x86-64-v3 requires AVX2 effective target and AVX2 specific avx2-check.h. gcc/testsuite/ChangeLog: * gcc.target/i386/pr107432-9.c: Require AVX2 effective target. Include avx2-check.h instead of avx-check.h. Define TEST to avx2_test. Diff: --- gcc/testsuite/gcc.target/i386/pr107432-9.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/pr107432-9.c b/gcc/testsuite/gcc.target/i386/pr107432-9.c index 90426c030c4e..861db17a5ff7 100644 --- a/gcc/testsuite/gcc.target/i386/pr107432-9.c +++ b/gcc/testsuite/gcc.target/i386/pr107432-9.c @@ -1,11 +1,13 @@ /* { dg-do run } */ /* { dg-options "-march=x86-64-v3 -O2 -flax-vector-conversions" } */ +/* { dg-require-effective-target avx2 } */ + #include -#include "avx-check.h" +#include "avx2-check.h" #ifndef TEST -#define TEST avx_test +#define TEST avx2_test #endif typedef short __v2hi __attribute__ ((__vector_size__ (4)));
[gcc r13-9115] middle-end: Fix ifcvt predicate generation for masked function calls
https://gcc.gnu.org/g:555406227c77abc067edf98d98bd17d6b19c208c commit r13-9115-g555406227c77abc067edf98d98bd17d6b19c208c Author: Victor Do Nascimento Date: Mon Sep 23 17:10:18 2024 +0100 middle-end: Fix ifcvt predicate generation for masked function calls Up until now, due to a latent bug in the code for the ifcvt pass, irrespective of the branch taken in a conditional statement, the original condition for the if statement was used in masking the function call. Thus, for code such as: if (a[i] > limit) b[i] = fixed_const; else b[i] = fn (a[i]); we would generate the following (wrong) if-converted tree code: _1 = a[i_1]; _2 = _1 > limit; _3 = .MASK_CALL (fn, _1, _2); cstore_4 = _2 ? fixed_const : _3; as opposed to the correct expected sequence: _1 = a[i_1]; _2 = _1 > limit; _3 = ~_2; _4 = .MASK_CALL (fn, _1, _3); cstore_5 = _2 ? fixed_const : _4; This patch ensures that the correct predicate mask generation is carried out such that, upon autovectorization, the correct vector lanes are selected in the vectorized function call. gcc/ChangeLog: * tree-if-conv.cc (predicate_statements): Fix handling of predicated function calls. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-fncall-mask.c: New. Diff: --- gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c | 31 gcc/tree-if-conv.cc | 14 - 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c new file mode 100644 index ..554488e06308 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+sve -fdump-tree-ifcvt-raw -Ofast" { target { aarch64*-*-* } } } */ + +extern int __attribute__ ((simd, const)) fn (int); + +const int N = 20; +const float lim = 101.0; +const float cst = -1.0; +float tot = 0.0; + +float b[20]; +float a[20] = { [0 ... 9] = 1.7014118e39, /* If branch. */ + [10 ... 19] = 100.0 };/* Else branch. */ + +int main (void) +{ + #pragma omp simd + for (int i = 0; i < N; i += 1) +{ + if (a[i] > lim) + b[i] = cst; + else + b[i] = fn (a[i]); + tot += b[i]; +} + return (0); +} + +/* { dg-final { scan-tree-dump {gimple_assign } ifcvt } } */ +/* { dg-final { scan-tree-dump {gimple_assign } ifcvt } } */ +/* { dg-final { scan-tree-dump {gimple_call <.MASK_CALL, _3, fn, _2, _34>} ifcvt } } */ diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index 71f5d98c2129..d4af5929d926 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -2649,6 +2649,7 @@ predicate_statements (loop_p loop) This will cause the vectorizer to match the "in branch" clone variants, and serves to build the mask vector in a natural way. */ + tree mask = cond; gcall *call = dyn_cast (gsi_stmt (gsi)); tree orig_fn = gimple_call_fn (call); int orig_nargs = gimple_call_num_args (call); @@ -2656,7 +2657,18 @@ predicate_statements (loop_p loop) args.safe_push (orig_fn); for (int i = 0; i < orig_nargs; i++) args.safe_push (gimple_call_arg (call, i)); - args.safe_push (cond); + /* If `swap', we invert the mask used for the if branch for use +when masking the function call. */ + if (swap) + { + gimple_seq stmts = NULL; + tree true_val + = constant_boolean_node (true, TREE_TYPE (mask)); + mask = gimple_build (&stmts, BIT_XOR_EXPR, + TREE_TYPE (mask), mask, true_val); + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + } + args.safe_push (mask); /* Replace the call with a IFN_MASK_CALL that has the extra condition parameter. */
[gcc r14-10788] middle-end: Fix ifcvt predicate generation for masked function calls
https://gcc.gnu.org/g:abbfe1e2616d8f18ccd7f9ea65d92aecec7258ea commit r14-10788-gabbfe1e2616d8f18ccd7f9ea65d92aecec7258ea Author: Victor Do Nascimento Date: Mon Sep 23 17:10:18 2024 +0100 middle-end: Fix ifcvt predicate generation for masked function calls Up until now, due to a latent bug in the code for the ifcvt pass, irrespective of the branch taken in a conditional statement, the original condition for the if statement was used in masking the function call. Thus, for code such as: if (a[i] > limit) b[i] = fixed_const; else b[i] = fn (a[i]); we would generate the following (wrong) if-converted tree code: _1 = a[i_1]; _2 = _1 > limit; _3 = .MASK_CALL (fn, _1, _2); cstore_4 = _2 ? fixed_const : _3; as opposed to the correct expected sequence: _1 = a[i_1]; _2 = _1 > limit; _3 = ~_2; _4 = .MASK_CALL (fn, _1, _3); cstore_5 = _2 ? fixed_const : _4; This patch ensures that the correct predicate mask generation is carried out such that, upon autovectorization, the correct vector lanes are selected in the vectorized function call. gcc/ChangeLog: * tree-if-conv.cc (predicate_statements): Fix handling of predicated function calls. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-fncall-mask.c: New. Diff: --- gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c | 31 gcc/tree-if-conv.cc | 14 - 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c new file mode 100644 index ..554488e06308 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-fncall-mask.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target { aarch64*-*-* } } } */ +/* { dg-additional-options "-march=armv8.2-a+sve -fdump-tree-ifcvt-raw -Ofast" { target { aarch64*-*-* } } } */ + +extern int __attribute__ ((simd, const)) fn (int); + +const int N = 20; +const float lim = 101.0; +const float cst = -1.0; +float tot = 0.0; + +float b[20]; +float a[20] = { [0 ... 9] = 1.7014118e39, /* If branch. */ + [10 ... 19] = 100.0 };/* Else branch. */ + +int main (void) +{ + #pragma omp simd + for (int i = 0; i < N; i += 1) +{ + if (a[i] > lim) + b[i] = cst; + else + b[i] = fn (a[i]); + tot += b[i]; +} + return (0); +} + +/* { dg-final { scan-tree-dump {gimple_assign } ifcvt } } */ +/* { dg-final { scan-tree-dump {gimple_assign } ifcvt } } */ +/* { dg-final { scan-tree-dump {gimple_call <.MASK_CALL, _3, fn, _2, _34>} ifcvt } } */ diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index c4c3ed41a447..873a5c66629f 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -2827,6 +2827,7 @@ predicate_statements (loop_p loop) This will cause the vectorizer to match the "in branch" clone variants, and serves to build the mask vector in a natural way. */ + tree mask = cond; gcall *call = dyn_cast (gsi_stmt (gsi)); tree orig_fn = gimple_call_fn (call); int orig_nargs = gimple_call_num_args (call); @@ -2834,7 +2835,18 @@ predicate_statements (loop_p loop) args.safe_push (orig_fn); for (int i = 0; i < orig_nargs; i++) args.safe_push (gimple_call_arg (call, i)); - args.safe_push (cond); + /* If `swap', we invert the mask used for the if branch for use +when masking the function call. */ + if (swap) + { + gimple_seq stmts = NULL; + tree true_val + = constant_boolean_node (true, TREE_TYPE (mask)); + mask = gimple_build (&stmts, BIT_XOR_EXPR, + TREE_TYPE (mask), mask, true_val); + gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); + } + args.safe_push (mask); /* Replace the call with a IFN_MASK_CALL that has the extra condition parameter. */
[gcc r15-4369] c: Implement C2Y N3355 - Named Loops [PR117022]
https://gcc.gnu.org/g:50f27896adb272b40ab03a56fd192e74789bef97 commit r15-4369-g50f27896adb272b40ab03a56fd192e74789bef97 Author: Jakub Jelinek Date: Tue Oct 15 20:41:18 2024 +0200 c: Implement C2Y N3355 - Named Loops [PR117022] The following patch implements the C2Y N3355 - Named Loops paper. I've tried to implement it lazily, rather than proactively e.g. push labels to a vector just in case the following statement is iteration statement, switch statement or one of the loop pragmas followed by iteration statement the patch just notes the last statement in cur_stmt_list if any before c_parser_label/c_parser_all_labels and passes it down to the iteration/switch statement parsing routines, which then search backward for LABEL_EXPRs before they reach the given stop statement. The patch then adds one extra argument to {FOR,WHILE,DO,BREAK,CONTINUE,SWITCH}_STMT, which is set to a canonical name LABEL_DECL (the last named label before the construct). If one just refers to the innermost construct with a fancy name, it is in the end parsed the same as break/continue without an identifier (i.e. NULL_TREE argument), and if a loop or switch has name(s) but break/continue to that isn't used, the name is set to NULL_TREE. At c-gimplify.cc time the name is then pushed into a hash map mapping it to a pair of labels. I've implemented it also for ObjC foreach loops (which have break/continue handled during parsing, not during c-gimplify.cc). As for OpenMP/OpenACC, the patch right now pretends no OpenMP loop has a name, until something different is decided in the standard. As shown in the testcases, most break identifier/continue identifier cases aren't really useful in OpenMP code, a break identifier or continue identifier jumping out of an OpenMP region is certainly invalid (such regions have to be single entry single exit, so escaping it through goto/break lab/continue lab violates that), similarly break is disallowed in the innermost OpenMP nested loop, just continue is allowed, so the only thing that would make sense for OpenMP (second gomp testcase) would be allowing to give name to the innermost loop in OpenMP canonical loop nest (except that labels aren't allowed in the syntax right now in between the loops) and only continue to that label. For collapse(1) loops that would be a label before the #pragma or [[omp::directive (parallel for)]] etc. And of course, what already works fine in the patch is break/continue to non-OpenMP loops nested in OpenMP loops. 2024-10-12 Jakub Jelinek PR c/117022 gcc/c-family/ * c-common.def (FOR_STMT, WHILE_STMT, DO_STMT, BREAK_STMT, CONTINUE_STMT, SWITCH_STMT): Add an extra operand, *_NAME and document it. * c-common.h (bc_hash_map_t): New typedef. (struct bc_state): Add bc_hash_map member. (WHILE_NAME, DO_NAME, FOR_NAME, BREAK_NAME, CONTINUE_NAME, SWITCH_STMT_NAME): Define. * c-pretty-print.cc (c_pretty_printer::statement): Print BREAK_STMT or CONTINUE_STMT operand if any. * c-gimplify.cc (bc_hash_map): New static variable. (note_named_bc, release_named_bc): New functions. (save_bc_state): Save and clear bc_hash_map. (restore_bc_state): Assert NULL and restore bc_hash_map. (genericize_c_loop): Add NAME argument, call note_named_bc and release_named_bc if non-NULL around the body walk. (genericize_for_stmt, genericize_while_stmt, genericize_do_stmt): Adjust callers of it. (genericize_switch_stmt): Rename break_block variable to blab. Call note_named_bc and release_named_bc if SWITCH_STMT_NAME is non-NULL around the body walk. (genericize_continue_stmt): Handle non-NULL CONTINUE_NAME. (genericize_break_stmt): Handle non-NULL BREAK_NAME. (c_genericize): Delete and clear bc_hash_map. gcc/c/ * c-tree.h: Implement C2Y N3355 - Named loops. (C_DECL_LOOP_NAME, C_DECL_SWITCH_NAME, C_DECL_LOOP_SWITCH_NAME_VALID, C_DECL_LOOP_SWITCH_NAME_USED, IN_NAMED_STMT): Define. (c_get_loop_names, c_release_loop_names, c_finish_bc_name): Declare. (c_start_switch): Add NAME argument. (c_finish_bc_stmt): Likewise. * c-lang.h (struct language_function): Add loop_names and loop_names_hash members. * c-parser.cc (c_parser_external_declaration, c_parser_declaration_or_fndef, c_parser_struct_or_union_specifier, c_parser_parameter_declaration): Adjust c_parser_pragma caller. (get_before_labels): New function. (c_parser_compound_statement_nostart): Call get_before_labels when
[gcc r15-4350] middle-end/117137 - expansion issue with vector equality compares
https://gcc.gnu.org/g:79b881df72c946f2ba61879c36ae93b0cb974617 commit r15-4350-g79b881df72c946f2ba61879c36ae93b0cb974617 Author: Richard Biener Date: Tue Oct 15 09:48:10 2024 +0200 middle-end/117137 - expansion issue with vector equality compares When expanding a COND_EXPR with a vector equality compare as condition expand_cond_expr_using_cmove fails to properly go the cbranch path. I failed to massage it's twisted logic so the simple fix is to make sure to expand a vector condition separately which also generates the expected code for the testcase: ptest %xmm0, %xmm0 cmovne %edi, %eax PR middle-end/117137 * expr.cc (expand_cond_expr_using_cmove): Make sure to expand vector comparisons separately. * gcc.dg/torture/pr117137.c: New testcase. Diff: --- gcc/expr.cc | 6 -- gcc/testsuite/gcc.dg/torture/pr117137.c | 13 + 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/gcc/expr.cc b/gcc/expr.cc index 7a471f20e794..da486cf85fdd 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -9524,7 +9524,8 @@ expand_cond_expr_using_cmove (tree treeop0 ATTRIBUTE_UNUSED, EXPAND_NORMAL); if (TREE_CODE (treeop0) == SSA_NAME - && (srcstmt = get_def_for_expr_class (treeop0, tcc_comparison))) + && (srcstmt = get_def_for_expr_class (treeop0, tcc_comparison)) + && !VECTOR_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (srcstmt { type = TREE_TYPE (gimple_assign_rhs1 (srcstmt)); enum tree_code cmpcode = gimple_assign_rhs_code (srcstmt); @@ -9534,7 +9535,8 @@ expand_cond_expr_using_cmove (tree treeop0 ATTRIBUTE_UNUSED, unsignedp = TYPE_UNSIGNED (type); comparison_code = convert_tree_comp_to_rtx (cmpcode, unsignedp); } - else if (COMPARISON_CLASS_P (treeop0)) + else if (COMPARISON_CLASS_P (treeop0) + && !VECTOR_TYPE_P (TREE_TYPE (TREE_OPERAND (treeop0, 0 { type = TREE_TYPE (TREE_OPERAND (treeop0, 0)); enum tree_code cmpcode = TREE_CODE (treeop0); diff --git a/gcc/testsuite/gcc.dg/torture/pr117137.c b/gcc/testsuite/gcc.dg/torture/pr117137.c new file mode 100644 index ..b6ce78d86087 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr117137.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-msse4" { target { x86_64-*-* i?86-*-* } } } */ + +long x[2]; + +int +foo (int c) +{ + long x0 = x[0], x1 = x[1]; + int t = x0 != 0 | x1 != 0; + c *= t; + return c; +}
[gcc r15-4349] tree-optimization/117147 - bogus re-use of previous ldst_p
https://gcc.gnu.org/g:8af474aa5d5efdda7def6cdcec3f246df2c7026a commit r15-4349-g8af474aa5d5efdda7def6cdcec3f246df2c7026a Author: Richard Biener Date: Tue Oct 15 09:22:09 2024 +0200 tree-optimization/117147 - bogus re-use of previous ldst_p The following shows that in vect_build_slp_tree_1 we're eventually re-using the previous lane set ldst_p flag. Fixed by some refactoring. PR tree-optimization/117147 * tree-vect-slp.cc (vect_build_slp_tree_1): Put vars and initialization of per-lane data into the per-lane processing loop to avoid re-using previous lane state. Diff: --- gcc/tree-vect-slp.cc | 13 - 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 16332e0b6d74..8727246c27a6 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1072,14 +1072,13 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, stmt_vec_info first_stmt_info = stmts[0]; code_helper first_stmt_code = ERROR_MARK; code_helper alt_stmt_code = ERROR_MARK; - code_helper rhs_code = ERROR_MARK; code_helper first_cond_code = ERROR_MARK; tree lhs; bool need_same_oprnds = false; - tree vectype = NULL_TREE, first_op1 = NULL_TREE; + tree first_op1 = NULL_TREE; stmt_vec_info first_load = NULL, prev_first_load = NULL; - bool first_stmt_ldst_p = false, ldst_p = false; - bool first_stmt_phi_p = false, phi_p = false; + bool first_stmt_ldst_p = false; + bool first_stmt_phi_p = false; int first_reduc_idx = -1; bool maybe_soft_fail = false; tree soft_fail_nunits_vectype = NULL_TREE; @@ -1088,6 +1087,10 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, stmt_vec_info stmt_info; FOR_EACH_VEC_ELT (stmts, i, stmt_info) { + bool ldst_p = false; + bool phi_p = false; + code_helper rhs_code = ERROR_MARK; + swap[i] = 0; matches[i] = false; if (!stmt_info) @@ -1139,7 +1142,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, return false; } - tree nunits_vectype; + tree vectype, nunits_vectype; if (!vect_get_vector_types_for_stmt (vinfo, stmt_info, &vectype, &nunits_vectype, group_size)) {
[gcc r15-4354] Match: Remove dup match pattern for signed_integer_sat_sub [PR117141]
https://gcc.gnu.org/g:97f98855d4157a2511a713129ec77740fe6f88dc commit r15-4354-g97f98855d4157a2511a713129ec77740fe6f88dc Author: Pan Li Date: Tue Oct 15 07:30:13 2024 +0800 Match: Remove dup match pattern for signed_integer_sat_sub [PR117141] This patch would like to fix the warning as below: /home/slyfox/dev/git/gcc/gcc/match.pd:3424:3 warning: duplicate pattern (cond^ (ne (imagpart (IFN_SUB_OVERFLOW:c@2 @0 @1)) integer_zerop) ^ /home/slyfox/dev/git/gcc/gcc/match.pd:3397:3 warning: previous pattern defined here (cond^ (ne (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop) The second has a optional nop_convert which allows for the first one, thus remove the dup one. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. PR middle-end/117141 gcc/ChangeLog: * match.pd: Remove the dup pattern for signed SAT_SUB. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 15 ++- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index ee53c25cef97..22fad1a8757b 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3395,7 +3395,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) The T and UT are type pair like T=int8_t, UT=uint8_t. */ (match (signed_integer_sat_sub @0 @1) (cond^ (ne (imagpart (IFN_SUB_OVERFLOW@2 @0 @1)) integer_zerop) - (bit_xor:c (negate (convert (lt @0 integer_zerop))) + (bit_xor:c (nop_convert? + (negate (nop_convert? (convert (lt @0 integer_zerop) max_value) (realpart @2)) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) @@ -3417,18 +3418,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) @2) (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type -/* Signed saturation sub, case 5: - Z = .SUB_OVERFLOW (X, Y) - SAT_S_SUB = IMAGPART_EXPR (Z) != 0 ? (-(T)(X < 0) ^ MAX) : minus; */ -(match (signed_integer_sat_sub @0 @1) - (cond^ (ne (imagpart (IFN_SUB_OVERFLOW:c@2 @0 @1)) integer_zerop) - (bit_xor:c (nop_convert? - (negate (nop_convert? (convert (lt @0 integer_zerop) - max_value) - (realpart @2)) - (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) - && types_match (type, @0, @1 - /* Unsigned saturation truncate, case 1, sizeof (WT) > sizeof (NT). SAT_U_TRUNC = (NT)x | (NT)(-(X > (WT)(NT)(-1))). */ (match (unsigned_integer_sat_trunc @0)
[gcc r15-4352] tree-optimization/117147 - add testcase
https://gcc.gnu.org/g:1aac888fac6cff63a3d6522bcf9b8439e14320ad commit r15-4352-g1aac888fac6cff63a3d6522bcf9b8439e14320ad Author: Richard Biener Date: Tue Oct 15 11:36:33 2024 +0200 tree-optimization/117147 - add testcase The following adds a testcase for the PR. PR tree-optimization/117147 * gcc.dg/vect/pr117147.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/vect/pr117147.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/pr117147.c b/gcc/testsuite/gcc.dg/vect/pr117147.c new file mode 100644 index ..bc20fa8741bc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr117147.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-mavx512f" { target { x86_64-*-* i?86-*-* } } } */ + +double Test(int p, double *sum3, double *sum4, double *wX) +{ + double tmp; + double bS1 = 0.; + double bS2 = 0.; + for (int i = 0; i < p; ++i) +{ + tmp = wX[i] * wX[i]; + if (tmp != 0.0) + { + bS1 += sum3[i] * sum3[i] / (tmp * wX[i]); + bS2 += sum4[i] / tmp; + } +} + return (bS2 + bS1); +}
[gcc r15-4356] libstdc++: Implement LWG 3798 for range adaptors [PR106676]
https://gcc.gnu.org/g:7f65f94917866c6b18d9698eec6451c1bf21e0f9 commit r15-4356-g7f65f94917866c6b18d9698eec6451c1bf21e0f9 Author: Jonathan Wakely Date: Sun Oct 13 22:28:16 2024 +0100 libstdc++: Implement LWG 3798 for range adaptors [PR106676] LWG 3798 modified the iterator_category of the iterator types for transform_view, join_with_view, zip_transform_view and adjacent_transform_view, to allow the iterator's reference type to be an rvalue reference. libstdc++-v3/ChangeLog: PR libstdc++/106676 * include/bits/iterator_concepts.h (__cpp17_fwd_iterator): Use is_reference instead of is_value_reference. rvalue references. * include/std/ranges (transform_view:__iter_cat::_S_iter_cat): Likewise. (zip_transform_view::__iter_cat::_S_iter_cat): Likewise. (adjacent_transform_view::__iter_cat::_S_iter_cat): Likewise. (join_with_view::__iter_cat::_S_iter_cat): Likewise. * testsuite/std/ranges/adaptors/transform.cc: Check iterator_category when the transformation function returns an rvalue reference type. Reviewed-by: Patrick Palka Diff: --- libstdc++-v3/include/bits/iterator_concepts.h | 4 +++- libstdc++-v3/include/std/ranges| 18 +- .../testsuite/std/ranges/adaptors/transform.cc | 16 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/libstdc++-v3/include/bits/iterator_concepts.h b/libstdc++-v3/include/bits/iterator_concepts.h index 490a362cdf1c..669d3ddfd1ec 100644 --- a/libstdc++-v3/include/bits/iterator_concepts.h +++ b/libstdc++-v3/include/bits/iterator_concepts.h @@ -333,10 +333,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typename incrementable_traits<_Iter>::difference_type>; }; +// _GLIBCXX_RESOLVE_LIB_DEFECTS +// 3798. Rvalue reference and iterator_category template concept __cpp17_fwd_iterator = __cpp17_input_iterator<_Iter> && constructible_from<_Iter> - && is_lvalue_reference_v> + && is_reference_v> && same_as>, typename indirectly_readable_traits<_Iter>::value_type> && requires(_Iter __it) diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges index 98442dcfbd51..5b4558885363 100644 --- a/libstdc++-v3/include/std/ranges +++ b/libstdc++-v3/include/std/ranges @@ -1892,7 +1892,9 @@ namespace views::__adaptor using _Base = transform_view::_Base<_Const>; using _Res = invoke_result_t<__maybe_const_t<_Const, _Fp>&, range_reference_t<_Base>>; - if constexpr (is_lvalue_reference_v<_Res>) + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 3798. Rvalue reference and iterator_category + if constexpr (is_reference_v<_Res>) { using _Cat = typename iterator_traits>::iterator_category; @@ -5047,7 +5049,9 @@ namespace views::__adaptor using __detail::__range_iter_cat; using _Res = invoke_result_t<__maybe_const_t<_Const, _Fp>&, range_reference_t<__maybe_const_t<_Const, _Vs>>...>; - if constexpr (!is_lvalue_reference_v<_Res>) + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 3798. Rvalue reference and iterator_category + if constexpr (!is_reference_v<_Res>) return input_iterator_tag{}; else if constexpr ((derived_from<__range_iter_cat<_Vs, _Const>, random_access_iterator_tag> && ...)) @@ -5820,7 +5824,9 @@ namespace views::__adaptor using _Res = invoke_result_t<__unarize<__maybe_const_t<_Const, _Fp>&, _Nm>, range_reference_t<_Base>>; using _Cat = typename iterator_traits>::iterator_category; - if constexpr (!is_lvalue_reference_v<_Res>) + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 3798. Rvalue reference and iterator_category + if constexpr (!is_reference_v<_Res>) return input_iterator_tag{}; else if constexpr (derived_from<_Cat, random_access_iterator_tag>) return random_access_iterator_tag{}; @@ -7228,8 +7234,10 @@ namespace views::__adaptor using _OuterCat = typename iterator_traits<_OuterIter>::iterator_category; using _InnerCat = typename iterator_traits<_InnerIter>::iterator_category; using _PatternCat = typename iterator_traits<_PatternIter>::iterator_category; - if constexpr (!is_lvalue_reference_v, - iter_reference_t<_PatternIter>>>) + // _GLIBCXX_RESOLVE_LIB_DEFECTS + // 3798. Rvalue reference and iterator_category + if constexpr (!is_reference_v, + iter_reference
[gcc r15-4353] AArch64: re-enable memory access costing after SLP change.
https://gcc.gnu.org/g:a1540bb843fd1a3e87f50d3f713386eaae454d1c commit r15-4353-ga1540bb843fd1a3e87f50d3f713386eaae454d1c Author: Tamar Christina Date: Tue Oct 15 11:22:26 2024 +0100 AArch64: re-enable memory access costing after SLP change. While chasing down a costing difference between SLP and non-SLP for memory access costing I noticed that at some point the SLP and non-SLP costing have diverged. It used to be we only supported LOAD_LANES in SLP and so the non-SLP costing was working fine. But with the change to SLP only we now lost costing. It looks like the vectorizer for non-SLP stores the VMAT type in STMT_VINFO_MEMORY_ACCESS_TYPE on the stmt_info, but for SLP it stores it in SLP_TREE_MEMORY_ACCESS_TYPE which is on the SLP node itself. While my first attempt of a patch was to just also store the VMAT in the stmt_info https://gcc.gnu.org/pipermail/gcc-patches/2024-October/665295.html Richi pointed out that this goes wrong when the same access is used Hybrid. And so we have to do a backend specific fix. To help out other backends this also introduces a generic helper function suggested by Richi in that patch (I hope that's ok.. I didn't want to split out just the helper.) This successfully restores VMAT based costing in the new SLP only world. gcc/ChangeLog: * tree-vectorizer.h (vect_mem_access_type): New. * config/aarch64/aarch64.cc (aarch64_ld234_st234_vectors): Use it. (aarch64_detect_vector_stmt_subtype): Likewise. (aarch64_adjust_stmt_cost): Likewise. (aarch64_vector_costs::count_ops): Likewise. (aarch64_vector_costs::add_stmt_cost): Make SLP node named. Diff: --- gcc/config/aarch64/aarch64.cc | 54 +++ gcc/tree-vectorizer.h | 12 ++ 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 102680a0efca..5770491b30ce 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -16278,7 +16278,7 @@ public: private: void record_potential_advsimd_unrolling (loop_vec_info); void analyze_loop_vinfo (loop_vec_info); - void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, + void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, slp_tree, aarch64_vec_op_count *); fractional_cost adjust_body_cost_sve (const aarch64_vec_op_count *, fractional_cost, unsigned int, @@ -16595,11 +16595,13 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, } } -/* Return true if an access of kind KIND for STMT_INFO represents one - vector of an LD[234] or ST[234] operation. Return the total number of - vectors (2, 3 or 4) if so, otherwise return a value outside that range. */ +/* Return true if an access of kind KIND for STMT_INFO (or NODE if SLP) + represents one vector of an LD[234] or ST[234] operation. Return the total + number of vectors (2, 3 or 4) if so, otherwise return a value outside that + range. */ static int -aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info) +aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info, +slp_tree node) { if ((kind == vector_load || kind == unaligned_load @@ -16609,7 +16611,7 @@ aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info) { stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); if (stmt_info - && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES) + && vect_mem_access_type (stmt_info, node) == VMAT_LOAD_STORE_LANES) return DR_GROUP_SIZE (stmt_info); } return 0; @@ -16847,14 +16849,15 @@ aarch64_detect_scalar_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, } /* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost - for the vectorized form of STMT_INFO, which has cost kind KIND and which - when vectorized would operate on vector type VECTYPE. Try to subdivide - the target-independent categorization provided by KIND to get a more - accurate cost. WHERE specifies where the cost associated with KIND - occurs. */ + for the vectorized form of STMT_INFO possibly using SLP node NODE, which has + cost kind KIND and which when vectorized would operate on vector type + VECTYPE. Try to subdivide the target-independent categorization provided by + KIND to get a more accurate cost. WHERE specifies where the cost associated + with KIND occurs. */ static fractional_cost aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, - stmt_vec_info stmt_info, tree vectype, + stmt_vec_info stmt_info, slp_tree
[gcc r15-4351] tree-optimization/117138 - fix ICE with vector comparison in COND_EXPR
https://gcc.gnu.org/g:28982c271cbbed3580e4c7c784892694c3b6b2de commit r15-4351-g28982c271cbbed3580e4c7c784892694c3b6b2de Author: Richard Biener Date: Tue Oct 15 10:23:06 2024 +0200 tree-optimization/117138 - fix ICE with vector comparison in COND_EXPR The range folding code of COND_EXPRs missed a check whether the comparison operand type is supported. PR tree-optimization/117138 * gimple-range-fold.cc (fold_using_range::condexpr_adjust): Check if the comparison operand type is supported. * gcc.dg/torture/pr117138.c: New testcase. Diff: --- gcc/gimple-range-fold.cc| 3 ++- gcc/testsuite/gcc.dg/torture/pr117138.c | 13 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc index 65d31adde54c..dcd0cae03517 100644 --- a/gcc/gimple-range-fold.cc +++ b/gcc/gimple-range-fold.cc @@ -1139,7 +1139,8 @@ fold_using_range::condexpr_adjust (vrange &r1, vrange &r2, gimple *, tree cond, || TREE_CODE_CLASS (gimple_assign_rhs_code (cond_def)) != tcc_comparison) return false; tree type = TREE_TYPE (gimple_assign_rhs1 (cond_def)); - if (!range_compatible_p (type, TREE_TYPE (gimple_assign_rhs2 (cond_def + if (!value_range::supports_type_p (type) + || !range_compatible_p (type, TREE_TYPE (gimple_assign_rhs2 (cond_def return false; range_op_handler hand (gimple_assign_rhs_code (cond_def)); if (!hand) diff --git a/gcc/testsuite/gcc.dg/torture/pr117138.c b/gcc/testsuite/gcc.dg/torture/pr117138.c new file mode 100644 index ..b32585d3a563 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr117138.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-msse4" { target { x86_64-*-* i?86-*-* } } } */ + +int a, b; +_Complex long c; + +void +foo () +{ + do +b = c || a; + while (a); +}
[gcc r15-4348] Fortran: Use OpenACC's acc_on_device builtin, fix OpenMP' __builtin_is_initial_device: Fix 'is_built
https://gcc.gnu.org/g:fa90febea9801d4255bf6a1e9f0fd998629c3c7c commit r15-4348-gfa90febea9801d4255bf6a1e9f0fd998629c3c7c Author: Thomas Schwinge Date: Tue Oct 15 09:29:53 2024 +0200 Fortran: Use OpenACC's acc_on_device builtin, fix OpenMP' __builtin_is_initial_device: Fix 'is_builtin' initialization Bug fix for commit 3269a722b7a03613e9c4e2862bc5088c4a17cc11 "Fortran: Use OpenACC's acc_on_device builtin, fix OpenMP' __builtin_is_initial_device". PR fortran/82250 PR fortran/82251 PR fortran/117136 gcc/fortran/ * trans-expr.cc (gfc_conv_procedure_call): Initialize 'is_builtin'. (conv_function_val): Clean up. Co-authored-by: Harald Anlauf Diff: --- gcc/fortran/trans-expr.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index b9f585d0d2f1..569b92a48ab4 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -4437,7 +4437,6 @@ conv_function_val (gfc_se * se, bool *is_builtin, gfc_symbol * sym, { tree tmp; - *is_builtin = false; if (gfc_is_proc_ptr_comp (expr)) tmp = get_proc_ptr_comp (expr); else if (sym->attr.dummy) @@ -8218,6 +8217,7 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, arglist = retargs; /* Generate the actual call. */ + is_builtin = false; if (base_object == NULL_TREE) conv_function_val (se, &is_builtin, sym, expr, args); else
[gcc r15-4355] tree-optimization/116907 - stale BLOCK reference from DECL_VALUE_EXPR
https://gcc.gnu.org/g:7d15248d41dc45a4ba2d38ff532b672a5c0651d0 commit r15-4355-g7d15248d41dc45a4ba2d38ff532b672a5c0651d0 Author: Richard Biener Date: Sun Oct 13 12:44:04 2024 +0200 tree-optimization/116907 - stale BLOCK reference from DECL_VALUE_EXPR When we remove unused BLOCKs we fail to clean references to them from DECL_VALUE_EXPRs of variables in other BLOCKs which in the PR causes LTO streaming to walk into pointers to GGC freed blocks. There's the question of whether such DECL_VALUE_EXPRs should keep variables and blocks referenced live (it doesn't seem to do that) and whether such DECL_VALUE_EXPRs should have survived in the first place. PR tree-optimization/116907 * tree-ssa-live.cc (clear_unused_block_pointer_in_block): New helper. (clear_unused_block_pointer): Call it. Diff: --- gcc/tree-ssa-live.cc | 20 1 file changed, 20 insertions(+) diff --git a/gcc/tree-ssa-live.cc b/gcc/tree-ssa-live.cc index 0739faa022ef..484698899cfe 100644 --- a/gcc/tree-ssa-live.cc +++ b/gcc/tree-ssa-live.cc @@ -612,6 +612,22 @@ clear_unused_block_pointer_1 (tree *tp, int *, void *) return NULL_TREE; } +/* Clear references to unused BLOCKs from DECL_VALUE_EXPRs of variables + in BLOCK. */ + +static void +clear_unused_block_pointer_in_block (tree block) +{ + for (tree t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) +if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) + { + tree val = DECL_VALUE_EXPR (t); + walk_tree (&val, clear_unused_block_pointer_1, NULL, NULL); + } + for (tree t = BLOCK_SUBBLOCKS (block); t; t = BLOCK_CHAIN (t)) +clear_unused_block_pointer_in_block (t); +} + /* Set all block pointer in debug or clobber stmt to NULL if the block is unused, so that they will not be streamed out. */ @@ -667,6 +683,10 @@ clear_unused_block_pointer (void) walk_tree (gimple_op_ptr (stmt, i), clear_unused_block_pointer_1, NULL, NULL); } + + /* Walk all variables mentioned in the functions BLOCK tree and clear + DECL_VALUE_EXPR from unused blocks where present. */ + clear_unused_block_pointer_in_block (DECL_INITIAL (current_function_decl)); } /* Dump scope blocks starting at SCOPE to FILE. INDENT is the
[gcc r15-4365] c++: checking ICE w/ constexpr if and lambda as def targ [PR117054]
https://gcc.gnu.org/g:bb2bfdb2048aed18ef7dc01b51816a800e83ce54 commit r15-4365-gbb2bfdb2048aed18ef7dc01b51816a800e83ce54 Author: Patrick Palka Date: Tue Oct 15 13:13:15 2024 -0400 c++: checking ICE w/ constexpr if and lambda as def targ [PR117054] Here we're tripping over the assert in extract_locals_r which enforces that an extra-args tree appearing inside another extra-args tree doesn't actually have extra args. This invariant doesn't always hold for lambdas (which recently gained the extra-args mechanism) but that should be harmless since cp_walk_subtrees doesn't walk LAMBDA_EXPR_EXTRA_ARGS and so should be immune to the PR114303 issue for now. So let's just disable this assert for lambdas. PR c++/117054 gcc/cp/ChangeLog: * pt.cc (extract_locals_r): Disable tree_extra_args assert for LAMBDA_EXPR. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/lambda-targ9.C: New test. Reviewed-by: Jason Merrill Diff: --- gcc/cp/pt.cc | 7 ++- gcc/testsuite/g++.dg/cpp2a/lambda-targ9.C | 16 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index c0a37a51cba3..c9219d5b3a5a 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -13480,7 +13480,12 @@ extract_locals_r (tree *tp, int *walk_subtrees, void *data_) outermost tree. Nested *_EXTRA_ARGS should naturally be empty since the outermost (extra-args) tree will intercept any substitution before a nested tree can. */ -gcc_checking_assert (tree_extra_args (*tp) == NULL_TREE); +gcc_checking_assert (tree_extra_args (*tp) == NULL_TREE + /* Except a lambda nested inside an extra-args tree + can have extra args if we deferred partial + substitution into it at template parse time. But + we don't walk LAMBDA_EXPR_EXTRA_ARGS anyway. */ +|| TREE_CODE (*tp) == LAMBDA_EXPR); if (TREE_CODE (*tp) == DECL_EXPR) { diff --git a/gcc/testsuite/g++.dg/cpp2a/lambda-targ9.C b/gcc/testsuite/g++.dg/cpp2a/lambda-targ9.C new file mode 100644 index ..41f8526184a8 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/lambda-targ9.C @@ -0,0 +1,16 @@ +// PR c++/117054 +// { dg-do compile { target c++20 } } + +template +constexpr bool v = true; + +template +void f() { + [](auto) { +if constexpr (v<>) { } + }(0); +} + +int main() { + f(); +}
[gcc r15-4366] c++: unifying lvalue vs rvalue (non-forwarding) ref [PR116710]
https://gcc.gnu.org/g:8161c4adea7f1842f9d28633d82e912ebb7a4cf9 commit r15-4366-g8161c4adea7f1842f9d28633d82e912ebb7a4cf9 Author: Patrick Palka Date: Tue Oct 15 13:23:17 2024 -0400 c++: unifying lvalue vs rvalue (non-forwarding) ref [PR116710] When unifying two (non-forwarding) reference types, unify immediately recurses into the referenced type without first comparing rvalueness. (Note that at this point forwarding references and other reference parameters have already been stripped to their referenced type by maybe_adjust_types_for_deduction, so this code path applies only to nested reference types.) PR c++/116710 gcc/cp/ChangeLog: * pt.cc (unify) : Compare rvalueness. gcc/testsuite/ChangeLog: * g++.dg/template/unify12.C: New test. Reviewed-by: Jason Merrill Diff: --- gcc/cp/pt.cc| 3 ++- gcc/testsuite/g++.dg/template/unify12.C | 24 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index c9219d5b3a5a..0141c53b617c 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -25161,7 +25161,8 @@ unify (tree tparms, tree targs, tree parm, tree arg, int strict, } case REFERENCE_TYPE: - if (!TYPE_REF_P (arg)) + if (!TYPE_REF_P (arg) + || TYPE_REF_IS_RVALUE (parm) != TYPE_REF_IS_RVALUE (arg)) return unify_type_mismatch (explain_p, parm, arg); return unify (tparms, targs, TREE_TYPE (parm), TREE_TYPE (arg), strict & UNIFY_ALLOW_MORE_CV_QUAL, explain_p); diff --git a/gcc/testsuite/g++.dg/template/unify12.C b/gcc/testsuite/g++.dg/template/unify12.C new file mode 100644 index ..bed52d0fa36c --- /dev/null +++ b/gcc/testsuite/g++.dg/template/unify12.C @@ -0,0 +1,24 @@ +// PR c++/116710 +// { dg-do compile { target c++11 } } + +template struct A : T {}; + +template +void f(void (*)(T &), typename A::type * = 0); + +void f(...); + +void g(int &&); + +void q() { f(g); } // OK + +template +struct B { operator B(); }; + +template +void h(B); + +int main() { + B b; + h(b); // { dg-error "no match" } +}
[gcc] Created branch 'jmelcr/heads/omp-cp' in namespace 'refs/users'
The branch 'jmelcr/heads/omp-cp' was created in namespace 'refs/users' pointing to: 0fa5017df917... testsuite/i386: Require AVX2 effective target in pr107432-9
[gcc r15-4368] c++: add fixed testcase [PR80637]
https://gcc.gnu.org/g:b110e092d260906432931796c1d96cba305c60e4 commit r15-4368-gb110e092d260906432931796c1d96cba305c60e4 Author: Patrick Palka Date: Tue Oct 15 13:43:29 2024 -0400 c++: add fixed testcase [PR80637] Fixed by r15-4340-gcacbb4daac3e9a. PR c++/80637 gcc/testsuite/ChangeLog: * g++.dg/cpp2a/concepts-fn9.C: New test. Diff: --- gcc/testsuite/g++.dg/cpp2a/concepts-fn9.C | 15 +++ 1 file changed, 15 insertions(+) diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-fn9.C b/gcc/testsuite/g++.dg/cpp2a/concepts-fn9.C new file mode 100644 index ..eb2963afcc96 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-fn9.C @@ -0,0 +1,15 @@ +// PR c++/80637 +// { dg-do compile { target c++20 } } + +template +concept same_as = __is_same(T, U); + +template +struct A { + void f(int) requires same_as; + void f(...) requires (!same_as); +}; + +auto fptr = &A::f; +using type = decltype(fptr); +using type = void (A::*)(int);
[gcc r15-4367] match.pd: Further fma negation fixes [PR116891]
https://gcc.gnu.org/g:4366f0c7e296ea0d7279343c9b0a1d597588a1da commit r15-4367-g4366f0c7e296ea0d7279343c9b0a1d597588a1da Author: Jakub Jelinek Date: Tue Oct 15 19:38:46 2024 +0200 match.pd: Further fma negation fixes [PR116891] On Mon, Oct 14, 2024 at 08:53:29AM +0200, Jakub Jelinek wrote: > > PR middle-end/116891 > > * match.pd ((negate (IFN_FNMS@3 @0 @1 @2)) -> (IFN_FMA @0 @1 @2)): > > Only enable for !HONOR_SIGN_DEPENDENT_ROUNDING. > > Guess it would be nice to have a testcase which FAILs without the patch and > PASSes with it, but it can be added later. I've added such a testcase now, and additionally found the fix only fixed one of the 4 problematic similar cases. Here is a patch which fixes the others too and adds the testcases. fma-pr116891.c FAILed without your patch, FAILs with your patch too (but only due to the bar/baz/qux checks) and PASSes with the patch. 2024-10-15 Jakub Jelinek PR middle-end/116891 * match.pd ((negate (fmas@3 @0 @1 @2)) -> (IFN_FNMS @0 @1 @2)): Only enable for !HONOR_SIGN_DEPENDENT_ROUNDING. ((negate (IFN_FMS@3 @0 @1 @2)) -> (IFN_FNMA @0 @1 @2)): Likewise. ((negate (IFN_FNMA@3 @0 @1 @2)) -> (IFN_FMS @0 @1 @2)): Likewise. * gcc.dg/pr116891.c: New test. * gcc.target/i386/fma-pr116891.c: New test. Diff: --- gcc/match.pd | 6 ++-- gcc/testsuite/gcc.dg/pr116891.c | 47 gcc/testsuite/gcc.target/i386/fma-pr116891.c | 19 +++ 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 22fad1a8757b..12d81fcac0de 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -9441,7 +9441,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (IFN_FNMS @0 @1 @2)) (simplify (negate (fmas@3 @0 @1 @2)) - (if (single_use (@3)) + (if (!HONOR_SIGN_DEPENDENT_ROUNDING (type) && single_use (@3)) (IFN_FNMS @0 @1 @2 (simplify @@ -9455,7 +9455,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (IFN_FNMA @0 @1 @2)) (simplify (negate (IFN_FMS@3 @0 @1 @2)) - (if (single_use (@3)) + (if (!HONOR_SIGN_DEPENDENT_ROUNDING (type) && single_use (@3)) (IFN_FNMA @0 @1 @2))) (simplify @@ -9469,7 +9469,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (IFN_FMS @0 @1 @2)) (simplify (negate (IFN_FNMA@3 @0 @1 @2)) - (if (single_use (@3)) + (if (!HONOR_SIGN_DEPENDENT_ROUNDING (type) && single_use (@3)) (IFN_FMS @0 @1 @2))) (simplify diff --git a/gcc/testsuite/gcc.dg/pr116891.c b/gcc/testsuite/gcc.dg/pr116891.c new file mode 100644 index ..446e5ec5a4aa --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr116891.c @@ -0,0 +1,47 @@ +/* PR middle-end/116891 */ +/* { dg-do run } */ +/* { dg-require-effective-target fenv } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-require-effective-target c99_runtime } */ +/* { dg-options "-O2 -frounding-math" } */ + +#include + +__attribute__((noipa)) double +foo (double x, double y, double z) +{ + return -__builtin_fma (-x, y, -z); +} + +__attribute__((noipa)) double +bar (double x, double y, double z) +{ + return -__builtin_fma (-x, y, z); +} + +__attribute__((noipa)) double +baz (double x, double y, double z) +{ + return -__builtin_fma (x, y, -z); +} + +__attribute__((noipa)) double +qux (double x, double y, double z) +{ + return -__builtin_fma (x, y, z); +} + +int +main () +{ +#if defined (FE_DOWNWARD) && __DBL_MANT_DIG__ == 53 && __DBL_MAX_EXP__ == 1024 + fesetround (FE_DOWNWARD); + double a = foo (-0x1.p256, 0x1.p256, 0x1.p-256); + if (a != -__builtin_nextafter (0x1p256 * 0x1p256, 0.)) +__builtin_abort (); + if (a != bar (-0x1.p256, 0x1.p256, -0x1.p-256) + || a != baz (0x1.p256, 0x1.p256, 0x1.p-256) + || a != qux (0x1.p256, 0x1.p256, -0x1.p-256)) +__builtin_abort (); +#endif +} diff --git a/gcc/testsuite/gcc.target/i386/fma-pr116891.c b/gcc/testsuite/gcc.target/i386/fma-pr116891.c new file mode 100644 index ..34689f44c419 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/fma-pr116891.c @@ -0,0 +1,19 @@ +/* PR middle-end/116891 */ +/* { dg-do run } */ +/* { dg-require-effective-target fenv } */ +/* { dg-require-effective-target hard_float } */ +/* { dg-require-effective-target c99_runtime } */ +/* { dg-require-effective-target fma } */ +/* { dg-options "-O2 -mfma -frounding-math" } */ + +#include +#include "fma-check.h" + +#define main() do_main () +#include "../../gcc.dg/pr116891.c" + +static void +fma_test (void) +{ + do_main (); +}
[gcc r15-4361] testsuite: simplify analyzer_cpython_plugin.c
https://gcc.gnu.org/g:77076d85e9aa5e15e1fe5b7db0241fbfc18cf06d commit r15-4361-g77076d85e9aa5e15e1fe5b7db0241fbfc18cf06d Author: David Malcolm Date: Tue Oct 15 11:03:08 2024 -0400 testsuite: simplify analyzer_cpython_plugin.c No functional change intended. gcc/testsuite/ChangeLog: * gcc.dg/plugin/analyzer_cpython_plugin.c: Use success_call_info in a couple of places to avoid reimplementing get_desc. Signed-off-by: David Malcolm Diff: --- .../gcc.dg/plugin/analyzer_cpython_plugin.c| 22 -- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c index d0fe110f20e9..c1510e441e6f 100644 --- a/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c +++ b/gcc/testsuite/gcc.dg/plugin/analyzer_cpython_plugin.c @@ -963,17 +963,10 @@ public: void kf_PyList_New::impl_call_post (const call_details &cd) const { - class success : public call_info + class success : public success_call_info { public: -success (const call_details &cd) : call_info (cd) {} - -label_text -get_desc (bool can_colorize) const final override -{ - return make_label_text (can_colorize, "when %qE succeeds", - get_fndecl ()); -} +success (const call_details &cd) : success_call_info (cd) {} bool update_model (region_model *model, const exploded_edge *, @@ -1104,17 +1097,10 @@ public: void kf_PyLong_FromLong::impl_call_post (const call_details &cd) const { - class success : public call_info + class success : public success_call_info { public: -success (const call_details &cd) : call_info (cd) {} - -label_text -get_desc (bool can_colorize) const final override -{ - return make_label_text (can_colorize, "when %qE succeeds", - get_fndecl ()); -} +success (const call_details &cd) : success_call_info (cd) {} bool update_model (region_model *model, const exploded_edge *,
[gcc r15-4359] i386: Fix expand_vector_set for VEC_MERGE/VEC_DUPLICATE RTX [PR117116]
https://gcc.gnu.org/g:80d7032067a3a5b76aecd657d9b35b0a8f5a941d commit r15-4359-g80d7032067a3a5b76aecd657d9b35b0a8f5a941d Author: Uros Bizjak Date: Tue Oct 15 16:51:33 2024 +0200 i386: Fix expand_vector_set for VEC_MERGE/VEC_DUPLICATE RTX [PR117116] Middle end can generate SYMBOL_REF RTX as a value "val" in the call to expand_vector_set, but SYMBOL_REF RTX is not accepted in _pinsr insn pattern, generated via VEC_MERGE/VEC_DUPLICATE RTX path. Force the value into a register before VEC_MERGE/VEC_DUPLICATE RTX is generated if it doesn't satisfy nonimmediate_operand predicate. PR target/117116 gcc/ChangeLog: * config/i386/i386-expand.cc (expand_vector_set): Force "val" into a register before VEC_MERGE/VEC_DUPLICATE RTX is generated if it doesn't satisfy nonimmediate_operand predicate. gcc/testsuite/ChangeLog: * gcc.target/i386/pr117116.c: New test. Diff: --- gcc/config/i386/i386-expand.cc | 2 ++ gcc/testsuite/gcc.target/i386/pr117116.c | 18 ++ 2 files changed, 20 insertions(+) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 2b774ff7c4e1..63f5e348d64e 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -18263,6 +18263,8 @@ quarter: else if (use_vec_merge) { do_vec_merge: + if (!nonimmediate_operand (val, inner_mode)) + val = force_reg (inner_mode, val); tmp = gen_rtx_VEC_DUPLICATE (mode, val); tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (HOST_WIDE_INT_1U << elt)); diff --git a/gcc/testsuite/gcc.target/i386/pr117116.c b/gcc/testsuite/gcc.target/i386/pr117116.c new file mode 100644 index ..d6e28848a4b3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117116.c @@ -0,0 +1,18 @@ +/* PR target/117116 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx2" } */ + +typedef void (*StmFct)(); +typedef struct { + StmFct fct_getc; + StmFct fct_putc; + StmFct fct_flush; + StmFct fct_close; +} StmInf; + +StmInf TTY_Getc_pstm; + +void TTY_Getc() { + TTY_Getc_pstm.fct_getc = TTY_Getc; + TTY_Getc_pstm.fct_putc = TTY_Getc_pstm.fct_flush = TTY_Getc_pstm.fct_close = (StmFct)1; +}
[gcc r15-4372] MAINTAINERS: Fix name order
https://gcc.gnu.org/g:2813a5bc7af2865ee4d2e94bce59a7fdefeea0b3 commit r15-4372-g2813a5bc7af2865ee4d2e94bce59a7fdefeea0b3 Author: Filip Kastl Date: Wed Oct 16 08:50:46 2024 +0200 MAINTAINERS: Fix name order ChangeLog: * MAINTAINERS: Fix Write After Approval name order. Signed-off-by: Filip Kastl Diff: --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index cf1cf78e16cb..269ac2ea6b49 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -760,7 +760,6 @@ Ankur Saini arsenic Hariharan Sandanagobalane hariharans Richard Sandiford rsandifo Iain Sandoe iains -Feng Xuefxue Duncan Sandsbaldrick Sujoy Saraswati ssaraswati Trevor Saunders tbsaunde @@ -880,6 +879,7 @@ Ruoyao Xi xry111 Mingjie Xingxmj Chenghua Xu paulhua Li Xu - +Feng Xuefxue Canqun Yang canqun Fei Yangfyang Jeffrey Yasskin jyasskin
[gcc r15-4371] Adjust testcase to avoid scan FIX in REG_EQUIV.
https://gcc.gnu.org/g:a8b4ea1bcc10b5253992f4b932aec6862aef32fa commit r15-4371-ga8b4ea1bcc10b5253992f4b932aec6862aef32fa Author: liuhongt Date: Tue Oct 15 11:17:20 2024 +0800 Adjust testcase to avoid scan FIX in REG_EQUIV. Also add hard_float target to avoid failed on arm-eabi. gcc/testsuite/ChangeLog: PR testsuite/115365 * gcc.dg/pr100927.c: Adjust testcase to avoid scan FIX in REG_EQUIV. Diff: --- gcc/testsuite/gcc.dg/pr100927.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/pr100927.c b/gcc/testsuite/gcc.dg/pr100927.c index 8a7d69c38311..28a168d35184 100644 --- a/gcc/testsuite/gcc.dg/pr100927.c +++ b/gcc/testsuite/gcc.dg/pr100927.c @@ -1,7 +1,8 @@ /* { dg-do compile } */ +/* { dg-require-effective-target hard_float } */ /* { dg-options "-O2 -ftrapping-math -fdump-tree-optimized -fdump-rtl-final" } */ /* { dg-final { scan-tree-dump-times {(?n)= \(int\)} 3 "optimized" } } */ -/* { dg-final { scan-rtl-dump-times {(?n)^[ \t]*\(fix:SI} 3 "final" } } */ +/* { dg-final { scan-rtl-dump-times {(?n)^(?!.*REG_EQUIV)(?=.*\(fix:SI)} 3 "final" } } */ int foo_ofr ()
[gcc r15-4370] Provide new GCC builtin __builtin_counted_by_ref [PR116016]
https://gcc.gnu.org/g:e7380688fa5917011c3fb85b5e06fb00f776a95d commit r15-4370-ge7380688fa5917011c3fb85b5e06fb00f776a95d Author: Qing Zhao Date: Tue Oct 15 17:55:22 2024 + Provide new GCC builtin __builtin_counted_by_ref [PR116016] With the addition of the 'counted_by' attribute and its wide roll-out within the Linux kernel, a use case has been found that would be very nice to have for object allocators: being able to set the counted_by counter variable without knowing its name. For example, given: struct foo { ... int counter; ... struct bar array[] __attribute__((counted_by (counter))); } *p; The existing Linux object allocators are roughly: #define MAX(A, B) (A > B) ? (A) : (B) #define alloc(P, FAM, COUNT) ({ \ __auto_type __p = &(P); \ size_t __size = MAX (sizeof(*P), __builtin_offsetof (__typeof(*P), FAM) + sizeof (*(P->FAM)) * COUNT); \ *__p = kmalloc(__size); \ }) Right now, any addition of a counted_by annotation must also include an open-coded assignment of the counter variable after the allocation: p = alloc(p, array, how_many); p->counter = how_many; In order to avoid the tedious and error-prone work of manually adding the open-coded counted-by intializations everywhere in the Linux kernel, a new GCC builtin __builtin_counted_by_ref will be very useful to be added to help the adoption of the counted-by attribute. -- Built-in Function: TYPE __builtin_counted_by_ref (PTR) The built-in function '__builtin_counted_by_ref' checks whether the array object pointed by the pointer PTR has another object associated with it that represents the number of elements in the array object through the 'counted_by' attribute (i.e. the counted-by object). If so, returns a pointer to the corresponding counted-by object. If such counted-by object does not exist, returns a null pointer. This built-in function is only available in C for now. The argument PTR must be a pointer to an array. The TYPE of the returned value is a pointer type pointing to the corresponding type of the counted-by object or a void pointer type in case of a null pointer being returned. With this new builtin, the central allocator could be updated to: #define MAX(A, B) (A > B) ? (A) : (B) #define alloc(P, FAM, COUNT) ({ \ __auto_type __p = &(P); \ __auto_type __c = (COUNT); \ size_t __size = MAX (sizeof (*(*__p)),\ __builtin_offsetof (__typeof(*(*__p)),FAM) \ + sizeof (*((*__p)->FAM)) * __c); \ if ((*__p = kmalloc(__size))) { \ __auto_type ret = __builtin_counted_by_ref((*__p)->FAM); \ *_Generic(ret, void *: &(size_t){0}, default: ret) = __c; \ } \ }) And then structs can gain the counted_by attribute without needing additional open-coded counter assignments for each struct, and unannotated structs could still use the same allocator. PR c/116016 gcc/c-family/ChangeLog: * c-common.cc: Add new __builtin_counted_by_ref. * c-common.h (enum rid): Add RID_BUILTIN_COUNTED_BY_REF. gcc/c/ChangeLog: * c-decl.cc (names_builtin_p): Add RID_BUILTIN_COUNTED_BY_REF. * c-parser.cc (has_counted_by_object): New routine. (get_counted_by_ref): New routine. (c_parser_postfix_expression): Handle New RID_BUILTIN_COUNTED_BY_REF. * c-tree.h: New routine handle_counted_by_for_component_ref. * c-typeck.cc (handle_counted_by_for_component_ref): New routine. (build_component_ref): Call the new routine. gcc/ChangeLog: * doc/extend.texi: Add documentation for __builtin_counted_by_ref. gcc/testsuite/ChangeLog: * gcc.dg/builtin-counted-by-ref-1.c: New test. * gcc.dg/builtin-counted-by-ref.c: New test. Diff: --- gcc/c-family/c-common.cc| 1 + gcc/c-family/c-common.h | 1 + gcc/c/c-decl.cc | 1 + gcc/c/c-parser.cc | 79 ++ gcc/c/c-tree.h | 1 + gcc/c/c-typeck.cc | 33 -- gcc/doc/extend.texi | 55 ++ gcc/testsuite/gcc.dg/builtin-counted-by-ref-1.c | 135 gcc/testsuite/gcc.dg/builtin-counted-by-ref.c | 61 +++ 9 files changed, 358 insertions(+), 9 deletions(-) diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc index ec6a5da892dd..8ad9b99