[PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
Hi! The following patch implements the C++23 P2290R3 paper. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2022-08-17 Jakub Jelinek PR c++/106645 libcpp/ * include/cpplib.h (struct cpp_options): Implement P2290R3 - Delimited escape sequences. Add delimite_escape_seqs member. * init.cc (struct lang_flags): Likewise. (lang_defaults): Add delim column. (cpp_set_lang): Copy over delimite_escape_seqs. * charset.cc (_cpp_valid_ucn): Handle delimited escape sequences. (convert_hex): Likewise. (convert_oct): Likewise. (convert_escape): Call convert_oct even for \o. (_cpp_interpret_identifier): Handle delimited escape sequences. * lex.cc (get_bidi_ucn_1): Likewise. Add end argument, fill it in. (get_bidi_ucn): Adjust get_bidi_ucn_1 caller. Use end argument to compute num_bytes. gcc/testsuite/ * c-c++-common/cpp/delimited-escape-seq-1.c: New test. * c-c++-common/cpp/delimited-escape-seq-2.c: New test. * c-c++-common/cpp/delimited-escape-seq-3.c: New test. * c-c++-common/Wbidi-chars-24.c: New test. * gcc.dg/cpp/delimited-escape-seq-1.c: New test. * gcc.dg/cpp/delimited-escape-seq-2.c: New test. * g++.dg/cpp/delimited-escape-seq-1.C: New test. * g++.dg/cpp/delimited-escape-seq-2.C: New test. --- libcpp/include/cpplib.h.jj 2022-08-10 09:06:53.268209449 +0200 +++ libcpp/include/cpplib.h 2022-08-15 19:32:53.743213474 +0200 @@ -519,6 +519,9 @@ struct cpp_options /* Nonzero for C++23 size_t literals. */ unsigned char size_t_literals; + /* Nonzero for C++23 delimited escape sequences. */ + unsigned char delimited_escape_seqs; + /* Holds the name of the target (execution) character set. */ const char *narrow_charset; --- libcpp/init.cc.jj 2022-08-10 09:06:53.268209449 +0200 +++ libcpp/init.cc 2022-08-15 16:09:01.403020485 +0200 @@ -96,34 +96,35 @@ struct lang_flags char dfp_constants; char size_t_literals; char elifdef; + char delimited_escape_seqs; }; static const struct lang_flags lang_defaults[] = -{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */ - /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,1, 1, 0, 1, 1, 1, 1, 0, 1 }, - /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,1, 1, 1, 1, 0, 1, 1, 0, 1 }, - /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX98*/ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,0, 0, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX11*/ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,0, 0, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX14*/ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* CXX17*/ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 0, 1, 0, 1, 0, 0, 0 }, - /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* CXX20*/ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 1, 1 }, - /* CXX23*/ { 1, 1, 1,
Re: [PATCH] fortran: Add -static-libquadmath support [PR46539]
On Tue, Aug 16, 2022 at 04:31:03PM +0200, Jakub Jelinek via Gcc-patches wrote: > So far slightly tested on x86_64-linux (and will bootstrap/regtest > it there tonight), but I unfortunately don't have a way to test it FYI, successfully bootstrapped/regtested on x86_64-linux and i686-linux. > 2022-08-16 Francois-Xavier Coudert > Jakub Jelinek > > PR fortran/46539 > gcc/ > * common.opt (static-libquadmath): New option. > * gcc.c (driver_handle_option): Always accept -static-libquadmath. > * config/darwin.h (LINK_SPEC): Handle -static-libquadmath. > gcc/fortran/ > * lang.opt (static-libquadmath): New option. > * invoke.texi (-static-libquadmath): Document it. > * options.c (gfc_handle_option): Error out if -static-libquadmath > is passed but we do not support it. > libgfortran/ > * acinclude.m4 (LIBQUADSPEC): From $FC -static-libgfortran -### > output determine -Bstatic/-Bdynamic, -bstatic/-bdynamic, > -aarchive_shared/-adefault linker support or Darwin remapping > of -lgfortran to libgfortran.a%s and use that around or instead > of -lquadmath in LIBQUADSPEC. > * configure: Regenerated. Jakub
[PATCH] RISC-V: Add runtime invariant support
From: zhongjuzhe RISC-V 'V' Extension support scalable vector like ARM SVE. To support RVV, we need to introduce runtime invariant. - For zve32*, the runtime invariant uses 32-bit chunk. - For zve64*, the runtime invariant uses 64-bit chunk. [1] https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#sec-vector-extensions This patch is preparing patch for RVV support. Because we didn't introduce vector machine_mode yet, it safe to just change HOST_WIDE_INT into poly_int. Also it safe to use "to_constant()" function for scalar operation. This patch has been tested by full dejagnu regression. gcc/ChangeLog: * config/riscv/predicates.md: Adjust runtime invariant. * config/riscv/riscv-modes.def (MAX_BITSIZE_MODE_ANY_MODE): New. (NUM_POLY_INT_COEFFS): New. * config/riscv/riscv-protos.h (riscv_initial_elimination_offset): Adjust runtime invariant. * config/riscv/riscv-sr.cc (riscv_remove_unneeded_save_restore_calls): Adjust runtime invariant. * config/riscv/riscv.cc (struct riscv_frame_info): Adjust runtime invariant. (enum riscv_microarchitecture_type): Ditto. (riscv_valid_offset_p): Ditto. (riscv_valid_lo_sum_p): Ditto. (riscv_address_insns): Ditto. (riscv_load_store_insns): Ditto. (riscv_legitimize_move): Ditto. (riscv_binary_cost): Ditto. (riscv_rtx_costs): Ditto. (riscv_output_move): Ditto. (riscv_extend_comparands): Ditto. (riscv_flatten_aggregate_field): Ditto. (riscv_get_arg_info): Ditto. (riscv_pass_by_reference): Ditto. (riscv_elf_select_rtx_section): Ditto. (riscv_stack_align): Ditto. (riscv_compute_frame_info): Ditto. (riscv_initial_elimination_offset): Ditto. (riscv_set_return_address): Ditto. (riscv_for_each_saved_reg): Ditto. (riscv_first_stack_step): Ditto. (riscv_expand_prologue): Ditto. (riscv_expand_epilogue): Ditto. (riscv_can_use_return_insn): Ditto. (riscv_secondary_memory_needed): Ditto. (riscv_hard_regno_nregs): Ditto. (riscv_convert_vector_bits): New. (riscv_option_override): Adjust runtime invariant. (riscv_promote_function_mode): Ditto. * config/riscv/riscv.h (POLY_SMALL_OPERAND_P): New. (BITS_PER_RISCV_VECTOR): New. (BYTES_PER_RISCV_VECTOR): New. * config/riscv/riscv.md: Adjust runtime invariant. --- gcc/config/riscv/predicates.md | 2 +- gcc/config/riscv/riscv-modes.def | 12 ++ gcc/config/riscv/riscv-protos.h | 2 +- gcc/config/riscv/riscv-sr.cc | 2 +- gcc/config/riscv/riscv.cc| 181 --- gcc/config/riscv/riscv.h | 9 ++ gcc/config/riscv/riscv.md| 2 +- 7 files changed, 142 insertions(+), 68 deletions(-) diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index e98db2cb574..79e0c1d5589 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -71,7 +71,7 @@ { /* Don't handle multi-word moves this way; we don't want to introduce the individual word-mode moves until after reload. */ - if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) + if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD) return false; /* Check whether the constant can be loaded in a single diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def index 5cf2fc8e9e6..b47909ef78b 100644 --- a/gcc/config/riscv/riscv-modes.def +++ b/gcc/config/riscv/riscv-modes.def @@ -21,3 +21,15 @@ along with GCC; see the file COPYING3. If not see FLOAT_MODE (HF, 2, ieee_half_format); FLOAT_MODE (TF, 16, ieee_quad_format); + +/* TODO:According to RISC-V 'V' ISA spec, the maximun vector length can + be 65536 for a single vector register which means the vector mode in + GCC can be maximum = 65536 * 8 bits (LMUL=8). However, 'GET_MODE_SIZE' + is using poly_uint16/unsigned short which will overflow if we specify + vector-length = 65536. To support this feature, we need to change the + codes outside the RISC-V port. We will support it in the future. */ +#define MAX_BITSIZE_MODE_ANY_MODE (4096 * 8) + +/* Coefficient 1 is multiplied by the number of 64-bit/32-bit chunks in a vector + * minus one. */ +#define NUM_POLY_INT_COEFFS 2 \ No newline at end of file diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 20c2381c21a..2bc0ef06f93 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -64,7 +64,7 @@ extern rtx riscv_legitimize_call_address (rtx); extern void riscv_set_return_address (rtx, rtx); extern bool riscv_expand_block_move (rtx, rtx, rtx); extern rtx riscv_return_addr (int, rtx); -extern HOST_WIDE_INT riscv_initial_elimination_offset (int, int); +extern poly_int64 riscv_initial_elimination_offset (int, int); extern void riscv_expand_prologue (void); extern void riscv_e
Re: [PATCH] Refactor back_threader_profitability
I just have a few high level comments. On Tue, Aug 16, 2022 at 4:05 PM Richard Biener wrote: > > The following refactors profitable_path_p in the backward threader, > splitting out parts that can be computed once the exit block is known, > parts that contiguously update and that can be checked allowing > for the path to be later identified as FSM with larger limits, > possibly_profitable_path_p, and final checks done when the whole > path is known, profitable_path_p. I thought we were removing references to FSM, as they were leftovers from some previous incarnation. For that matter, I don't think I ever understood what they are, so if we're gonna keep them, could you comment what makes FSM threads different from other threads? In your possibly_profitable_path_p function, could you document a bit better what's the difference between profitable_path_p and possibly_profitable_path_p? > > I've removed the back_threader_profitability instance from the > back_threader class and instead instantiate it once per path > discovery. I've kept the size compute non-incremental to simplify > the patch and not worry about unwinding. > > There's key changes to previous behavior - namely we apply > the param_max_jump_thread_duplication_stmts early only when > we know the path cannot become an FSM one (multiway + thread through > latch) but make sure to elide the path query when we we didn't > yet discover that but are over this limit. Similarly the > speed limit is now used even when we did not yet discover a > hot BB on the path. Basically the idea is to only stop path > discovery when we know the path will never become profitable > but avoid the expensive path range query when we know it's > currently not. > > I've done a few cleanups, merging functions, on the way. > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > Statistics show an overall slight increase in threading but > looking at different files theres noise up and down. That's > somewhat expected since we now are applying the "more correct" > limits in the end. Unless I made big mistakes of course. > > The next thing cost-wise would be to raise the backwards > threading limit to the limit of DOM so we don't get > artificial high counts for that. The DOM threader has limits? I thought most of those limits were just due to the fact that it couldn't determine long enough paths? Either way, I like that we're merging the necessary forward threader bits here, in preparation for its demise ;-). Looks good. Thanks. Aldy
Re: [PATCH] Support threading of just the exit edge
On Tue, 16 Aug 2022, Andrew MacLeod wrote: > > On 8/16/22 05:18, Richard Biener wrote: > > On Mon, 15 Aug 2022, Aldy Hernandez wrote: > > > >> On Mon, Aug 15, 2022 at 9:24 PM Andrew MacLeod wrote: > >>> heh. or just > >>> > >>> > >>> + int_range<2> r; > >>> + if (!fold_range (r, const_cast (cond_stmt)) > >>> + || !r.singleton_p (&val)) > >>> > >>> > >>> if you do not provide a range_query to any of the fold_using_range code, > >>> it defaults to: > >>> > >>> fur_source::fur_source (range_query *q) > >>> { > >>> if (q) > >>> m_query = q; > >>> else if (cfun) > >>> m_query = get_range_query (cfun); > >>> else > >>> m_query = get_global_range_query (); > >>> m_gori = NULL; > >>> } > >>> > >> Sweet. Even better! > > So when I do the following incremental change ontop of the posted > > patch then I see that the path query is able to simplify more > > "single BB paths" than the global range folding. > > > > diff --git a/gcc/tree-ssa-threadbackward.cc > > b/gcc/tree-ssa-threadbackward.cc > > index 669098e4ec3..777e778037f 100644 > > --- a/gcc/tree-ssa-threadbackward.cc > > +++ b/gcc/tree-ssa-threadbackward.cc > > @@ -314,6 +314,12 @@ back_threader::find_taken_edge_cond (const > > vec &path, > > { > > int_range_max r; > > + int_range<2> rf; > > + if (path.length () == 1) > > +{ > > + fold_range (rf, cond); > > +} > > + > > m_solver->compute_ranges (path, m_imports); > > m_solver->range_of_stmt (r, cond); > > @@ -325,6 +331,8 @@ back_threader::find_taken_edge_cond (const > > vec &path, > > > > if (r == true_range || r == false_range) > > { > > + if (path.length () == 1) > > + gcc_assert (r == rf); > > edge e_true, e_false; > > basic_block bb = gimple_bb (cond); > > extract_true_false_edges_from_block (bb, &e_true, &e_false); > > > > Even doing the following (not sure what's the difference and in > > particular expense over the path range query) results in missed > > simplifications (checking my set of cc1files). > > > > diff --git a/gcc/tree-ssa-threadbackward.cc > > b/gcc/tree-ssa-threadbackward.cc > > index 669098e4ec3..1d43a179d08 100644 > > --- a/gcc/tree-ssa-threadbackward.cc > > +++ b/gcc/tree-ssa-threadbackward.cc > > @@ -99,6 +99,7 @@ private: > > > > back_threader_registry m_registry; > > back_threader_profitability m_profit; > > + gimple_ranger *m_ranger; > > path_range_query *m_solver; > > > > // Current path being analyzed. > > @@ -146,12 +147,14 @@ back_threader::back_threader (function *fun, > > unsigned flags, bool first) > > // The path solver needs EDGE_DFS_BACK in resolving mode. > > if (flags & BT_RESOLVE) > > mark_dfs_back_edges (); > > - m_solver = new path_range_query (flags & BT_RESOLVE); > > + m_ranger = new gimple_ranger; > > + m_solver = new path_range_query (flags & BT_RESOLVE, m_ranger); > > } > > > > back_threader::~back_threader () > > { > > delete m_solver; > > + delete m_ranger; > > > > loop_optimizer_finalize (); > > } > > @@ -314,6 +317,12 @@ back_threader::find_taken_edge_cond (const > > vec &path, > > { > > int_range_max r; > > + int_range<2> rf; > > + if (path.length () == 1) > > +{ > > + fold_range (rf, cond, m_ranger); > > +} > > + > > m_solver->compute_ranges (path, m_imports); > > m_solver->range_of_stmt (r, cond); > > @@ -325,6 +334,8 @@ back_threader::find_taken_edge_cond (const > > vec &path, > > > > if (r == true_range || r == false_range) > > { > > + if (path.length () == 1) > > + gcc_assert (r == rf); > > edge e_true, e_false; > > basic_block bb = gimple_bb (cond); > > extract_true_false_edges_from_block (bb, &e_true, &e_false); > > > > one example is > > > > [local count: 14414059]: > > _128 = node_177(D)->typed.type; > > pretmp_413 = MEM[(const union tree_node *)_128].base.code; > > _431 = pretmp_413 + 65519; > > if (_128 == 0B) > >goto ; [18.09%] > > else > >goto ; [81.91%] > > > > where m_imports for the path is just _128 and the range computed is > > false while the ranger query returns VARYING. But > > path_range_query::range_defined_in_block does > > > >if (bb && POINTER_TYPE_P (TREE_TYPE (name))) > > m_ranger->m_cache.m_exit.maybe_adjust_range (r, name, bb); > This is the coarse grained "side effect applies somewhere in the block" > mechanism. There is no understanding of where in the block it happens. > > > > which adjusts the range to ~[0, 0], probably because of the > > dereference in the following stmt. > > > > Why does fold_range not do this when folding the exit test? Is there > > a way to make it do so? It looks like the only routine using this > > in gimple-range.cc is range_on_edge and there it's used for e->src > > after calling range_on_exit for e->src (why's it not done in > > range_on_exit?). A testcase for this is > > Fold_range doesnt do this beca
Re: [PATCH] Refactor back_threader_profitability
On Wed, 17 Aug 2022, Aldy Hernandez wrote: > I just have a few high level comments. > > On Tue, Aug 16, 2022 at 4:05 PM Richard Biener wrote: > > > > The following refactors profitable_path_p in the backward threader, > > splitting out parts that can be computed once the exit block is known, > > parts that contiguously update and that can be checked allowing > > for the path to be later identified as FSM with larger limits, > > possibly_profitable_path_p, and final checks done when the whole > > path is known, profitable_path_p. > > I thought we were removing references to FSM, as they were leftovers > from some previous incarnation. For that matter, I don't think I ever > understood what they are, so if we're gonna keep them, could you > comment what makes FSM threads different from other threads? I don't know exactly what 'FSM' stands for but the FSM threader specifically tried to cover for (;;) { switch (state) { case 1: /* sth */ state = 3; break; ... case 3: ... } } so optimizing state machine transitions. That is, these are all multiway (switch or goto, but goto support has been removed with the ranger rewrite it seems) and the thread path going through the loop backedge. This is why FSM has different size limits because it was thought those threadings are especially worthwhile and they tend to be larger. To make discovery cheaper a TODO item would be to skip to the loop backedge when we reach the regular thread limit and only continue the real search there (and pick the "smallest" ways through any diamonds on the way). > In your possibly_profitable_path_p function, could you document a bit > better what's the difference between profitable_path_p and > possibly_profitable_path_p? Sure. > > > > I've removed the back_threader_profitability instance from the > > back_threader class and instead instantiate it once per path > > discovery. I've kept the size compute non-incremental to simplify > > the patch and not worry about unwinding. > > > > There's key changes to previous behavior - namely we apply > > the param_max_jump_thread_duplication_stmts early only when > > we know the path cannot become an FSM one (multiway + thread through > > latch) but make sure to elide the path query when we we didn't > > yet discover that but are over this limit. Similarly the > > speed limit is now used even when we did not yet discover a > > hot BB on the path. Basically the idea is to only stop path > > discovery when we know the path will never become profitable > > but avoid the expensive path range query when we know it's > > currently not. > > > > I've done a few cleanups, merging functions, on the way. > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > > > Statistics show an overall slight increase in threading but > > looking at different files theres noise up and down. That's > > somewhat expected since we now are applying the "more correct" > > limits in the end. Unless I made big mistakes of course. > > > > The next thing cost-wise would be to raise the backwards > > threading limit to the limit of DOM so we don't get > > artificial high counts for that. > > The DOM threader has limits? I thought most of those limits were just > due to the fact that it couldn't determine long enough paths? Either > way, I like that we're merging the necessary forward threader bits > here, in preparation for its demise ;-). Both use param_max_jump_thread_duplication_stmts, but the backwards threader applies this limit to non-FSM threads with /* The generic copier used by the backthreader does not re-use an existing threading path to reduce code duplication. So for that case, drastically reduce the number of statements we are allowed to copy. */ if (!(threaded_through_latch && threaded_multiway_branch) && (n_insns * param_fsm_scale_path_stmts >= param_max_jump_thread_duplication_stmts)) and param_fsm_scale_path_stmts happens to be two. I have no idea why we apply this scaling, the scaling is otherwise used in /* We avoid creating irreducible inner loops unless we thread through a multiway branch, in which case we have deemed it worth losing other loop optimizations later. We also consider it worth creating an irreducible inner loop if the number of copied statement is low relative to the length of the path -- in that case there's little the traditional loop optimizer would have done anyway, so an irreducible loop is not so bad. */ if (!threaded_multiway_branch && creates_irreducible_loop && *creates_irreducible_loop && (n_insns * (unsigned) param_fsm_scale_path_stmts > (m_path.length () * (unsigned) param_fsm_scale_path_blocks))) so my idea is to drop the scaling from applying the param_max_jump_thread_duplication_stmts limit which raises the effective default limit from 15 / 2 to 15, just like what DOM applies (where DOM
Re: [PATCH] fortran: Add -static-libquadmath support [PR46539]
On 16.08.22 16:31, Jakub Jelinek via Gcc-patches wrote: The following patch is a revival of the https://gcc.gnu.org/legacy-ml/gcc-patches/2014-10/msg00771.html patch. While trunk configured against recent glibc and with linker --as-needed support doesn't really need to link against -lquadmath anymore, there are still other targets where libquadmath is still in use. As has been discussed, making -static-libgfortran imply statically linking both libgfortran and libquadmath is undesirable because of the significant licensing differences between the 2 libraries. [...] So far slightly tested on x86_64-linux (and will bootstrap/regtest it there tonight), but I unfortunately don't have a way to test it e.g. on Darwin. Thoughts on this? Looks good to me – with the caveat of having only limited knowledge of .spec and Darwin. --- gcc/fortran/invoke.texi.jj2022-05-09 09:09:20.312473272 +0200 +++ gcc/fortran/invoke.texi 2022-08-16 16:12:47.638203577 +0200 ... +Please note that the @file{libquadmath} runtime library is licensed under the +GNU Lesser General Public License (LGPL), and linking it statically introduces +requirements on redistributing the resulting binaries. "on redistributing" sounds odd to me – "when redistributing" or "on (the) redistribution of" sounds better to me. Tobias - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Re: [PATCH] Refactor back_threader_profitability
On Wed, Aug 17, 2022 at 9:54 AM Richard Biener wrote: > > On Wed, 17 Aug 2022, Aldy Hernandez wrote: > > > I just have a few high level comments. > > > > On Tue, Aug 16, 2022 at 4:05 PM Richard Biener wrote: > > > > > > The following refactors profitable_path_p in the backward threader, > > > splitting out parts that can be computed once the exit block is known, > > > parts that contiguously update and that can be checked allowing > > > for the path to be later identified as FSM with larger limits, > > > possibly_profitable_path_p, and final checks done when the whole > > > path is known, profitable_path_p. > > > > I thought we were removing references to FSM, as they were leftovers > > from some previous incarnation. For that matter, I don't think I ever > > understood what they are, so if we're gonna keep them, could you > > comment what makes FSM threads different from other threads? > > I don't know exactly what 'FSM' stands for but the FSM threader > specifically tried to cover It probably stands for finite state machine then? > > for (;;) > { >switch (state) > { > case 1: > /* sth */ > state = 3; > break; > ... > case 3: > ... > } > } > > so optimizing state machine transitions. That is, these are all > multiway (switch or goto, but goto support has been removed with the > ranger rewrite it seems) and the thread path going through the ISTR going through the computed goto path in the old code, and it never got triggered. I just didn't get around to removing the references to GIMPLE_GOTO. At least, the old threader not once got a thread we didn't get with the new code, even through a full Fedora build. I could be wrong though, but that's my recollection. > loop backedge. This is why FSM has different size limits because > it was thought those threadings are especially worthwhile and > they tend to be larger. To make discovery cheaper a TODO item > would be to skip to the loop backedge when we reach the regular > thread limit and only continue the real search there (and > pick the "smallest" ways through any diamonds on the way). Ah, thanks. If you could, add some similar blurb, it'd be great. I'm sure we'll all forget it at some time (well, I will :)). Aldy > > > In your possibly_profitable_path_p function, could you document a bit > > better what's the difference between profitable_path_p and > > possibly_profitable_path_p? > > Sure. > > > > > > > I've removed the back_threader_profitability instance from the > > > back_threader class and instead instantiate it once per path > > > discovery. I've kept the size compute non-incremental to simplify > > > the patch and not worry about unwinding. > > > > > > There's key changes to previous behavior - namely we apply > > > the param_max_jump_thread_duplication_stmts early only when > > > we know the path cannot become an FSM one (multiway + thread through > > > latch) but make sure to elide the path query when we we didn't > > > yet discover that but are over this limit. Similarly the > > > speed limit is now used even when we did not yet discover a > > > hot BB on the path. Basically the idea is to only stop path > > > discovery when we know the path will never become profitable > > > but avoid the expensive path range query when we know it's > > > currently not. > > > > > > I've done a few cleanups, merging functions, on the way. > > > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > > > > > Statistics show an overall slight increase in threading but > > > looking at different files theres noise up and down. That's > > > somewhat expected since we now are applying the "more correct" > > > limits in the end. Unless I made big mistakes of course. > > > > > > The next thing cost-wise would be to raise the backwards > > > threading limit to the limit of DOM so we don't get > > > artificial high counts for that. > > > > The DOM threader has limits? I thought most of those limits were just > > due to the fact that it couldn't determine long enough paths? Either > > way, I like that we're merging the necessary forward threader bits > > here, in preparation for its demise ;-). > > Both use param_max_jump_thread_duplication_stmts, but the backwards > threader applies this limit to non-FSM threads with > > /* The generic copier used by the backthreader does not re-use an > existing threading path to reduce code duplication. So for that > case, drastically reduce the number of statements we are allowed > to copy. */ > if (!(threaded_through_latch && threaded_multiway_branch) > && (n_insns * param_fsm_scale_path_stmts > >= param_max_jump_thread_duplication_stmts)) > > and param_fsm_scale_path_stmts happens to be two. I have no idea > why we apply this scaling, the scaling is otherwise used in > > /* We avoid creating irreducible inner loops unless we thread through > a multiway branch, in which case we have deemed it worth losing >
Re: [PATCH] fortran: Add -static-libquadmath support [PR46539]
Hello, Tobias approved it already, but I spotted what looks like typos. See below. Mikael gcc/ * common.opt (static-libquadmath): New option. * gcc.c (driver_handle_option): Always accept -static-libquadmath. * config/darwin.h (LINK_SPEC): Handle -static-libquadmath. (...) --- gcc/config/darwin.h.jj 2022-08-16 14:51:14.529544492 +0200 +++ gcc/config/darwin.h 2022-08-16 14:53:54.402460097 +0200 @@ -443,6 +443,7 @@ extern GTY(()) int darwin_ms_struct; %:replace-outfile(-lobjc libobjc-gnu.a%s); \ :%:replace-outfile(-lobjc -lobjc-gnu )}}\ %{static|static-libgcc|static-libgfortran:%:replace-outfile(-lgfortran libgfortran.a%s)}\ + %{static|static-libgcc|static-libgfortran:%:replace-outfile(-lquadmath libquadmath.a%s)}\ s/static-libgfortran/static-libquadmath/ I guess? Otherwise I don’t understand the corresponding ChangeLog description. %{static|static-libgcc|static-libphobos:%:replace-outfile(-lgphobos libgphobos.a%s)}\ %{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp libgomp.a%s)}\ %{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}\ (...) --- libgfortran/acinclude.m4.jj 2022-06-29 17:05:45.478790781 +0200 +++ libgfortran/acinclude.m42022-08-16 16:06:50.047814043 +0200 @@ -356,18 +356,39 @@ AC_DEFUN([LIBGFOR_CHECK_FLOAT128], [ ac_[]_AC_LANG_ABBREV[]_werror_flag=$ac_xsave_[]_AC_LANG_ABBREV[]_werror_flag ]) +dnl Determine -Bstatic ... -Bdynamic etc. support from gfortran -### stderr. +touch conftest1.$ac_objext conftest2.$ac_objext +LQUADMATH=-lquadmath +$FC -static-libgfortran -### -o conftest \ + conftest1.$ac_objext -lgfortran conftest2.$ac_objext 2>&1 >/dev/null \ + | grep "conftest1.$ac_objext.*conftest2.$ac_objext" > conftest.cmd +if grep "conftest1.$ac_objext.* -Bstatic -lgfortran -Bdynamic .*conftest2.$ac_objext" \ + conftest.cmd >/dev/null 2>&1; then + LQUADMATH="%{static-libquadmath:-Bstatic} -lquadmath %{static-libquadmath:-Bdynamic}" +elif grep "conftest1.$ac_objext.* -bstatic -lgfortran -bdynamic .*conftest2.$ac_objext" \ + conftest.cmd >/dev/null 2>&1; then + LQUADMATH="%{static-libquadmath:-bstatic} -lquadmath %{static-libquadmath:-bdynamic}" +elif grep "conftest1.$ac_objext.* -aarchive_shared -lgfortran -adefault .*conftest2.$ac_objext" \ + conftest.cmd >/dev/null 2>&1; then + LQUADMATH="%{static-libquadmath:-aarchive_shared} -lquadmath %{static-libquadmath:-adefault}" +elif grep "conftest1.$ac_objext.* ligfortran.a .*conftest2.$ac_objext" \ s/ligfortran.a/libgfortran.a/ + conftest.cmd >/dev/null 2>&1; then + LQUADMATH="%{static-libquadmath:libquadmath.a%s;:-lquadmath}" +fi +rm -f conftest1.$ac_objext conftest2.$ac_objext conftest conftest.cmd + dnl For static libgfortran linkage, depend on libquadmath only if needed. dnl If using *f128 APIs from libc/libm, depend on libquadmath only if needed dnl even for dynamic libgfortran linkage, and don't link libgfortran against dnl -lquadmath. if test "x$libgfor_cv_have_as_needed" = xyes; then if test "x$USE_IEC_60559" = xyes; then - LIBQUADSPEC="$libgfor_cv_as_needed_option -lquadmath $libgfor_cv_no_as_needed_option" + LIBQUADSPEC="$libgfor_cv_as_needed_option $LQUADMATH $libgfor_cv_no_as_needed_option" else - LIBQUADSPEC="%{static-libgfortran:$libgfor_cv_as_needed_option} -lquadmath %{static-libgfortran:$libgfor_cv_no_as_needed_option}" + LIBQUADSPEC="%{static-libgfortran:$libgfor_cv_as_needed_option} $LQUADMATH %{static-libgfortran:$libgfor_cv_no_as_needed_option}" fi else - LIBQUADSPEC="-lquadmath" + LIBQUADSPEC="$LQUADMATH" fi if test "x$USE_IEC_60559" != xyes; then if test -f ../libquadmath/libquadmath.la; then
[PATCH] arm: Define with_float to hard when target name ends with hf
On arm, the --with-float= configure option is used to define include files search path (among other things). However, when targeting arm-linux-gnueabihf, one would expect to automatically default to the hard-float ABI, but this is not the case. As a consequence, GCC bootstrap fails on an arm-linux-gnueabihf target if --with-float=hard is not used. This patch checks if the target name ends with 'hf' and defines with_float to hard if not already defined. This is achieved in gcc/config.gcc, just before selecting the default CPU depending on the $with_float value. 2022-08-17 Christophe Lyon gcc/ * config.gcc (arm): Define with_float to hard if target name ends with 'hf'. --- gcc/config.gcc | 7 +++ 1 file changed, 7 insertions(+) diff --git a/gcc/config.gcc b/gcc/config.gcc index 4e3b15bb5e9..02f58970db0 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -1314,6 +1314,13 @@ arm*-*-linux-* | arm*-*-uclinuxfdpiceabi) tm_file="$tm_file arm/uclinuxfdpiceabi.h" ;; esac + # Define with_float to "hard" if not already defined and + # target name ends with "hf" + case $target:$with_float in + arm*-*-*hf:) + with_float=hard + ;; + esac # Generation of floating-point instructions requires at least ARMv5te. if [ "$with_float" = "hard" -o "$with_float" = "softfp" ] ; then target_cpu_cname="arm10e" -- 2.25.1
Re: [PATCH] Refactor back_threader_profitability
On Wed, 17 Aug 2022, Aldy Hernandez wrote: > On Wed, Aug 17, 2022 at 9:54 AM Richard Biener wrote: > > > > On Wed, 17 Aug 2022, Aldy Hernandez wrote: > > > > > I just have a few high level comments. > > > > > > On Tue, Aug 16, 2022 at 4:05 PM Richard Biener wrote: > > > > > > > > The following refactors profitable_path_p in the backward threader, > > > > splitting out parts that can be computed once the exit block is known, > > > > parts that contiguously update and that can be checked allowing > > > > for the path to be later identified as FSM with larger limits, > > > > possibly_profitable_path_p, and final checks done when the whole > > > > path is known, profitable_path_p. > > > > > > I thought we were removing references to FSM, as they were leftovers > > > from some previous incarnation. For that matter, I don't think I ever > > > understood what they are, so if we're gonna keep them, could you > > > comment what makes FSM threads different from other threads? > > > > I don't know exactly what 'FSM' stands for but the FSM threader > > specifically tried to cover > > It probably stands for finite state machine then? > > > > > for (;;) > > { > >switch (state) > > { > > case 1: > > /* sth */ > > state = 3; > > break; > > ... > > case 3: > > ... > > } > > } > > > > so optimizing state machine transitions. That is, these are all > > multiway (switch or goto, but goto support has been removed with the > > ranger rewrite it seems) and the thread path going through the > > ISTR going through the computed goto path in the old code, and it > never got triggered. I just didn't get around to removing the > references to GIMPLE_GOTO. At least, the old threader not once got a > thread we didn't get with the new code, even through a full Fedora > build. I could be wrong though, but that's my recollection. When one massages the threader to even consider gotos we eventually run into find_taken_edge not handling them because range_of_expr computes the range of 'state' as [irange] void * [1, +INF]$4 = void rather than &&E. A testcase would be unsigned g; void FSM (int start) { void *states[] = { &&A, &&B, &&C, &&E }; void *state = states[start]; do { goto *state; A: g += 1; state = g & 1 ? &&B : &&E; continue; B: g += 2; state = &&C; continue; C: g += 3; state = states[g & 3]; continue; E: break; } while (1); } where we'd expect to thread the B -> C state transition. I checked gcc 7 and it doesn't do that - not sure if it broke somewhen on the way or if it was just never working. I'll file a bugreport, OTOH &label and goto *p are both GNU extensions, so not sure if it is worth optimizing. I'll attach the "simple" enabler I have. > > loop backedge. This is why FSM has different size limits because > > it was thought those threadings are especially worthwhile and > > they tend to be larger. To make discovery cheaper a TODO item > > would be to skip to the loop backedge when we reach the regular > > thread limit and only continue the real search there (and > > pick the "smallest" ways through any diamonds on the way). > > Ah, thanks. If you could, add some similar blurb, it'd be great. I'm > sure we'll all forget it at some time (well, I will :)). Sure ;) Richard. > Aldy > > > > > > In your possibly_profitable_path_p function, could you document a bit > > > better what's the difference between profitable_path_p and > > > possibly_profitable_path_p? > > > > Sure. > > > > > > > > > > I've removed the back_threader_profitability instance from the > > > > back_threader class and instead instantiate it once per path > > > > discovery. I've kept the size compute non-incremental to simplify > > > > the patch and not worry about unwinding. > > > > > > > > There's key changes to previous behavior - namely we apply > > > > the param_max_jump_thread_duplication_stmts early only when > > > > we know the path cannot become an FSM one (multiway + thread through > > > > latch) but make sure to elide the path query when we we didn't > > > > yet discover that but are over this limit. Similarly the > > > > speed limit is now used even when we did not yet discover a > > > > hot BB on the path. Basically the idea is to only stop path > > > > discovery when we know the path will never become profitable > > > > but avoid the expensive path range query when we know it's > > > > currently not. > > > > > > > > I've done a few cleanups, merging functions, on the way. > > > > > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > > > > > > > Statistics show an overall slight increase in threading but > > > > looking at different files theres noise up and down. That's > > > > somewhat expected since we now are applying the "more correct" > > > > limits in the end. Unless I made big mistakes of course. > > > > > > > > The next thing cost-wise would be to raise the backwards > > > > threading limit to the limit of DOM so we
Re: [PATCH] Refactor back_threader_profitability
On Wed, Aug 17, 2022 at 10:38 AM Richard Biener wrote: > > On Wed, 17 Aug 2022, Aldy Hernandez wrote: > > > On Wed, Aug 17, 2022 at 9:54 AM Richard Biener wrote: > > > > > > On Wed, 17 Aug 2022, Aldy Hernandez wrote: > > > > > > > I just have a few high level comments. > > > > > > > > On Tue, Aug 16, 2022 at 4:05 PM Richard Biener > > > > wrote: > > > > > > > > > > The following refactors profitable_path_p in the backward threader, > > > > > splitting out parts that can be computed once the exit block is known, > > > > > parts that contiguously update and that can be checked allowing > > > > > for the path to be later identified as FSM with larger limits, > > > > > possibly_profitable_path_p, and final checks done when the whole > > > > > path is known, profitable_path_p. > > > > > > > > I thought we were removing references to FSM, as they were leftovers > > > > from some previous incarnation. For that matter, I don't think I ever > > > > understood what they are, so if we're gonna keep them, could you > > > > comment what makes FSM threads different from other threads? > > > > > > I don't know exactly what 'FSM' stands for but the FSM threader > > > specifically tried to cover > > > > It probably stands for finite state machine then? > > > > > > > > for (;;) > > > { > > >switch (state) > > > { > > > case 1: > > > /* sth */ > > > state = 3; > > > break; > > > ... > > > case 3: > > > ... > > > } > > > } > > > > > > so optimizing state machine transitions. That is, these are all > > > multiway (switch or goto, but goto support has been removed with the > > > ranger rewrite it seems) and the thread path going through the > > > > ISTR going through the computed goto path in the old code, and it > > never got triggered. I just didn't get around to removing the > > references to GIMPLE_GOTO. At least, the old threader not once got a > > thread we didn't get with the new code, even through a full Fedora > > build. I could be wrong though, but that's my recollection. > > When one massages the threader to even consider gotos we eventually > run into find_taken_edge not handling them because range_of_expr > computes the range of 'state' as > > [irange] void * [1, +INF]$4 = void > > rather than &&E. > > A testcase would be > > unsigned g; > void FSM (int start) > { > void *states[] = { &&A, &&B, &&C, &&E }; > void *state = states[start]; > > do { > goto *state; > > A: > g += 1; > state = g & 1 ? &&B : &&E; > continue; > > B: > g += 2; > state = &&C; > continue; > > C: > g += 3; > state = states[g & 3]; > continue; > > E: > break; > } while (1); > } > > where we'd expect to thread the B -> C state transition. I checked > gcc 7 and it doesn't do that - not sure if it broke somewhen > on the way or if it was just never working. I'll file a bugreport, > OTOH &label and goto *p are both GNU extensions, so not sure if it > is worth optimizing. I'll attach the "simple" enabler I have. Yeah, that's what I thought. I seem to remember tracing through the old code and never finding a path that would handle computed gotos. Either way, thanks for distilling a testcase. If you could file a PR and CC me on it, that'd be great. When I contribute prange (irange for pointers), the plan is to have it handle pointer equivalences, and we should be able to get the address from the prange itself. Aldy > > > > loop backedge. This is why FSM has different size limits because > > > it was thought those threadings are especially worthwhile and > > > they tend to be larger. To make discovery cheaper a TODO item > > > would be to skip to the loop backedge when we reach the regular > > > thread limit and only continue the real search there (and > > > pick the "smallest" ways through any diamonds on the way). > > > > Ah, thanks. If you could, add some similar blurb, it'd be great. I'm > > sure we'll all forget it at some time (well, I will :)). > > Sure ;) > > Richard. > > > Aldy > > > > > > > > > In your possibly_profitable_path_p function, could you document a bit > > > > better what's the difference between profitable_path_p and > > > > possibly_profitable_path_p? > > > > > > Sure. > > > > > > > > > > > > > I've removed the back_threader_profitability instance from the > > > > > back_threader class and instead instantiate it once per path > > > > > discovery. I've kept the size compute non-incremental to simplify > > > > > the patch and not worry about unwinding. > > > > > > > > > > There's key changes to previous behavior - namely we apply > > > > > the param_max_jump_thread_duplication_stmts early only when > > > > > we know the path cannot become an FSM one (multiway + thread through > > > > > latch) but make sure to elide the path query when we we didn't > > > > > yet discover that but are over this limit. Similarly the > > > > > speed limit is now used even when we did not yet discover a > > > > > hot BB on the path. Basically the idea
Re: [PATCH] Refactor back_threader_profitability
On Wed, 17 Aug 2022, Aldy Hernandez wrote: > On Wed, Aug 17, 2022 at 10:38 AM Richard Biener wrote: > > > > On Wed, 17 Aug 2022, Aldy Hernandez wrote: > > > > > On Wed, Aug 17, 2022 at 9:54 AM Richard Biener wrote: > > > > > > > > On Wed, 17 Aug 2022, Aldy Hernandez wrote: > > > > > > > > > I just have a few high level comments. > > > > > > > > > > On Tue, Aug 16, 2022 at 4:05 PM Richard Biener > > > > > wrote: > > > > > > > > > > > > The following refactors profitable_path_p in the backward threader, > > > > > > splitting out parts that can be computed once the exit block is > > > > > > known, > > > > > > parts that contiguously update and that can be checked allowing > > > > > > for the path to be later identified as FSM with larger limits, > > > > > > possibly_profitable_path_p, and final checks done when the whole > > > > > > path is known, profitable_path_p. > > > > > > > > > > I thought we were removing references to FSM, as they were leftovers > > > > > from some previous incarnation. For that matter, I don't think I ever > > > > > understood what they are, so if we're gonna keep them, could you > > > > > comment what makes FSM threads different from other threads? > > > > > > > > I don't know exactly what 'FSM' stands for but the FSM threader > > > > specifically tried to cover > > > > > > It probably stands for finite state machine then? > > > > > > > > > > > for (;;) > > > > { > > > >switch (state) > > > > { > > > > case 1: > > > > /* sth */ > > > > state = 3; > > > > break; > > > > ... > > > > case 3: > > > > ... > > > > } > > > > } > > > > > > > > so optimizing state machine transitions. That is, these are all > > > > multiway (switch or goto, but goto support has been removed with the > > > > ranger rewrite it seems) and the thread path going through the > > > > > > ISTR going through the computed goto path in the old code, and it > > > never got triggered. I just didn't get around to removing the > > > references to GIMPLE_GOTO. At least, the old threader not once got a > > > thread we didn't get with the new code, even through a full Fedora > > > build. I could be wrong though, but that's my recollection. > > > > When one massages the threader to even consider gotos we eventually > > run into find_taken_edge not handling them because range_of_expr > > computes the range of 'state' as > > > > [irange] void * [1, +INF]$4 = void > > > > rather than &&E. > > > > A testcase would be > > > > unsigned g; > > void FSM (int start) > > { > > void *states[] = { &&A, &&B, &&C, &&E }; > > void *state = states[start]; > > > > do { > > goto *state; > > > > A: > > g += 1; > > state = g & 1 ? &&B : &&E; > > continue; > > > > B: > > g += 2; > > state = &&C; > > continue; > > > > C: > > g += 3; > > state = states[g & 3]; > > continue; > > > > E: > > break; > > } while (1); > > } > > > > where we'd expect to thread the B -> C state transition. I checked > > gcc 7 and it doesn't do that - not sure if it broke somewhen > > on the way or if it was just never working. I'll file a bugreport, > > OTOH &label and goto *p are both GNU extensions, so not sure if it > > is worth optimizing. I'll attach the "simple" enabler I have. > > Yeah, that's what I thought. I seem to remember tracing through the > old code and never finding a path that would handle computed gotos. > Either way, thanks for distilling a testcase. If you could file a PR > and CC me on it, that'd be great. > > When I contribute prange (irange for pointers), the plan is to have it > handle pointer equivalences, and we should be able to get the address > from the prange itself. Well, ISTR seeing &decl in integer ranges so I wondered why it doesn't just work ;) tracing ranger shows Checking profitability of path (backwards): bb:11 (0 insns) bb:4 (latch) Control statement insns: 0 Overall: 0 insns 1range_of_expr(state_9) at stmt state_9 = PHI <&E(6), state_23(9), &C(8), &B(5)> 2 range_of_stmt (state_9) at stmt state_9 = PHI <&E(6), state_23(9), &C(8), &B(5)> 3ROS dependence fill ROS dep fill (state_9) at stmt state_9 = PHI <&E(6), state_23(9), &C(8), &B(5)> FALSE : (3) ROS (state_9) 4range_on_edge (&E) on edge 6->4 TRUE : (4) range_on_edge (&E) [irange] void * [1, +INF] so why's the range not &E but non-NULL instead? Seems to be range_query::get_tree_range doing. Richard. > Aldy > > > > > > > loop backedge. This is why FSM has different size limits because > > > > it was thought those threadings are especially worthwhile and > > > > they tend to be larger. To make discovery cheaper a TODO item > > > > would be to skip to the loop backedge when we reach the regular > > > > thread limit and only continue the real search there (and > > > > pick the "smallest" ways through any diamonds on the way). > > > > > > Ah, thanks. If you could, add some similar blurb, it'd
Re: [PATCH] Refactor back_threader_profitability
On Wed, Aug 17, 2022 at 10:59 AM Richard Biener wrote: > > On Wed, 17 Aug 2022, Aldy Hernandez wrote: > > > On Wed, Aug 17, 2022 at 10:38 AM Richard Biener wrote: > > > > > > On Wed, 17 Aug 2022, Aldy Hernandez wrote: > > > > > > > On Wed, Aug 17, 2022 at 9:54 AM Richard Biener > > > > wrote: > > > > > > > > > > On Wed, 17 Aug 2022, Aldy Hernandez wrote: > > > > > > > > > > > I just have a few high level comments. > > > > > > > > > > > > On Tue, Aug 16, 2022 at 4:05 PM Richard Biener > > > > > > wrote: > > > > > > > > > > > > > > The following refactors profitable_path_p in the backward > > > > > > > threader, > > > > > > > splitting out parts that can be computed once the exit block is > > > > > > > known, > > > > > > > parts that contiguously update and that can be checked allowing > > > > > > > for the path to be later identified as FSM with larger limits, > > > > > > > possibly_profitable_path_p, and final checks done when the whole > > > > > > > path is known, profitable_path_p. > > > > > > > > > > > > I thought we were removing references to FSM, as they were leftovers > > > > > > from some previous incarnation. For that matter, I don't think I > > > > > > ever > > > > > > understood what they are, so if we're gonna keep them, could you > > > > > > comment what makes FSM threads different from other threads? > > > > > > > > > > I don't know exactly what 'FSM' stands for but the FSM threader > > > > > specifically tried to cover > > > > > > > > It probably stands for finite state machine then? > > > > > > > > > > > > > > for (;;) > > > > > { > > > > >switch (state) > > > > > { > > > > > case 1: > > > > > /* sth */ > > > > > state = 3; > > > > > break; > > > > > ... > > > > > case 3: > > > > > ... > > > > > } > > > > > } > > > > > > > > > > so optimizing state machine transitions. That is, these are all > > > > > multiway (switch or goto, but goto support has been removed with the > > > > > ranger rewrite it seems) and the thread path going through the > > > > > > > > ISTR going through the computed goto path in the old code, and it > > > > never got triggered. I just didn't get around to removing the > > > > references to GIMPLE_GOTO. At least, the old threader not once got a > > > > thread we didn't get with the new code, even through a full Fedora > > > > build. I could be wrong though, but that's my recollection. > > > > > > When one massages the threader to even consider gotos we eventually > > > run into find_taken_edge not handling them because range_of_expr > > > computes the range of 'state' as > > > > > > [irange] void * [1, +INF]$4 = void > > > > > > rather than &&E. > > > > > > A testcase would be > > > > > > unsigned g; > > > void FSM (int start) > > > { > > > void *states[] = { &&A, &&B, &&C, &&E }; > > > void *state = states[start]; > > > > > > do { > > > goto *state; > > > > > > A: > > > g += 1; > > > state = g & 1 ? &&B : &&E; > > > continue; > > > > > > B: > > > g += 2; > > > state = &&C; > > > continue; > > > > > > C: > > > g += 3; > > > state = states[g & 3]; > > > continue; > > > > > > E: > > > break; > > > } while (1); > > > } > > > > > > where we'd expect to thread the B -> C state transition. I checked > > > gcc 7 and it doesn't do that - not sure if it broke somewhen > > > on the way or if it was just never working. I'll file a bugreport, > > > OTOH &label and goto *p are both GNU extensions, so not sure if it > > > is worth optimizing. I'll attach the "simple" enabler I have. > > > > Yeah, that's what I thought. I seem to remember tracing through the > > old code and never finding a path that would handle computed gotos. > > Either way, thanks for distilling a testcase. If you could file a PR > > and CC me on it, that'd be great. > > > > When I contribute prange (irange for pointers), the plan is to have it > > handle pointer equivalences, and we should be able to get the address > > from the prange itself. > > Well, ISTR seeing &decl in integer ranges so I wondered why it doesn't > just work ;) tracing ranger shows &decl in integer ranges are leftovers from legacy evrp/VRP. Iranges are constant only, but we make an exception when irange::legacy_mode_p() is true so legacy VRP can fit in whatever it needs. But ranger will never put a non-integer in an irange. There's probably an assert to make sure a symbolic never slips into a "modern" irange. The proof of concept I have for prange still has integer endpoints (well, zero, nonzero, since that's what we care about). But range-ops will also keep track of equivalences that we can query with prange::get_equiv() or some such. So yeah, ranger will never give you &decl with irange, but prange may give us what we need. Aldy > > Checking profitability of path (backwards): bb:11 (0 insns) bb:4 (latch) > Control statement insns: 0 > Overall: 0 insns > 1range_of_expr(state_9) at stmt state_9 = PHI <&E(6), state_23(9), >
Re: [PATCH] xtensa: Prevent emitting integer additions of constant zero
On 2022/08/17 4:58, Max Filippov wrote: > Hi Suwa-san, Hi! > > On Tue, Aug 16, 2022 at 5:42 AM Takayuki 'January June' Suwa > wrote: >> >> In a few cases, obviously omitable add instructions can be emitted via >> invoking gen_addsi3. >> >> gcc/ChangeLog: >> >> * config/xtensa/xtensa.md (addsi3_internal): Rename from "addsi3". >> (addsi3): New define_expand in order to reject integer additions of >> constant zero. >> --- >> gcc/config/xtensa/xtensa.md | 14 +- >> 1 file changed, 13 insertions(+), 1 deletion(-) > > with this change a bunch of tests fail to build with the following error: Ah, sorry, I want to withdraw this patch. >> In a few cases As a matter of fact, "in a few cases" is just only one: [xtensa_expand_epilogue() in /gcc/config/xtensa/xtensa.cc] > if (cfun->machine->current_frame_size > 0) > { > if (frame_pointer_needed || /* always reachable with addi */ > cfun->machine->current_frame_size > 1024 || > cfun->machine->current_frame_size <= 127) > { > if (cfun->machine->current_frame_size <= 127) > offset = cfun->machine->current_frame_size; > else > offset = cfun->machine->callee_save_size; > > emit_insn (gen_addsi3 (stack_pointer_rtx, >stack_pointer_rtx, >GEN_INT (offset)));// offset can > be zero! > } And adding "define_expand" only to deal with one rare case had too much impact, as you saw... > undefined reference to `__addsi3' > > E.g. gcc.c-torture/execute/2519-1.c > or gcc.c-torture/execute/20070919-1.c >
Re: add more C++ name hints
Any comment on this? These changes are low-risk so the only real objection I can see is that people don't deem these messages useful in the first place. I would hope, though, that some editors/IDEs provide one-click solutions to add the #includes. On Fri, Aug 5, 2022 at 9:35 PM Ulrich Drepper wrote: > How about adding a few more names from the std namespace to get > appropriate hints? This patch compiles and the appropriate messages are > printed. Is there a problem with just adding more or even at some point > all the symbols of the standard library? > > gcc/ChangeLog: > > * cp/name-lookup.cc (get_std_name_hint): Add more symbols from the > , , and headers. >
[PATCH] fortran, v2: Add -static-libquadmath support [PR46539]
On Wed, Aug 17, 2022 at 10:28:29AM +0200, Mikael Morin wrote: > Tobias approved it already, but I spotted what looks like typos. > See below. Thanks for catching that. > > --- gcc/config/darwin.h.jj 2022-08-16 14:51:14.529544492 +0200 > > +++ gcc/config/darwin.h 2022-08-16 14:53:54.402460097 +0200 > > @@ -443,6 +443,7 @@ extern GTY(()) int darwin_ms_struct; > >%:replace-outfile(-lobjc libobjc-gnu.a%s); \ > > :%:replace-outfile(-lobjc -lobjc-gnu )}}\ > > %{static|static-libgcc|static-libgfortran:%:replace-outfile(-lgfortran > > libgfortran.a%s)}\ > > + %{static|static-libgcc|static-libgfortran:%:replace-outfile(-lquadmath > > libquadmath.a%s)}\ > > s/static-libgfortran/static-libquadmath/ I guess? Otherwise I don’t > understand the corresponding ChangeLog description. Yeah. I just copied this part from the 2014 patch and didn't spot that. > > +elif grep "conftest1.$ac_objext.* -aarchive_shared -lgfortran > > -adefault .*conftest2.$ac_objext" \ > > + conftest.cmd >/dev/null 2>&1; then > > + LQUADMATH="%{static-libquadmath:-aarchive_shared} -lquadmath > > %{static-libquadmath:-adefault}" > > +elif grep "conftest1.$ac_objext.* ligfortran.a .*conftest2.$ac_objext" > > \ > > s/ligfortran.a/libgfortran.a/ Indeed. Also removed the space before libgfortran.a because I'm not sure if it won't have because of the %s full path in front of that. This is Darwin only stuff. I must say I don't know if even this elif ... LQUADMATH part is needed, maybe replace-outfile will rename even the -lquadmath from the spec file. Here is an updated version (but nothing relevant to Linux changed, so there is no point for me to bootstrap/regtest it again): 2022-08-17 Francois-Xavier Coudert Jakub Jelinek PR fortran/46539 gcc/ * common.opt (static-libquadmath): New option. * gcc.c (driver_handle_option): Always accept -static-libquadmath. * config/darwin.h (LINK_SPEC): Handle -static-libquadmath. gcc/fortran/ * lang.opt (static-libquadmath): New option. * invoke.texi (-static-libquadmath): Document it. * options.c (gfc_handle_option): Error out if -static-libquadmath is passed but we do not support it. libgfortran/ * acinclude.m4 (LIBQUADSPEC): From $FC -static-libgfortran -### output determine -Bstatic/-Bdynamic, -bstatic/-bdynamic, -aarchive_shared/-adefault linker support or Darwin remapping of -lgfortran to libgfortran.a%s and use that around or instead of -lquadmath in LIBQUADSPEC. * configure: Regenerated. --- gcc/common.opt.jj 2022-06-27 11:18:02.050066582 +0200 +++ gcc/common.opt 2022-08-16 14:51:04.611673800 +0200 @@ -3601,6 +3601,10 @@ static-libphobos Driver ; Documented for D, but always accepted by driver. +static-libquadmath +Driver +; Documented for Fortran, but always accepted by driver. + static-libstdc++ Driver --- gcc/gcc.cc.jj 2022-08-11 09:57:24.765334380 +0200 +++ gcc/gcc.cc 2022-08-16 14:57:54.708327024 +0200 @@ -4585,12 +4585,14 @@ driver_handle_option (struct gcc_options case OPT_static_libgcc: case OPT_shared_libgcc: case OPT_static_libgfortran: +case OPT_static_libquadmath: case OPT_static_libphobos: case OPT_static_libstdc__: /* These are always valid, since gcc.cc itself understands the first two, gfortranspec.cc understands -static-libgfortran, -d-spec.cc understands -static-libphobos, and g++spec.cc -understands -static-libstdc++ */ +d-spec.cc understands -static-libphobos, g++spec.cc +understands -static-libstdc++ and libgfortran.spec handles +-static-libquadmath. */ validated = true; break; --- gcc/config/darwin.h.jj 2022-08-16 14:51:14.529544492 +0200 +++ gcc/config/darwin.h 2022-08-16 14:53:54.402460097 +0200 @@ -443,6 +443,7 @@ extern GTY(()) int darwin_ms_struct; %:replace-outfile(-lobjc libobjc-gnu.a%s); \ :%:replace-outfile(-lobjc -lobjc-gnu )}}\ %{static|static-libgcc|static-libgfortran:%:replace-outfile(-lgfortran libgfortran.a%s)}\ + %{static|static-libgcc|static-libquadmath:%:replace-outfile(-lquadmath libquadmath.a%s)}\ %{static|static-libgcc|static-libphobos:%:replace-outfile(-lgphobos libgphobos.a%s)}\ %{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp libgomp.a%s)}\ %{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}\ --- gcc/fortran/lang.opt.jj 2022-02-04 14:36:55.050604670 +0100 +++ gcc/fortran/lang.opt2022-08-16 14:52:52.459267705 +0200 @@ -863,6 +863,10 @@ static-libgfortran Fortran Statically link the GNU Fortran helper library (libgfortran). +static-libquadmath +Fortran +Statically link the GCC Quad-Precision Math Library (libquadmath). + std=f2003 Fortran Conform to the ISO Fortran 2003 s
[PATCH] Support bitmap_copy across representations
The following started as making the backward threader m_imports use the tree representation. Since that interfaces to a list representation bitmap in ranger by copying rewriting the tree to list to perform the copy is inefficient in that it loses balancing. The following adds bitmap_copy_tree_to_list and integrates it with the generic bitmap_copy routine. For symmetry I also added list to tree copy, relying on auto-balancing, and tree to tree copy which I didn't optimize to preserve the source balancing but instead use bitmap_copy_tree_to_list and have the result auto-balanced again. I've only exercised the tree to list path and I won't actually end up using it but it's at least worth posting. Bootstrapped and tested on x86_64-unknown-linux-gnu. Worth pushing? * bitmap.h: Document set_copy aka bitmap_copy as usable for tree representation. * bitmap.cc (bitmap_copy_tree_to_list): New helper. (bitmap_copy): Support copying all bitmap representation combinations. --- gcc/bitmap.cc | 78 +-- gcc/bitmap.h | 6 +++- 2 files changed, 81 insertions(+), 3 deletions(-) diff --git a/gcc/bitmap.cc b/gcc/bitmap.cc index 88c329f9325..d9520397992 100644 --- a/gcc/bitmap.cc +++ b/gcc/bitmap.cc @@ -847,6 +847,58 @@ bitmap_element_zerop (const bitmap_element *element) #endif } +/* Copy bitmap FROM which is in tree form to bitmap TO in list form. */ + +static void +bitmap_copy_tree_to_list (bitmap to, const_bitmap from) +{ + bitmap_element *to_ptr = nullptr; + + gcc_checking_assert (!to->tree_form && from->tree_form); + + /* Do like bitmap_tree_to_vec but populate the destination bitmap + instead of a vector. */ + auto_vec stack; + bitmap_element *e = from->first; + while (true) +{ + while (e != NULL) + { + stack.safe_push (e); + e = e->prev; + } + if (stack.is_empty ()) + break; + + bitmap_element *from_ptr = stack.pop (); + { + bitmap_element *to_elt = bitmap_element_allocate (to); + + to_elt->indx = from_ptr->indx; + memcpy (to_elt->bits, from_ptr->bits, sizeof (to_elt->bits)); + + /* Here we have a special case of bitmap_list_link_element, +for the case where we know the links are being entered +in sequence. */ + if (to_ptr == nullptr) + { + to->first = to->current = to_elt; + to->indx = from_ptr->indx; + to_elt->next = to_elt->prev = nullptr; + } + else + { + to_elt->prev = to_ptr; + to_elt->next = nullptr; + to_ptr->next = to_elt; + } + + to_ptr = to_elt; + } + e = from_ptr->next; +} +} + /* Copy a bitmap to another bitmap. */ void @@ -854,11 +906,29 @@ bitmap_copy (bitmap to, const_bitmap from) { const bitmap_element *from_ptr; bitmap_element *to_ptr = 0; - - gcc_checking_assert (!to->tree_form && !from->tree_form); + bool to_tree = to->tree_form; bitmap_clear (to); + /* From tree form first copy to list form, avoiding debalancing from, + then if the result is in tree form have it rebalance itself. */ + if (from->tree_form) +{ + if (to_tree) + bitmap_list_view (to); + bitmap_copy_tree_to_list (to, from); + if (to_tree) + bitmap_tree_view (to); + return; +} + + /* We are copying from list form - first copy the list representation + so temporarily switch the destination to list form. */ + if (to_tree) +bitmap_list_view (to); + + gcc_checking_assert (!to->tree_form && !from->tree_form); + /* Copy elements in forward direction one at a time. */ for (from_ptr = from->first; from_ptr; from_ptr = from_ptr->next) { @@ -885,6 +955,10 @@ bitmap_copy (bitmap to, const_bitmap from) to_ptr = to_elt; } + + /* To tree, result will balance itself. */ + if (to_tree) +bitmap_tree_view (to); } /* Move a bitmap to another bitmap. */ diff --git a/gcc/bitmap.h b/gcc/bitmap.h index 7fba443aff1..9a007855d0c 100644 --- a/gcc/bitmap.h +++ b/gcc/bitmap.h @@ -139,11 +139,15 @@ along with GCC; see the file COPYING3. If not see * add_member * remove_member + Additionally both representations support enumeration of the + members in O(E) time for the following operations: + + * set_copy: bitmap_copy + Additionally, the linked-list sparse set representation supports enumeration of the members in O(E) time: * forall : EXECUTE_IF_SET_IN_BITMAP - * set_copy: bitmap_copy * set_intersection: bitmap_intersect_p / bitmap_and / bitmap_and_into / EXECUTE_IF_AND_IN_BITMAP -- 2.35.3
Re: [PATCH] Fix bogus -Wstringop-overflow warning in Ada
On Tue, Aug 16, 2022 at 3:57 PM Eric Botcazou via Gcc-patches wrote: > > Hi, > > the following bogus warning: > > In function 'lto26', > inlined from 'main' at /home/eric/gnat/bugs/V721-018/b~lto26.adb:237:7: > lto26.adb:11:13: warning: writing 1 byte into a region of size 0 [-Wstringop- > overflow=] >11 | Set (R, (7, 0, 84, Stream_Element (I), 0, 0, 0), 1); > | ^ > lto26.adb: In function 'main': > lto26.adb:11:50: note: at offset -9223372036854775808 into destination object > 'A.0' of size 7 >11 | Set (R, (7, 0, 84, Stream_Element (I), 0, 0, 0), 1); > | ^ > > comes from a discrepancy between get_offset_range, which uses a signed type, > and handle_array_ref, which uses an unsigned one, to do offset computations. > > Tested on x86-64/Linux, OK for the mainline? Hmm, can we instead do if (!integer_zerop (lowbnd) && tree_fits_shwi_p (lowbnd)) { const offset_int lb = offset_int::from (lowbnd, SIGNED); ... ? In particular interpreting the unsigned lowbnd as SIGNED when not wlb.get_precision () < TYPE_PRECISION (sizetype), offset_int should handle all positive and negative byte offsets since it can also represent them measured in bits. Unfortunately the wide_int classes do not provide the maximum precision they can handle. That said, the check, if any, should guard the whole orng adjustment, no? (in fact I wonder why we just ignore lowbnd if it doesn't fit or is variable...) > > > 2022-08-16 Eric Botcazou > > * pointer-query.cc (handle_array_ref): Fix handling of low bound. > > > 2022-08-16 Eric Botcazou > > * gnat.dg/lto26.adb: New test. > * gnat.dg/lto26_pkg1.ads, gnat.dg/lto26_pkg1.adb: New helper. > * gnat.dg/lto26_pkg2.ads, gnat.dg/lto26_pkg2.adb: Likewise. > > -- > Eric Botcazou
Re: ICE after folding svld1rq to vec_perm_expr duing forwprop
On Tue, Aug 16, 2022 at 6:30 PM Richard Sandiford wrote: > > Prathamesh Kulkarni writes: > > On Tue, 9 Aug 2022 at 18:42, Richard Biener > > wrote: > >> > >> On Tue, Aug 9, 2022 at 12:10 PM Prathamesh Kulkarni > >> wrote: > >> > > >> > On Mon, 8 Aug 2022 at 14:27, Richard Biener > >> > w>> > > > >> > > > >> > > /* If result vector has greater length than input vector, > >> > > + then allow permuting two vectors as long as: > >> > > + a) sel.nelts_per_pattern == 1 > >> > > + b) sel.npatterns == len of input vector. > >> > > + The intent is to permute input vectors, and > >> > > + dup the elements in resulting vector to target vector length. */ > >> > > + > >> > > + if (maybe_gt (TYPE_VECTOR_SUBPARTS (type), > >> > > + TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0 > >> > > +{ > >> > > + nelts = sel.encoding ().npatterns (); > >> > > + if (sel.encoding ().nelts_per_pattern () != 1 > >> > > + || (!known_eq (nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE > >> > > (arg0) > >> > > + return NULL_TREE; > >> > > +} > >> > > > >> > > so the only case you add is non-VLA to VLA and there > >> > > explicitely only the case of a period that's same as the > >> > > element count in the input vectors. > >> > > > >> > > > >> > > @@ -2602,6 +2602,9 @@ dump_generic_node (pretty_printer *pp, tree > >> > > node, int spc, dump_flags_t flags, > >> > > pp_space (pp); > >> > > } > >> > > } > >> > > + if (VECTOR_TYPE_P (TREE_TYPE (node)) > >> > > + && !TYPE_VECTOR_SUBPARTS (TREE_TYPE (node)).is_constant ()) > >> > > + pp_string (pp, ", ... "); > >> > > pp_right_brace (pp); > >> > > > >> > > btw, I do wonder if VLA CONSTRUCTORs are a "thing"? Are they? > >> > Well, it got created for the following case after folding: > >> > svint32_t f2(int a, int b, int c, int d) > >> > { > >> > int32x4_t v = {a, b, c, d}; > >> > return svld1rq_s32 (svptrue_b8 (), &v[0]); > >> > } > >> > > >> > The svld1rq_s32 call gets folded to: > >> > v = {a, b, c, d} > >> > lhs = VEC_PERM_EXPR > >> > > >> > fold_vec_perm then folds the above VEC_PERM_EXPR to > >> > VLA constructor, since elements in v (in_elts) are not constant, and > >> > need_ctor is thus true: > >> > lhs = {a, b, c, d, ...} > >> > I added "..." to make it more explicit that it's a VLA constructor. > >> > >> But I doubt we do anything reasonable with such a beast? Do we? > >> I suppose it's like a vec_duplicate if you view it as V1TImode > >> but do we actually make sure to do this duplication? > > I am not sure. As mentioned above, the current code-gen for VLA > > constructor looks pretty bad. > > Should we avoid folding VLA constructors for now ? > > VLA constructors aren't really a thing. At least, the only VLA vector > you could represent with current CONSTRUCTOR nodes is a fixed-length > sequence at the start of an otherwise zero vector. I'm not sure > we even use that though (perhaps we do and I've forgotten). > > > I guess these are 2 different issues: > > (a) Resolving ICE with VEC_PERM_EXPR for above aarch64 tests. > > (b) Extending fold_vec_perm to handle vectors with differing lengths. > > > > For (a), I think the issue with using: > > res_type = gimple_assign_lhs (stmt) > > in previous patch, was that op2's type will change to match tgt_units, > > if we go thru > > (code == VIEW_CONVERT_EXPR || code2 == VIEW_CONVERT_EXPR) branch, > > and may thus not be same as len(lhs_type) anymore, and hit the assert > > in fold_vec_perm. > > > > IIUC, for lhs = VEC_PERM_EXPR, we now have the > > following semantics: > > (1) Element types for lhs, rhs1 and rhs2 should be the same. > > (2) len(lhs) == len(mask) and len(rhs1) == len(rhs2). > > Yeah. > > > The attached patch changes res_type from TREE_TYPE (arg0) to following: > > res_type = build_vector_type (TREE_TYPE (TREE_TYPE (arg0)), > > TYPE_VECTOR_SUBPARTS (op2)) > > so it has same element type as arg0 (and arg1) and len of op2. > > Does that look reasonable ? > > > > If we need a cast from res_type to lhs_type, then both would be fixed > > width vectors > > with len(lhs_type) being a multiple of len(res_type). > > IIUC, we don't support casting from VLA vector to/from fixed width vector, > > Yes, that's not supported as a cast. If the compiler knows the > length of the "VLA" vector then it's not VLA. If it doesn't > know the length of the VLA vector then the sizes could be different > (preventing VIEW_CONVERT_EXPR) and the number of elements could be > different (preventing pointwise CONVERT_EXPRs). > > > or from VLA vector of one type to VLA vector of other type ? > > That's supported though. They work just like VLS vectors: if the sizes > are the same then we can use VIEW_CONVERT_EXPR, if the number of elements > are the same then we can do pointwise conversions (e.g. element-by-element > extensions, truncations, conversions to float, convers
Re: Restore 'GOMP_offload_unregister_ver' functionality (was: [Patch][v5] OpenMP: Move omp requires checks to libgomp)
On Wed, Jul 06, 2022 at 03:59:59PM +0200, Tobias Burnus wrote: > > @@ -2436,12 +2452,15 @@ GOMP_offload_unregister_ver (unsigned version, > > const void *host_table, > > } > > > > /* Remove image from array of pending images. */ > > + bool found = false; > > for (i = 0; i < num_offload_images; i++) > > if (offload_images[i].target_data == target_data) > > { > > offload_images[i] = offload_images[--num_offload_images]; > > + found = true; > > break; > > } > > + assert (found); > > > > gomp_mutex_unlock (®ister_lock); > > } > > ... I don't like that libgomp crashes without any helpful message in that > case. > > In my opinion: > * Either we assume that it is unlikely to occur - ignore it. > (Matches the current implementation: do nothing.) I think we don't need any asserts here, nor gomp_error/gomp_fatal. The GOMP_* APIs aren't meant for direct user uses, they are to be only called by compiler generated code or support object files, and in the case of GOMP_offload_{,un}register_ver the crt file makes sure it will be registered and unregistered in pairs. Jakub
Re: [PATCH] arm: Define with_float to hard when target name ends with hf
On 17/08/2022 09:35, Christophe Lyon via Gcc-patches wrote: On arm, the --with-float= configure option is used to define include files search path (among other things). However, when targeting arm-linux-gnueabihf, one would expect to automatically default to the hard-float ABI, but this is not the case. As a consequence, GCC bootstrap fails on an arm-linux-gnueabihf target if --with-float=hard is not used. This patch checks if the target name ends with 'hf' and defines with_float to hard if not already defined. This is achieved in gcc/config.gcc, just before selecting the default CPU depending on the $with_float value. 2022-08-17 Christophe Lyon gcc/ * config.gcc (arm): Define with_float to hard if target name ends with 'hf'. --- gcc/config.gcc | 7 +++ 1 file changed, 7 insertions(+) diff --git a/gcc/config.gcc b/gcc/config.gcc index 4e3b15bb5e9..02f58970db0 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -1314,6 +1314,13 @@ arm*-*-linux-* | arm*-*-uclinuxfdpiceabi) tm_file="$tm_file arm/uclinuxfdpiceabi.h" ;; esac + # Define with_float to "hard" if not already defined and + # target name ends with "hf" + case $target:$with_float in + arm*-*-*hf:) + with_float=hard + ;; + esac # Generation of floating-point instructions requires at least ARMv5te. if [ "$with_float" = "hard" -o "$with_float" = "softfp" ] ; then target_cpu_cname="arm10e" OK. R.
Re: [Patch] OpenMP: Fix folding with simd's linear clause [PR106492]
On Thu, Aug 04, 2022 at 09:32:22AM +0200, Tobias Burnus wrote: > Rather obvious fix and similar to PR106449. > > OK for mainline and backporting (how far?). I would like to backport it > at least to GCC 12. It can go even to 11 and 10 if you are willing to test the backports there (after a while). Jakub
[PATCH] IBM zSystems: Fix function_ok_for_sibcall [PR106355]
For a parameter with BLKmode we cannot use REG_NREGS in order to determine the number of consecutive registers. Streamlined this with the implementation of s390_function_arg. Fix some indentation whitespace, too. Assuming bootstrap and regtest are ok for mainline and gcc-{10,11,12}, ok to install for all of those? PR target/106355 gcc/ChangeLog: * config/s390/s390.cc (s390_call_saved_register_used): For a parameter with BLKmode fix determining number of consecutive registers. gcc/testsuite/ChangeLog: * gcc.target/s390/pr106355.h: Common code for new tests. * gcc.target/s390/pr106355-1.c: New test. * gcc.target/s390/pr106355-2.c: New test. * gcc.target/s390/pr106355-3.c: New test. --- gcc/config/s390/s390.cc| 47 +++--- gcc/testsuite/gcc.target/s390/pr106355-1.c | 42 +++ gcc/testsuite/gcc.target/s390/pr106355-2.c | 8 gcc/testsuite/gcc.target/s390/pr106355-3.c | 8 gcc/testsuite/gcc.target/s390/pr106355.h | 18 + 5 files changed, 100 insertions(+), 23 deletions(-) create mode 100644 gcc/testsuite/gcc.target/s390/pr106355-1.c create mode 100644 gcc/testsuite/gcc.target/s390/pr106355-2.c create mode 100644 gcc/testsuite/gcc.target/s390/pr106355-3.c create mode 100644 gcc/testsuite/gcc.target/s390/pr106355.h diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 5aaf76a9490..85e5b2cb2a2 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -13712,36 +13712,37 @@ s390_call_saved_register_used (tree call_expr) function_arg_info arg (TREE_TYPE (parameter), /*named=*/true); apply_pass_by_reference_rules (&cum_v, arg); - parm_rtx = s390_function_arg (cum, arg); + parm_rtx = s390_function_arg (cum, arg); - s390_function_arg_advance (cum, arg); + s390_function_arg_advance (cum, arg); - if (!parm_rtx) -continue; - - if (REG_P (parm_rtx)) -{ - for (reg = 0; reg < REG_NREGS (parm_rtx); reg++) -if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx))) - return true; -} + if (!parm_rtx) + continue; - if (GET_CODE (parm_rtx) == PARALLEL) -{ - int i; + if (REG_P (parm_rtx)) + { + int size = s390_function_arg_size (arg.mode, arg.type); + int nregs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG; - for (i = 0; i < XVECLEN (parm_rtx, 0); i++) -{ - rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0); + for (reg = 0; reg < nregs; reg++) + if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx))) + return true; + } + else if (GET_CODE (parm_rtx) == PARALLEL) + { + int i; - gcc_assert (REG_P (r)); + for (i = 0; i < XVECLEN (parm_rtx, 0); i++) + { + rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0); - for (reg = 0; reg < REG_NREGS (r); reg++) -if (!call_used_or_fixed_reg_p (reg + REGNO (r))) - return true; -} -} + gcc_assert (REG_P (r)); + gcc_assert (REG_NREGS (r) == 1); + if (!call_used_or_fixed_reg_p (REGNO (r))) + return true; + } + } } return false; } diff --git a/gcc/testsuite/gcc.target/s390/pr106355-1.c b/gcc/testsuite/gcc.target/s390/pr106355-1.c new file mode 100644 index 000..1ec0f6b25ac --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/pr106355-1.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-foptimize-sibling-calls" } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,bar4} } } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,bar8} } } */ + +/* Parameter E is passed in GPR 6 which is call-saved which prohibits + sibling call optimization. This must hold true also if the mode of the + parameter is BLKmode. */ + +/* 4 byte */ + +typedef struct +{ + char x; + char y[3]; +} t4; + +extern t4 e4; + +extern void bar4 (int a, int b, int c, int d, t4 e4); + +void foo4 (int a, int b, int c, int d) +{ + bar4 (a, b, c, d, e4); +} + +/* 8 byte */ + +typedef struct +{ + short x; + char y[6]; +} t8; + +extern t8 e8; + +extern void bar8 (int a, int b, int c, int d, t8 e8); + +void foo8 (int a, int b, int c, int d) +{ + bar8 (a, b, c, d, e8); +} diff --git a/gcc/testsuite/gcc.target/s390/pr106355-2.c b/gcc/testsuite/gcc.target/s390/pr106355-2.c new file mode 100644 index 000..ddbdba5d278 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/pr106355-2.c @@ -0,0 +1,8 @@ +/* { dg-do compile { target { s390-*-* } } } */ +/* { dg-options "-foptimize-sibling-calls -mzarch" } */ +/* { dg-final { scan-assembler {brasl\t%r\d+,bar} } } */ + +/* This tests function s390_call_saved_register_used where + GET_CODE (parm_rtx) == PARALLEL holds. */ + +#include "pr106355.h" diff --git a/gcc/testsuite/gcc.target
[PATCH] Make path_range_query standalone and add reset_path.
These are a bunch of cleanups inspired by Richi's suggestion of making path_range_query standalone, instead of having to call compute_ranges() for each path. I've made the ranger need explicit, and moved the responsibility for its creation to the caller. I've also investigated and documented why the forward threader needs its own compute exit dependencies variant. I can't wait for it to go away :-/. I've also added constructors that take a path and dependencies, and made compute_ranges() private. Unfortunately, reinstantiating path_range_query in the forward threader caused a 14% performance regression in DOM, because the old threader calls it over and over on the same path to simplify statements (some of which not even in the IL, but that's old news). In the meantime, I've left the ability to reset a path, but this time appropriately called reset_path(). I've moved the verify_marked_backedges call to the threader. Is this ok? Interestingly, comparing the thread counts for the patch yielded more threads. I narrowed this down to a bug in the path oracle reset code that's not cleaning things up as expected. I'll fix that before committing this, but figured I'd post for comments in the meantime. Thoughts? gcc/ChangeLog: * gimple-range-path.cc (path_range_query::path_range_query): Add various constructors to take a path. (path_range_query::~path_range_query): Remove m_alloced_ranger. (path_range_query::range_on_path_entry): Adjust for m_ranger being a reference. (path_range_query::set_path): Rename to... (path_range_query::reset_path): ...this and call compute_ranges. (path_range_query::ssa_range_in_phi): Adjust for m_ranger reference. (path_range_query::range_defined_in_block): Same. (path_range_query::compute_ranges_in_block): Same. (path_range_query::adjust_for_non_null_uses): Same. (path_range_query::compute_exit_dependencies): Use m_path instead of argument. (path_range_query::compute_ranges): Remove path argument. (path_range_query::range_of_stmt): Adjust for m_ranger reference. (path_range_query::compute_outgoing_relations): Same. * gimple-range-path.h (class path_range_query): Add various constructors. Make compute_ranges and compute_exit_dependencies private. Rename set_path to reset_path. Make m_ranger a reference. Remove m_alloced_ranger. * tree-ssa-dom.cc (pass_dominator::execute): Adjust constructor to path_range_query. * tree-ssa-loop-ch.cc (entry_loop_condition_is_static): Take a ranger and instantiate a new path_range_query every time. (ch_base::copy_headers): Pass ranger instead of path_range_query. * tree-ssa-threadbackward.cc (class back_threader): Remove m_solver. (back_threader::~back_threader): Remove m_solver. (back_threader::find_taken_edge_switch): Adjust for m_ranger reference. (back_threader::find_taken_edge_cond): Same. (back_threader::dump): Remove m_solver. (back_threader::back_threader): Move verify_marked_backedges here from the path_range_query constructor. * tree-ssa-threadedge.cc (hybrid_jt_simplifier::simplify): Move some code from compute_ranges_from_state here. (hybrid_jt_simplifier::compute_ranges_from_state): Rename... (hybrid_jt_simplifier::compute_exit_dependencies): ...to this. * tree-ssa-threadedge.h (class hybrid_jt_simplifier): Rename compute_ranges_from_state to compute_exit_dependencies. Remove m_path. --- gcc/gimple-range-path.cc | 112 + gcc/gimple-range-path.h| 22 +++ gcc/tree-ssa-dom.cc| 2 +- gcc/tree-ssa-loop-ch.cc| 12 ++-- gcc/tree-ssa-threadbackward.cc | 27 gcc/tree-ssa-threadedge.cc | 30 + gcc/tree-ssa-threadedge.h | 5 +- 7 files changed, 111 insertions(+), 99 deletions(-) diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc index c99d77dd340..d37605f4539 100644 --- a/gcc/gimple-range-path.cc +++ b/gcc/gimple-range-path.cc @@ -36,33 +36,52 @@ along with GCC; see the file COPYING3. If not see // Internal construct to help facilitate debugging of solver. #define DEBUG_SOLVER (dump_file && (param_threader_debug == THREADER_DEBUG_ALL)) -path_range_query::path_range_query (bool resolve, gimple_ranger *ranger) +path_range_query::path_range_query (gimple_ranger &ranger, + const vec &path, + const bitmap_head *dependencies, + bool resolve) : m_cache (new ssa_global_cache), m_has_cache_entry (BITMAP_ALLOC (NULL)), -m_resolve (resolve), -m_alloced_ranger (!ranger) +m_ranger (ranger), +m_resolve (resolve) { - if (m_alloced_ranger) -m_ranger = new gimple_rang
Re: [PATCH] IBM zSystems: Fix function_ok_for_sibcall [PR106355]
On Wed, Aug 17, 2022 at 1:57 PM Stefan Schulze Frielinghaus via Gcc-patches wrote: > > For a parameter with BLKmode we cannot use REG_NREGS in order to > determine the number of consecutive registers. Streamlined this with > the implementation of s390_function_arg. > > Fix some indentation whitespace, too. > > Assuming bootstrap and regtest are ok for mainline and gcc-{10,11,12}, Note the GCC 12 branch is currently frozen. Richard. > ok to install for all of those? > > PR target/106355 > > gcc/ChangeLog: > > * config/s390/s390.cc (s390_call_saved_register_used): For a > parameter with BLKmode fix determining number of consecutive > registers. > > gcc/testsuite/ChangeLog: > > * gcc.target/s390/pr106355.h: Common code for new tests. > * gcc.target/s390/pr106355-1.c: New test. > * gcc.target/s390/pr106355-2.c: New test. > * gcc.target/s390/pr106355-3.c: New test. > --- > gcc/config/s390/s390.cc| 47 +++--- > gcc/testsuite/gcc.target/s390/pr106355-1.c | 42 +++ > gcc/testsuite/gcc.target/s390/pr106355-2.c | 8 > gcc/testsuite/gcc.target/s390/pr106355-3.c | 8 > gcc/testsuite/gcc.target/s390/pr106355.h | 18 + > 5 files changed, 100 insertions(+), 23 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/s390/pr106355-1.c > create mode 100644 gcc/testsuite/gcc.target/s390/pr106355-2.c > create mode 100644 gcc/testsuite/gcc.target/s390/pr106355-3.c > create mode 100644 gcc/testsuite/gcc.target/s390/pr106355.h > > diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc > index 5aaf76a9490..85e5b2cb2a2 100644 > --- a/gcc/config/s390/s390.cc > +++ b/gcc/config/s390/s390.cc > @@ -13712,36 +13712,37 @@ s390_call_saved_register_used (tree call_expr) >function_arg_info arg (TREE_TYPE (parameter), /*named=*/true); >apply_pass_by_reference_rules (&cum_v, arg); > > - parm_rtx = s390_function_arg (cum, arg); > + parm_rtx = s390_function_arg (cum, arg); > > - s390_function_arg_advance (cum, arg); > + s390_function_arg_advance (cum, arg); > > - if (!parm_rtx) > -continue; > - > - if (REG_P (parm_rtx)) > -{ > - for (reg = 0; reg < REG_NREGS (parm_rtx); reg++) > -if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx))) > - return true; > -} > + if (!parm_rtx) > + continue; > > - if (GET_CODE (parm_rtx) == PARALLEL) > -{ > - int i; > + if (REG_P (parm_rtx)) > + { > + int size = s390_function_arg_size (arg.mode, arg.type); > + int nregs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG; > > - for (i = 0; i < XVECLEN (parm_rtx, 0); i++) > -{ > - rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0); > + for (reg = 0; reg < nregs; reg++) > + if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx))) > + return true; > + } > + else if (GET_CODE (parm_rtx) == PARALLEL) > + { > + int i; > > - gcc_assert (REG_P (r)); > + for (i = 0; i < XVECLEN (parm_rtx, 0); i++) > + { > + rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0); > > - for (reg = 0; reg < REG_NREGS (r); reg++) > -if (!call_used_or_fixed_reg_p (reg + REGNO (r))) > - return true; > -} > -} > + gcc_assert (REG_P (r)); > + gcc_assert (REG_NREGS (r) == 1); > > + if (!call_used_or_fixed_reg_p (REGNO (r))) > + return true; > + } > + } > } >return false; > } > diff --git a/gcc/testsuite/gcc.target/s390/pr106355-1.c > b/gcc/testsuite/gcc.target/s390/pr106355-1.c > new file mode 100644 > index 000..1ec0f6b25ac > --- /dev/null > +++ b/gcc/testsuite/gcc.target/s390/pr106355-1.c > @@ -0,0 +1,42 @@ > +/* { dg-do compile } */ > +/* { dg-options "-foptimize-sibling-calls" } */ > +/* { dg-final { scan-assembler {brasl\t%r\d+,bar4} } } */ > +/* { dg-final { scan-assembler {brasl\t%r\d+,bar8} } } */ > + > +/* Parameter E is passed in GPR 6 which is call-saved which prohibits > + sibling call optimization. This must hold true also if the mode of the > + parameter is BLKmode. */ > + > +/* 4 byte */ > + > +typedef struct > +{ > + char x; > + char y[3]; > +} t4; > + > +extern t4 e4; > + > +extern void bar4 (int a, int b, int c, int d, t4 e4); > + > +void foo4 (int a, int b, int c, int d) > +{ > + bar4 (a, b, c, d, e4); > +} > + > +/* 8 byte */ > + > +typedef struct > +{ > + short x; > + char y[6]; > +} t8; > + > +extern t8 e8; > + > +extern void bar8 (int a, int b, int c, int d, t8 e8); > + > +void foo8 (int a, int b, int c, int d) > +{ > + bar8 (a, b, c, d, e8); > +} > diff --git a/gcc/testsuite/gcc.target/s390/pr106355-2.c > b/gcc/testsuite/gcc.target/s390/pr106355-2.c > new file mode 100644 > index 000..dd
[PATCH] config/rs6000/t-float128: Don't encode full build paths into headers
Avoid encoding full build paths into headers, just use the basename of the file. This aids build reproducibility where the build paths vary and source is saved for debugging purposes. libgcc/ChangeLog: * config/rs6000/t-float128: Don't encode full build paths into headers Signed-off-by: Richard Purdie --- libgcc/config/rs6000/t-float128 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libgcc/config/rs6000/t-float128 b/libgcc/config/rs6000/t-float128 index b09b5664af0..513e63748f1 100644 --- a/libgcc/config/rs6000/t-float128 +++ b/libgcc/config/rs6000/t-float128 @@ -103,7 +103,7 @@ $(ibm128_dec_objs) : INTERNAL_CFLAGS += $(IBM128_CFLAGS_DECIMAL) $(fp128_softfp_src) : $(srcdir)/soft-fp/$(subst -sw,,$(subst kf,tf,$@)) $(fp128_dep) @src="$(srcdir)/soft-fp/$(subst -sw,,$(subst kf,tf,$@))"; \ echo "Create $@"; \ - (echo "/* file created from $$src */"; \ + (echo "/* file created from `basename $$src` */"; \ echo; \ sed -f $(fp128_sed) < $$src) > $@ -- 2.34.1
Re: [PATCH, OpenMP, C++] Allow classes with static members to be mappable
On Wed, Jul 27, 2022 at 01:45:30PM +0200, Tobias Burnus wrote: > OpenMP/C++: Allow classes with static members to be mappable > > As this is the last lang-specific user of the omp_mappable_type hook, > the hook is removed, keeping only a generic omp_mappable_type for > incomplete types (or error_node). > > gcc/c/ChangeLog: > > * c-decl.cc (c_decl_attributes, finish_decl): Call omp_mappable_type > instead of removed langhook. > * c-typeck.cc (c_finish_omp_clauses): Likewise. > > gcc/cp/ChangeLog: > > * cp-objcp-common.h (LANG_HOOKS_OMP_MAPPABLE_TYPE): Remove. > * cp-tree.h (cp_omp_mappable_type, cp_omp_emit_unmappable_type_notes): > Remove. > * decl2.cc (cp_omp_mappable_type_1, cp_omp_mappable_type, > cp_omp_emit_unmappable_type_notes): Remove. > (cplus_decl_attributes): Call omp_mappable_type instead of > removed langhook. > * decl.cc (cp_finish_decl): Likewise; call cxx_incomplete_type_inform > in lieu of cp_omp_emit_unmappable_type_notes. > * semantics.cc (finish_omp_clauses): Likewise. > > gcc/ChangeLog: > > * gimplify.cc (omp_notice_variable): Call omp_mappable_type > instead of removed langhook. > * omp-general.h (omp_mappable_type): New prototype. > * omp-general.cc (omp_mappable_type): New; moved from ... > * langhooks.cc (lhd_omp_mappable_type): ... here. > * langhooks-def.h (lhd_omp_mappable_type, > LANG_HOOKS_OMP_MAPPABLE_TYPE): Remove. > (LANG_HOOKS_FOR_TYPES_INITIALIZER): Remote the latter. > * langhooks.h (struct lang_hooks_for_types): Remove > omp_mappable_type. > > gcc/testsuite/ChangeLog: > > * g++.dg/gomp/unmappable-1.C: Remove dg-error; remove dg-note no > longer shown as TYPE_MAIN_DECL is NULL. > * c-c++-common/gomp/map-incomplete-type.c: New test. > > Co-authored-by: Chung-Lin Tang Ok, thanks. Jakub
[PATCH 1/2] gcc/file-prefix-map: Allow remapping of relative paths
Relative paths currently aren't remapped by -ffile-prefix-map and friends. When cross compiling with separate 'source' and 'build' directories, the same relative paths between directories may not be available on target as compared to build time. In order to be able to remap these relative build paths to paths that would work on target, resolve paths within the file-prefix-map function using realpath(). This does cause a change of behaviour if users were previously relying upon symlinks or absolute paths not being resolved. Use basename to ensure plain filenames don't have paths added. gcc/ChangeLog: * file-prefix-map.cc (remap_filename): Allow remapping of relative paths Signed-off-by: Richard Purdie --- gcc/file-prefix-map.cc | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/gcc/file-prefix-map.cc b/gcc/file-prefix-map.cc index 24733f831d6..50d5d724a8f 100644 --- a/gcc/file-prefix-map.cc +++ b/gcc/file-prefix-map.cc @@ -70,19 +70,28 @@ remap_filename (file_prefix_map *maps, const char *filename) file_prefix_map *map; char *s; const char *name; + char *realname; size_t name_len; + if (lbasename (filename) == filename) +return filename; + + realname = lrealpath (filename); + for (map = maps; map; map = map->next) -if (filename_ncmp (filename, map->old_prefix, map->old_len) == 0) +if (filename_ncmp (realname, map->old_prefix, map->old_len) == 0) break; - if (!map) + if (!map) { +free (realname); return filename; - name = filename + map->old_len; + } + name = realname + map->old_len; name_len = strlen (name) + 1; s = (char *) ggc_alloc_atomic (name_len + map->new_len); memcpy (s, map->new_prefix, map->new_len); memcpy (s + map->new_len, name, name_len); + free (realname); return s; }
[PATCH 2/2] libcpp: Avoid remapping filenames within directives
Code such as: can interact poorly with file-prefix-map options when cross compiling. In general you're after to remap filenames for use in target context but the local paths should be used to find include files at compile time. Ingoring filename remapping for directives is one way to avoid such failures. libcpp/ChangeLog: * macro.cc (_cpp_builtin_macro_text): Don't remap filenames within directives Signed-off-by: Richard Purdie --- libcpp/macro.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) I wasn't sure if this should apply to all directives or whether this may need filtering to includes (along the lines of pfile->directive.flags & INCL). That gets a little more complex as the flag isn't available outside of directives.cc and would need a helper function. diff --git a/libcpp/macro.cc b/libcpp/macro.cc index 8ebf360c03c..7d5a0d0fd2e 100644 --- a/libcpp/macro.cc +++ b/libcpp/macro.cc @@ -563,7 +563,7 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node, if (!name) abort (); } - if (pfile->cb.remap_filename) + if (pfile->cb.remap_filename && !pfile->state.in_directive) name = pfile->cb.remap_filename (name); len = strlen (name); buf = _cpp_unaligned_alloc (pfile, len * 2 + 3);
Re: add more C++ name hints
On Fri, Aug 05, 2022 at 09:35:33PM +0200, Ulrich Drepper via Gcc-patches wrote: > How about adding a few more names from the std namespace to get appropriate > hints? This patch compiles and the appropriate messages are printed. Is > there a problem with just adding more or even at some point all the symbols > of the standard library? IMHO if the table in get_std_name_hint gets too big, we should probably see if we can replace the O(n) search with something more efficient. > gcc/ChangeLog: > > * cp/name-lookup.cc (get_std_name_hint): Add more symbols from the > , , and headers. Please drop the "cp/" prefix here. Patch LGTM, but I can't approve. Thanks and sorry for the delay, Marek
Re: [Patch] OpenMP: Fix var replacement with 'simd' and linear-step vars [PR106548]
On Tue, Aug 16, 2022 at 05:28:40PM +0200, Tobias Burnus wrote: > The testcase is just a copy of linear-1 with 'omp ... for' replaced by 'omp > ... for simd', > matching what the PR report referred to. > > The problem occurs for 'omp ... for simd linear( i : step)' when 'step' is a > variable > when a omp_fn... is generated - as in this case, the original variable is > used (in the > reduced example of the PR, the PARM_DECL of 'f') instead of the replacement. > > OK for mainline? Thoughts on backporting (and for which versions)? > > Tobias > - > Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 > München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas > Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht > München, HRB 106955 > OpenMP: Fix var replacement with 'simd' and linear-step vars [PR106548] > > gcc/ChangeLog: > > PR middle-end/106548 > * omp-low.cc (lower_rec_input_clauses): Use build_outer_var_ref > for 'simd' linear-step values that are variable. > > libgomp/ChangeLog: > > PR middle-end/106548 > * testsuite/libgomp.c/linear-2.c: New test. > > diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc > index 3c4b8593c8b..d6d6ff372a1 100644 > --- a/gcc/omp-low.cc > +++ b/gcc/omp-low.cc > @@ -6188,6 +6188,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq > *ilist, gimple_seq *dlist, > && gimple_omp_for_combined_into_p (ctx->stmt)) > { > tree t = OMP_CLAUSE_LINEAR_STEP (c); > + if (VAR_P (t) > + || TREE_CODE (t) == PARM_DECL > + || TREE_CODE (t) == RESULT_DECL) > + t = build_outer_var_ref (t, ctx); I think this should be just if (DECL_P (t)) t = build_outer_var_ref (t, ctx); Ok with that change. With backports I'd wait a week or two (12.2 is frozen anyway), can be applied to any open release branches you are willing to backport it to. > --- /dev/null > +++ b/libgomp/testsuite/libgomp.c/linear-2.c > @@ -0,0 +1,251 @@ > +/* PR middle-end/106548. */ Usually simd related runtime tests in libgomp use something like /* { dg-do run } */ /* { dg-additional-options "-msse2" { target sse2_runtime } } */ /* { dg-additional-options "-mavx" { target avx_runtime } } */ to test some actual vectorization if possible. Jakub
Extend fold_vec_perm to fold VEC_PERM_EXPR in VLA manner
Hi, The attached prototype patch extends fold_vec_perm to fold VEC_PERM_EXPR in VLA manner, and currently handles the following cases: (a) fixed len arg0, arg1 and fixed len sel. (b) fixed len arg0, arg1 and vla sel (c) vla arg0, arg1 and vla sel with arg0, arg1 being VECTOR_CST. It seems to work for the VLA tests written in test_vec_perm_vla_folding (), and am working thru the fallout observed in regression testing. Does the approach taken in the patch look in the right direction ? I am not sure if I have got the conversion from "sel_index" to index of either arg0, or arg1 entirely correct. I would be grateful for suggestions on the patch. Thanks, Prathamesh diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 4f4ec81c8d4..5e12260211e 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -85,6 +85,9 @@ along with GCC; see the file COPYING3. If not see #include "vec-perm-indices.h" #include "asan.h" #include "gimple-range.h" +#include "tree-pretty-print.h" +#include "gimple-pretty-print.h" +#include "print-tree.h" /* Nonzero if we are folding constants inside an initializer or a C++ manifestly-constant-evaluated context; zero otherwise. @@ -10496,40 +10499,6 @@ fold_mult_zconjz (location_t loc, tree type, tree expr) build_zero_cst (itype)); } - -/* Helper function for fold_vec_perm. Store elements of VECTOR_CST or - CONSTRUCTOR ARG into array ELTS, which has NELTS elements, and return - true if successful. */ - -static bool -vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) -{ - unsigned HOST_WIDE_INT i, nunits; - - if (TREE_CODE (arg) == VECTOR_CST - && VECTOR_CST_NELTS (arg).is_constant (&nunits)) -{ - for (i = 0; i < nunits; ++i) - elts[i] = VECTOR_CST_ELT (arg, i); -} - else if (TREE_CODE (arg) == CONSTRUCTOR) -{ - constructor_elt *elt; - - FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (arg), i, elt) - if (i >= nelts || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE) - return false; - else - elts[i] = elt->value; -} - else -return false; - for (; i < nelts; i++) -elts[i] - = fold_convert (TREE_TYPE (TREE_TYPE (arg)), integer_zero_node); - return true; -} - /* Attempt to fold vector permutation of ARG0 and ARG1 vectors using SEL selector. Return the folded VECTOR_CST or CONSTRUCTOR if successful, NULL_TREE otherwise. */ @@ -10537,45 +10506,149 @@ vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) tree fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel) { - unsigned int i; - unsigned HOST_WIDE_INT nelts; - bool need_ctor = false; + poly_uint64 arg0_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + poly_uint64 arg1_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)); + + gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), + sel.length ())); + gcc_assert (known_eq (arg0_len, arg1_len)); - if (!sel.length ().is_constant (&nelts)) -return NULL_TREE; - gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), nelts) - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)), nelts) - && known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg1)), nelts)); if (TREE_TYPE (TREE_TYPE (arg0)) != TREE_TYPE (type) || TREE_TYPE (TREE_TYPE (arg1)) != TREE_TYPE (type)) return NULL_TREE; - tree *in_elts = XALLOCAVEC (tree, nelts * 2); - if (!vec_cst_ctor_to_array (arg0, nelts, in_elts) - || !vec_cst_ctor_to_array (arg1, nelts, in_elts + nelts)) + unsigned input_npatterns = 0; + unsigned out_npatterns = sel.encoding ().npatterns (); + unsigned out_nelts_per_pattern = sel.encoding ().nelts_per_pattern (); + + /* FIXME: How to reshape fixed length vector_cst, so that + npatterns == vector.length () and nelts_per_pattern == 1 ? + It seems the vector is canonicalized to minimize npatterns. */ + + if (arg0_len.is_constant ()) +{ + /* If arg0, arg1 are fixed width vectors, and sel is VLA, + ensure that it is a dup sequence and has same period +as input vector. */ + + if (!sel.length ().is_constant () + && (sel.encoding ().nelts_per_pattern () > 2 + || !known_eq (arg0_len, sel.encoding ().npatterns ( + return NULL_TREE; + + input_npatterns = arg0_len.to_constant (); + + if (sel.length ().is_constant ()) + { + out_npatterns = sel.length ().to_constant (); + out_nelts_per_pattern = 1; + } +} + else if (TREE_CODE (arg0) == VECTOR_CST + && TREE_CODE (arg1) == VECTOR_CST) +{ + unsigned npatterns = VECTOR_CST_NPATTERNS (arg0); + unsigned input_nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (arg0); + + /* If arg0, arg1 are VLA, then ensure that, +(a) sel also has same length as input vectors. +(b) arg0 and arg1 have same encoding. +(c) sel has same number of patterns as input vectors. +(d) if se
Re: [PATCH] Teach vectorizer to deal with bitfield accesses (was: [RFC] Teach vectorizer to deal with bitfield reads)
On Tue, 16 Aug 2022, Andre Vieira (lists) wrote: > Hi, > > New version of the patch attached, but haven't recreated the ChangeLog yet, > just waiting to see if this is what you had in mind. See also some replies to > your comments in-line below: > > On 09/08/2022 15:34, Richard Biener wrote: > > > @@ -2998,7 +3013,7 @@ ifcvt_split_critical_edges (class loop *loop, bool > > aggressive_if_conv) > > auto_vec critical_edges; > > > > /* Loop is not well formed. */ > > - if (num <= 2 || loop->inner || !single_exit (loop)) > > + if (num <= 2 || loop->inner) > > return false; > > > > body = get_loop_body (loop); > > > > this doesn't appear in the ChangeLog nor is it clear to me why it's > > needed? Likewise > So both these and... > > > > - /* Save BB->aux around loop_version as that uses the same field. */ > > - save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes; > > - void **saved_preds = XALLOCAVEC (void *, save_length); > > - for (unsigned i = 0; i < save_length; i++) > > -saved_preds[i] = ifc_bbs[i]->aux; > > + void **saved_preds = NULL; > > + if (any_complicated_phi || need_to_predicate) > > +{ > > + /* Save BB->aux around loop_version as that uses the same field. > > */ > > + save_length = loop->inner ? loop->inner->num_nodes : > > loop->num_nodes; > > + saved_preds = XALLOCAVEC (void *, save_length); > > + for (unsigned i = 0; i < save_length; i++) > > + saved_preds[i] = ifc_bbs[i]->aux; > > +} > > > > is that just premature optimization? > > .. these changes are to make sure we can still use the loop versioning code > even for cases where there are bitfields to lower but no ifcvts (i.e. num of > BBs <= 2). > I wasn't sure about the loop-inner condition and the small examples I tried it > seemed to work, that is loop version seems to be able to handle nested loops. > > The single_exit condition is still required for both, because the code to > create the loop versions depends on it. It does look like I missed this in the > ChangeLog... > > > + /* BITSTART and BITEND describe the region we can safely load from > > inside the > > + structure. BITPOS is the bit position of the value inside the > > + representative that we will end up loading OFFSET bytes from the > > start > > + of the struct. BEST_MODE is the mode describing the optimal size of > > the > > + representative chunk we load. If this is a write we will store the > > same > > + sized representative back, after we have changed the appropriate > > bits. */ > > + get_bit_range (&bitstart, &bitend, comp_ref, &bitpos, &offset); > > > > I think you need to give up when get_bit_range sets bitstart = bitend to > > zero > > > > + if (get_best_mode (bitsize, bitpos.to_constant (), bitstart, bitend, > > +TYPE_ALIGN (TREE_TYPE (struct_expr)), > > +INT_MAX, false, &best_mode)) > > > > + tree rep_decl = build_decl (UNKNOWN_LOCATION, FIELD_DECL, > > + NULL_TREE, rep_type); > > + /* Load from the start of 'offset + bitpos % alignment'. */ > > + uint64_t extra_offset = bitpos.to_constant (); > > > > you shouldn't build a new FIELD_DECL. Either you use > > DECL_BIT_FIELD_REPRESENTATIVE directly or you use a > > BIT_FIELD_REF accessing the "representative". > > DECL_BIT_FIELD_REPRESENTATIVE exists so it can maintain > > a variable field offset, you can also subset that with an > > intermediate BIT_FIELD_REF if DECL_BIT_FIELD_REPRESENTATIVE is > > too large for your taste. > > > > I'm not sure all the offset calculation you do is correct, but > > since you shouldn't invent a new FIELD_DECL it probably needs > > to change anyway ... > I can use the DECL_BIT_FIELD_REPRESENTATIVE, but I'll still need some offset > calculation/extraction. It's easier to example with an example: > > In vect-bitfield-read-3.c the struct: > typedef struct { > int c; > int b; > bool a : 1; > } struct_t; > > and field access 'vect_false[i].a' or 'vect_true[i].a' will lead to a > DECL_BIT_FIELD_REPRESENTATIVE of TYPE_SIZE of 8 (and TYPE_PRECISION is also 8 > as expected). However, the DECL_FIELD_OFFSET of either the original field > decl, the actual bitfield member, or the DECL_BIT_FIELD_REPRESENTATIVE is 0 > and the DECL_FIELD_BIT_OFFSET is 64. These will lead to the correct load: > _1 = vect_false[i].D; > > D here being the representative is an 8-bit load from vect_false[i] + 64bits. > So all good there. However, when we construct BIT_FIELD_REF we can't simply > use DECL_FIELD_BIT_OFFSET (field_decl) as the BIT_FIELD_REF's bitpos. During > `verify_gimple` it checks that bitpos + bitsize < TYPE_SIZE (TREE_TYPE (load)) > where BIT_FIELD_REF (load, bitsize, bitpos). Yes, of course. What you need to do is subtract DECL_FIELD_BIT_OFFSET of the representative from DECL_FIELD_BIT_OFFSET of the original bitfield access - that's the offset within the representative (by construction both fields
Re: [PATCH] PR106342 - IBM zSystems: Provide vsel for all vector modes
On Thu, 2022-08-11 at 07:45 +0200, Andreas Krebbel wrote: > On 8/10/22 13:42, Ilya Leoshkevich wrote: > > On Wed, 2022-08-03 at 12:20 +0200, Ilya Leoshkevich wrote: > > > Bootstrapped and regtested on s390x-redhat-linux. Ok for master? > > > > > > > > > > > > dg.exp=pr104612.c fails with an ICE on s390x, because > > > copysignv2sf3 > > > produces an insn that vsel is supposed to recognize, but > > > can't, > > > because it's not defined for V2SF. Fix by defining it for all > > > vector > > > modes supported by copysign3. > > > > > > gcc/ChangeLog: > > > > > > * config/s390/vector.md (V_HW_FT): New iterator. > > > * config/s390/vx-builtins.md (vsel): Use V instead > > > of > > > V_HW. > > > --- > > > gcc/config/s390/vector.md | 6 ++ > > > gcc/config/s390/vx-builtins.md | 12 ++-- > > > 2 files changed, 12 insertions(+), 6 deletions(-) > > > > Jakub pointed out that this is broken in gcc-12 as well. > > The patch applies cleanly, and I started a bootstrap/regtest. > > Ok for gcc-12? > > Yes. Thanks! > > Andreas Hi, I've committed this today without realizing that gcc-12 branch is closed. Sorry! Please let me know if I should revert this. Best regards, Ilya
Re: [Patch] Fortran: OpenMP fix declare simd inside modules and absent linear step [PR106566]
On Tue, Aug 16, 2022 at 04:45:07PM +0200, Tobias Burnus wrote: > Fixed subject line: "absent linear" should be "absent linear step" in the > subject line; > i.e. with "step" added: "Fortran: OpenMP fix declare simd inside modules and > absent linear step [PR106566]" > > I have also decided to move the 'step = 1' to openmp.cc, which also set it > before with > the old pre-OpenMP 5.2 syntax. > > I also added a pre-OpenMP-5.2-syntax example. > > * * * > > For GCC 12 (and GCC 11), only the '%s' fix and the third, now added example > apply; > for the 5.1 syntax, 'step' was already set. > > OK? And thoughts regarding the backports (none? Only 12? Or 11+12?)? > > Tobias > - > Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 > München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas > Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht > München, HRB 106955 > Fortran: OpenMP fix declare simd inside modules and absent linear step > [PR106566] > > gcc/fortran/ChangeLog: > > PR fortran/106566 > * openmp.cc (gfc_match_omp_clauses): Fix setting linear-step value > to 1 when not specified. > (gfc_match_omp_declare_simd): Accept module procedures. > > gcc/testsuite/ChangeLog: > > PR fortran/106566 > * gfortran.dg/gomp/declare-simd-4.f90: New test. > * gfortran.dg/gomp/declare-simd-5.f90: New test. > * gfortran.dg/gomp/declare-simd-6.f90: New test. > > gcc/fortran/openmp.cc | 10 +++-- > gcc/testsuite/gfortran.dg/gomp/declare-simd-4.f90 | 42 +++ > gcc/testsuite/gfortran.dg/gomp/declare-simd-5.f90 | 49 > +++ > gcc/testsuite/gfortran.dg/gomp/declare-simd-6.f90 | 42 +++ > 4 files changed, 140 insertions(+), 3 deletions(-) > > diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc > index a7eb6c3e8f4..594907714ff 100644 > --- a/gcc/fortran/openmp.cc > +++ b/gcc/fortran/openmp.cc > @@ -2480,7 +2480,7 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const > omp_mask mask, > goto error; > } > } > - else > + if (step == NULL) > { > step = gfc_get_constant_expr (BT_INTEGER, > gfc_default_integer_kind, Ah, didn't know that gfc_match ("%e ) ", &step) will free and clear step if it successfully matched it first and then doesn't match ) after it. So ok. > @@ -4213,9 +4213,13 @@ gfc_match_omp_declare_simd (void) >gfc_omp_declare_simd *ods; >bool needs_space = false; > > - switch (gfc_match (" ( %s ) ", &proc_name)) > + switch (gfc_match (" ( ")) > { > -case MATCH_YES: break; > +case MATCH_YES: > + if (gfc_match_symbol (&proc_name, /* host assoc = */ true) != MATCH_YES > + || gfc_match (" ) ") != MATCH_YES) > + return MATCH_ERROR; > + break; > case MATCH_NO: proc_name = NULL; needs_space = true; break; > case MATCH_ERROR: return MATCH_ERROR; > } LGTM. > diff --git a/gcc/testsuite/gfortran.dg/gomp/declare-simd-4.f90 > b/gcc/testsuite/gfortran.dg/gomp/declare-simd-4.f90 > new file mode 100644 > index 000..44132525963 > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/gomp/declare-simd-4.f90 > @@ -0,0 +1,42 @@ > +! { dg-do compile } > +! { dg-additional-options "-fdump-tree-gimple" } > +! > +! PR fortran/106566 > +! > +! { dg-final { scan-tree-dump-times "__attribute__\\(\\(omp declare simd > \\(linear\\(0:ref,step\\(4\\)\\) simdlen\\(8\\)\\)\\)\\)" 2 "gimple" } } > +! { dg-final { scan-tree-dump-times "__attribute__\\(\\(omp declare simd > \\(linear\\(0:ref,step\\(8\\)\\) simdlen\\(8\\)\\)\\)\\)" 2 "gimple" } } > + > +subroutine add_one2(p) > + implicit none > + !$omp declare simd(add_one2) linear(p: ref) simdlen(8) > + integer :: p Wonder if it wouldn't be better to use integer(kind=4) explicitly when you try to match the size of that multiplied by 1 or 2 in dg-final, as say with -fdefault-integer-8 this will fail miserably otherwise. Ditto in other spots in this as well as other tests. Ok with/without that change. As for backports, I'd wait some time with just trunk and then backport wherever you are willing to test it. Jakub
Re: [patch] libgomp/splay-tree.h: Fix splay_tree_prefix handling
On Fri, Jul 22, 2022 at 11:47:42AM +0200, Tobias Burnus wrote: > As is, it is only a cleanup/consistency patch. > > However, I did run into this issue when working on the reverse-offload > implementation (to handle reverse lookups of vars and functions). > > OK for mainline? > > Tobias > - > Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 > München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas > Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht > München, HRB 106955 > libgomp/splay-tree.h: Fix splay_tree_prefix handling > > When splay_tree_prefix is defined, the .h file > defines splay_* macros to add the prefix. However, > before those were only unset when additionally > splay_tree_c was defined. > Additionally, for consistency undefine splay_tree_c > also when no splay_tree_prefix is defined - there > is no interdependence either. > > libgomp/ChangeLog: > > * splay-tree.h: Fix splay_* macro unsetting if > splay_tree_prefix is defined. > > libgomp/splay-tree.h | 30 +++--- > 1 file changed, 15 insertions(+), 15 deletions(-) Ok. Jakub
Re: [Patch] Fortran: OpenMP fix declare simd inside modules and absent linear step [PR106566]
On 17.08.22 15:09, Jakub Jelinek wrote: On Tue, Aug 16, 2022 at 04:45:07PM +0200, Tobias Burnus wrote: Fortran: OpenMP fix declare simd inside modules and absent linear step [PR106566] ... LGTM. +! { dg-final { scan-tree-dump-times "__attribute__\\(\\(omp declare simd \\(linear\\(0:ref,step\\(4\\)\\) simdlen\\(8\\)\\)\\)\\)" 2 "gimple" } } ... + integer :: p Wonder if it wouldn't be better to use integer(kind=4) explicitly when you try to match the size of that multiplied by 1 or 2 in dg-final, as say with -fdefault-integer-8 this will fail miserably otherwise. Ditto in other spots in this as well as other tests. I assume(d) that no one would compile with -fdefault-integer-8. But possibly it should be changed to permit testing with that flag, given that API routines should be able to handle different default-kind integers. On the other hand, API routines implies 'omp_lib', which is only available in libgomp/testsuite/. Ok with/without that change. Thanks for the review. Tobias - Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
Re: [PATCH] Fix bogus -Wstringop-overflow warning in Ada
> Hmm, can we instead do > > if (!integer_zerop (lowbnd) && tree_fits_shwi_p (lowbnd)) >{ > const offset_int lb = offset_int::from (lowbnd, SIGNED); > ... > > ? Apparently not: In file included from /home/eric/cvs/gcc/gcc/coretypes.h:460, from /home/eric/cvs/gcc/gcc/pointer-query.cc:23: /home/eric/cvs/gcc/gcc/wide-int.h: In instantiation of 'wide_int_ref_storage::wide_int_ref_storage(const T&) [with T = tree_node*; bool SE = false; bool HDP = true]': /home/eric/cvs/gcc/gcc/wide-int.h:782:15: required from 'generic_wide_int::generic_wide_int(const T&) [with T = tree_node*; storage = wide_int_ref_storage]' /home/eric/cvs/gcc/gcc/pointer-query.cc:1803:46: required from here /home/eric/cvs/gcc/gcc/wide-int.h:1024:48: error: incomplete type 'wi::int_traits' used in nested name specifier 1024 | : storage_ref (wi::int_traits ::decompose (scratch, | ~~^ 1025 | wi::get_precision (x), x)) | ~ And: const offset_int lb = wi::to_offset (lowbnd); compiles but does *not* fix the problem since it does a zero-extension. [Either extension is fine, as long as it's the same in get_offset_range]. > In particular interpreting the unsigned lowbnd as SIGNED when > not wlb.get_precision () < TYPE_PRECISION (sizetype), offset_int > should handle all positive and negative byte offsets since it can > also represent them measured in bits. Unfortunately the > wide_int classes do not provide the maximum precision they can > handle. That said, the check, if any, should guard the whole > orng adjustment, no? (in fact I wonder why we just ignore lowbnd > if it doesn't fit or is variable...) Yes, tree_fits_uhwi_p (or tree_fits_shwi_p) is bogus here, but the test against INTEGER_CST is used everywhere else and should be good enough. -- Eric Botcazou
Re: [Patch] OpenMP requires: Fix diagnostic filename corner case
On Fri, Jul 22, 2022 at 12:38:31PM +0200, Tobias Burnus wrote: > OpenMP requires: Fix diagnostic filename corner case > > The issue occurs when there is, e.g., main._omp_fn.0 in two files with > different OpenMP requires clauses. The function entries in the offload > table ends up having the same decl tree and, hence, the diagnostic showed > the same filename for both. Solution: Use the .o filename in this case. > > Note that the issue does not occur with same-named 'static' functions and > without the fatal error from the requires diagnostic, there would be > later a linker error due to having two 'main'. > > gcc/ > * lto-cgraph.cc (input_offload_tables): Improve requires diagnostic > when filenames come out identically. > > diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc > index 062677a32eb..350195d86db 100644 > --- a/gcc/lto-cgraph.cc > +++ b/gcc/lto-cgraph.cc > @@ -1893,6 +1893,11 @@ input_offload_tables (bool do_force_output) > if (tmp_decl != NULL_TREE) > fn2 = IDENTIFIER_POINTER (DECL_NAME (tmp_decl)); > } > + if (fn1 == fn2) > + { > + fn1 = requires_fn; > + fn2 = file_data->file_name; > + } Ugly but ok. Jakub
[PATCH] Add further FOR_EACH_ macros
For my current use case only some FOR_EACH_MODE macros were missing. Though I thought I will give it a try and grep'ed through the source code and added further ones. I didn't manually check all of them but so far it looks good to me. Ok for mainline? contrib/ChangeLog: * clang-format: Add further FOR_EACH_ macros. --- contrib/clang-format | 63 1 file changed, 63 insertions(+) diff --git a/contrib/clang-format b/contrib/clang-format index ceb5c1d524f..57cec1e6947 100644 --- a/contrib/clang-format +++ b/contrib/clang-format @@ -63,17 +63,33 @@ ForEachMacros: [ 'FOR_BB_INSNS_SAFE', 'FOR_BODY', 'FOR_COND', +'FOR_EACH_2XWIDER_MODE', +'FOR_EACH_ACTUAL_CHILD', 'FOR_EACH_AGGR_INIT_EXPR_ARG', 'FOR_EACH_ALIAS', 'FOR_EACH_ALLOCNO', +'FOR_EACH_ALLOCNO_CONFLICT', +'FOR_EACH_ALLOCNO_IN_ALLOCNO_SET', 'FOR_EACH_ALLOCNO_OBJECT', 'FOR_EACH_ARTIFICIAL_DEF', 'FOR_EACH_ARTIFICIAL_USE', +'FOR_EACH_BB', 'FOR_EACH_BB_FN', +'FOR_EACH_BB_IN_BITMAP', +'FOR_EACH_BB_IN_BITMAP_REV', +'FOR_EACH_BB_IN_REGION', +'FOR_EACH_BB_IN_SBITMAP', +'FOR_EACH_BB_REVERSE', 'FOR_EACH_BB_REVERSE_FN', +'FOR_EACH_BB_REVERSE_IN_REGION', 'FOR_EACH_BIT_IN_MINMAX_SET', +'FOR_EACH_BSI_IN_REVERSE', 'FOR_EACH_CALL_EXPR_ARG', +'FOR_EACH_CHILD', 'FOR_EACH_CLONE', +'FOR_EACH_CODE_MAPPING', +'FOR_EACH_COND_FN_PAIR', +'FOR_EACH_CONFLICT', 'FOR_EACH_CONST_CALL_EXPR_ARG', 'FOR_EACH_CONSTRUCTOR_ELT', 'FOR_EACH_CONSTRUCTOR_VALUE', @@ -83,16 +99,27 @@ ForEachMacros: [ 'FOR_EACH_DEFINED_SYMBOL', 'FOR_EACH_DEFINED_VARIABLE', 'FOR_EACH_DEP', +'FOR_EACH_DEP_LINK', 'FOR_EACH_EDGE', +'FOR_EACH_ELEMENT', +'FOR_EACH_ELIM_GRAPH_PRED', +'FOR_EACH_ELIM_GRAPH_SUCC', 'FOR_EACH_EXPR', 'FOR_EACH_EXPR_1', +'FOR_EACH_EXPR_ID_IN_SET', +'FOR_EACH_FLOAT_OPERATOR', +'FOR_EACH_FP_TYPE', 'FOR_EACH_FUNCTION', 'FOREACH_FUNCTION_ARGS', 'FOREACH_FUNCTION_ARGS_PTR', 'FOR_EACH_FUNCTION_WITH_GIMPLE_BODY', +'FOR_EACH_GORI_EXPORT_NAME', +'FOR_EACH_GORI_IMPORT_NAME', 'FOR_EACH_HASH_TABLE_ELEMENT', +'FOR_EACH_HTAB_ELEMENT', 'FOR_EACH_IMM_USE_FAST', 'FOR_EACH_IMM_USE_ON_STMT', +'FOR_EACH_IMM_USE_SAFE', 'FOR_EACH_IMM_USE_STMT', 'FOR_EACH_INSN', 'FOR_EACH_INSN_1', @@ -103,32 +130,68 @@ ForEachMacros: [ 'FOR_EACH_INSN_INFO_MW', 'FOR_EACH_INSN_INFO_USE', 'FOR_EACH_INSN_USE', +'FOR_EACH_INT_OPERATOR', +'FOR_EACH_INT_TYPE', +'FOR_EACH_INV', +'FOR_EACH_LOAD_BROADCAST', +'FOR_EACH_LOAD_BROADCAST_IMM', 'FOR_EACH_LOCAL_DECL', +'FOR_EACH_LOG_LINK', 'FOR_EACH_LOOP', +'FOR_EACH_LOOP_BREAK', 'FOR_EACH_LOOP_FN', +'FOR_EACH_MODE', +'FOR_EACH_MODE_FROM', +'FOR_EACH_MODE_IN_CLASS', +'FOR_EACH_MODE_UNTIL', +'FOR_EACH_NEST_INFO', 'FOR_EACH_OBJECT', 'FOR_EACH_OBJECT_CONFLICT', +'FOR_EACH_OP', +'FOR_EACH_PARTITION_PAIR', +'FOR_EACH_PHI', 'FOR_EACH_PHI_ARG', 'FOR_EACH_PHI_OR_STMT_DEF', 'FOR_EACH_PHI_OR_STMT_USE', +'FOR_EACH_POP', 'FOR_EACH_PREF', +'FOR_EACH_REF', +'FOR_EACH_REFERENCED_VAR', +'FOR_EACH_REFERENCED_VAR_IN_BITMAP', +'FOR_EACH_REFERENCED_VAR_SAFE', +'FOR_EACH_REF_REV', +'FOR_EACH_REGNO', 'FOR_EACH_SCALAR', +'FOR_EACH_SIGNED_TYPE', +'FOR_EACH_SSA', 'FOR_EACH_SSA_DEF_OPERAND', +'FOR_EACH_SSA_MAYDEF_OPERAND', +'FOR_EACH_SSA_MUST_AND_MAY_DEF_OPERAND', +'FOR_EACH_SSA_MUSTDEF_OPERAND', +'FOR_EACH_SSA_NAME', 'FOR_EACH_SSA_TREE_OPERAND', 'FOR_EACH_SSA_USE_OPERAND', +'FOR_EACH_SSA_VDEF_OPERAND', 'FOR_EACH_STATIC_INITIALIZER', +'FOR_EACH_STATIC_VARIABLE', +'FOR_EACH_STMT_IN_REVERSE', +'FOR_EACH_SUBINSN', 'FOR_EACH_SUBRTX', 'FOR_EACH_SUBRTX_PTR', 'FOR_EACH_SUBRTX_VAR', 'FOR_EACH_SUCC', 'FOR_EACH_SUCC_1', 'FOR_EACH_SYMBOL', +'FOR_EACH_TYPE', +'FOR_EACH_UNSIGNED_TYPE', +'FOR_EACH_VALUE_ID_IN_SET', 'FOR_EACH_VARIABLE', 'FOR_EACH_VEC_ELT', 'FOR_EACH_VEC_ELT_FROM', 'FOR_EACH_VEC_ELT_REVERSE', 'FOR_EACH_VEC_SAFE_ELT', 'FOR_EACH_VEC_SAFE_ELT_REVERSE', +'FOR_EACH_WIDER_MODE', 'FOR_EXPR', 'FOR_INIT_STMT', 'FOR_SCOPE' -- 2.35.3
Re: [PATCH 2/2] libcpp: Avoid remapping filenames within directives
On Wed, 2022-08-17 at 13:15 +0100, Richard Purdie via Gcc-patches wrote: > Code such as: > > can interact poorly with file-prefix-map options when cross compiling. In > general you're after to remap filenames for use in target context but the > local paths should be used to find include files at compile time. Ingoring > filename remapping for directives is one way to avoid such failures. > > libcpp/ChangeLog: > > * macro.cc (_cpp_builtin_macro_text): Don't remap filenames within > directives > > Signed-off-by: Richard Purdie > --- > libcpp/macro.cc | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > I wasn't sure if this should apply to all directives or whether this may need > filtering to includes (along the lines of pfile->directive.flags & INCL). That > gets a little more complex as the flag isn't available outside of > directives.cc > and would need a helper function. The above should have said Code such as: #include __FILE__ can interact poorly with... Cheers, Richard
Re: [PATCH] Support threading of just the exit edge
On 8/17/22 03:42, Richard Biener wrote: On Tue, 16 Aug 2022, Andrew MacLeod wrote: On 8/16/22 05:18, Richard Biener wrote: On Mon, 15 Aug 2022, Aldy Hernandez wrote: On Mon, Aug 15, 2022 at 9:24 PM Andrew MacLeod wrote: heh. or just + int_range<2> r; + if (!fold_range (r, const_cast (cond_stmt)) + || !r.singleton_p (&val)) if you do not provide a range_query to any of the fold_using_range code, it defaults to: fur_source::fur_source (range_query *q) { if (q) m_query = q; else if (cfun) m_query = get_range_query (cfun); else m_query = get_global_range_query (); m_gori = NULL; } Sweet. Even better! So when I do the following incremental change ontop of the posted patch then I see that the path query is able to simplify more "single BB paths" than the global range folding. diff --git a/gcc/tree-ssa-threadbackward.cc b/gcc/tree-ssa-threadbackward.cc index 669098e4ec3..777e778037f 100644 --- a/gcc/tree-ssa-threadbackward.cc +++ b/gcc/tree-ssa-threadbackward.cc @@ -314,6 +314,12 @@ back_threader::find_taken_edge_cond (const vec &path, { int_range_max r; + int_range<2> rf; + if (path.length () == 1) +{ + fold_range (rf, cond); +} + m_solver->compute_ranges (path, m_imports); m_solver->range_of_stmt (r, cond); @@ -325,6 +331,8 @@ back_threader::find_taken_edge_cond (const vec &path, if (r == true_range || r == false_range) { + if (path.length () == 1) + gcc_assert (r == rf); edge e_true, e_false; basic_block bb = gimple_bb (cond); extract_true_false_edges_from_block (bb, &e_true, &e_false); Even doing the following (not sure what's the difference and in particular expense over the path range query) results in missed simplifications (checking my set of cc1files). diff --git a/gcc/tree-ssa-threadbackward.cc b/gcc/tree-ssa-threadbackward.cc index 669098e4ec3..1d43a179d08 100644 --- a/gcc/tree-ssa-threadbackward.cc +++ b/gcc/tree-ssa-threadbackward.cc @@ -99,6 +99,7 @@ private: back_threader_registry m_registry; back_threader_profitability m_profit; + gimple_ranger *m_ranger; path_range_query *m_solver; // Current path being analyzed. @@ -146,12 +147,14 @@ back_threader::back_threader (function *fun, unsigned flags, bool first) // The path solver needs EDGE_DFS_BACK in resolving mode. if (flags & BT_RESOLVE) mark_dfs_back_edges (); - m_solver = new path_range_query (flags & BT_RESOLVE); + m_ranger = new gimple_ranger; + m_solver = new path_range_query (flags & BT_RESOLVE, m_ranger); } back_threader::~back_threader () { delete m_solver; + delete m_ranger; loop_optimizer_finalize (); } @@ -314,6 +317,12 @@ back_threader::find_taken_edge_cond (const vec &path, { int_range_max r; + int_range<2> rf; + if (path.length () == 1) +{ + fold_range (rf, cond, m_ranger); +} + m_solver->compute_ranges (path, m_imports); m_solver->range_of_stmt (r, cond); @@ -325,6 +334,8 @@ back_threader::find_taken_edge_cond (const vec &path, if (r == true_range || r == false_range) { + if (path.length () == 1) + gcc_assert (r == rf); edge e_true, e_false; basic_block bb = gimple_bb (cond); extract_true_false_edges_from_block (bb, &e_true, &e_false); one example is [local count: 14414059]: _128 = node_177(D)->typed.type; pretmp_413 = MEM[(const union tree_node *)_128].base.code; _431 = pretmp_413 + 65519; if (_128 == 0B) goto ; [18.09%] else goto ; [81.91%] where m_imports for the path is just _128 and the range computed is false while the ranger query returns VARYING. But path_range_query::range_defined_in_block does if (bb && POINTER_TYPE_P (TREE_TYPE (name))) m_ranger->m_cache.m_exit.maybe_adjust_range (r, name, bb); This is the coarse grained "side effect applies somewhere in the block" mechanism. There is no understanding of where in the block it happens. which adjusts the range to ~[0, 0], probably because of the dereference in the following stmt. Why does fold_range not do this when folding the exit test? Is there a way to make it do so? It looks like the only routine using this in gimple-range.cc is range_on_edge and there it's used for e->src after calling range_on_exit for e->src (why's it not done in range_on_exit?). A testcase for this is Fold_range doesnt do this because it is simply another statement. It makes no attempt to understand the context in which you are folding something. you could be folding that stmt from a different location (ie recomputing) If your context is that you are looking for the range after the last statement has been executed, then one needs to check to see if there are any side effects. Hmm, but I'm asking it to fold a specific statement - how can that ever be folded "from a different context"? In fact it traces as " at stmt "
[GCC13][Patch][V3][0/2]Add a new option -fstrict-flex-array[=n] and attribute strict_flex_array(n) and use it in PR101836
Hi, This is the 3rd version of the patch set. Compare to the 2nd version, the following are the major change: 1. change the name of the option from -fstrict-flex-array to -fstrict-flex-arrays (per Kees' suggestion, this will be consistent with LLVM's option); 2. -std=c89 and ISO C++ will disable all -fstrict-flex-arrays; -std=gnu89 and C++ with GNU extension will disable -fstrict-flex-arrays=3 the attribute has the same impact. Issue warnings when the feature is disabled; 3. add testing cases for these new changes; 4. update documentation; 5. delete the common untility routine flexible_array_member_p, zero_length_array_p, one_element_array_p from tree.[ch], and move them to c/c-decl.cc since they are C/C++ specific concepts, should not be exposed in middle end. 6. not update component_ref_size routine in tree.cc, keep the original one; 7. handle the new IR bit in LTO streamer in/out per Richard's request; 8. handle the new IR bit in C++ modules per Richard's request with Nathan's help; 9. Print the new IR bit; 10. update array_at_struct_end_p to check the new IR bit DECL_NOT_FLEXARRAY to still keep array_at_struct_end_p as the common utility in middle-end; I have bootstrapped and regression tested on both aarch64 and x86, no issues. Let me know if you have any comments on the patches. thanks. Qing
[[GCC13][Patch][V3] 2/2] Use array_at_struct_end_p in __builtin_object_size [PR101836]
Use array_at_struct_end_p to determine whether the trailing array of a structure is flexible array member in __builtin_object_size. gcc/ChangeLog: PR tree-optimization/101836 * tree-object-size.cc (addr_object_size): Use array_at_struct_end_p to determine a flexible array member reference. gcc/testsuite/ChangeLog: PR tree-optimization/101836 * gcc.dg/pr101836.c: New test. * gcc.dg/pr101836_1.c: New test. * gcc.dg/pr101836_2.c: New test. * gcc.dg/pr101836_3.c: New test. * gcc.dg/pr101836_4.c: New test. * gcc.dg/pr101836_5.c: New test. * gcc.dg/strict-flex-array-5.c: New test. * gcc.dg/strict-flex-array-6.c: New test. --- gcc/testsuite/gcc.dg/pr101836.c| 60 ++ gcc/testsuite/gcc.dg/pr101836_1.c | 60 ++ gcc/testsuite/gcc.dg/pr101836_2.c | 60 ++ gcc/testsuite/gcc.dg/pr101836_3.c | 60 ++ gcc/testsuite/gcc.dg/pr101836_4.c | 60 ++ gcc/testsuite/gcc.dg/pr101836_5.c | 60 ++ gcc/testsuite/gcc.dg/strict-flex-array-5.c | 60 ++ gcc/testsuite/gcc.dg/strict-flex-array-6.c | 60 ++ gcc/tree-object-size.cc| 16 +++--- 9 files changed, 487 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr101836.c create mode 100644 gcc/testsuite/gcc.dg/pr101836_1.c create mode 100644 gcc/testsuite/gcc.dg/pr101836_2.c create mode 100644 gcc/testsuite/gcc.dg/pr101836_3.c create mode 100644 gcc/testsuite/gcc.dg/pr101836_4.c create mode 100644 gcc/testsuite/gcc.dg/pr101836_5.c create mode 100644 gcc/testsuite/gcc.dg/strict-flex-array-5.c create mode 100644 gcc/testsuite/gcc.dg/strict-flex-array-6.c diff --git a/gcc/testsuite/gcc.dg/pr101836.c b/gcc/testsuite/gcc.dg/pr101836.c new file mode 100644 index ..efad02cfe899 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101836.c @@ -0,0 +1,60 @@ +/* -fstrict-flex-arrays is aliased with -ftrict-flex-arrays=3, which is the + strictest, only [] is treated as flexible array. */ +/* PR tree-optimization/101836 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fstrict-flex-arrays" } */ + +#include + +#define expect(p, _v) do { \ +size_t v = _v; \ +if (p == v) \ +printf("ok: %s == %zd\n", #p, p); \ +else \ + { \ + printf("WAT: %s == %zd (expected %zd)\n", #p, p, v); \ + __builtin_abort (); \ + } \ +} while (0); + +struct trailing_array_1 { +int a; +int b; +int c[4]; +}; + +struct trailing_array_2 { +int a; +int b; +int c[1]; +}; + +struct trailing_array_3 { +int a; +int b; +int c[0]; +}; +struct trailing_array_4 { +int a; +int b; +int c[]; +}; + +void __attribute__((__noinline__)) stuff( +struct trailing_array_1 *normal, +struct trailing_array_2 *trailing_1, +struct trailing_array_3 *trailing_0, +struct trailing_array_4 *trailing_flex) +{ +expect(__builtin_object_size(normal->c, 1), 16); +expect(__builtin_object_size(trailing_1->c, 1), 4); +expect(__builtin_object_size(trailing_0->c, 1), 0); +expect(__builtin_object_size(trailing_flex->c, 1), -1); +} + +int main(int argc, char *argv[]) +{ +stuff((void *)argv[0], (void *)argv[0], (void *)argv[0], (void *)argv[0]); + +return 0; +} diff --git a/gcc/testsuite/gcc.dg/pr101836_1.c b/gcc/testsuite/gcc.dg/pr101836_1.c new file mode 100644 index ..e2931ce1012e --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr101836_1.c @@ -0,0 +1,60 @@ +/* -fstrict-flex-arrays=3 is the strictest, only [] is treated as + flexible array. */ +/* PR tree-optimization/101836 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fstrict-flex-arrays=3" } */ + +#include + +#define expect(p, _v) do { \ +size_t v = _v; \ +if (p == v) \ +printf("ok: %s == %zd\n", #p, p); \ +else \ + { \ + printf("WAT: %s == %zd (expected %zd)\n", #p, p, v); \ + __builtin_abort (); \ + } \ +} while (0); + +struct trailing_array_1 { +int a; +int b; +int c[4]; +}; + +struct trailing_array_2 { +int a; +int b; +int c[1]; +}; + +struct trailing_array_3 { +int a; +int b; +int c[0]; +}; +struct trailing_array_4 { +int a; +int b; +int c[]; +}; + +void __attribute__((__noinline__)) stuff( +struct trailing_array_1 *normal, +struct trailing_array_2 *trailing_1, +struct trailing_array_3 *trailing_0, +struct trailing_array_4 *trailing_flex) +{ +expect(__builtin_object_size(normal->c, 1), 16); +expect(__builtin_object_size(trailing_1->c, 1), 4); +expect(__builtin_object_size(trailing_0->c, 1), 0); +expect(__builtin_object_size(trailing_flex->c, 1), -1); +} + +int main(int argc, char *argv[]) +{ +stuff((void *)argv[0], (void *)argv[0], (void *)argv[0], (void *)argv[0]); + +
[[GCC13][Patch][V3] 1/2] Add a new option -fstrict-flex-array[=n] and new attribute strict_flex_array
Add the following new option -fstrict-flex-array[=n] and a corresponding attribute strict_flex_array to GCC: '-fstrict-flex-array' Treat the trailing array of a structure as a flexible array member in a stricter way. The positive form is equivalent to '-fstrict-flex-array=3', which is the strictest. A trailing array is treated as a flexible array member only when it is declared as a flexible array member per C99 standard onwards. The negative form is equivalent to '-fstrict-flex-array=0', which is the least strict. All trailing arrays of structures are treated as flexible array members. '-fstrict-flex-array=LEVEL' Treat the trailing array of a structure as a flexible array member in a stricter way. The value of LEVEL controls the level of strictness. The possible values of LEVEL are the same as for the 'strict_flex_array' attribute (*note Variable Attributes::). You can control this behavior for a specific trailing array field of a structure by using the variable attribute 'strict_flex_array' attribute (*note Variable Attributes::). This option is only valid when flexible array member is supported in the language. FOR ISO C before C99 and ISO C++, no language support for the flexible array member at all, this option will be invalid and a warning will be issued. When -std=gnu89 is specified or C++ with GNU extension, only zero-length array extension and one-size array are supported, as a result, LEVEL=3 will be invalid and a warning will be issued. 'strict_flex_array (LEVEL)' The 'strict_flex_array' attribute should be attached to the trailing array field of a structure. It specifies the level of strictness of treating the trailing array field of a structure as a flexible array member. LEVEL must be an integer betwen 0 to 3. LEVEL=0 is the least strict level, all trailing arrays of structures are treated as flexible array members. LEVEL=3 is the strictest level, only when the trailing array is declared as a flexible array member per C99 standard onwards ([]), it is treated as a flexible array member. There are two more levels in between 0 and 3, which are provided to support older codes that use GCC zero-length array extension ([0]) or one-size array as flexible array member ([1]): When LEVEL is 1, the trailing array is treated as a flexible array member when it is declared as either [], [0], or [1]; When LEVEL is 2, the trailing array is treated as a flexible array member when it is declared as either [], or [0]. This attribute can be used with or without '-fstrict-flex-array'. When both the attribute and the option present at the same time, the level of the strictness for the specific trailing array field is determined by the attribute. This attribute is only valid when flexible array member is supported in the language. For ISO C before C99 and ISO C++, no language support for the flexible array member at all, this attribute will be invalid and a warning is issued. When -std=gnu89 is specified or C++ with GNU extension, only zero-length array extension and one-size array are supported, as a result, LEVEL=3 will be invalid and a warning is issued. gcc/c-family/ChangeLog: * c-attribs.cc (handle_strict_flex_arrays_attribute): New function. (c_common_attribute_table): New item for strict_flex_array. * c-opts.cc (c_common_post_options): Handle the combination of -fstrict-flex-arrays and -std specially. * c.opt: (fstrict-flex-array): New option. (fstrict-flex-array=): New option. gcc/c/ChangeLog: * c-decl.cc (flexible_array_member_type_p): New function. (one_element_array_type_p): Likewise. (zero_length_array_type_p): Likewise. (add_flexible_array_elts_to_size): Call new utility routine flexible_array_member_type_p. (is_flexible_array_member_p): New function. (finish_struct): Set the new DECL_NOT_FLEXARRAY flag. gcc/cp/ChangeLog: * module.cc (trees_out::core_bools): Stream out new bit decl_not_flexarray. (trees_in::core_bools): Stream in new bit decl_not_flexarray. gcc/ChangeLog: * doc/extend.texi: Document strict_flex_array attribute. * doc/invoke.texi: Document -fstrict-flex-array[=n] option. * print-tree.cc (print_node): Print new bit decl_not_flexarray. * tree-core.h (struct tree_decl_common): New bit field decl_not_flexarray. * tree-streamer-in.cc (unpack_ts_decl_common_value_fields): Stream in new bit decl_not_flexarray. * tree-streamer-out.cc (pack_ts_decl_common_value_fields): Stream out new bit decl_not_flexarray. * tree.cc (array_at_struct_end_p): Update it with the new bit field decl_not_flexarray. * tree.h (DECL
Re: [PATCH] openmp: fix max_vf setting for amdgcn offloading
On Tue, Jul 12, 2022 at 03:16:35PM +0100, Andrew Stubbs wrote: > --- a/gcc/gimple-loop-versioning.cc > +++ b/gcc/gimple-loop-versioning.cc > @@ -555,7 +555,10 @@ loop_versioning::loop_versioning (function *fn) > unvectorizable code, since it is the largest size that can be > handled efficiently by scalar code. omp_max_vf calculates the > maximum number of bytes in a vector, when such a value is relevant > - to loop optimization. */ > + to loop optimization. > + FIXME: this probably needs to use omp_max_simd_vf when in a target > + region, but how to tell? (And MAX_FIXED_MODE_SIZE is large enough that > + it doesn't actually matter.) */ >m_maximum_scale = estimated_poly_value (omp_max_vf ()); >m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE); I think this shouldn't have the comment added, the use here actually isn't much OpenMP related, it just uses the function because it implements what it wants. > --- a/gcc/omp-general.cc > +++ b/gcc/omp-general.cc > @@ -994,6 +994,24 @@ omp_max_simt_vf (void) >return 0; > } > > +/* Return maximum SIMD width if offloading may target SIMD hardware. */ > + > +int > +omp_max_simd_vf (void) The name is just confusing. omp_max_vf is about the SIMD maximum VF, so if you really want, rename omp_max_vf to omp_max_simd_vf. For the offloading related stuff, IMHO either we put it into that single omp-general.cc function and add a bool argument to it whether it is or might be in offloading region (ordered_maximum from the returned value and the offloading one, but only after the initialy return 1; conditions and adjust callers), or have this separate function, but then IMHO the if (!optimize) return 0; initial test should be if (!optimize || optimize_debug || !flag_tree_loop_optimize || (!flag_tree_loop_vectorize && OPTION_SET_P (flag_tree_loop_vectorize))) return 1; because without that nothing is vectorized, on host nor on offloading targets, and the function should be called omp_max_target_vf or omp_max_target_simd_vf. > +{ > + if (!optimize) > +return 0; > --- a/gcc/omp-low.cc > +++ b/gcc/omp-low.cc > @@ -4646,7 +4646,14 @@ lower_rec_simd_input_clauses (tree new_var, > omp_context *ctx, > { >if (known_eq (sctx->max_vf, 0U)) > { > - sctx->max_vf = sctx->is_simt ? omp_max_simt_vf () : omp_max_vf (); > + /* If we are compiling for multiple devices choose the largest VF. */ > + sctx->max_vf = omp_max_vf (); > + if (omp_maybe_offloaded_ctx (ctx)) > + { > + if (sctx->is_simt) > + sctx->max_vf = ordered_max (sctx->max_vf, omp_max_simt_vf ()); > + sctx->max_vf = ordered_max (sctx->max_vf, omp_max_simd_vf ()); > + } This is wrong. If sctx->is_simt, we know it is the SIMT version. So we want to use omp_max_simt_vf (), not maximum of that and something unrelated. Only if !sctx->is_simt, we want to use maximum of omp_max_vf and if omp_maybe_offloaded_ctx also omp_max_target_vf or how it is called (or pass that as argument to omp_max_vf). We have another omp_max_vf () call though, in omp-expand.cc (omp_adjust_chunk_size). That is for schedule (simd: dynamic, 32) and similar, though unlike the omp-low.cc case (where using a larger VF in that case doesn't hurt, it is used for sizing of the maxic arrays that are afterwards resized to the actual size), using too large values in that case is harmful. So dunno if it should take into account offloading vf or not. Maybe if maybe offloading maybe not it should fold to some internal fn call dependent expression that folds to omp_max_vf of the actual target after IPA. Jakub
[PATCH] Reset root oracle from path_oracle::reset_path.
When we cross a backedge in the path solver, we reset the path relations and nuke the root oracle. However, we forget to reset it for the next path. This is causing us to miss threads because subsequent paths will have no root oracle to use. With this patch we get 201 more threads in the threadfull passes in my .ii files and 118 more overall (DOM gets less because threadfull runs before). Normally, I'd recommend this for the GCC 12 branch, but considering how sensitive other passes are to jump threading, and that there is no PR associated with this, perhaps we should leave this out. Up to the release maintainers of course. Andrew, you OK with this for trunk? gcc/ChangeLog: * gimple-range-path.cc (path_range_query::compute_ranges_in_block): Remove set_root_oracle call. (path_range_query::compute_ranges): Pass ranger oracle to reset_path. * value-relation.cc (path_oracle::reset_path): Set root oracle. * value-relation.h (path_oracle::reset_path): Add root oracle argument. --- gcc/gimple-range-path.cc | 8 +--- gcc/value-relation.cc| 6 -- gcc/value-relation.h | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc index c99d77dd340..73e248b7e0b 100644 --- a/gcc/gimple-range-path.cc +++ b/gcc/gimple-range-path.cc @@ -435,11 +435,10 @@ path_range_query::compute_ranges_in_block (basic_block bb) e->src->index, e->dest->index); path_oracle *p = get_path_oracle (); - p->reset_path (); // ?? Instead of nuking the root oracle altogether, we could // reset the path oracle to search for relations from the top of // the loop with the root oracle. Something for future development. - p->set_root_oracle (nullptr); + p->reset_path (); } gori_compute &g = m_ranger->gori (); @@ -615,7 +614,10 @@ path_range_query::compute_ranges (const vec &path, compute_exit_dependencies (m_exit_dependencies, m_path); if (m_resolve) -get_path_oracle ()->reset_path (); +{ + path_oracle *p = get_path_oracle (); + p->reset_path (m_ranger->oracle ()); +} if (DEBUG_SOLVER) { diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc index 3f0957ccdd6..7fc22d30126 100644 --- a/gcc/value-relation.cc +++ b/gcc/value-relation.cc @@ -1513,11 +1513,13 @@ path_oracle::query_relation (basic_block bb, tree ssa1, tree ssa2) return query_relation (bb, equiv_1, equiv_2); } -// Reset any relations registered on this path. +// Reset any relations registered on this path. ORACLE is the root +// oracle to use. void -path_oracle::reset_path () +path_oracle::reset_path (relation_oracle *oracle) { + set_root_oracle (oracle); m_equiv.m_next = NULL; bitmap_clear (m_equiv.m_names); m_relations.m_head = NULL; diff --git a/gcc/value-relation.h b/gcc/value-relation.h index 77e12085eea..64884a8eea2 100644 --- a/gcc/value-relation.h +++ b/gcc/value-relation.h @@ -242,7 +242,7 @@ public: relation_kind query_relation (basic_block, tree, tree) final override; relation_kind query_relation (basic_block, const_bitmap, const_bitmap) final override; - void reset_path (); + void reset_path (relation_oracle *oracle = NULL); void set_root_oracle (relation_oracle *oracle) { m_root = oracle; } void dump (FILE *, basic_block) const final override; void dump (FILE *) const final override; -- 2.37.1
Re: [PATCH] c++: Extend -Wpessimizing-move to other contexts
On 8/16/22 14:09, Marek Polacek wrote: On Tue, Aug 16, 2022 at 03:23:18PM -0400, Jason Merrill wrote: On 8/2/22 16:04, Marek Polacek wrote: In my recent patch which enhanced -Wpessimizing-move so that it warns about class prvalues too I said that I'd like to extend it so that it warns in more contexts where a std::move can prevent copy elision, such as: T t = std::move(T()); T t(std::move(T())); T t{std::move(T())}; T t = {std::move(T())}; void foo (T); foo (std::move(T())); This patch does that by adding two maybe_warn_pessimizing_move calls. These must happen before we've converted the initializers otherwise the std::move will be buried in a TARGET_EXPR. Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk? PR c++/106276 gcc/cp/ChangeLog: * call.cc (build_over_call): Call maybe_warn_pessimizing_move. * cp-tree.h (maybe_warn_pessimizing_move): Declare. * decl.cc (build_aggr_init_full_exprs): Call maybe_warn_pessimizing_move. * typeck.cc (maybe_warn_pessimizing_move): Handle TREE_LIST and CONSTRUCTOR. Add a bool parameter and use it. Adjust a diagnostic message. (check_return_expr): Adjust the call to maybe_warn_pessimizing_move. gcc/testsuite/ChangeLog: * g++.dg/cpp0x/Wpessimizing-move7.C: Add dg-warning. * g++.dg/cpp0x/Wpessimizing-move8.C: New test. --- gcc/cp/call.cc| 5 +- gcc/cp/cp-tree.h | 1 + gcc/cp/decl.cc| 3 +- gcc/cp/typeck.cc | 58 - .../g++.dg/cpp0x/Wpessimizing-move7.C | 16 ++--- .../g++.dg/cpp0x/Wpessimizing-move8.C | 65 +++ 6 files changed, 120 insertions(+), 28 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move8.C diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 01a7be10077..370137ebd6d 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -9627,10 +9627,13 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain) if (!conversion_warning) arg_complain &= ~tf_warning; + if (arg_complain & tf_warning) + maybe_warn_pessimizing_move (arg, type, /*return_p*/false); + val = convert_like_with_context (conv, arg, fn, i - is_method, arg_complain); val = convert_for_arg_passing (type, val, arg_complain); - + if (val == error_mark_node) return error_mark_node; else diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 3278b4114bd..5a8af22b509 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -8101,6 +8101,7 @@ extern tree finish_right_unary_fold_expr (tree, int); extern tree finish_binary_fold_expr (tree, tree, int); extern tree treat_lvalue_as_rvalue_p (tree, bool); extern bool decl_in_std_namespace_p (tree); +extern void maybe_warn_pessimizing_move (tree, tree, bool); /* in typeck2.cc */ extern void require_complete_eh_spec_types (tree, tree); diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index 70ad681467e..dc6853a7de1 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -7220,9 +7220,10 @@ check_array_initializer (tree decl, tree type, tree init) static tree build_aggr_init_full_exprs (tree decl, tree init, int flags) - { gcc_assert (stmts_are_full_exprs_p ()); + if (init) +maybe_warn_pessimizing_move (init, TREE_TYPE (decl), /*return_p*/false); This is a surprising place to add this. Why here rather than in build_aggr_init or check_initializer? IIRC it just felt appropriate since we only want to invoke maybe_warn_ on the full expression, not any subexpressions -- we're looking to see if the outermost expr is a std::move. Also, we want to warn for all types, not just classes. Makes sense. The patch is OK. But I can move the call into some place in check_initializer if you prefer. Marek
Re: [PATCH] Avoid further recomputations in path_range_query once path is finalized.
On Tue, Aug 16, 2022 at 4:12 PM Richard Biener wrote: > > On Tue, Aug 16, 2022 at 3:59 PM Aldy Hernandez wrote: > > > > [Richi, I'm trying to make things more obvious for others working for the > > cose base. What do you think?] > > > > This makes a few things explicit to avoid misuse. First, we add a > > flag to differentiate between a path whose depdenency ranges are being > > computed, and a path that is finalized and no further calculations > > should be done. I've also enhanced the comments to document what > > queries are supported. Finally, I added some asserts to make sure > > range_of_expr is only used on the final conditional once the path has > > been finalized. > > > > Unfortunately, the forward threader builds dummy statements it passes > > to the path solver. Most of these don't have a BB associated with > > them. I've cleared the BB field for the one that still had one, to > > make the asserts above work. > > > > No difference in thread counts over my cc1 .ii files. > > > > gcc/ChangeLog: > > > > * gimple-range-path.cc (path_range_query::path_range_query): > > Initialized m_path_finalized. > > (path_range_query::internal_range_of_expr): Avoid further > > recomputations once path is finalized. > > Take basic_block instead of stmt as argument. > > (path_range_query::range_of_expr): Document. Add asserts. > > (path_range_query::compute_ranges): Set m_path_finalized. > > * gimple-range-path.h (path_range_query::internal_range_of_expr): > > Replace statement argument with basic block. > > (class path_range_query): Add m_path_finalized. > > * tree-ssa-threadedge.cc > > (jump_threader::simplify_control_stmt_condition): Clear BB field > > in dummy_switch. > > --- > > gcc/gimple-range-path.cc | 50 ++ > > gcc/gimple-range-path.h| 7 +- > > gcc/tree-ssa-threadedge.cc | 1 + > > 3 files changed, 52 insertions(+), 6 deletions(-) > > > > diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc > > index c99d77dd340..a8412dd090b 100644 > > --- a/gcc/gimple-range-path.cc > > +++ b/gcc/gimple-range-path.cc > > @@ -40,7 +40,8 @@ path_range_query::path_range_query (bool resolve, > > gimple_ranger *ranger) > >: m_cache (new ssa_global_cache), > > m_has_cache_entry (BITMAP_ALLOC (NULL)), > > m_resolve (resolve), > > -m_alloced_ranger (!ranger) > > +m_alloced_ranger (!ranger), > > +m_path_finalized (false) > > { > >if (m_alloced_ranger) > > m_ranger = new gimple_ranger; > > @@ -159,7 +160,7 @@ path_range_query::range_on_path_entry (vrange &r, tree > > name) > > // Return the range of NAME at the end of the path being analyzed. > > > > bool > > -path_range_query::internal_range_of_expr (vrange &r, tree name, gimple > > *stmt) > > +path_range_query::internal_range_of_expr (vrange &r, tree name, > > basic_block bb) > > { > >if (!r.supports_type_p (TREE_TYPE (name))) > > return false; > > @@ -174,8 +175,16 @@ path_range_query::internal_range_of_expr (vrange &r, > > tree name, gimple *stmt) > >return true; > > } > > > > - if (stmt > > - && range_defined_in_block (r, name, gimple_bb (stmt))) > > + // Avoid further recomputations once the path has been finalized. > > + if (m_path_finalized) > > +{ > > + gimple_range_global (r, name); > > I suppose we can't assert here instead? > > > + return true; > > +} > > + > > + // We're being called as part of the calculation of ranges for exit > > + // dependencies. Set the cache as we traverse the path top-down. > > + if (bb && range_defined_in_block (r, name, bb)) > > { > >if (TREE_CODE (name) == SSA_NAME) > > { > > @@ -192,10 +201,37 @@ path_range_query::internal_range_of_expr (vrange &r, > > tree name, gimple *stmt) > >return true; > > } > > > > +// This, as well as range_of_stmt, are the main entry points for > > +// making queries about a path. > > +// > > +// Once the ranges for the exit dependencies have been pre-calculated, > > +// the path is considered finalized, and the only valid query is > > +// asking the range of a NAME at the end of the path. > > +// > > +// Note that this method can also be called internally before the path > > +// is finalized, as part of the path traversal pre-calculating the > > +// ranges for exit dependencies. In this case, it may be called on > > +// statements that are not the final conditional as described above. > > + > > bool > > path_range_query::range_of_expr (vrange &r, tree name, gimple *stmt) > > { > > - if (internal_range_of_expr (r, name, stmt)) > > + basic_block bb = stmt ? gimple_bb (stmt) : NULL; > > + > > + // Once a path is finalized, the only valid queries are of the final > > + // statement in the exit block. > > why? shouldn't we support queries for all dependent ranges (what > the user specified as imports argument to compu
Re: [PATCH] Reset root oracle from path_oracle::reset_path.
works for me. On 8/17/22 12:09, Aldy Hernandez wrote: When we cross a backedge in the path solver, we reset the path relations and nuke the root oracle. However, we forget to reset it for the next path. This is causing us to miss threads because subsequent paths will have no root oracle to use. With this patch we get 201 more threads in the threadfull passes in my .ii files and 118 more overall (DOM gets less because threadfull runs before). Normally, I'd recommend this for the GCC 12 branch, but considering how sensitive other passes are to jump threading, and that there is no PR associated with this, perhaps we should leave this out. Up to the release maintainers of course. Andrew, you OK with this for trunk? gcc/ChangeLog: * gimple-range-path.cc (path_range_query::compute_ranges_in_block): Remove set_root_oracle call. (path_range_query::compute_ranges): Pass ranger oracle to reset_path. * value-relation.cc (path_oracle::reset_path): Set root oracle. * value-relation.h (path_oracle::reset_path): Add root oracle argument. --- gcc/gimple-range-path.cc | 8 +--- gcc/value-relation.cc| 6 -- gcc/value-relation.h | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc index c99d77dd340..73e248b7e0b 100644 --- a/gcc/gimple-range-path.cc +++ b/gcc/gimple-range-path.cc @@ -435,11 +435,10 @@ path_range_query::compute_ranges_in_block (basic_block bb) e->src->index, e->dest->index); path_oracle *p = get_path_oracle (); - p->reset_path (); // ?? Instead of nuking the root oracle altogether, we could // reset the path oracle to search for relations from the top of // the loop with the root oracle. Something for future development. - p->set_root_oracle (nullptr); + p->reset_path (); } gori_compute &g = m_ranger->gori (); @@ -615,7 +614,10 @@ path_range_query::compute_ranges (const vec &path, compute_exit_dependencies (m_exit_dependencies, m_path); if (m_resolve) -get_path_oracle ()->reset_path (); +{ + path_oracle *p = get_path_oracle (); + p->reset_path (m_ranger->oracle ()); +} if (DEBUG_SOLVER) { diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc index 3f0957ccdd6..7fc22d30126 100644 --- a/gcc/value-relation.cc +++ b/gcc/value-relation.cc @@ -1513,11 +1513,13 @@ path_oracle::query_relation (basic_block bb, tree ssa1, tree ssa2) return query_relation (bb, equiv_1, equiv_2); } -// Reset any relations registered on this path. +// Reset any relations registered on this path. ORACLE is the root +// oracle to use. void -path_oracle::reset_path () +path_oracle::reset_path (relation_oracle *oracle) { + set_root_oracle (oracle); m_equiv.m_next = NULL; bitmap_clear (m_equiv.m_names); m_relations.m_head = NULL; diff --git a/gcc/value-relation.h b/gcc/value-relation.h index 77e12085eea..64884a8eea2 100644 --- a/gcc/value-relation.h +++ b/gcc/value-relation.h @@ -242,7 +242,7 @@ public: relation_kind query_relation (basic_block, tree, tree) final override; relation_kind query_relation (basic_block, const_bitmap, const_bitmap) final override; - void reset_path (); + void reset_path (relation_oracle *oracle = NULL); void set_root_oracle (relation_oracle *oracle) { m_root = oracle; } void dump (FILE *, basic_block) const final override; void dump (FILE *) const final override;
[PATCH] wwwdocs: Add D language changes and caveats to gcc-12/changes.html
Hi, This patch belatedly adds the new features and changes to the D front-end during the GCC 12 development cycle, as well as a bullet in the caveat section for D's new bootstrapping requirements. If nothing stands out being really wrong, I'll go ahead and commit it by end of week. OK? Regards, Iain. --- htdocs/gcc-12/changes.html | 78 +- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/htdocs/gcc-12/changes.html b/htdocs/gcc-12/changes.html index ef957204..5bbad9bf 100644 --- a/htdocs/gcc-12/changes.html +++ b/htdocs/gcc-12/changes.html @@ -72,6 +72,17 @@ You may also want to check out our pool arguments. Those configurations had been broken for some time. + +D: +Building and bootstrapping GDC, the D compiler, now requires a working GDC +compiler (GCC version 9.1 or later) and D runtime library, libphobos, as +the D front end is written in D. On some targets, libphobos isn't enabled +by default, but compiles and works if --enable-libphobos is +used. Other targets may require a more recent minimum version of GCC to +bootstrap. Specifics are documented for affected targets in the +https://gcc.gnu.org/install/specific.html";>manual for +installing GCC. + Fortran: OpenMP code using the omp_lib.h include file can no longer be @@ -509,7 +520,72 @@ function Multiply (S1, S2 : Sign) return Sign is - +D + + New features: + + Support for the D programming language has been updated to version + 2.100.1 of the language and run-time library. Full changelog for this + release and previous releases can be found on the + https://dlang.org/changelog/2.100.1.html";>dlang.org + website. + + On supported targets, the __traits(compiles) expression + can now be used to determine whether a target-specific built-in is + available without error during CTFE + (https://gcc.gnu.org/PR101127";>PR101127). + + Partial support for directly importing C99 sources into a D + compilation (https://dlang.org/spec/importc.html";>ImportC) + has been added to the language. A notable missing feature is support + for preprocessing C imports, which can be worked around by + preprocessing all C sources used for importing ahead of time. + + + + New language options: + + -fcheck=, enables or disables the code generation of + specific run-time contract checks. + + -fcheckaction=, controls the run-time behaviour on an + assert, array bounds check, or final switch contract failure. The + default is -fcheckaction=throw. + + -fdump-c++-spec=, dumps all compiled + extern(C++) declarations as C++ code to a file. + The supplimentary option -fdump-c++-spec-verbose turns on + emission of comments for ignored declaration in the generated C++ spec. + + -fextern-std=, controls which C++ standard + extern(C++) declarations are compiled to be compatible + with. The default is -fextern-std=c++17. + + -fpreview=, added to enable upcoming D language features + in the compiler. + + -frevert=, added to revert D language changes to support + older D codebases that need more time to transition. + + -fsave-mixins=, saves mixins expanded at compile-time to + a file. + + + + Deprecated and removed features: + + The -Wtemplates compiler switch has been removed, as it + had been superceded by -ftransition=templates, which more + accurately reports on which templates have been instantiated. + + The -ftransition=dip25 and + -ftransition=dip1000 compiler switches have been renamed + to -fpreview=dip25 and -fprefix=dip1000. + + + + + Fortran -- 2.34.1
Re: [PATCH] xtensa: Prevent emitting integer additions of constant zero
On Wed, Aug 17, 2022 at 2:52 AM Takayuki 'January June' Suwa wrote: > As a matter of fact, "in a few cases" is just only one: > > [xtensa_expand_epilogue() in /gcc/config/xtensa/xtensa.cc] > > if (cfun->machine->current_frame_size > 0) > > { > > if (frame_pointer_needed || /* always reachable with addi */ > > cfun->machine->current_frame_size > 1024 || > > cfun->machine->current_frame_size <= 127) > > { > > if (cfun->machine->current_frame_size <= 127) > > offset = cfun->machine->current_frame_size; > > else > > offset = cfun->machine->callee_save_size; > > > > emit_insn (gen_addsi3 (stack_pointer_rtx, > >stack_pointer_rtx, > >GEN_INT (offset)));// offset can > > be zero! > > } Oh, nice catch! I'll post a patch that adds a check for non-zero offset here. -- Thanks. -- Max
Re: [PATCH] bug in emergency cxa pool free()
Thank you! I was working on a modified version that I could LD_PRELOAD to investigate some issues I was having in my asynchronous application code. It used a higher order collection of pool instances instead of malloc/free, with extra context (an array of pc register values) packed into the buffer before the refcounted exception. in_pool on the pools in the collection is the safety check to know that the extra context can be safely pulled from the pointer. With that implementation, memory was being leaked because the destruction of one of the pools in the collection requires there to be no more uses of it and quickly checking for that to be the case also needed the ordered invariant to hold (just checking if the freelist the whole arena). On Tue, Aug 16, 2022 at 11:45 PM Richard Biener wrote: > On Tue, Aug 16, 2022 at 9:15 PM Keef Aragon > wrote: > > > > This probably has never actually affected anyone in practice. The normal > > ABI implementation just uses malloc and only falls back to the pool on > > malloc failure. But if that happens a bunch of times the freelist gets > out > > of order which violates some of the invariants of the freelist (as well > as > > the comments that follow the bug). The bug is just a comparison reversal > > when traversing the freelist in the case where the pointer being returned > > to the pool is after the existing freelist. > > > > I'm not sure what to do as far as the test suite is concerned. It's a > > private part of the implementation of the exception handling ABI and it > can > > only ever be triggered if malloc fails (repeatedly). So it seems like > > reproducing it from the external interface will require hooking malloc to > > forcibly return NULL. > > > > But I'm a newb on these lists, so will obediently do as instructed. > > Oops, that's my fault. > For consistency it's probably best written > as reinterpret_cast (e) + sz > reinterpret_cast ((*fe)) > thus > > diff --git a/libstdc++-v3/libsupc++/eh_alloc.cc > b/libstdc++-v3/libsupc++/eh_alloc.cc > index c85b9aed40b..68f319869f9 100644 > --- a/libstdc++-v3/libsupc++/eh_alloc.cc > +++ b/libstdc++-v3/libsupc++/eh_alloc.cc > @@ -224,8 +224,8 @@ namespace > free_entry **fe; > for (fe = &first_free_entry; >(*fe)->next > - && (reinterpret_cast ((*fe)->next) > - > reinterpret_cast (e) + sz); > + && (reinterpret_cast (e) + sz > + > reinterpret_cast ((*fe)->next)); >fe = &(*fe)->next) > ; > // If we can merge the next block into us do so and continue > > The change is OK with that adjustment. I see you do not have write access > so > I'll test & push it for you. > > I'm curious how you noticed? > > Thanks, > Richard. >
Re: [PATCH v2] c++: Extend -Wredundant-move for const-qual objects [PR90428]
On Mon, Aug 15, 2022 at 03:43:38PM -0400, Jason Merrill wrote: > On 8/8/22 13:27, Marek Polacek wrote: > > This is to warn about this: > > > > T f5(const T t) > > { > >return std::move(t); // { dg-warning "redundant move" } > > } > > > > where OR fails because there's no T(const T&&) (or it's deleted in which > > case > > convert_for_initialization also returns error_mark_node). This OR is going > > to > > fail with std::move but also without std::move when we're trying to treat > > an lvalue > > as an rvalue. So the std::move has no effect, because T(const T&) will be > > called in either case. > > > > Now, if there was a T(const T&&), we'd *still* warn, because the std::move > > would > > still be redundant. Does that make sense? > > Ah, so this is because LOOKUP_PREFER_RVALUE returns an error if a const T& > overload is selected. The comment should mention that. OK with that > change. Thanks, pushed. > Since you've been thinking about this area, I wonder if you want to look at > implementing P2266? Yup, I can take a look. It's also come up recently in a discussion with Davis in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89780#c3. Since P2266 was approved, I've updated our C++23 status table as well. Marek
[PATCH] xtensa: Optimize stack pointer updates in function pro/epilogue under certain conditions
This patch enforces the use of "addmi" machine instruction instead of addition/subtraction with two source registers for adjusting the stack pointer, if the adjustment fits into a signed 16-bit and is also a multiple of 256. /* example */ void test(void) { char buffer[4096]; __asm__(""::"m"(buffer)); } ;; before test: movi.n a9, 1 sllia9, a9, 12 sub sp, sp, a9 movi.n a9, 1 sllia9, a9, 12 add.n sp, sp, a9 addisp, sp, 0 ret.n ;; after test: addmi sp, sp, -0x1000 addmi sp, sp, 0x1000 ret.n gcc/ChangeLog: * config/xtensa/xtensa.cc (xtensa_expand_prologue): Use an "addmi" machine instruction for updating the stack pointer rather than addition/subtraction via hard register A9, if the amount of change satisfies the literal value conditions of that instruction when the CALL0 ABI is used. (xtensa_expand_epilogue): Ditto. And also inhibit the stack pointer addition of constant zero. --- gcc/config/xtensa/xtensa.cc | 79 + 1 file changed, 54 insertions(+), 25 deletions(-) diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index 6ac879c38fb..b673b6764da 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -3150,7 +3150,6 @@ xtensa_expand_prologue (void) rtx_insn *insn = NULL; rtx note_rtx; - total_size = compute_frame_size (get_frame_size ()); if (flag_stack_usage_info) @@ -3206,10 +3205,17 @@ xtensa_expand_prologue (void) } else { - rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); - emit_move_insn (tmp_reg, GEN_INT (total_size)); - insn = emit_insn (gen_subsi3 (stack_pointer_rtx, - stack_pointer_rtx, tmp_reg)); + if (xtensa_simm8x256 (-total_size)) + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (-total_size))); + else + { + rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); + emit_move_insn (tmp_reg, GEN_INT (total_size)); + insn = emit_insn (gen_subsi3 (stack_pointer_rtx, + stack_pointer_rtx, tmp_reg)); + } RTX_FRAME_RELATED_P (insn) = 1; note_rtx = gen_rtx_SET (stack_pointer_rtx, plus_constant (Pmode, stack_pointer_rtx, @@ -3237,11 +3243,19 @@ xtensa_expand_prologue (void) if (total_size > 1024 || (!callee_save_size && total_size > 128)) { - rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); - emit_move_insn (tmp_reg, GEN_INT (total_size - - callee_save_size)); - insn = emit_insn (gen_subsi3 (stack_pointer_rtx, - stack_pointer_rtx, tmp_reg)); + if (xtensa_simm8x256 (callee_save_size - total_size)) + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, + GEN_INT (callee_save_size - + total_size))); + else + { + rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); + emit_move_insn (tmp_reg, GEN_INT (total_size - + callee_save_size)); + insn = emit_insn (gen_subsi3 (stack_pointer_rtx, + stack_pointer_rtx, tmp_reg)); + } RTX_FRAME_RELATED_P (insn) = 1; note_rtx = gen_rtx_SET (stack_pointer_rtx, plus_constant (Pmode, stack_pointer_rtx, @@ -3315,12 +3329,21 @@ xtensa_expand_epilogue (bool sibcall_p) if (cfun->machine->current_frame_size > (frame_pointer_needed ? 127 : 1024)) { - rtx tmp_reg = gen_rtx_REG (Pmode, A9_REG); - emit_move_insn (tmp_reg, GEN_INT (cfun->machine->current_frame_size - - cfun->machine->callee_save_size)); - emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? -hard_frame_pointer_rtx : stack_pointer_rtx, -tmp_reg)); + if (xtensa_simm8x256 (cfun->machine->current_frame_size - + cfun->machine->callee_save_size)) + emit_insn (gen_addsi3 (stack_pointer_rtx, frame_pointer_needed ? + hard_frame_pointer_rtx : stack_pointer_rtx, + GEN_INT (cfun->machine->current_frame_size - + cfun
[pushed] c++: Add new std::move test [PR67906]
As discussed in 67906, let's make sure we don't warn about a std::move when initializing when there's a T(const T&&) ctor. Tested x86_64-pc-linux-gnu, applying to trunk. PR c++/67906 gcc/testsuite/ChangeLog: * g++.dg/cpp0x/Wredundant-move11.C: New test. --- .../g++.dg/cpp0x/Wredundant-move11.C | 32 +++ 1 file changed, 32 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp0x/Wredundant-move11.C diff --git a/gcc/testsuite/g++.dg/cpp0x/Wredundant-move11.C b/gcc/testsuite/g++.dg/cpp0x/Wredundant-move11.C new file mode 100644 index 000..5dfa37f1686 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/Wredundant-move11.C @@ -0,0 +1,32 @@ +// PR c++/67906 +// { dg-do compile { target c++11 } } +// { dg-options "-Wall -Wextra" } + +// Define std::move. +namespace std { + template +struct remove_reference +{ typedef _Tp type; }; + + template +struct remove_reference<_Tp&> +{ typedef _Tp type; }; + + template +struct remove_reference<_Tp&&> +{ typedef _Tp type; }; + + template +constexpr typename std::remove_reference<_Tp>::type&& +move(_Tp&& __t) noexcept +{ return static_cast::type&&>(__t); } +} + +struct X { + X() { } + X(const X&) { } + X(const X&&) { } +}; + +const X x; +const X y = std::move(x); base-commit: 5adfb6540db95da5faf1f77fbe9ec38b4cf8eb1f -- 2.37.2
Re: [PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
On 8/17/22 00:17, Jakub Jelinek wrote: Hi! The following patch implements the C++23 P2290R3 paper. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2022-08-17 Jakub Jelinek PR c++/106645 libcpp/ * include/cpplib.h (struct cpp_options): Implement P2290R3 - Delimited escape sequences. Add delimite_escape_seqs member. * init.cc (struct lang_flags): Likewise. (lang_defaults): Add delim column. (cpp_set_lang): Copy over delimite_escape_seqs. * charset.cc (_cpp_valid_ucn): Handle delimited escape sequences. (convert_hex): Likewise. (convert_oct): Likewise. (convert_escape): Call convert_oct even for \o. (_cpp_interpret_identifier): Handle delimited escape sequences. * lex.cc (get_bidi_ucn_1): Likewise. Add end argument, fill it in. (get_bidi_ucn): Adjust get_bidi_ucn_1 caller. Use end argument to compute num_bytes. gcc/testsuite/ * c-c++-common/cpp/delimited-escape-seq-1.c: New test. * c-c++-common/cpp/delimited-escape-seq-2.c: New test. * c-c++-common/cpp/delimited-escape-seq-3.c: New test. * c-c++-common/Wbidi-chars-24.c: New test. * gcc.dg/cpp/delimited-escape-seq-1.c: New test. * gcc.dg/cpp/delimited-escape-seq-2.c: New test. * g++.dg/cpp/delimited-escape-seq-1.C: New test. * g++.dg/cpp/delimited-escape-seq-2.C: New test. --- libcpp/include/cpplib.h.jj 2022-08-10 09:06:53.268209449 +0200 +++ libcpp/include/cpplib.h 2022-08-15 19:32:53.743213474 +0200 @@ -519,6 +519,9 @@ struct cpp_options /* Nonzero for C++23 size_t literals. */ unsigned char size_t_literals; + /* Nonzero for C++23 delimited escape sequences. */ + unsigned char delimited_escape_seqs; + /* Holds the name of the target (execution) character set. */ const char *narrow_charset; --- libcpp/init.cc.jj 2022-08-10 09:06:53.268209449 +0200 +++ libcpp/init.cc 2022-08-15 16:09:01.403020485 +0200 @@ -96,34 +96,35 @@ struct lang_flags char dfp_constants; char size_t_literals; char elifdef; + char delimited_escape_seqs; }; static const struct lang_flags lang_defaults[] = -{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */ - /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,1, 1, 0, 1, 1, 1, 1, 0, 1 }, - /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0,1, 1, 1, 1, 0, 1, 1, 0, 1 }, - /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX98*/ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0,0, 0, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX11*/ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,0, 0, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX14*/ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* CXX17*/ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 0, 1, 0, 1, 0, 0, 0 }, - /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* CXX20*/ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,1, 1, 0, 1, 1,
Re: [PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
On Wed, Aug 17, 2022 at 04:47:19PM -0400, Jason Merrill via Gcc-patches wrote: > > + length = 32; > > /* Magic value to indicate no digits seen. */ Indeed, will add the comment. > > + delimited = true; > > + if (loc_reader) > > + char_range->m_finish = loc_reader->get_next ().m_finish; > > + } > > +} > > else if (str[-1] == 'U') > > length = 8; > > else > > @@ -1107,6 +1118,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const > > result = 0; > > do > > { > > + if (str == limit) > > + break; > > c = *str; > > if (!ISXDIGIT (c)) > > break; > > @@ -1116,9 +1129,41 @@ _cpp_valid_ucn (cpp_reader *pfile, const > > gcc_assert (char_range); > > char_range->m_finish = loc_reader->get_next ().m_finish; > > } > > + if (delimited) > > + { > > + if (!result) > > + /* Accept arbitrary number of leading zeros. */ > > + length = 16; > > + else if (length == 8) > > + { > > + /* Make sure we detect overflows. */ > > + result |= 0x800; > > + ++length; > > + } > > 16 above so that this case happens after we read 8 digits after leading > zeroes? Another magic value less than the no digits seen one and > 8, so that it can count 8 digits with the first non-zero one after which to or in the overflow flag. The intent is not to break the loop if there are further digits, just that there will be overflow. Another option would be those overflow |= n ^ (n << 4 >> 4); tests that convert_hex does and just making sure length is never decremented (except we need a way to distinguish between \u{} and at least one digit). > > + if (loc_reader) > > + char_range->m_finish = loc_reader->get_next ().m_finish; > > Here and in other functions, the pattern of increment the input pointer and > update m_finish seems like it should be a macro? Perhaps or inline function. Before my patch, there are 5 such ifs (some with char_range.m_finish and others char_range->m_finish), the patch adds another 7 such spots. > > @@ -2119,15 +2255,23 @@ _cpp_interpret_identifier (cpp_reader *p > > cppchar_t value = 0; > > size_t bufleft = len - (bufp - buf); > > int rval; > > + bool delimited = false; > > idp += 2; > > + if (length == 4 && id[idp] == '{') > > + { > > + delimited = true; > > + idp++; > > + } > > while (length && idp < len && ISXDIGIT (id[idp])) > > { > > value = (value << 4) + hex_value (id[idp]); > > idp++; > > - length--; > > + if (!delimited) > > + length--; > > } > > - idp--; > > + if (!delimited) > > + idp--; > > Don't we need to check that the first non-xdigit is a }? The comments and my understanding of the code say that we first check what is a valid identifier and the above is only called on a valid identifier. So, if it would be delimited \u{ not terminated with }, then it would fail forms_identifier_p and wouldn't be included in the range. Thus e.g. the ISXDIGIT (id[id]) test is probably not needed unless delimited is true because we've checked earlier that it has 4 or 8 hex digits. But sure, if you want a id[idp] == '}' test or assertion, it can be added. Jakub
[Committed] PR target/106640: Fix use of XINT in TImode compute_convert_gain.
Thanks to Zdenek Sojka for reporting PR target/106640 where an RTL checking build reveals a thinko in my recent patch to support TImode shifts/rotates in STV. My "senior moment" was to inappropriately use XINT where I should be using INTVAL of XEXP. I can't imagine what I was thinking. Corrected by the patch below, tested on x86_64-pc-linux-gnu with make bootstrap, both with and without --enable-checking=rtl, and regression tested, both with and without --target_board=unix{-m32}, with no new failures. Committed to mainline as obvious. 2022-08-17 Roger Sayle gcc/ChangeLog PR target/106640 * config/i386/i386-features.cc (timde_scalar_chain::compute_convert_gain): Replace incorrect use of XINT with INTVAL (XEXP (src, 1)). Roger -- diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 821d8c7..d6bb66c 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -1248,7 +1248,7 @@ timode_scalar_chain::compute_convert_gain () case ASHIFT: case LSHIFTRT: /* See ix86_expand_v1ti_shift. */ - op1val = XINT (src, 1); + op1val = INTVAL (XEXP (src, 1)); if (optimize_insn_for_size_p ()) { if (op1val == 64 || op1val == 65) @@ -1282,7 +1282,7 @@ timode_scalar_chain::compute_convert_gain () case ASHIFTRT: /* See ix86_expand_v1ti_ashiftrt. */ - op1val = XINT (src, 1); + op1val = INTVAL (XEXP (src, 1)); if (optimize_insn_for_size_p ()) { if (op1val == 64 || op1val == 127) @@ -1355,7 +1355,7 @@ timode_scalar_chain::compute_convert_gain () case ROTATE: case ROTATERT: /* See ix86_expand_v1ti_rotate. */ - op1val = XINT (src, 1); + op1val = INTVAL (XEXP (src, 1)); if (optimize_insn_for_size_p ()) { scost = COSTS_N_BYTES (13);
[PATCH v2] stack-protector: Check stack canary before throwing exception
Check stack canary before throwing exception to avoid stack corruption. gcc/ PR middle-end/58245 * calls.cc: Include "tree-eh.h". (expand_call): Check stack canary before throwing exception. gcc/testsuite/ PR middle-end/58245 * g++.dg/fstack-protector-strong.C: Adjusted. * g++.dg/pr58245-1.C: New test. --- gcc/calls.cc | 6 +- gcc/testsuite/g++.dg/fstack-protector-strong.C | 2 +- gcc/testsuite/g++.dg/pr58245-1.C | 10 ++ 3 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/pr58245-1.C diff --git a/gcc/calls.cc b/gcc/calls.cc index bc96aff38f0..6dd6f73e978 100644 --- a/gcc/calls.cc +++ b/gcc/calls.cc @@ -60,6 +60,7 @@ along with GCC; see the file COPYING3. If not see #include "attr-fnspec.h" #include "value-query.h" #include "tree-pretty-print.h" +#include "tree-eh.h" /* Like PREFERRED_STACK_BOUNDARY but in units of bytes, not bits. */ #define STACK_BYTES (PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) @@ -3154,7 +3155,10 @@ expand_call (tree exp, rtx target, int ignore) if (pass && (flags & ECF_MALLOC)) start_sequence (); - if (pass == 0 + /* Check the canary value for sibcall or function which doesn't +return and could throw. */ + if ((pass == 0 + || ((flags & ECF_NORETURN) != 0 && tree_could_throw_p (exp))) && crtl->stack_protect_guard && targetm.stack_protect_runtime_enabled_p ()) stack_protect_epilogue (); diff --git a/gcc/testsuite/g++.dg/fstack-protector-strong.C b/gcc/testsuite/g++.dg/fstack-protector-strong.C index ae6d2fdb8df..034af2ce9ab 100644 --- a/gcc/testsuite/g++.dg/fstack-protector-strong.C +++ b/gcc/testsuite/g++.dg/fstack-protector-strong.C @@ -85,4 +85,4 @@ int foo7 (B *p) return p->return_slot ().a1; } -/* { dg-final { scan-assembler-times "stack_chk_fail" 7 } } */ +/* { dg-final { scan-assembler-times "stack_chk_fail" 8 } } */ diff --git a/gcc/testsuite/g++.dg/pr58245-1.C b/gcc/testsuite/g++.dg/pr58245-1.C new file mode 100644 index 000..1439bc62e71 --- /dev/null +++ b/gcc/testsuite/g++.dg/pr58245-1.C @@ -0,0 +1,10 @@ +/* { dg-do compile { target i?86-*-* x86_64-*-* rs6000-*-* s390x-*-* } } */ +/* { dg-options "-O2 -fstack-protector-all" } */ + +void +bar (void) +{ + throw 1; +} + +/* { dg-final { scan-assembler-times "stack_chk_fail" 1 } } */ -- 2.37.2
Re: [PATCH] stack-protector: Check stack canary for noreturn function
On Wed, Aug 3, 2022 at 10:27 AM H.J. Lu wrote: > > On Tue, Aug 2, 2022 at 4:34 PM Jeff Law wrote: > > > > > > > > On 8/2/2022 11:43 AM, H.J. Lu wrote: > > > On Sat, Jul 30, 2022 at 1:30 PM Jeff Law via Gcc-patches > > > wrote: > > >> > > >> > > >> On 7/14/2022 3:55 PM, H.J. Lu via Gcc-patches wrote: > > >>> Check stack canary for noreturn function to catch stack corruption > > >>> before calling noreturn function. For C++, check stack canary when > > >>> throwing exception or resuming stack unwind to avoid corrupted stack. > > >>> > > >>> gcc/ > > >>> > > >>>PR middle-end/58245 > > >>>* calls.cc (expand_call): Check stack canary for noreturn > > >>>function. > > >>> > > >>> gcc/testsuite/ > > >>> > > >>>PR middle-end/58245 > > >>>* c-c++-common/pr58245-1.c: New test. > > >>>* g++.dg/pr58245-1.C: Likewise. > > >>>* g++.dg/fstack-protector-strong.C: Adjusted. > > >> But is this really something we want? I'd actually lean towards > > >> eliminating the useless load -- I don't necessarily think we should be > > >> treating non-returning paths specially here. > > >> > > >> The whole point of the stack protector is to prevent the *return* path > > >> from going to an attacker controlled location. I'm not sure checking > > >> the protector at this point actually does anything particularly useful. > > > throw is marked no return. Since the unwind library may read > > > the stack contents to unwind stack, it the stack is corrupted, the > > > exception handling may go wrong. Should we handle this case? > > That's the question I think we need to answer. The EH paths are a known > > security issue on Windows and while ours are notably different I'm not > > sure if there's a real attack surface in those paths. My sense is that > > if we need to tackle this that doing so on the throw side might be > > better as it's closer conceptually to when//how we check the canary for > > a normal return. > > Like this? > > @@ -3154,7 +3155,10 @@ expand_call (tree exp, rtx target, int ignore) >if (pass && (flags & ECF_MALLOC)) >start_sequence (); > > - if (pass == 0 > + /* Check the canary value for sibcall or function which doesn't > + return and could throw. */ > + if ((pass == 0 > + || ((flags & ECF_NORETURN) != 0 && tree_could_throw_p (exp))) > && crtl->stack_protect_guard > && targetm.stack_protect_runtime_enabled_p ()) >stack_protect_epilogue (); Here is the patch: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599916.html > > jeff > > > > > > -- > > > H.J. > > > > > -- > H.J. -- H.J.
Re: [PATCH] RISC-V/testsuite: Restrict remaining `fmin'/`fmax' tests to hard float
On Thu, 11 Aug 2022, Kito Cheng wrote: > LGTM, thanks :) I have committed this change now. Thank you for your review. Maciej
Re: [PATCH, GCC, AARCH64, 5/6] Enable BTI : Add new pass for BTI.
On Fri, Nov 2, 2018 at 11:39 AM Sudakshina Das wrote: > > Hi > > This patch is part of a series that enables ARMv8.5-A in GCC and > adds Branch Target Identification Mechanism. > (https://developer.arm.com/products/architecture/cpu-architecture/a-profile/exploration-tools) > > This patch adds a new pass called "bti" which is triggered by the > command line argument -mbranch-protection whenever "bti" is turned on. > > The pass iterates through the instructions and adds appropriated BTI > instructions based on the following: > * Add a new "BTI C" at the beginning of a function, unless its already > protected by a "PACIASP/PACIBSP". We exempt the functions that are > only called directly. Coming back to this because the check only_called_directly_p does not work if the linker will insert a veneer as the compiler does not know about that. This is recorded as https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106671 . Thanks, Andrew Pinski > * Add a new "BTI J" for every target of an indirect jump, jump table > targets, non-local goto targets or labels that might be referenced > by variables, constant pools, etc (NOTE_INSN_DELETED_LABEL) > > Since we have already changed the use of indirect tail calls to only x16 > and x17, we do not have to use "BTI JC". > (check patch 3/6). > > Bootstrapped and regression tested with aarch64-none-linux-gnu. Added > new tests. > Is this ok for trunk? > > Thanks > Sudi > > *** gcc/ChangeLog *** > > 2018-xx-xx Sudakshina Das > Ramana Radhakrishnan > > * config.gcc (aarch64*-*-*): Add aarch64-bti-insert.o. > * gcc/config/aarch64/aarch64.h: Update comment for > TRAMPOLINE_SIZE. > * config/aarch64/aarch64.c (aarch64_asm_trampoline_template): > Update if bti is enabled. > * config/aarch64/aarch64-bti-insert.c: New file. > * config/aarch64/aarch64-passes.def (INSERT_PASS_BEFORE): Insert > bti pass. > * config/aarch64/aarch64-protos.h (make_pass_insert_bti): > Declare the new bti pass. > * config/aarch64/aarch64.md (bti_nop): Define. > * config/aarch64/t-aarch64: Add rule for aarch64-bti-insert.o. > > *** gcc/testsuite/ChangeLog *** > > 2018-xx-xx Sudakshina Das > > * gcc.target/aarch64/bti-1.c: New test. > * gcc.target/aarch64/bti-2.c: New test. > * lib/target-supports.exp > (check_effective_target_aarch64_bti_hw): Add new check for > BTI hw. >
[r13-2098 Regression] FAIL: gcc.dg/tree-prof/cmpsf-1.c scan-tree-dump-not dom2 "Invalid sum" on Linux/x86_64
On Linux/x86_64, 5adfb6540db95da5faf1f77fbe9ec38b4cf8eb1f is the first bad commit commit 5adfb6540db95da5faf1f77fbe9ec38b4cf8eb1f Author: Aldy Hernandez Date: Wed Aug 17 17:47:21 2022 +0200 Reset root oracle from path_oracle::reset_path. caused FAIL: gcc.dg/tree-prof/cmpsf-1.c scan-tree-dump-not dom2 "Invalid sum" with GCC configured with ../../gcc/configure --prefix=/export/users/haochenj/src/gcc-bisect/master/master/r13-2098/usr --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl --enable-libmpx x86_64-linux --disable-bootstrap To reproduce: $ cd {build_dir}/gcc && make check RUNTESTFLAGS="tree-prof.exp=gcc.dg/tree-prof/cmpsf-1.c --target_board='unix{-m32}'" $ cd {build_dir}/gcc && make check RUNTESTFLAGS="tree-prof.exp=gcc.dg/tree-prof/cmpsf-1.c --target_board='unix{-m32\ -march=cascadelake}'" $ cd {build_dir}/gcc && make check RUNTESTFLAGS="tree-prof.exp=gcc.dg/tree-prof/cmpsf-1.c --target_board='unix{-m64}'" $ cd {build_dir}/gcc && make check RUNTESTFLAGS="tree-prof.exp=gcc.dg/tree-prof/cmpsf-1.c --target_board='unix{-m64\ -march=cascadelake}'" (Please do not reply to this email, for question about this report, contact me at haochen dot jiang at intel.com)
Re: [PATCH] xtensa: Optimize stack pointer updates in function pro/epilogue under certain conditions
On Wed, Aug 17, 2022 at 12:32 PM Takayuki 'January June' Suwa wrote: > > This patch enforces the use of "addmi" machine instruction instead of > addition/subtraction with two source registers for adjusting the stack > pointer, if the adjustment fits into a signed 16-bit and is also a multiple > of 256. > > /* example */ > void test(void) { > char buffer[4096]; > __asm__(""::"m"(buffer)); > } > > ;; before > test: > movi.n a9, 1 > sllia9, a9, 12 > sub sp, sp, a9 > movi.n a9, 1 > sllia9, a9, 12 > add.n sp, sp, a9 > addisp, sp, 0 > ret.n > > ;; after > test: > addmi sp, sp, -0x1000 > addmi sp, sp, 0x1000 > ret.n > > gcc/ChangeLog: > > * config/xtensa/xtensa.cc (xtensa_expand_prologue): > Use an "addmi" machine instruction for updating the stack pointer > rather than addition/subtraction via hard register A9, if the amount > of change satisfies the literal value conditions of that instruction > when the CALL0 ABI is used. > (xtensa_expand_epilogue): Ditto. > And also inhibit the stack pointer addition of constant zero. > --- > gcc/config/xtensa/xtensa.cc | 79 + > 1 file changed, 54 insertions(+), 25 deletions(-) Regtested for target=xtensa-linux-uclibc, no new regressions. Committed to master. -- Thanks. -- Max
Re: [PATCH] Add pattern to convert vector shift + bitwise and + multiply to vector compare in some cases.
On Sat, 13 Aug 2022, mtsamis wrote: > When using SWAR (SIMD in a register) techniques a comparison operation within *within > such a register can be made by using a combination of shifts, bitwise and and > multiplication. If code using this scheme is vectorized then there is > potential > to replace all these operations with a single vector comparison, by > reinterpreting > the vector types to match the width of the SWAR register. Hadn't it been for "all" modern processors these days having SIMD instructions, adding general SWAR handling to the vectorizer would be worth promoting to someones favorite decision-maker. (This is a wonderful first step.) Random typo spotted: > +++ b/gcc/match.pd > @@ -301,6 +301,63 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (view_convert (bit_and:itype (view_convert @0) >(ne @1 { build_zero_cst (type); }))) > > +/* In SWAR (SIMD in a register) code a comparison of packed data can > + be consturcted with a particular combination of shift, bitwise and, *constructed brgds, H-P
Re: [PATCH] Add pattern to convert vector shift + bitwise and + multiply to vector compare in some cases.
On Sat, Aug 13, 2022 at 2:59 AM mtsamis wrote: > > When using SWAR (SIMD in a register) techniques a comparison operation within > such a register can be made by using a combination of shifts, bitwise and and > multiplication. If code using this scheme is vectorized then there is > potential > to replace all these operations with a single vector comparison, by > reinterpreting > the vector types to match the width of the SWAR register. > > For example, for the test function packed_cmp_16_32, the original generated > code is: > > ldr q0, [x0] > add w1, w1, 1 > ushrv0.4s, v0.4s, 15 > and v0.16b, v0.16b, v2.16b > shl v1.4s, v0.4s, 16 > sub v0.4s, v1.4s, v0.4s > str q0, [x0], 16 > cmp w2, w1 > bhi .L20 > > with this pattern the above can be optimized to: > > ldr q0, [x0] > add w1, w1, 1 > cmltv0.8h, v0.8h, #0 > str q0, [x0], 16 > cmp w2, w1 > bhi .L20 > > The effect is similar for x86-64. > > gcc/ChangeLog: > > * match.pd: Simplify vector shift + bit_and + multiply in some cases. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/swar_to_vec_cmp.c: New test. > > Signed-off-by: mtsamis > --- > gcc/match.pd | 57 +++ > .../gcc.target/aarch64/swar_to_vec_cmp.c | 72 +++ > 2 files changed, 129 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/swar_to_vec_cmp.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 8bbc0dbd5cd..5c768a94846 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -301,6 +301,63 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (view_convert (bit_and:itype (view_convert @0) > (ne @1 { build_zero_cst (type); }))) > > +/* In SWAR (SIMD in a register) code a comparison of packed data can > + be consturcted with a particular combination of shift, bitwise and, > + and multiplication by constants. If that code is vectorized we can > + convert this pattern into a more efficient vector comparison. */ > +(simplify > + (mult (bit_and (rshift @0 @1) @2) @3) > + (with { > + tree op_type = TREE_TYPE (@0); > + tree rshift_cst = NULL_TREE; > + tree bit_and_cst = NULL_TREE; > + tree mult_cst = NULL_TREE; > + } > + /* Make sure we're working with vectors and uniform vector constants. */ > + (if (VECTOR_TYPE_P (op_type) I think you can use type here and don't need op_type. Maybe change this to: (simplify (mult (bit_and (rshift @0 uniform_integer_cst_p@1) uniform_integer_cst_p@2) uniform_integer_cst_p@3) (if (VECTOR_TYPE_P (type)) (with { tree rshift_cst = uniform_integer_cst_p (@1); tree bit_and_cst = uniform_integer_cst_p (@2); tree mult_cst = uniform_integer_cst_p (@3); } ... Similar to the patterns under "Simplifications of comparisons". Thanks, Andrew Pinski > + && (rshift_cst = uniform_integer_cst_p (@1)) > + && (bit_and_cst = uniform_integer_cst_p (@2)) > + && (mult_cst = uniform_integer_cst_p (@3))) > + /* Compute what constants would be needed for this to represent a packed > + comparison based on the shift amount denoted by RSHIFT_CST. */ > + (with { > + HOST_WIDE_INT vec_elem_bits = vector_element_bits (op_type); > + HOST_WIDE_INT vec_nelts = TYPE_VECTOR_SUBPARTS (op_type).to_constant (); > + HOST_WIDE_INT vec_bits = vec_elem_bits * vec_nelts; > + > + unsigned HOST_WIDE_INT cmp_bits_i, bit_and_i, mult_i; > + unsigned HOST_WIDE_INT target_mult_i, target_bit_and_i; > + cmp_bits_i = tree_to_uhwi (rshift_cst) + 1; > + target_mult_i = (HOST_WIDE_INT_1U << cmp_bits_i) - 1; > + > + mult_i = tree_to_uhwi (mult_cst); > + bit_and_i = tree_to_uhwi (bit_and_cst); > + target_bit_and_i = 0; > + > + for (unsigned i = 0; i < vec_elem_bits / cmp_bits_i; i++) > + target_bit_and_i = (target_bit_and_i << cmp_bits_i) | 1U; > +} > +(if ((exact_log2 (cmp_bits_i)) >= 0 > +&& cmp_bits_i < HOST_BITS_PER_WIDE_INT > +&& vec_elem_bits <= HOST_BITS_PER_WIDE_INT > +&& tree_fits_uhwi_p (rshift_cst) > +&& tree_fits_uhwi_p (mult_cst) > +&& tree_fits_uhwi_p (bit_and_cst) > +&& target_mult_i == mult_i > +&& target_bit_and_i == bit_and_i) > + /* Compute the vector shape for the comparison and check if the target > is > + able to expand the comparison with that type. */ > + (with { > + tree bool_type = build_nonstandard_boolean_type (cmp_bits_i); > + int vector_type_nelts = vec_bits / cmp_bits_i; > + tree vector_type = build_vector_type (bool_type, vector_type_nelts); > + tree zeros = build_zero_cst (vector_type); > + tree mask_type = truth_type_for (vector_type); > + } > + (if (expand_vec_cmp_expr_p (vector_type, mask_type, LT_EXPR)) > + (view_convert
Re: [commited PATCH] LoongArch: Get __tls_get_addr address through got table when disable plt.
2022/8/11 下午7:35, Lulu Cheng 写道: thread.c: __attribute__ ((tls_model ("global-dynamic"))) __thread int a; void test (void) { a = 10; } Compile the tests with -fno-plt, error message is as follows: thread.c: In function 'test': thread.c:7:1: error: unrecognizable insn: 7 | } | ^ (call_insn/u 7 6 8 2 (parallel [ (set (reg:DI 4 $r4) (call (mem:SI (symbol_ref:DI ("__tls_get_addr") [flags 0x41] ) [0 S4 A8]) (const_int 0 [0]))) (clobber (reg:SI 1 $r1)) ]) "thread.c":5:5 -1 (expr_list:REG_EH_REGION (const_int -2147483648 [0x8000]) (nil)) (expr_list (use (reg:DI 4 $r4)) (nil))) during RTL pass: vregs thread.c:7:1: internal compiler error: in extract_insn, at recog.cc:2791 - Fix bug, ICE with tls gd var with -fno-plt. gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_call_tls_get_addr): Get __tls_get_addr address through got table when disable plt. gcc/testsuite/ChangeLog: * gcc.target/loongarch/tls-gd-noplt.c: New test. --- gcc/config/loongarch/loongarch.cc | 14 -- gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c | 12 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 5c9a33c14f7..8d8232e5805 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -2297,8 +2297,18 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) else gcc_unreachable (); - insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol, - const0_rtx)); + if (flag_plt) +insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol, + const0_rtx)); + else +{ + rtx dest = gen_reg_rtx (Pmode); + rtx high = gen_reg_rtx (Pmode); + loongarch_emit_move (high, gen_rtx_HIGH (Pmode, loongarch_tls_symbol)); + emit_insn (gen_ld_from_got (Pmode, dest, high, loongarch_tls_symbol)); + insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx)); +} + RTL_CONST_CALL_P (insn) = 1; use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0); insn = get_insns (); diff --git a/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c new file mode 100644 index 000..a71bb48676d --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-plt" } */ +/* { dg-final { scan-assembler "pcalau12i\t.*%got_pc_hi20\\(__tls_get_addr\\)" } } */ + +__attribute__ ((tls_model ("global-dynamic"))) __thread int a; + +void +test (void) +{ + a = 10; +} + Pushed r13-2104.
[PATCH v3] rs6000: Rework ELFv2 support for -fpatchable-function-entry* [PR99888]
Hi, As PR99888 and its related show, the current support for -fpatchable-function-entry on powerpc ELFv2 doesn't work well with global entry existence. For example, with one command line option -fpatchable-function-entry=3,2, it got below w/o this patch: .LPFE1: nop nop .type foo, @function foo: nop .LFB0: .cfi_startproc .LCF0: 0: addis 2,12,.TOC.-.LCF0@ha addi 2,2,.TOC.-.LCF0@l .localentry foo,.-foo , the assembly is unexpected since the patched nops have no effects when being entered from local entry. This patch is to update the nops patched before and after local entry, it looks like: .type foo, @function foo: .LFB0: .cfi_startproc .LCF0: 0: addis 2,12,.TOC.-.LCF0@ha addi 2,2,.TOC.-.LCF0@l nop nop .localentry foo,.-foo nop Bootstrapped and regtested on powerpc64-linux-gnu P7 & P8, and powerpc64le-linux-gnu P9 & P10. v3: Modify NOP(s) to nop(s), update test case patchable_function_entry-default.c with extra comments, pr99888-{1,2,3} by removing required targets as Segher's review comments. v2: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599617.html v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599461.html Is it ok for trunk? BR, Kewen - PR target/99888 PR target/105649 gcc/ChangeLog: * config/rs6000/rs6000-internal.h (rs6000_print_patchable_function_entry): New function declaration. * config/rs6000/rs6000-logue.cc (rs6000_output_function_prologue): Support patchable-function-entry by emitting nops before and after local entry for the function that needs global entry. * config/rs6000/rs6000.cc (rs6000_print_patchable_function_entry): Skip the function that needs global entry till global entry has been emitted. * config/rs6000/rs6000.h (struct machine_function): New bool member global_entry_emitted. gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr99888-1.c: New test. * gcc.target/powerpc/pr99888-2.c: New test. * gcc.target/powerpc/pr99888-3.c: New test. * gcc.target/powerpc/pr99888-4.c: New test. * gcc.target/powerpc/pr99888-5.c: New test. * gcc.target/powerpc/pr99888-6.c: New test. * c-c++-common/patchable_function_entry-default.c: Adjust for powerpc_elfv2 to avoid compilation error. --- gcc/config/rs6000/rs6000-internal.h | 5 +++ gcc/config/rs6000/rs6000-logue.cc | 32 ++ gcc/config/rs6000/rs6000.cc | 10 - gcc/config/rs6000/rs6000.h| 4 ++ .../patchable_function_entry-default.c| 3 ++ gcc/testsuite/gcc.target/powerpc/pr99888-1.c | 43 +++ gcc/testsuite/gcc.target/powerpc/pr99888-2.c | 43 +++ gcc/testsuite/gcc.target/powerpc/pr99888-3.c | 11 + gcc/testsuite/gcc.target/powerpc/pr99888-4.c | 13 ++ gcc/testsuite/gcc.target/powerpc/pr99888-5.c | 13 ++ gcc/testsuite/gcc.target/powerpc/pr99888-6.c | 14 ++ 11 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-4.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-5.c create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-6.c diff --git a/gcc/config/rs6000/rs6000-internal.h b/gcc/config/rs6000/rs6000-internal.h index b9e82c0468d..da809d1ac8b 100644 --- a/gcc/config/rs6000/rs6000-internal.h +++ b/gcc/config/rs6000/rs6000-internal.h @@ -182,10 +182,15 @@ extern tree rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED, tree *args ATTRIBUTE_UNUSED, bool ignore ATTRIBUTE_UNUSED); +extern void rs6000_print_patchable_function_entry (FILE *, + unsigned HOST_WIDE_INT, + bool); + extern bool rs6000_passes_float; extern bool rs6000_passes_long_double; extern bool rs6000_passes_vector; extern bool rs6000_returns_struct; extern bool cpu_builtin_p; + #endif diff --git a/gcc/config/rs6000/rs6000-logue.cc b/gcc/config/rs6000/rs6000-logue.cc index 59fe1c8cb8b..3e2b1773154 100644 --- a/gcc/config/rs6000/rs6000-logue.cc +++ b/gcc/config/rs6000/rs6000-logue.cc @@ -4013,11 +4013,43 @@ rs6000_output_function_prologue (FILE *file) fprintf (file, "\tadd 2,2,12\n"); } + unsigned short patch_area_size = crtl->patch_area_size; + unsigned short patch_area_entry = crtl->patch_area_entry; + /* Need to emit the patching area. */ + if (patch_area_size > 0) + { + cfun->machine-
Re: [PATCH] libcpp: Implement C++23 P2290R3 - Delimited escape sequences [PR106645]
On 8/17/22 14:19, Jakub Jelinek wrote: On Wed, Aug 17, 2022 at 04:47:19PM -0400, Jason Merrill via Gcc-patches wrote: + length = 32; /* Magic value to indicate no digits seen. */ Indeed, will add the comment. + delimited = true; + if (loc_reader) + char_range->m_finish = loc_reader->get_next ().m_finish; + } +} else if (str[-1] == 'U') length = 8; else @@ -1107,6 +1118,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const result = 0; do { + if (str == limit) + break; c = *str; if (!ISXDIGIT (c)) break; @@ -1116,9 +1129,41 @@ _cpp_valid_ucn (cpp_reader *pfile, const gcc_assert (char_range); char_range->m_finish = loc_reader->get_next ().m_finish; } + if (delimited) + { + if (!result) + /* Accept arbitrary number of leading zeros. */ + length = 16; + else if (length == 8) + { + /* Make sure we detect overflows. */ + result |= 0x800; + ++length; + } 16 above so that this case happens after we read 8 digits after leading zeroes? Another magic value less than the no digits seen one and >8, so that it can count 8 digits with the first non-zero one after which to or in the overflow flag. The intent is not to break the loop if there are further digits, just that there will be overflow. Another option would be those overflow |= n ^ (n << 4 >> 4); tests that convert_hex does and just making sure length is never decremented (except we need a way to distinguish between \u{} and at least one digit). This way is fine, could just use more comment. + if (loc_reader) + char_range->m_finish = loc_reader->get_next ().m_finish; Here and in other functions, the pattern of increment the input pointer and update m_finish seems like it should be a macro? Perhaps or inline function. Before my patch, there are 5 such ifs (some with char_range.m_finish and others char_range->m_finish), the patch adds another 7 such spots. Either way is fine. @@ -2119,15 +2255,23 @@ _cpp_interpret_identifier (cpp_reader *p cppchar_t value = 0; size_t bufleft = len - (bufp - buf); int rval; + bool delimited = false; idp += 2; + if (length == 4 && id[idp] == '{') + { + delimited = true; + idp++; + } while (length && idp < len && ISXDIGIT (id[idp])) { value = (value << 4) + hex_value (id[idp]); idp++; - length--; + if (!delimited) + length--; } - idp--; + if (!delimited) + idp--; Don't we need to check that the first non-xdigit is a }? The comments and my understanding of the code say that we first check what is a valid identifier and the above is only called on a valid identifier. So, if it would be delimited \u{ not terminated with }, then it would fail forms_identifier_p and wouldn't be included in the range. Thus e.g. the ISXDIGIT (id[id]) test is probably not needed unless delimited is true because we've checked earlier that it has 4 or 8 hex digits. But sure, if you want a id[idp] == '}' test or assertion, it can be added. OK, a comment mentioning this should be sufficient. Jason
Re: [PATCH v6] LoongArch: add addr_global attribute
On Mon, 15 Aug 2022, Xi Ruoyao via Gcc-patches wrote: > Can we make a final solution to this soon? Now the merge window of > Linux 6.0 is closed and we have two Linux kernel releases not possible > to be built with Binutils or GCC with new relocation types. This is > just ugly... > > On Fri, 2022-08-12 at 17:17 +0800, Xi Ruoyao via Gcc-patches wrote: > > v5 -> v6: > > > > * still use "addr_global" as we don't have a better name. "far"? brgds, H-P
Re: [PATCH v6] LoongArch: add addr_global attribute
在 2022/8/18 上午10:56, Hans-Peter Nilsson 写道: On Mon, 15 Aug 2022, Xi Ruoyao via Gcc-patches wrote: Can we make a final solution to this soon? Now the merge window of Linux 6.0 is closed and we have two Linux kernel releases not possible to be built with Binutils or GCC with new relocation types. This is just ugly... On Fri, 2022-08-12 at 17:17 +0800, Xi Ruoyao via Gcc-patches wrote: v5 -> v6: * still use "addr_global" as we don't have a better name. "far"? brgds, H-P Sorry, I have already added the code of cmodel=extreme, and I am sorting out the code and test cases. I still insist on using __attribute__(model(extreme)) to describe the variables of precpu. I will send my patch today.
Re: [PATCH v6] LoongArch: add addr_global attribute
On Thu, 2022-08-18 at 11:54 +0800, Lulu Cheng wrote: > Sorry, I have already added the code of cmodel=extreme, and I am sorting out > the code and test cases. > I still insist on using __attribute__(model(extreme)) to describe the > variables of precpu. > I will send my patch today. Ok, I can live with it. A possible enhancement (we can add it later or abandon it if it won't really produce any benefit): The address of a per-CPU variable is per_cpu_buf_addr + cpu_offset + var_offset Or, B + C + V as an abbreviation. Currently we have $r21 = C, and the faked address of per-cpu symbol is (B + V). As (B + V) is large, we have to use five (PC-rel) or four (Absolute) instructions to address it. But in the future we can make $r21 = B + C, and the faked address V. As V <= 2GiB we can add model(low) and use a lu12i/addi pair to address V. -- Xi Ruoyao School of Aerospace Science and Technology, Xidian University
Re: [PATCH] PR106342 - IBM zSystems: Provide vsel for all vector modes
On Wed, 17 Aug 2022, Ilya Leoshkevich wrote: > On Thu, 2022-08-11 at 07:45 +0200, Andreas Krebbel wrote: > > On 8/10/22 13:42, Ilya Leoshkevich wrote: > > > On Wed, 2022-08-03 at 12:20 +0200, Ilya Leoshkevich wrote: > > > > Bootstrapped and regtested on s390x-redhat-linux. Ok for master? > > > > > > > > > > > > > > > > dg.exp=pr104612.c fails with an ICE on s390x, because > > > > copysignv2sf3 > > > > produces an insn that vsel is supposed to recognize, but > > > > can't, > > > > because it's not defined for V2SF. Fix by defining it for all > > > > vector > > > > modes supported by copysign3. > > > > > > > > gcc/ChangeLog: > > > > > > > > * config/s390/vector.md (V_HW_FT): New iterator. > > > > * config/s390/vx-builtins.md (vsel): Use V instead > > > > of > > > > V_HW. > > > > --- > > > > gcc/config/s390/vector.md | 6 ++ > > > > gcc/config/s390/vx-builtins.md | 12 ++-- > > > > 2 files changed, 12 insertions(+), 6 deletions(-) > > > > > > Jakub pointed out that this is broken in gcc-12 as well. > > > The patch applies cleanly, and I started a bootstrap/regtest. > > > Ok for gcc-12? > > > > Yes. Thanks! > > > > Andreas > > Hi, > > I've committed this today without realizing that gcc-12 branch is > closed. Sorry! Please let me know if I should revert this. It doesn't look too risky, so leave it in. Richard.
[PATCH] Enhance final_value_replacement_loop to handle bitop with an invariant induction.[PR105735]
Hi, This patch is for pr105735/pr101991. It will enable below optimization: { - long unsigned int bit; - - [local count: 32534376]: - - [local count: 1041207449]: - # tmp_10 = PHI - # bit_12 = PHI - tmp_7 = bit2_6(D) & tmp_10; - bit_8 = bit_12 + 1; - if (bit_8 != 32) -goto ; [96.97%] - else -goto ; [3.03%] - - [local count: 1009658865]: - goto ; [100.00%] - - [local count: 32534376]: - # tmp_11 = PHI - return tmp_11; + tmp_11 = tmp_4(D) & bit2_6(D); + return tmp_11; } Ok for master ? gcc/ChangeLog: PR middle-end/105735 * match.pd (bitop_with_inv_p): New match. * tree-scalar-evolution.cc (gimple_bitop_with_inv_p): Declare. (analyze_and_compute_bitop_with_inv_effect): New function. (final_value_replacement_loop): Enhanced to handle bitop with inv induction. gcc/testsuite/ChangeLog: * gcc.target/i386/pr105735-1.c: New test. * gcc.target/i386/pr105735-2.c: New test. --- gcc/match.pd | 4 + gcc/testsuite/gcc.target/i386/pr105735-1.c | 88 ++ gcc/testsuite/gcc.target/i386/pr105735-2.c | 28 +++ gcc/tree-scalar-evolution.cc | 59 +++ 4 files changed, 179 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-1.c create mode 100644 gcc/testsuite/gcc.target/i386/pr105735-2.c diff --git a/gcc/match.pd b/gcc/match.pd index 562138a8034..cfe593ebb02 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -8050,6 +8050,10 @@ and, (bit_not (nop_convert1? (bit_xor@0 (convert2? (lshift integer_onep@1 @2)) @3 +(for bit_op (bit_and bit_ior bit_xor) + (match (bitop_with_inv_p @0 @1) + (bit_op:c @0 @1))) + /* n - (((n > C1) ? n : C1) & -C2) -> n & C1 for unsigned case. n - (((n > C1) ? n : C1) & -C2) -> (n <= C1) ? n : (n & C1) for signed case. */ (simplify diff --git a/gcc/testsuite/gcc.target/i386/pr105735-1.c b/gcc/testsuite/gcc.target/i386/pr105735-1.c new file mode 100644 index 000..8d2123ed351 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105735-1.c @@ -0,0 +1,88 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-tree-sccp-details" } */ +/* { dg-final { scan-tree-dump-times {final value replacement} 8 "sccp" +} } */ + +unsigned long long +__attribute__((noipa)) +foo (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 0; bit < 64; bit++) +tmp &= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo1 (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 63; bit >= 0; bit -=3) +tmp &= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo2 (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 0; bit < 64; bit++) +tmp |= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo3 (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 63; bit >= 0; bit -=3) +tmp |= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo4 (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 0; bit < 64; bit++) +tmp ^= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +foo5 (unsigned long long tmp, unsigned long long bit2) { + for (int bit = 0; bit < 63; bit++) +tmp ^= bit2; + return tmp; +} + +unsigned long long +__attribute__((noipa)) +f (unsigned long long tmp, long long bit, unsigned long long bit2) { + unsigned long long res = tmp; + for (long long i = 0; i < bit; i++) +res &= bit2; + return res; +} + +unsigned long long +__attribute__((noipa)) +f1 (unsigned long long tmp, long long bit, unsigned long long bit2) { + unsigned long long res = tmp; + for (long long i = 0; i < bit; i++) +res |= bit2; + return res; +} + +unsigned long long +__attribute__((noipa)) +f2 (unsigned long long tmp, long long bit, unsigned long long bit2) { + unsigned long long res = tmp; + for (long long i = 0; i < bit; i++) +res ^= bit2; + return res; +} + diff --git a/gcc/testsuite/gcc.target/i386/pr105735-2.c b/gcc/testsuite/gcc.target/i386/pr105735-2.c new file mode 100644 index 000..79c1d300b1b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105735-2.c @@ -0,0 +1,28 @@ +/* { dg-do run } */ +/* { dg-options "-O1" } */ + +#include "pr105735-1.c" + +int main() +{ + unsigned long long tmp = 0x1101101ULL; + unsigned long long bit2 = 0x11100111ULL; + if (foo (tmp, bit2) != 0x1100101ULL) +__builtin_abort (); + if (foo1 (tmp, bit2) != 0x1100101ULL) +__builtin_abort (); + if (foo2 (tmp, bit2) != 0x1110ULL) +__builtin_abort (); + if (foo3 (tmp, bit2) != 0x1110ULL) +__builtin_abort (); + if (foo4 (tmp, bit2) != 0x1101101ULL) +__builtin_abort (); + if (foo5 (tmp, bit2) != 0x111010011010ULL) +__builtin_abort (); + if (f (tmp, 64, bit2) != 0x1100101ULL) +__builtin_abort (); + if (f1 (tmp, 64, bit2) != 0x1110ULL) +__bui