[PATCH] Fix fallout from VRP strict-overflow changes
I was notifed I broke proper handling of undefined overflow in multiplicative ops handling. The following resurrects previous behavior (and adds a testcase). Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2017-08-17 Richard Biener * tree-vrp.c (vrp_int_const_binop): Do not set *overflow_p to true when overflow is undefined and we saturated the result. * gcc.dg/tree-ssa/vrp117.c: New testcase. Index: gcc/tree-vrp.c === --- gcc/tree-vrp.c (revision 251084) +++ gcc/tree-vrp.c (working copy) @@ -1614,6 +1614,8 @@ vrp_int_const_binop (enum tree_code code signop sign = TYPE_SIGN (TREE_TYPE (val1)); wide_int res; + *overflow_p = false; + switch (code) { case RSHIFT_EXPR: @@ -1685,8 +1687,6 @@ vrp_int_const_binop (enum tree_code code gcc_unreachable (); } - *overflow_p = overflow; - if (overflow && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (val1))) { @@ -1730,6 +1730,8 @@ vrp_int_const_binop (enum tree_code code TYPE_SIGN (TREE_TYPE (val1))); } + *overflow_p = overflow; + return res; } Index: gcc/testsuite/gcc.dg/tree-ssa/vrp117.c === --- gcc/testsuite/gcc.dg/tree-ssa/vrp117.c (nonexistent) +++ gcc/testsuite/gcc.dg/tree-ssa/vrp117.c (working copy) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-evrp" } */ + +void link_error (void); + +void foo (int i) +{ + if (i > __INT_MAX__ - 10) +{ + int j = i * 10; + if (j < i) + link_error (); +} +} + +/* { dg-final { scan-tree-dump-not "link_error" "evrp" } } */
[PATCH] Fix PR81827
I am currently testing the following patch to improve bitmap density during points-to analysis. This improves the testcase in PR81827 from > /usr/bin/time /space/rguenther/install/gcc-7.2/bin/gfortran -S t.f90 -O 1139.91user 1.99system 19:02.37elapsed 99%CPU (0avgtext+0avgdata 8035048maxresident)k 40816inputs+144outputs (49major+2004165minor)pagefaults 0swaps to (patched GCC 7 branch): > /usr/bin/time /abuild/rguenther/obj2/gcc/f951 t.f90 -O -quiet 61.02user 0.68system 1:01.76elapsed 99%CPU (0avgtext+0avgdata 1629172maxresident)k note both compile-time and memory use improve significantly (PTA still uses a lot of memory though). Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2017-08-17 Richard Biener PR tree-optimization/81827 * tree-ssa-structalias.c (struct variable_info): Add is_reg_var flag. (new_var_info): Initialize it conservatively. (get_call_vi): Mark register vars. (new_scalar_tmp_constraint_exp): Likewise. (handle_rhs_call): Likewise. (handle_const_call): Likewise. (create_function_info_for): Likewise. (solve_constraints): Sort varinfos to separate register from non-register vars to pack points-to solution bitmaps during iteration. Index: gcc/tree-ssa-structalias.c === --- gcc/tree-ssa-structalias.c (revision 251119) +++ gcc/tree-ssa-structalias.c (working copy) @@ -257,6 +257,9 @@ struct variable_info /* True if this is a heap variable. */ unsigned int is_heap_var : 1; + /* True if this is a register variable. */ + unsigned int is_reg_var : 1; + /* True if this field may contain pointers. */ unsigned int may_have_pointers : 1; @@ -389,6 +392,7 @@ new_var_info (tree t, const char *name, /* We have to treat even local register variables as escape points. */ || (VAR_P (t) && DECL_HARD_REGISTER (t))); + ret->is_reg_var = (t && TREE_CODE (t) == SSA_NAME); ret->solution = BITMAP_ALLOC (&pta_obstack); ret->oldsolution = NULL; ret->next = 0; @@ -422,12 +426,14 @@ get_call_vi (gcall *call) vi->size = 1; vi->fullsize = 2; vi->is_full_var = true; + vi->is_reg_var = true; vi2 = new_var_info (NULL_TREE, "CALLCLOBBERED", true); vi2->offset = 1; vi2->size = 1; vi2->fullsize = 2; vi2->is_full_var = true; + vi2->is_reg_var = true; vi->next = vi2->id; @@ -2892,6 +2898,7 @@ new_scalar_tmp_constraint_exp (const cha vi->size = -1; vi->fullsize = -1; vi->is_full_var = 1; + vi->is_reg_var = 1; tmp.var = vi->id; tmp.type = SCALAR; @@ -3930,6 +3937,7 @@ handle_rhs_call (gcall *stmt, vec { varinfo_t uses = get_call_use_vi (stmt); varinfo_t tem = new_var_info (NULL_TREE, "callarg", true); + tem->is_reg_var = true; make_constraint_to (tem->id, arg); make_any_offset_constraints (tem); if (!(flags & EAF_DIRECT)) @@ -3943,6 +3951,7 @@ handle_rhs_call (gcall *stmt, vec varinfo_t uses = get_call_use_vi (stmt); varinfo_t clobbers = get_call_clobber_vi (stmt); varinfo_t tem = new_var_info (NULL_TREE, "callarg", true); + tem->is_reg_var = true; make_constraint_to (tem->id, arg); make_any_offset_constraints (tem); if (!(flags & EAF_DIRECT)) @@ -4110,7 +4119,10 @@ handle_const_call (gcall *stmt, vecis_reg_var = true; +} for (k = 0; k < gimple_call_num_args (stmt); ++k) { tree arg = gimple_call_arg (stmt, k); @@ -5712,6 +5724,7 @@ create_function_info_for (tree decl, con clobbervi->fullsize = vi->fullsize; clobbervi->is_full_var = true; clobbervi->is_global_var = false; + clobbervi->is_reg_var = true; gcc_assert (prev_vi->offset < clobbervi->offset); prev_vi->next = clobbervi->id; @@ -5727,6 +5740,7 @@ create_function_info_for (tree decl, con usevi->fullsize = vi->fullsize; usevi->is_full_var = true; usevi->is_global_var = false; + usevi->is_reg_var = true; gcc_assert (prev_vi->offset < usevi->offset); prev_vi->next = usevi->id; @@ -7075,6 +7089,39 @@ solve_constraints (void) { struct scc_info *si; + /* Sort varinfos so that ones that cannot be pointed to are last. + This makes bitmaps more efficient. */ + unsigned int *map = XNEWVEC (unsigned int, varmap.length ()); + for (unsigned i = 0; i < integer_id + 1; ++i) +map[i] = i; + /* Start with non-register vars (as possibly address-taken), followed + by register vars as conservative set of vars never appearing in + the points-to solution bitmaps. */ + unsigned j = integer_id + 1; + for (unsigned i = integer_id + 1; i < varmap.length (); ++i) +if (! varmap[i]->is_reg_var) + map[i] = j++; + for (unsigned i = integer_id + 1; i < varmap.length (); ++i)
Re: [PATCH] Fix always true condition in gimplify_adjust_omp_clauses
Ping. On Thu, Aug 10, 2017 at 08:50:44PM +0200, Marek Polacek wrote: > My new warning triggered here and said "bitwise comparison always evaluates to > true" and I believe it's right: GOVD_WRITTEN = 131072 which is 10... in > binary, so n->value & GOVD_WRITTEN can never be 1; I suppose 0 was meant to > be used here instead. Too bad Jakub's away, so he can't confirm. > > Bootstrapped/regtested on x86_64-linux and ppc64le-linux, ok for trunk? > > 2017-08-10 Marek Polacek > > * gimplify.c (gimplify_adjust_omp_clauses): Compare with 0 instead of > 1. > > diff --git gcc/gimplify.c gcc/gimplify.c > index 86623e09f5d..e52d7dcddaf 100644 > --- gcc/gimplify.c > +++ gcc/gimplify.c > @@ -8915,7 +8915,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, > gimple_seq body, tree *list_p, > OMP_CLAUSE_SHARED_READONLY (c) = 1; > else if (DECL_P (decl) > && ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED > - && (n->value & GOVD_WRITTEN) != 1) > + && (n->value & GOVD_WRITTEN) != 0) > || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR > && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c))) > && omp_shared_to_firstprivate_optimizable_decl_p (decl)) Marek
[PATCH] Speed up PTA solving
The following patch resulted from PR81827 analysis where I was on the wrong track initially. It still results in a small speedup because we avoid useless duplicate work in case nodes are unified and reduce the number of graph edges. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2017-08-17 Richard Biener * tree-ssa-structalias.c (solve_graph): When propagating to successors update the graphs succ edges and avoid duplicate work. Index: gcc/tree-ssa-structalias.c === --- gcc/tree-ssa-structalias.c (revision 251119) +++ gcc/tree-ssa-structalias.c (working copy) @@ -2759,20 +2759,35 @@ solve_graph (constraint_graph_t graph) unsigned eff_escaped_id = find (escaped_id); /* Propagate solution to all successors. */ + unsigned to_remove = ~0U; EXECUTE_IF_IN_NONNULL_BITMAP (graph->succs[i], 0, j, bi) { - bitmap tmp; - bool flag; - + if (to_remove != ~0U) + { + bitmap_clear_bit (graph->succs[i], to_remove); + to_remove = ~0U; + } unsigned int to = find (j); - tmp = get_varinfo (to)->solution; - flag = false; - + if (to != j) + { + /* Update the succ graph, avoiding duplicate +work. */ + to_remove = j; + if (! bitmap_set_bit (graph->succs[i], to)) + continue; + /* We eventually end up processing 'to' twice +as it is undefined whether bitmap iteration +iterates over bits set during iteration. +Play safe instead of doing tricks. */ + } /* Don't try to propagate to ourselves. */ if (to == i) continue; + bitmap tmp = get_varinfo (to)->solution; + bool flag = false; + /* If we propagate from ESCAPED use ESCAPED as placeholder. */ if (i == eff_escaped_id) @@ -2783,6 +2798,8 @@ solve_graph (constraint_graph_t graph) if (flag) bitmap_set_bit (changed, to); } + if (to_remove != ~0U) + bitmap_clear_bit (graph->succs[i], to_remove); } } }
Re: [PATCH] Fix always true condition in gimplify_adjust_omp_clauses
On Thu, Aug 17, 2017 at 10:57 AM, Marek Polacek wrote: > Ping. Ok. Richard. > On Thu, Aug 10, 2017 at 08:50:44PM +0200, Marek Polacek wrote: >> My new warning triggered here and said "bitwise comparison always evaluates >> to >> true" and I believe it's right: GOVD_WRITTEN = 131072 which is 10... in >> binary, so n->value & GOVD_WRITTEN can never be 1; I suppose 0 was meant to >> be used here instead. Too bad Jakub's away, so he can't confirm. >> Bootstrapped/regtested on x86_64-linux and ppc64le-linux, ok for trunk? >> >> 2017-08-10 Marek Polacek >> >> * gimplify.c (gimplify_adjust_omp_clauses): Compare with 0 instead of >> 1. >> >> diff --git gcc/gimplify.c gcc/gimplify.c >> index 86623e09f5d..e52d7dcddaf 100644 >> --- gcc/gimplify.c >> +++ gcc/gimplify.c >> @@ -8915,7 +8915,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, >> gimple_seq body, tree *list_p, >> OMP_CLAUSE_SHARED_READONLY (c) = 1; >> else if (DECL_P (decl) >> && ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED >> - && (n->value & GOVD_WRITTEN) != 1) >> + && (n->value & GOVD_WRITTEN) != 0) >> || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR >> && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c))) >> && omp_shared_to_firstprivate_optimizable_decl_p (decl)) > > Marek
Re: [PATCH] Factor out division by squares and remove division around comparisons (1/2)
On Tue, Aug 15, 2017 at 4:11 PM, Wilco Dijkstra wrote: > Richard Biener wrote: >> > We also change the association of >> > >> > x / (y * C) -> (x / C) / y >> > >> > If C is a constant. >> >> Why's that profitable? > > It enables (x * C1) / (y * C2) -> (x * C1/C2) / y for example. > Also 1/y is now available to the reciprocal optimization, see > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71026 for details. Sure, but on its own it's going to be slower. So this isn't the correct way to enable those followup transforms. >> > x / (- y) -> (-x) / y >> >> Why? (it's only one of the possible canonicalizations) > > Same here, y is now available for reciprocal optimization. The > negate may now be optimized, for example (a * b) / -y -> (-a*b) / y > will use a negated multiple on various targets. Fair enough. Though if it were x / -(a*b) you'd regress that case. Richard. > > Wilco
Add missing ECF_NOTHROW flags to internal.def
This patch adds missing ECF_NOTHROW flags to the vectorisable integer internal functions. I noticed it while doing some SVE work but don't have a testcase that's useful now. Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Richard 2017-08-17 Richard Sandiford gcc/ * internal-fn.def (CLRSB, CLZ, CTZ, FFS, PARITY, POPCOUNT): Add missing ECF_NOTHROW flags. Index: gcc/internal-fn.def === --- gcc/internal-fn.def 2017-08-10 14:36:08.046471664 +0100 +++ gcc/internal-fn.def 2017-08-17 09:05:10.128942687 +0100 @@ -135,12 +135,12 @@ DEF_INTERNAL_OPTAB_FN (XORSIGN, ECF_CONS DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary) /* Unary integer ops. */ -DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST, clrsb, unary) -DEF_INTERNAL_INT_FN (CLZ, ECF_CONST, clz, unary) -DEF_INTERNAL_INT_FN (CTZ, ECF_CONST, ctz, unary) -DEF_INTERNAL_INT_FN (FFS, ECF_CONST, ffs, unary) -DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary) -DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary) +DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary) +DEF_INTERNAL_INT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary) +DEF_INTERNAL_INT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary) +DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary) +DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary) +DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary) DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
Improve ECF_NOTHROW flags for direct internal functions
Internal functions that map directly to an optab can only throw an exception for -fnon-call-exceptions. This patch handles that in internal_fn_flags, in a similar way to ATTR_*NOTHROW in builtins.def. (Functions that don't throw even for flag_non_call_exceptions should be explicitly marked ECF_NOTHROW in internal-fn.def.) Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Richard 2017-08-17 Richard Sandiford gcc/ * internal-fn.h (internal_fn_flags): Just declare and move the actual implementation out-of-line to... * internal-fn.c (internal_fn_flags): ...here. Set ECF_NOTHROW for directly-mapped internal functions if !flag_non_call_exceptions. Index: gcc/internal-fn.h === --- gcc/internal-fn.h 2017-02-23 19:54:03.0 + +++ gcc/internal-fn.h 2017-08-17 09:05:37.459968561 +0100 @@ -107,15 +107,7 @@ internal_fn_name (enum internal_fn fn) return internal_fn_name_array[(int) fn]; } -/* Return the ECF_* flags for function FN. */ - -extern const int internal_fn_flags_array[]; - -static inline int -internal_fn_flags (enum internal_fn fn) -{ - return internal_fn_flags_array[(int) fn]; -} +extern int internal_fn_flags (enum internal_fn fn); /* Return fnspec for function FN. */ Index: gcc/internal-fn.c === --- gcc/internal-fn.c 2017-08-10 14:36:07.453493083 +0100 +++ gcc/internal-fn.c 2017-08-17 09:05:37.459968561 +0100 @@ -2814,3 +2814,18 @@ expand_PHI (internal_fn, gcall *) { gcc_unreachable (); } + +/* Return the ECF_* flags for function FN. */ + +int +internal_fn_flags (enum internal_fn fn) +{ + int flags = internal_fn_flags_array[(int) fn]; + /* Functions that map to optabs can only throw a catchable exception + when non-call exceptions are enabled. The non-call exceptions in + these cases will typically come from things like IEEE exceptions, + divide by zero errors and SEGVs. */ + if (direct_internal_fn_p (fn) && !flag_non_call_exceptions) +flags |= ECF_NOTHROW; + return flags; +}
Re: [PATCH] Factor out division by squares and remove division around comparisons (1/2)
Richard Biener wrote: > On Tue, Aug 15, 2017 at 4:11 PM, Wilco Dijkstra > wrote: > > Richard Biener wrote: >>> > We also change the association of >>> > >>> > x / (y * C) -> (x / C) / y >>> > >>> > If C is a constant. >>> >>> Why's that profitable? >> >> It enables (x * C1) / (y * C2) -> (x * C1/C2) / y for example. >> Also 1/y is now available to the reciprocal optimization, see >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71026 for details. > > Sure, but on its own it's going to be slower. So this isn't the > correct way to enable those followup transforms. How can it be any slower? It's one division and one multiply in both cases. >>> > x / (- y) -> (-x) / y >>> >>> Why? (it's only one of the possible canonicalizations) >> >> Same here, y is now available for reciprocal optimization. The >> negate may now be optimized, for example (a * b) / -y -> (-a*b) / y >> will use a negated multiple on various targets. > > Fair enough. Though if it were x / -(a*b) you'd regress that case. Possibly. You might still be able to merge the negate if the result is used in an addition or multiply, which wouldn't be possible if it were hiding in a subexpression. Without global analysis it seems best to move constants/negates to the toplevel if they can't be trivially removed in a subexpression. Eg. -x / (a * b * -c). Wilco
Re: [PATCH] Factor out division by squares and remove division around comparisons (1/2)
On Thu, Aug 17, 2017 at 11:55 AM, Wilco Dijkstra wrote: > Richard Biener wrote: >> On Tue, Aug 15, 2017 at 4:11 PM, Wilco Dijkstra >> wrote: >> > Richard Biener wrote: > We also change the association of > > x / (y * C) -> (x / C) / y > > If C is a constant. Why's that profitable? >>> >>> It enables (x * C1) / (y * C2) -> (x * C1/C2) / y for example. >>> Also 1/y is now available to the reciprocal optimization, see >>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71026 for details. >> >> Sure, but on its own it's going to be slower. So this isn't the >> correct way to enable those followup transforms. > > How can it be any slower? It's one division and one multiply in both cases. (x / C) / y is two divisions. If you restrict it to the case where we can transform this to (x * C') / y then it's indeed one. > x / (- y) -> (-x) / y Why? (it's only one of the possible canonicalizations) >>> >>> Same here, y is now available for reciprocal optimization. The >>> negate may now be optimized, for example (a * b) / -y -> (-a*b) / y >>> will use a negated multiple on various targets. >> >> Fair enough. Though if it were x / -(a*b) you'd regress that case. > > Possibly. You might still be able to merge the negate if the result is used > in an > addition or multiply, which wouldn't be possible if it were hiding in a > subexpression. > Without global analysis it seems best to move constants/negates to the > toplevel > if they can't be trivially removed in a subexpression. Eg. -x / (a * b * -c). Sure. So both patterns are canonicalization which is fine for match.pd. Those followup transforms should be done at a place that can look at more complicated patterns. We have the reassoc pass, then backprop (not exactly matching), and the recip pattern matching / cse pass. Richard. > Wilco
Re: Add missing ECF_NOTHROW flags to internal.def
On Thu, Aug 17, 2017 at 11:47 AM, Richard Sandiford wrote: > This patch adds missing ECF_NOTHROW flags to the vectorisable > integer internal functions. I noticed it while doing some SVE > work but don't have a testcase that's useful now. > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? ECF_LEAF is missing as well. Ok with adding them change. Richard. > Richard > > > 2017-08-17 Richard Sandiford > > gcc/ > * internal-fn.def (CLRSB, CLZ, CTZ, FFS, PARITY, POPCOUNT): Add > missing ECF_NOTHROW flags. > > Index: gcc/internal-fn.def > === > --- gcc/internal-fn.def 2017-08-10 14:36:08.046471664 +0100 > +++ gcc/internal-fn.def 2017-08-17 09:05:10.128942687 +0100 > @@ -135,12 +135,12 @@ DEF_INTERNAL_OPTAB_FN (XORSIGN, ECF_CONS > DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary) > > /* Unary integer ops. */ > -DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST, clrsb, unary) > -DEF_INTERNAL_INT_FN (CLZ, ECF_CONST, clz, unary) > -DEF_INTERNAL_INT_FN (CTZ, ECF_CONST, ctz, unary) > -DEF_INTERNAL_INT_FN (FFS, ECF_CONST, ffs, unary) > -DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary) > -DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary) > +DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary) > +DEF_INTERNAL_INT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary) > +DEF_INTERNAL_INT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary) > +DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary) > +DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary) > +DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary) > > DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) > DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
C PATCH to remove unused block of code
I've been itching to remove this code for some time now. The comment suggests that the code is actually unused, so I replaced the body of that "else if" with gcc_unreachable (); and ran regtest/bootstrap and nothing broke, so I propose to do away with it. Bootstrapped/regtested on x86_64-linux, ok for trunk? 2017-08-17 Marek Polacek * c-parser.c (c_parser_postfix_expression): Remove unused code. diff --git gcc/c/c-parser.c gcc/c/c-parser.c index 1402ba67204..8511a8b4fe7 100644 --- gcc/c/c-parser.c +++ gcc/c/c-parser.c @@ -7943,28 +7943,6 @@ c_parser_postfix_expression (c_parser *parser) set_c_expr_source_range (&expr, loc, close_loc); mark_exp_read (expr.value); } - else if (c_token_starts_typename (c_parser_peek_2nd_token (parser))) - { - /* A compound literal. ??? Can we actually get here rather -than going directly to -c_parser_postfix_expression_after_paren_type from -elsewhere? */ - location_t loc; - struct c_type_name *type_name; - c_parser_consume_token (parser); - loc = c_parser_peek_token (parser)->location; - type_name = c_parser_type_name (parser); - c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, -"expected %<)%>"); - if (type_name == NULL) - { - expr.set_error (); - } - else - expr = c_parser_postfix_expression_after_paren_type (parser, -type_name, -loc); - } else { /* A parenthesized expression. */ Marek
Re: Improve ECF_NOTHROW flags for direct internal functions
On Thu, Aug 17, 2017 at 11:49 AM, Richard Sandiford wrote: > Internal functions that map directly to an optab can only throw an > exception for -fnon-call-exceptions. This patch handles that in > internal_fn_flags, in a similar way to ATTR_*NOTHROW in builtins.def. > > (Functions that don't throw even for flag_non_call_exceptions should be > explicitly marked ECF_NOTHROW in internal-fn.def.) > > Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? Hmm. Note the outcome of flag_non_call_exceptions depends on the current function and thus IPA passes querying flags would need to push an appropriate function context. This means the function should get a struct function * argument and opt_for_fn (fn, flag_non_call_exceptions) should be used. It doesn't help very much that all callers don't have any such context either which means this "optimization" looks like in the wrong place :/ (the global value of flag_non_call_exceptions in the IPA case isn't necessarily conservative). So if you insist then add a comment and add a && cfun check so we're sure we are in non-IPA context (or in properly setup context). Richard. > Richard > > > 2017-08-17 Richard Sandiford > > gcc/ > * internal-fn.h (internal_fn_flags): Just declare and move > the actual implementation out-of-line to... > * internal-fn.c (internal_fn_flags): ...here. Set ECF_NOTHROW for > directly-mapped internal functions if !flag_non_call_exceptions. > > Index: gcc/internal-fn.h > === > --- gcc/internal-fn.h 2017-02-23 19:54:03.0 + > +++ gcc/internal-fn.h 2017-08-17 09:05:37.459968561 +0100 > @@ -107,15 +107,7 @@ internal_fn_name (enum internal_fn fn) >return internal_fn_name_array[(int) fn]; > } > > -/* Return the ECF_* flags for function FN. */ > - > -extern const int internal_fn_flags_array[]; > - > -static inline int > -internal_fn_flags (enum internal_fn fn) > -{ > - return internal_fn_flags_array[(int) fn]; > -} > +extern int internal_fn_flags (enum internal_fn fn); > > /* Return fnspec for function FN. */ > > Index: gcc/internal-fn.c > === > --- gcc/internal-fn.c 2017-08-10 14:36:07.453493083 +0100 > +++ gcc/internal-fn.c 2017-08-17 09:05:37.459968561 +0100 > @@ -2814,3 +2814,18 @@ expand_PHI (internal_fn, gcall *) > { > gcc_unreachable (); > } > + > +/* Return the ECF_* flags for function FN. */ > + > +int > +internal_fn_flags (enum internal_fn fn) > +{ > + int flags = internal_fn_flags_array[(int) fn]; > + /* Functions that map to optabs can only throw a catchable exception > + when non-call exceptions are enabled. The non-call exceptions in > + these cases will typically come from things like IEEE exceptions, > + divide by zero errors and SEGVs. */ > + if (direct_internal_fn_p (fn) && !flag_non_call_exceptions) > +flags |= ECF_NOTHROW; > + return flags; > +}
Re: Add missing ECF_NOTHROW flags to internal.def
Richard Biener writes: > On Thu, Aug 17, 2017 at 11:47 AM, Richard Sandiford > wrote: >> This patch adds missing ECF_NOTHROW flags to the vectorisable >> integer internal functions. I noticed it while doing some SVE >> work but don't have a testcase that's useful now. >> >> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? > > ECF_LEAF is missing as well. Ok with adding them change. That's added automatically for all directly-mapped functions by: #ifndef DEF_INTERNAL_OPTAB_FN #define DEF_INTERNAL_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \ DEF_INTERNAL_FN (NAME, FLAGS | ECF_LEAF, NULL) #endif Thanks, Richard > > Richard. > >> Richard >> >> >> 2017-08-17 Richard Sandiford >> >> gcc/ >> * internal-fn.def (CLRSB, CLZ, CTZ, FFS, PARITY, POPCOUNT): Add >> missing ECF_NOTHROW flags. >> >> Index: gcc/internal-fn.def >> === >> --- gcc/internal-fn.def 2017-08-10 14:36:08.046471664 +0100 >> +++ gcc/internal-fn.def 2017-08-17 09:05:10.128942687 +0100 >> @@ -135,12 +135,12 @@ DEF_INTERNAL_OPTAB_FN (XORSIGN, ECF_CONS >> DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary) >> >> /* Unary integer ops. */ >> -DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST, clrsb, unary) >> -DEF_INTERNAL_INT_FN (CLZ, ECF_CONST, clz, unary) >> -DEF_INTERNAL_INT_FN (CTZ, ECF_CONST, ctz, unary) >> -DEF_INTERNAL_INT_FN (FFS, ECF_CONST, ffs, unary) >> -DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary) >> -DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary) >> +DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary) >> +DEF_INTERNAL_INT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary) >> +DEF_INTERNAL_INT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary) >> +DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary) >> +DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary) >> +DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary) >> >> DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL) >> DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
Re: PR81635: Use chrecs to help find related data refs
On Wed, Aug 16, 2017 at 6:50 PM, Richard Sandiford wrote: > "Bin.Cheng" writes: >> On Wed, Aug 16, 2017 at 5:00 PM, Richard Sandiford >> wrote: >>> "Bin.Cheng" writes: On Wed, Aug 16, 2017 at 2:38 PM, Richard Sandiford wrote: > The first loop in the testcase regressed after my recent changes to > dr_analyze_innermost. Previously we would treat "i" as an iv even > for bb analysis and end up with: > >DR_BASE_ADDRESS: p or q >DR_OFFSET: 0 >DR_INIT: 0 or 4 >DR_STEP: 16 > > We now always keep the step as 0 instead, so for an int "i" we'd have: > >DR_BASE_ADDRESS: p or q >DR_OFFSET: (intptr_t) i >DR_INIT: 0 or 4 >DR_STEP: 0 > > This is also what we'd like to have for the unsigned "i", but the > problem is that strip_constant_offset thinks that the "i + 1" in > "(intptr_t) (i + 1)" could wrap and so doesn't peel off the "+ 1". > The [i + 1] accesses therefore have a DR_OFFSET equal to the SSA > name that holds "(intptr_t) (i + 1)", meaning that the accesses no > longer seem to be related to the [i] ones. Didn't read the change in detail, so sorry if I mis-understood the issue. I made changes in scev to better fold type conversion by various sources of information, for example, vrp, niters, undefined overflow behavior etc. In theory these information should be available for other optimizers without querying scev. For the mentioned test, vrp should compute accurate range information for "i" so that we can fold (intptr_t) (i + 1) it without worrying overflow. Note we don't do it in generic folding because (intptr_t) (i) + 1 could be more expensive (especially in case of (T)(i + j)), or because the CST part is in bigger precision after conversion. But such folding is wanted in several places, e.g, IVOPTs. To provide such an interface, we changed tree-affine and made it do aggressive fold. I am curious if it's possible to use aff_tree to implement strip_constant_offset here since aggressive folding is wanted. After all, using additional chrec looks like a little heavy wrto the simple test. >>> >>> Yeah, using aff_tree does work here when the bounds are constant. >>> It doesn't look like it works for things like: >>> >>> double p[1000]; >>> double q[1000]; >>> >>> void >>> f4 (unsigned int n) >>> { >>> for (unsigned int i = 0; i < n; i += 4) >>> { >>> double a = q[i] + p[i]; >>> double b = q[i + 1] + p[i + 1]; >>> q[i] = a; >>> q[i + 1] = b; >>> } >>> } >>> >>> though, where the bounds on the global arrays guarantee that [i + 1] can't >>> overflow, even though "n" is unconstrained. The patch as posted handles >>> this case too. >> BTW is this a missed optimization in value range analysis? The range >> information for i should flow in a way like: array boundary -> niters >> -> scev/vrp. >> I think that's what niters/scev do in analysis. > > Yeah, maybe :-) It looks like the problem is that when SLP runs, > the previous VRP pass came before loop header copying, so the (single) > header has to cope with n == 0 case. Thus we get: Ah, there are several passes in between vrp and pass_ch, not sure if any such pass depends on vrp intensively. I would suggestion reorder the two passes, or standalone VRP interface updating information for loop region after header copied? This is a non-trivial issue that needs to be fixed. Niters analyzer rely on simplify_using_initial_conditions heavily to get the same information, which in my opinion should be provided by VRP. Though this won't be able to obsolete simplify_using_initial_conditions because VRP is weak in symbolic range... > > Visiting statement: > i_15 = ASSERT_EXPR ; > Intersecting > [0, n_9(D) + 4294967295] EQUIVALENCES: { i_6 } (1 elements) > and > [0, 0] > to > [0, 0] EQUIVALENCES: { i_6 } (1 elements) > Intersecting > [0, 0] EQUIVALENCES: { i_6 } (1 elements) > and > [0, 1000] > to > [0, 0] EQUIVALENCES: { i_6 } (1 elements) > Found new range for i_15: [0, 0] > > Visiting statement: > _3 = i_15 + 1; > Match-and-simplified i_15 + 1 to 1 > Intersecting > [1, 1] > and > [0, +INF] > to > [1, 1] > Found new range for _3: [1, 1] > > (where _3 is the index we care about), followed by: > > Visiting statement: > i_15 = ASSERT_EXPR ; > Intersectings/aarch64-linux/trunk-orig/debug/gcc' > [0, n_9(D) + 4294967295] EQUIVALENCES: { i_6 } (1 elements) > and > [0, 4] > to > [0, n_9(D) + 4294967295] EQUIVALENCES: { i_6 } (1 elements) > Intersecting > [0, n_9(D) + 4294967295] EQUIVALENCES: { i_6 } (1 elements) > and > [0, 1000] > to > [0, n_9(D) + 4294967295] EQUIVALENCES: { i_6 } (1 elements) > Found new range for i_15: [0, n_9(D) + 4294967295] > > Vi
RE: [PATCH][GCC][PATCHv3] Improve fpclassify w.r.t IEEE like numbers in GIMPLE.
> -Original Message- > From: Richard Biener [mailto:rguent...@suse.de] > Sent: 08 June 2017 19:23 > To: Joseph Myers > Cc: Tamar Christina; Christophe Lyon; Markus Trippelsdorf; Jeff Law; GCC > Patches; Wilco Dijkstra; Michael Meissner; nd > Subject: Re: [PATCH][GCC][PATCHv3] Improve fpclassify w.r.t IEEE like > numbers in GIMPLE. > > On June 8, 2017 6:44:01 PM GMT+02:00, Joseph Myers > wrote: > >On Thu, 8 Jun 2017, Richard Biener wrote: > > > >> For a built-in this is generally valid. For plain isnan it depends > >on > >> what the standard says. > >> > >> You have to support taking the address of isnan anyway and thus > >> expanding to a library call in that case. Why doesn't that not work? > > > >In the case of isnan there is the Unix98 non-type-generic function, so > >it should definitely work to take the address of that function as > >declared in the system headers. > > > >For the DEF_GCC_BUILTIN type-generic functions there may not be any > >corresponding library function at all (as well as only being callable > >with the __builtin_* name). > > I haven't followed the patches in detail but I would suggest to move the > lowering somewhere to gimple-fold.c so that late discovered direct calls are > also lowered. I can't do it in fold as in the case where the input isn't a constant it can't be folder away and I would have to introduce new code. Because of how often folds are called it seems to be invalid at certain points to introduce new control flow. So while I have fixed the ICEs for PowerPC, AIX and the rest I'm still not sure what to do about the late expansion behaviour change. Originally the patch was in expand, which would have covered the late expansion case similar to what it's doing now in trunk. I was however asked to move this to a GIMPLE lowering pass as to give other optimizations a chance to do further optimizations on the generated code. This of course works fine for C since these math functions are a Macro in C but are functions in C++. C++ would then allow you to do stuff like take the address of the function so void foo(float a) { int (*xx)(...); xx = isnan; if (xx(a)) g++; } is perfectly legal. My current patch leaves "isnan" in as a call as by the time we're doing GIMPLE lowering the alias has not been resolved yet, whereas the version currently committed is able to expand it as it's late in expand. Curiously the behaviour is somewhat confusing. if xx is called with a non-constant it is expanded as you would expect .LFB0: .cfi_startproc fcvtd0, s0 fcmpd0, d0 bvs .L4 but when xx is called with a constant, say 0 it's not .LFB0: .cfi_startproc mov w0, 0 bl isnan cbz w0, .L1 because it infers the type of the 0 to be an integer and then doesn't recognize the call. using 0.0 works, but the behaviour is really counter intuitive. The question is what should I do now, clearly it would be useful to handle the late expansion as well, however I don't know what the best approach would be. I can either: 1) Add two new implementations, one for constant folding and one for expansion, but then one in expand would be quite similar to the one in the early lowering phase. The constant folding one could be very simple since it's a constant I can just call the buildins and evaluate the value completely. 2) Use the patch as is but make another one to allow the renaming to be applied quite early on. e.g while still in Tree or GIMPLE resolve int (*xx)(...); xx = isnan; if (xx(a)) to int (*xx)(...); xx = isnan; if (isnan(a)) This seems like it would be the best approach and the more useful one in general. Any thoughts? Thanks, Tamar > > Richard.
Re: Improve ECF_NOTHROW flags for direct internal functions
Richard Biener writes: > On Thu, Aug 17, 2017 at 11:49 AM, Richard Sandiford > wrote: >> Internal functions that map directly to an optab can only throw an >> exception for -fnon-call-exceptions. This patch handles that in >> internal_fn_flags, in a similar way to ATTR_*NOTHROW in builtins.def. >> >> (Functions that don't throw even for flag_non_call_exceptions should be >> explicitly marked ECF_NOTHROW in internal-fn.def.) >> >> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? > > Hmm. Note the outcome of flag_non_call_exceptions depends on the > current function and thus IPA passes querying flags would need to > push an appropriate function context. This means the function should > get a struct function * argument and opt_for_fn (fn, flag_non_call_exceptions) > should be used. It doesn't help very much that all callers don't have > any such context either which means this "optimization" looks like > in the wrong place :/ (the global value of flag_non_call_exceptions in > the IPA case isn't necessarily conservative). > > So if you insist then add a comment and add a && cfun check so > we're sure we are in non-IPA context (or in properly setup context). Bah. In that case, what should happen if a -fno-non-call-exceptions function is inlined into an -fnon-call-exceptions one? Should the call keep the NOTHROWness of the original function, or should it lose NOTHROWness (and thus gain an exception edge)? I guess the path of least resistance would be to add an extra check for this case in the places that need it, rather than relying solely on gimple_call_flags. Thanks, Richard > > Richard. > >> Richard >> >> >> 2017-08-17 Richard Sandiford >> >> gcc/ >> * internal-fn.h (internal_fn_flags): Just declare and move >> the actual implementation out-of-line to... >> * internal-fn.c (internal_fn_flags): ...here. Set ECF_NOTHROW for >> directly-mapped internal functions if !flag_non_call_exceptions. >> >> Index: gcc/internal-fn.h >> === >> --- gcc/internal-fn.h 2017-02-23 19:54:03.0 + >> +++ gcc/internal-fn.h 2017-08-17 09:05:37.459968561 +0100 >> @@ -107,15 +107,7 @@ internal_fn_name (enum internal_fn fn) >>return internal_fn_name_array[(int) fn]; >> } >> >> -/* Return the ECF_* flags for function FN. */ >> - >> -extern const int internal_fn_flags_array[]; >> - >> -static inline int >> -internal_fn_flags (enum internal_fn fn) >> -{ >> - return internal_fn_flags_array[(int) fn]; >> -} >> +extern int internal_fn_flags (enum internal_fn fn); >> >> /* Return fnspec for function FN. */ >> >> Index: gcc/internal-fn.c >> === >> --- gcc/internal-fn.c 2017-08-10 14:36:07.453493083 +0100 >> +++ gcc/internal-fn.c 2017-08-17 09:05:37.459968561 +0100 >> @@ -2814,3 +2814,18 @@ expand_PHI (internal_fn, gcall *) >> { >> gcc_unreachable (); >> } >> + >> +/* Return the ECF_* flags for function FN. */ >> + >> +int >> +internal_fn_flags (enum internal_fn fn) >> +{ >> + int flags = internal_fn_flags_array[(int) fn]; >> + /* Functions that map to optabs can only throw a catchable exception >> + when non-call exceptions are enabled. The non-call exceptions in >> + these cases will typically come from things like IEEE exceptions, >> + divide by zero errors and SEGVs. */ >> + if (direct_internal_fn_p (fn) && !flag_non_call_exceptions) >> +flags |= ECF_NOTHROW; >> + return flags; >> +}
Re: PR81635: Use chrecs to help find related data refs
"Bin.Cheng" writes: > On Wed, Aug 16, 2017 at 6:50 PM, Richard Sandiford > wrote: >> "Bin.Cheng" writes: >>> On Wed, Aug 16, 2017 at 5:00 PM, Richard Sandiford >>> wrote: "Bin.Cheng" writes: > On Wed, Aug 16, 2017 at 2:38 PM, Richard Sandiford > wrote: >> The first loop in the testcase regressed after my recent changes to >> dr_analyze_innermost. Previously we would treat "i" as an iv even >> for bb analysis and end up with: >> >>DR_BASE_ADDRESS: p or q >>DR_OFFSET: 0 >>DR_INIT: 0 or 4 >>DR_STEP: 16 >> >> We now always keep the step as 0 instead, so for an int "i" we'd have: >> >>DR_BASE_ADDRESS: p or q >>DR_OFFSET: (intptr_t) i >>DR_INIT: 0 or 4 >>DR_STEP: 0 >> >> This is also what we'd like to have for the unsigned "i", but the >> problem is that strip_constant_offset thinks that the "i + 1" in >> "(intptr_t) (i + 1)" could wrap and so doesn't peel off the "+ 1". >> The [i + 1] accesses therefore have a DR_OFFSET equal to the SSA >> name that holds "(intptr_t) (i + 1)", meaning that the accesses no >> longer seem to be related to the [i] ones. > > Didn't read the change in detail, so sorry if I mis-understood the issue. > I made changes in scev to better fold type conversion by various sources > of information, for example, vrp, niters, undefined overflow behavior etc. > In theory these information should be available for other > optimizers without > querying scev. For the mentioned test, vrp should compute accurate range > information for "i" so that we can fold (intptr_t) (i + 1) it without > worrying > overflow. Note we don't do it in generic folding because > (intptr_t) (i) + 1 > could be more expensive (especially in case of (T)(i + j)), or because the > CST part is in bigger precision after conversion. > But such folding is wanted in several places, e.g, IVOPTs. To provide > such > an interface, we changed tree-affine and made it do aggressive fold. I am > curious if it's possible to use aff_tree to implement > strip_constant_offset > here since aggressive folding is wanted. After all, using additional > chrec > looks like a little heavy wrto the simple test. Yeah, using aff_tree does work here when the bounds are constant. It doesn't look like it works for things like: double p[1000]; double q[1000]; void f4 (unsigned int n) { for (unsigned int i = 0; i < n; i += 4) { double a = q[i] + p[i]; double b = q[i + 1] + p[i + 1]; q[i] = a; q[i + 1] = b; } } though, where the bounds on the global arrays guarantee that [i + 1] can't overflow, even though "n" is unconstrained. The patch as posted handles this case too. >>> BTW is this a missed optimization in value range analysis? The range >>> information for i should flow in a way like: array boundary -> niters >>> -> scev/vrp. >>> I think that's what niters/scev do in analysis. >> >> Yeah, maybe :-) It looks like the problem is that when SLP runs, >> the previous VRP pass came before loop header copying, so the (single) >> header has to cope with n == 0 case. Thus we get: > Ah, there are several passes in between vrp and pass_ch, not sure if > any such pass depends on vrp intensively. I would suggestion reorder > the two passes, or standalone VRP interface updating information for > loop region after header copied? This is a non-trivial issue that > needs to be fixed. Niters analyzer rely on > simplify_using_initial_conditions heavily to get the same information, > which in my opinion should be provided by VRP. Though this won't be > able to obsolete simplify_using_initial_conditions because VRP is weak > in symbolic range... > >> >> Visiting statement: >> i_15 = ASSERT_EXPR ; >> Intersecting >> [0, n_9(D) + 4294967295] EQUIVALENCES: { i_6 } (1 elements) >> and >> [0, 0] >> to >> [0, 0] EQUIVALENCES: { i_6 } (1 elements) >> Intersecting >> [0, 0] EQUIVALENCES: { i_6 } (1 elements) >> and >> [0, 1000] >> to >> [0, 0] EQUIVALENCES: { i_6 } (1 elements) >> Found new range for i_15: [0, 0] >> >> Visiting statement: >> _3 = i_15 + 1; >> Match-and-simplified i_15 + 1 to 1 >> Intersecting >> [1, 1] >> and >> [0, +INF] >> to >> [1, 1] >> Found new range for _3: [1, 1] >> >> (where _3 is the index we care about), followed by: >> >> Visiting statement: >> i_15 = ASSERT_EXPR ; >> Intersectings/aarch64-linux/trunk-orig/debug/gcc' >> [0, n_9(D) + 4294967295] EQUIVALENCES: { i_6 } (1 elements) >> and >> [0, 4] >> to >> [0, n_9(D) + 4294967295] EQUIVALENCES: { i_6 } (1 elements) >> Intersecting >> [0, n_9(D) + 4294967295] EQUIVALENCES: { i
[PATCH] [PR target/81861] Save target specific options after ix86_stack_protector_guard_reg was changed
Hi, as discussed in PR, currently we are missing some SSP related options when streaming target specific options in LTO mode. This leads to incorrect code and segfaults (e.g. in ASan's pr64820.c testcase). This patch fixes this by moving options saving machinery down to setting ix86_stack_protector_guard_reg thus all related options are propagated correctly. Tested on x86{_64}-unknown-linux-gnu and lto-bootstrapped, OK to apply? -Maxim gcc/ChangeLog: 2017-08-17 Maxim Ostapenko PR target/81861 * config/i386/i386.c (ix86_option_override_internal): Save target specific options after ix86_stack_protector_guard_reg was changed. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 3f85197..bd3260c 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -6651,12 +6651,6 @@ ix86_option_override_internal (bool main_args_p, gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0); - /* Save the initial options in case the user does function specific - options. */ - if (main_args_p) -target_option_default_node = target_option_current_node - = build_target_option_node (opts); - /* Handle stack protector */ if (!opts_set->x_ix86_stack_protector_guard) opts->x_ix86_stack_protector_guard @@ -6740,6 +6734,12 @@ ix86_option_override_internal (bool main_args_p, free (str); } + /* Save the initial options in case the user does function specific + options. */ + if (main_args_p) +target_option_default_node = target_option_current_node + = build_target_option_node (opts); + return true; }
Re: [PATCH] [PR target/81861] Save target specific options after ix86_stack_protector_guard_reg was changed
On Thu, Aug 17, 2017 at 1:37 PM, Maxim Ostapenko wrote: > Hi, > > as discussed in PR, currently we are missing some SSP related options when > streaming target specific options in LTO mode. This leads to incorrect code > and segfaults (e.g. in ASan's pr64820.c testcase). > This patch fixes this by moving options saving machinery down to setting > ix86_stack_protector_guard_reg thus all related options are propagated > correctly. > > Tested on x86{_64}-unknown-linux-gnu and lto-bootstrapped, OK to apply? OK. I'll take care to backport the patch to other release branches. Thanks, Uros.
[testsuite, committed] Require effective target nonlocal_goto for ipa/pr81696.c
Hi, this patch requires effective target nonlocal_goto for ipa/pr81696.c. Committed. Thanks, - Tom 2017-08-17 Tom de Vries * gcc.dg/ipa/pr81696.c: Require effective target nonlocal_goto. --- gcc/testsuite/gcc.dg/ipa/pr81696.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.dg/ipa/pr81696.c b/gcc/testsuite/gcc.dg/ipa/pr81696.c index 2d3d63f..628dcde 100644 --- a/gcc/testsuite/gcc.dg/ipa/pr81696.c +++ b/gcc/testsuite/gcc.dg/ipa/pr81696.c @@ -1,4 +1,5 @@ /* { dg-options "-O2 -fdump-ipa-icf-details" } */ +/* { dg-require-effective-target nonlocal_goto } */ int main (int argc, char **argv) -- 2.7.4
Re: [PATCH] Factor out division by squares and remove division around comparisons (1/2)
On Thu, 17 Aug 2017, Richard Biener wrote: > > Without global analysis it seems best to move constants/negates to the > > toplevel if they can't be trivially removed in a subexpression. Eg. -x > > / (a * b * -c). > > Sure. So both patterns are canonicalization which is fine for match.pd. > Those followup transforms should be done at a place that can look at > more complicated patterns. We have the reassoc pass, then backprop (not > exactly matching), and the recip pattern matching / cse pass. Watch out for -frounding-math issues, though. -a * b is always the same as a * -b, but not the same as -(a * b) with -frounding-math, and similarly, -frounding-math should prevent eliminating the negations from the -x / (a * b * -c) expression (whereas elimination from -x / -(a * b * c) would be fine). -- Joseph S. Myers jos...@codesourcery.com
Re: PR81635: Use chrecs to help find related data refs
On Thu, Aug 17, 2017 at 12:35 PM, Richard Sandiford wrote: > "Bin.Cheng" writes: >> On Wed, Aug 16, 2017 at 6:50 PM, Richard Sandiford >> wrote: >>> "Bin.Cheng" writes: On Wed, Aug 16, 2017 at 5:00 PM, Richard Sandiford wrote: > "Bin.Cheng" writes: >> On Wed, Aug 16, 2017 at 2:38 PM, Richard Sandiford >> wrote: >>> The first loop in the testcase regressed after my recent changes to >>> dr_analyze_innermost. Previously we would treat "i" as an iv even >>> for bb analysis and end up with: >>> >>>DR_BASE_ADDRESS: p or q >>>DR_OFFSET: 0 >>>DR_INIT: 0 or 4 >>>DR_STEP: 16 >>> >>> We now always keep the step as 0 instead, so for an int "i" we'd have: >>> >>>DR_BASE_ADDRESS: p or q >>>DR_OFFSET: (intptr_t) i >>>DR_INIT: 0 or 4 >>>DR_STEP: 0 >>> >>> This is also what we'd like to have for the unsigned "i", but the >>> problem is that strip_constant_offset thinks that the "i + 1" in >>> "(intptr_t) (i + 1)" could wrap and so doesn't peel off the "+ 1". >>> The [i + 1] accesses therefore have a DR_OFFSET equal to the SSA >>> name that holds "(intptr_t) (i + 1)", meaning that the accesses no >>> longer seem to be related to the [i] ones. >> >> Didn't read the change in detail, so sorry if I mis-understood the issue. >> I made changes in scev to better fold type conversion by various sources >> of information, for example, vrp, niters, undefined overflow behavior >> etc. >> In theory these information should be available for other >> optimizers without >> querying scev. For the mentioned test, vrp should compute accurate range >> information for "i" so that we can fold (intptr_t) (i + 1) it without >> worrying >> overflow. Note we don't do it in generic folding because >> (intptr_t) (i) + 1 >> could be more expensive (especially in case of (T)(i + j)), or because >> the >> CST part is in bigger precision after conversion. >> But such folding is wanted in several places, e.g, IVOPTs. To provide >> such >> an interface, we changed tree-affine and made it do aggressive fold. I >> am >> curious if it's possible to use aff_tree to implement >> strip_constant_offset >> here since aggressive folding is wanted. After all, using additional >> chrec >> looks like a little heavy wrto the simple test. > > Yeah, using aff_tree does work here when the bounds are constant. > It doesn't look like it works for things like: > > double p[1000]; > double q[1000]; > > void > f4 (unsigned int n) > { > for (unsigned int i = 0; i < n; i += 4) > { > double a = q[i] + p[i]; > double b = q[i + 1] + p[i + 1]; > q[i] = a; > q[i + 1] = b; > } > } > > though, where the bounds on the global arrays guarantee that [i + 1] can't > overflow, even though "n" is unconstrained. The patch as posted handles > this case too. BTW is this a missed optimization in value range analysis? The range information for i should flow in a way like: array boundary -> niters -> scev/vrp. I think that's what niters/scev do in analysis. >>> >>> Yeah, maybe :-) It looks like the problem is that when SLP runs, >>> the previous VRP pass came before loop header copying, so the (single) >>> header has to cope with n == 0 case. Thus we get: >> Ah, there are several passes in between vrp and pass_ch, not sure if >> any such pass depends on vrp intensively. I would suggestion reorder >> the two passes, or standalone VRP interface updating information for >> loop region after header copied? This is a non-trivial issue that >> needs to be fixed. Niters analyzer rely on >> simplify_using_initial_conditions heavily to get the same information, >> which in my opinion should be provided by VRP. Though this won't be >> able to obsolete simplify_using_initial_conditions because VRP is weak >> in symbolic range... >> >>> >>> Visiting statement: >>> i_15 = ASSERT_EXPR ; >>> Intersecting >>> [0, n_9(D) + 4294967295] EQUIVALENCES: { i_6 } (1 elements) >>> and >>> [0, 0] >>> to >>> [0, 0] EQUIVALENCES: { i_6 } (1 elements) >>> Intersecting >>> [0, 0] EQUIVALENCES: { i_6 } (1 elements) >>> and >>> [0, 1000] >>> to >>> [0, 0] EQUIVALENCES: { i_6 } (1 elements) >>> Found new range for i_15: [0, 0] >>> >>> Visiting statement: >>> _3 = i_15 + 1; >>> Match-and-simplified i_15 + 1 to 1 >>> Intersecting >>> [1, 1] >>> and >>> [0, +INF] >>> to >>> [1, 1] >>> Found new range for _3: [1, 1] >>> >>> (where _3 is the index we care about), followed by: >>> >>> Visiting statement: >>> i_15 = ASSERT_EXPR ; >>> Intersectings/aarch64-linux/trunk-orig/debug/gcc' >>> [0, n_9(D) + 4294
Re: [1/2] PR 78736: New warning -Wenum-conversion
On 8 August 2017 at 09:51, Prathamesh Kulkarni wrote: > On 1 August 2017 at 00:10, Prathamesh Kulkarni > wrote: >> On 11 July 2017 at 17:59, Prathamesh Kulkarni >> wrote: >>> On 13 June 2017 at 01:47, Joseph Myers wrote: This is OK with one fix: > +C ObjC Var(warn_enum_conversion) Init(0) Warning LangEnabledBy(C > Objc,Wall) I believe the LangEnabledBy arguments are case-sensitive, so you need to have ObjC not Objc there for it to work correctly. (*.opt parsing isn't very good at detecting typos and giving errors rather than silently ignoring things.) >>> Hi, >>> Sorry for the late response, I was on a vacation. >>> The attached patch is rebased and bootstrap+tested on >>> x86_64-unknown-linux-gnu. >>> I have modified it slightly to not warn for enums with different names >>> but having same value ranges. >>> For eg: >>> enum e1 { e1_1, e1_2 }; >>> enum e2 { e2_1, e2_2 }; >>> >>> enum e1 x = e2_1; >>> With this version, there would be no warning for the above assignment >>> since both e1 and e2 have >>> same value ranges. Is that OK ? >>> >>> The patch has following fallouts in the testsuite: >>> >>> a) libgomp: >>> I initially assume it was a false positive because I thought enum >>> gomp_schedule_type >>> and enum omp_sched_t have same value-ranges but it looks like omp_sched_t >>> has range [1, 4] while gomp_schedule_type has range [0, 4] with one >>> extra element. >>> Is the warning then correct for this case ? >>> >>> b) libgfortran: >>> i) Implicit conversion from unit_mode to file_mode >>> ii) Implicit conversion from unit_sign_s to unit_sign. >>> I suppose the warning is OK for these cases since unit_mode, file_mode >>> have different value-ranges and similarly for unit_sign_s, unit_sign ? >>> >>> Also I tested the warning by compiling the kernel for x86_64 with >>> allmodconifg (attached), and there >>> have been quite few instances of the warning (attached). I have been >>> through few cases which I don't think are false positives >>> but I wonder then whether we should relegate the warning to Wextra instead ? >> ping: https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00514.html > ping * 2 https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00514.html ping * 3 https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00514.html Thanks, Prathamesh > > Thanks, > Prathamesh >> >> Thanks, >> Prathamesh >>> >>> Thanks, >>> Prathamesh -- Joseph S. Myers jos...@codesourcery.com
Re: [RFC] propagate malloc attribute in ipa-pure-const pass
On 8 August 2017 at 09:50, Prathamesh Kulkarni wrote: > On 31 July 2017 at 23:53, Prathamesh Kulkarni > wrote: >> On 23 May 2017 at 19:10, Prathamesh Kulkarni >> wrote: >>> On 19 May 2017 at 19:02, Jan Hubicka wrote: > > * LTO and memory management > This is a general question about LTO and memory management. > IIUC the following sequence takes place during normal LTO: > LGEN: generate_summary, write_summary > WPA: read_summary, execute ipa passes, write_opt_summary > > So I assumed it was OK in LGEN to allocate return_callees_map in > generate_summary and free it in write_summary and during WPA, allocate > return_callees_map in read_summary and free it after execute (since > write_opt_summary does not require return_callees_map). > > However with fat LTO, it seems the sequence changes for LGEN with > execute phase takes place after write_summary. However since > return_callees_map is freed in pure_const_write_summary and > propagate_malloc() accesses it in execute stage, it results in > segmentation fault. > > To work around this, I am using the following hack in > pure_const_write_summary: > // FIXME: Do not free if -ffat-lto-objects is enabled. > if (!global_options.x_flag_fat_lto_objects) > free_return_callees_map (); > Is there a better approach for handling this ? I think most passes just do not free summaries with -flto. We probably want to fix it to make it possible to compile multiple units i.e. from plugin by adding release_summaries method... So I would say it is OK to do the same as others do and leak it with -flto. > diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c > index e457166ea39..724c26e03f6 100644 > --- a/gcc/ipa-pure-const.c > +++ b/gcc/ipa-pure-const.c > @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see > #include "tree-scalar-evolution.h" > #include "intl.h" > #include "opts.h" > +#include "ssa.h" > > /* Lattice values for const and pure functions. Everything starts out > being const, then may drop to pure and then neither depending on > @@ -69,6 +70,15 @@ enum pure_const_state_e > > const char *pure_const_names[3] = {"const", "pure", "neither"}; > > +enum malloc_state_e > +{ > + PURE_CONST_MALLOC_TOP, > + PURE_CONST_MALLOC, > + PURE_CONST_MALLOC_BOTTOM > +}; It took me a while to work out what PURE_CONST means here :) I would just call it something like STATE_MALLOC_TOP... or so. ipa_pure_const is outdated name from the time pass was doing only those two. > @@ -109,6 +121,10 @@ typedef struct funct_state_d * funct_state; > > static vec funct_state_vec; > > +/* A map from node to subset of callees. The subset contains those > callees > + * whose return-value is returned by the node. */ > +static hash_map< cgraph_node *, vec* > > *return_callees_map; > + Hehe, a special case of return jump function. We ought to support those more generally. How do you keep it up to date over callgraph changes? > @@ -921,6 +1055,23 @@ end: >if (TREE_NOTHROW (decl)) > l->can_throw = false; > > + if (ipa) > +{ > + vec v = vNULL; > + l->malloc_state = PURE_CONST_MALLOC_BOTTOM; > + if (DECL_IS_MALLOC (decl)) > + l->malloc_state = PURE_CONST_MALLOC; > + else if (malloc_candidate_p (DECL_STRUCT_FUNCTION (decl), v)) > + { > + l->malloc_state = PURE_CONST_MALLOC_TOP; > + vec *callees_p = new vec (vNULL); > + for (unsigned i = 0; i < v.length (); ++i) > + callees_p->safe_push (v[i]); > + return_callees_map->put (fn, callees_p); > + } > + v.release (); > +} > + I would do non-ipa variant, too. I think most attributes can be detected that way as well. The patch generally makes sense to me. It would be nice to make it easier to write such a basic propagators across callgraph (perhaps adding a template doing the basic propagation logic). Also I think you need to solve the problem with keeping your summaries up to date across callgraph node removal and duplications. >>> Thanks for the suggestions, I will try to address them in a follow-up patch. >>> IIUC, I would need to modify ipa-pure-const cgraph hooks - >>> add_new_function, remove_node_data, duplicate_node_data >>> to keep return_callees_map up-to-date across callgraph node insertions >>> and removal ? >>> >>> Also, if instead of having a separate data-structure like >>> return_callees_map, >>> should we rather have a flag within cgraph_edge, which marks that the >>> caller may return the value of the callee ? >> Hi, >> Sorry for the very late response. I have attached an updated v
[PATCH] Fix ancient wrong-code with ?: (PR middle-end/81814)
This PR is about wrong-code and has gone undetected for over 10 years (!). The issue is that e.g. the following (signed char) x == 0 ? (unsigned long long) x : 0 was wrongly folded to 0, because fold_cond_expr_with_comparison will fold A != 0 ? A : 0 to 0. But for x = 0x0100 this is wrong: (signed char) is 0, but (unsigned long long) x is not. The culprit is operand_equal_for_comparison_p which contains shorten_compare-like code which says that the above is safe to fold. The code harks back to 1992 so I thought it worth to just get rid of it. But I did some measurements and it turns out that substituting operand_equal_p for operand_equal_for_comparison_p prevents folding ~6 times in bootstrap. So I feel uneasy about removing the function completely. Instead, I propose to remove just the part that is causing trouble. (Maybe I should also delete the first call to operand_equal_p in operand_equal_for_comparison_p.) Bootstrapped/regtested on x86_64-linux, ok for trunk? What about 7? 2017-08-17 Marek Polacek PR middle-end/81814 * fold-const.c (operand_equal_for_comparison_p): Remove code that used to mimic what shorten_compare did. Change the return type to bool. (fold_cond_expr_with_comparison): Update call to operand_equal_for_comparison_p. (fold_ternary_loc): Likewise. * gcc.dg/torture/pr81814.c: New test. diff --git gcc/fold-const.c gcc/fold-const.c index 0a5b168c320..fef9b1a707a 100644 --- gcc/fold-const.c +++ gcc/fold-const.c @@ -113,7 +113,6 @@ static tree negate_expr (tree); static tree associate_trees (location_t, tree, tree, enum tree_code, tree); static enum comparison_code comparison_to_compcode (enum tree_code); static enum tree_code compcode_to_comparison (enum comparison_code); -static int operand_equal_for_comparison_p (tree, tree, tree); static int twoval_comparison_p (tree, tree *, tree *, int *); static tree eval_subst (location_t, tree, tree, tree, tree, tree); static tree optimize_bit_field_compare (location_t, enum tree_code, @@ -3365,60 +3364,27 @@ operand_equal_p (const_tree arg0, const_tree arg1, unsigned int flags) #undef OP_SAME_WITH_NULL } -/* Similar to operand_equal_p, but see if ARG0 might have been made by - shorten_compare from ARG1 when ARG1 was being compared with OTHER. +/* Similar to operand_equal_p, but strip nops first. */ - When in doubt, return 0. */ - -static int -operand_equal_for_comparison_p (tree arg0, tree arg1, tree other) +static bool +operand_equal_for_comparison_p (tree arg0, tree arg1) { - int unsignedp1, unsignedpo; - tree primarg0, primarg1, primother; - unsigned int correct_width; - if (operand_equal_p (arg0, arg1, 0)) -return 1; +return true; if (! INTEGRAL_TYPE_P (TREE_TYPE (arg0)) || ! INTEGRAL_TYPE_P (TREE_TYPE (arg1))) -return 0; +return false; /* Discard any conversions that don't change the modes of ARG0 and ARG1 and see if the inner values are the same. This removes any signedness comparison, which doesn't matter here. */ - primarg0 = arg0, primarg1 = arg1; - STRIP_NOPS (primarg0); - STRIP_NOPS (primarg1); - if (operand_equal_p (primarg0, primarg1, 0)) -return 1; - - /* Duplicate what shorten_compare does to ARG1 and see if that gives the - actual comparison operand, ARG0. - - First throw away any conversions to wider types - already present in the operands. */ - - primarg1 = get_narrower (arg1, &unsignedp1); - primother = get_narrower (other, &unsignedpo); - - correct_width = TYPE_PRECISION (TREE_TYPE (arg1)); - if (unsignedp1 == unsignedpo - && TYPE_PRECISION (TREE_TYPE (primarg1)) < correct_width - && TYPE_PRECISION (TREE_TYPE (primother)) < correct_width) -{ - tree type = TREE_TYPE (arg0); - - /* Make sure shorter operand is extended the right way -to match the longer operand. */ - primarg1 = fold_convert (signed_or_unsigned_type_for - (unsignedp1, TREE_TYPE (primarg1)), primarg1); - - if (operand_equal_p (arg0, fold_convert (type, primarg1), 0)) - return 1; -} + STRIP_NOPS (arg0); + STRIP_NOPS (arg1); + if (operand_equal_p (arg0, arg1, 0)) +return true; - return 0; + return false; } /* See if ARG is an expression that is either a comparison or is performing @@ -5300,7 +5266,7 @@ fold_cond_expr_with_comparison (location_t loc, tree type, expressions will be false, so all four give B. The min() and max() versions would give a NaN instead. */ if (!HONOR_SIGNED_ZEROS (element_mode (type)) - && operand_equal_for_comparison_p (arg01, arg2, arg00) + && operand_equal_for_comparison_p (arg01, arg2) /* Avoid these transformations if the COND_EXPR may be used as an lvalue in the C++ front-end. PR c++/19199. */ && (in_gimple_form @@ -11357,8 +11323,7 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
PING: [PATCH] Add -static-pie to GCC driver to create static PIE
On Wed, Aug 9, 2017 at 3:39 AM, H.J. Lu wrote: > On Tue, Aug 8, 2017 at 10:36 PM, Richard Biener > wrote: >> On August 9, 2017 12:18:41 AM GMT+02:00, "H.J. Lu" >> wrote: >>>This patch adds -static-pie to GCC driver to create static PIE. A >>>static >>>position independent executable (PIE) is similar to static executable, >>>but can be loaded at any address without a dynamic linker. All linker >>>input files must be compiled with -fpie or -fPIE and linker must >>>support >>>--no-dynamic-linker to avoid linking with dynamic linker. "-z text" is >>>also needed to prevent dynamic relocations in read-only segments. >>> >>>OK for trunk? >> >> What's wrong with -static -pie? >> > > -static -pie behaved differently depending on if --enable-default-pie > was used: > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81523 > > I just checked in a patch to make -static always override -pie. > Hi Joseph, Can you take a look at https://gcc.gnu.org/ml/gcc-patches/2017-08/msg00638.html Thanks. -- H.J.
[C++ PATCH] move template_info field
With yesterday's patch to move TYPE_BINFO onto TYPE_MAX_VALUES_RAW, I freed up TYPE_LANG_SLOT_1 for class types. This patch moves template_info from lang_type to that field for both RECORD_TYPESs and BOUND_TEMPLATE_TEMPLATE_PARM_TYPEs. now, you'll notice that TYPE_LANG_SLOT_1 was where ENUMERATION_TYPEs held their TEMPLATE_INFO already, so we now template_info in the same place. That will permit 1) some simplification of TYPE_TEMPLATE_INFO accessor. 2) (I suspect) BOUND_TEMPLATE_TEMPLATE_PARM_TYPEs no longer need TYPE_LANG_SPECIFIC 3) (I suspect) several checks of the form (TYPE_LANG_SPECIFIC (x) && TYPE_TEMPLATE_INFO (x)) can drop the T_L_S check. applied to trunk nathan -- Nathan Sidwell 2017-08-17 Nathan Sidwell * cp-tree.h (struct lang_type): Remove template_info field. (CLASSTYPE_TEMPLATE_INFO): Use TYPE_LANG_SLOT_1. (TEMPLATE_TEMPLATE_PARM_TEMPLATE_INFO): Likewise. Index: cp-tree.h === --- cp-tree.h (revision 251144) +++ cp-tree.h (working copy) @@ -471,9 +471,12 @@ extern GTY(()) tree cp_global_trees[CPTI At present, only the six low-order bits are used. TYPE_LANG_SLOT_1 + For a FUNCTION_TYPE or METHOD_TYPE, this is TYPE_RAISES_EXCEPTIONS. + For a POINTER_TYPE (to a METHOD_TYPE), this is TYPE_PTRMEMFUNC_TYPE. For an ENUMERAL_TYPE, this is ENUM_TEMPLATE_INFO. - For a FUNCTION_TYPE or METHOD_TYPE, this is TYPE_RAISES_EXCEPTIONS - For a POINTER_TYPE (to a METHOD_TYPE), this is TYPE_PTRMEMFUNC_TYPE + For a RECORD_TYPE or UNION_TYPE this is CLASSTYPE_TEMPLATE_INFO, + For a BOUND_TEMPLATE_TEMPLATE_PARM_TYPE this is also + TEMPLATE_TEMPLATE_PARM_TEMPLATE_INFO. BINFO_VIRTUALS For a binfo, this is a TREE_LIST. There is an entry for each @@ -2001,7 +2004,6 @@ struct GTY(()) lang_type { vec * GTY((reorder ("resort_type_method_vec"))) methods; tree key_method; tree decl_list; - tree template_info; tree befriending_classes; /* In a RECORD_TYPE, information specific to Objective-C++, such as a list of adopted protocols or a pointer to a corresponding @@ -3276,7 +3278,7 @@ extern void decl_shadowed_for_var_insert /* Template information for a RECORD_TYPE or UNION_TYPE. */ #define CLASSTYPE_TEMPLATE_INFO(NODE) \ - (LANG_TYPE_CLASS_CHECK (RECORD_OR_UNION_CHECK (NODE))->template_info) + (TYPE_LANG_SLOT_1 (RECORD_OR_UNION_CHECK (NODE))) /* Template information for an ENUMERAL_TYPE. Although an enumeration may not be a primary template, it may be declared within the scope of a @@ -3287,8 +3289,7 @@ extern void decl_shadowed_for_var_insert /* Template information for a template template parameter. */ #define TEMPLATE_TEMPLATE_PARM_TEMPLATE_INFO(NODE) \ - (LANG_TYPE_CLASS_CHECK (BOUND_TEMPLATE_TEMPLATE_PARM_TYPE_CHECK (NODE)) \ - ->template_info) + (TYPE_LANG_SLOT_1 (BOUND_TEMPLATE_TEMPLATE_PARM_TYPE_CHECK (NODE))) /* Template information for an ENUMERAL_, RECORD_, UNION_TYPE, or BOUND_TEMPLATE_TEMPLATE_PARM type. This ignores any alias
Re: [AArch64, PATCH] Improve Neon store of zero
On 16/08/17 16:19, Jackson Woodruff wrote: > Hi Richard, > > I have changed the condition as you suggest below. OK for trunk? > > Jackson. > I renamed the testcase to vect_str_zero.c, as that seems to more closely match the naming style, and checked this in. Thanks for the patch. R. > On 08/11/2017 02:56 PM, Richard Earnshaw (lists) wrote: > >> On 10/08/17 14:12, Jackson Woodruff wrote: >>> Hi all, >>> >>> This patch changes patterns in aarch64-simd.md to replace >>> >>> moviv0.4s, 0 >>> strq0, [x0, 16] >>> >>> With: >>> >>> stp xzr, xzr, [x0, 16] >>> >>> When we are storing zeros to vectors like this: >>> >>> void f(uint32x4_t *p) { >>>uint32x4_t x = { 0, 0, 0, 0}; >>>p[1] = x; >>> } >>> >>> Bootstrapped and regtested on aarch64 with no regressions. >>> OK for trunk? >>> >>> Jackson >>> >>> gcc/ >>> >>> 2017-08-09 Jackson Woodruff >>> >>> * aarch64-simd.md (mov): No longer force zero >>> immediate into register. >>> (*aarch64_simd_mov): Add new case for stp >>> using zero immediate. >>> >>> >>> gcc/testsuite >>> >>> 2017-08-09 Jackson Woodruff >>> >>> * gcc.target/aarch64/simd/neon_str_zero.c: New. >>> >>> >>> patchfile >>> >>> >>> diff --git a/gcc/config/aarch64/aarch64-simd.md >>> b/gcc/config/aarch64/aarch64-simd.md >>> index >>> 74de9b8c89dd5e4e3d87504594c969de0e0128ce..0149a742d34ae4fd5b3fd705b03c845f94aa1d59 >>> 100644 >>> --- a/gcc/config/aarch64/aarch64-simd.md >>> +++ b/gcc/config/aarch64/aarch64-simd.md >>> @@ -23,7 +23,10 @@ >>> (match_operand:VALL_F16 1 "general_operand" ""))] >>> "TARGET_SIMD" >>> " >>> -if (GET_CODE (operands[0]) == MEM) >>> +if (GET_CODE (operands[0]) == MEM >>> +&& !(aarch64_simd_imm_zero (operands[1], mode) >>> + && aarch64_legitimate_address_p (mode, operands[0], >>> + PARALLEL, 1))) >>> operands[1] = force_reg (mode, operands[1]); >>> " >>> ) >>> @@ -94,63 +97,70 @@ >>> (define_insn "*aarch64_simd_mov" >>> [(set (match_operand:VD 0 "nonimmediate_operand" >>> -"=w, m, w, ?r, ?w, ?r, w") >>> +"=w, m, m, w, ?r, ?w, ?r, w") >>> (match_operand:VD 1 "general_operand" >>> -"m, w, w, w, r, r, Dn"))] >>> +"m, Dz, w, w, w, r, r, Dn"))] >>> "TARGET_SIMD >>> - && (register_operand (operands[0], mode) >>> - || register_operand (operands[1], mode))" >>> + && ((register_operand (operands[0], mode) >>> + || register_operand (operands[1], mode)) >>> + || (memory_operand (operands[0], mode) >>> + && immediate_operand (operands[1], mode)))" >> Allowing any immediate here seems too lax - it allows any immediate >> value which then could cause reload operations to be inserted (that in >> turn might cause register pressure calculations to be incorrect). >> Wouldn't it be better to use something like aarch64_simd_reg_or_zero? >> Similarly below. >> >> R. >> >>> { >>> switch (which_alternative) >>>{ >>>case 0: return "ldr\\t%d0, %1"; >>> - case 1: return "str\\t%d1, %0"; >>> - case 2: return "mov\t%0., %1."; >>> - case 3: return "umov\t%0, %1.d[0]"; >>> - case 4: return "fmov\t%d0, %1"; >>> - case 5: return "mov\t%0, %1"; >>> - case 6: >>> + case 1: return "str\\txzr, %0"; >>> + case 2: return "str\\t%d1, %0"; >>> + case 3: return "mov\t%0., %1."; >>> + case 4: return "umov\t%0, %1.d[0]"; >>> + case 5: return "fmov\t%d0, %1"; >>> + case 6: return "mov\t%0, %1"; >>> + case 7: >>> return aarch64_output_simd_mov_immediate (operands[1], >>> mode, 64); >>>default: gcc_unreachable (); >>>} >>> } >>> - [(set_attr "type" "neon_load1_1reg, neon_store1_1reg,\ >>> + [(set_attr "type" "neon_load1_1reg, neon_stp, >>> neon_store1_1reg,\ >>>neon_logic, neon_to_gp, f_mcr,\ >>>mov_reg, neon_move")] >>> ) >>> (define_insn "*aarch64_simd_mov" >>> [(set (match_operand:VQ 0 "nonimmediate_operand" >>> -"=w, m, w, ?r, ?w, ?r, w") >>> +"=w, Ump, m, w, ?r, ?w, ?r, w") >>> (match_operand:VQ 1 "general_operand" >>> -"m, w, w, w, r, r, Dn"))] >>> +"m, Dz, w, w, w, r, r, Dn"))] >>> "TARGET_SIMD >>> - && (register_operand (operands[0], mode) >>> - || register_operand (operands[1], mode))" >>> + && ((register_operand (operands[0], mode) >>> +|| register_operand (operands[1], mode)) >>> + || (memory_operand (operands[0], mode) >>> + && immediate_operand (operands[1], mode)))" >>> { >>> switch (which_alternative) >>> { >>> case 0: >>> return "ldr\\t%q0, %1"; >>> case 1: >>> -return "str\\t%q1, %0"; >>> +return "stp\\txzr, xzr, %0"; >>> case 2: >>> -return "mov\t%0., %1."; >>> +return "str\\t%q1, %0"; >>> case 3: >>> +return "mov\t%0., %1."; >>> case 4: >>>
[PATCH] Fix PR81878
Currently non-bootstrap of --enable-languages=ada fails, the following fixes this. Bootstrapped and non-bootstrapped on x86_64-unknown-linux-gnu. Approved by Eric in the PR, applied. Richard. 2017-08-17 Richard Biener PR ada/81878 * Makefile.in (CXX_LFLAGS): Remove. (TOOLS_FLAGS_TO_PASS_NATIVE): Pass $(CXX) as CXX. (TOOLS_FLAGS_TO_PASS_RE): Likewise. Index: gnattools/Makefile.in === --- gnattools/Makefile.in (revision 251140) +++ gnattools/Makefile.in (working copy) @@ -69,16 +69,10 @@ INCLUDES_FOR_SUBDIR = -iquote . -iquote -iquote $(fsrcdir) -I$(ftop_srcdir)/include ADA_INCLUDES_FOR_SUBDIR = -I. -I$(fsrcdir)/ada -CXX_LFLAGS = \ - -B../../../$(target_noncanonical)/libstdc++-v3/src/.libs \ - -B../../../$(target_noncanonical)/libstdc++-v3/libsupc++/.libs \ - -L../../../$(target_noncanonical)/libstdc++-v3/src/.libs \ - -L../../../$(target_noncanonical)/libstdc++-v3/libsupc++/.libs - # Variables for gnattools, native TOOLS_FLAGS_TO_PASS_NATIVE= \ "CC=../../xgcc -B../../" \ - "CXX=../../xg++ -B../../ $(CXX_LFLAGS)" \ + "CXX=$(CXX)" \ "CFLAGS=$(CFLAGS) $(WARN_CFLAGS)" \ "LDFLAGS=$(LDFLAGS)" \ "ADAFLAGS=$(ADAFLAGS)" \ @@ -96,7 +90,7 @@ TOOLS_FLAGS_TO_PASS_NATIVE= \ # Variables for regnattools TOOLS_FLAGS_TO_PASS_RE= \ "CC=../../xgcc -B../../" \ - "CXX=../../xg++ -B../../ $(CXX_LFLAGS)" \ + "CXX=$(CXX)" \ "CFLAGS=$(CFLAGS)" \ "LDFLAGS=$(LDFLAGS)" \ "ADAFLAGS=$(ADAFLAGS)" \
[PATCH] Fix build of --enable-gather-detailed-mem-stats (PR bootstrap/81864).
Hello. As described in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81864#c5 we should not introduce a new static hash_table variable in order to be sure mem_alloc_descriptors are initialized earlier. Patch can bootstrap on ppc64le-redhat-linux and survives regression tests. Ready to be installed? Martin gcc/ChangeLog: 2017-08-17 Martin Liska PR bootstrap/81864 * tree-loop-distribution.c (ddrs_table): Change type to pointer type. (get_data_dependence): Use it as pointer type. (distribute_loop): Likewise. --- gcc/tree-loop-distribution.c | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index b1b293419b0..26b8b9a3751 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -157,8 +157,7 @@ static vec datarefs_vec; #define DR_INDEX(dr) ((uintptr_t) (dr)->aux) /* Hash table for data dependence relation in the loop to be distributed. */ -static hash_table ddrs_table (389); - +static hash_table *ddrs_table; /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ struct rdg_vertex @@ -1183,7 +1182,7 @@ get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b) <= rdg_vertex_for_stmt (rdg, DR_STMT (b))); ent.a = a; ent.b = b; - slot = ddrs_table.find_slot (&ent, INSERT); + slot = ddrs_table->find_slot (&ent, INSERT); if (*slot == NULL) { ddr = initialize_data_dependence_relation (a, b, loop_nest); @@ -2366,6 +2365,7 @@ static int distribute_loop (struct loop *loop, vec stmts, control_dependences *cd, int *nb_calls, bool *destroy_p) { + ddrs_table = new hash_table (389); struct graph *rdg; partition *partition; bool any_builtin; @@ -2377,6 +2377,7 @@ distribute_loop (struct loop *loop, vec stmts, if (!find_loop_nest (loop, &loop_nest)) { loop_nest.release (); + delete ddrs_table; return 0; } @@ -2391,6 +2392,7 @@ distribute_loop (struct loop *loop, vec stmts, loop_nest.release (); free_data_refs (datarefs_vec); + delete ddrs_table; return 0; } @@ -2404,6 +2406,7 @@ distribute_loop (struct loop *loop, vec stmts, free_rdg (rdg); loop_nest.release (); free_data_refs (datarefs_vec); + delete ddrs_table; return 0; } @@ -2542,13 +2545,13 @@ distribute_loop (struct loop *loop, vec stmts, ldist_done: loop_nest.release (); free_data_refs (datarefs_vec); - for (hash_table::iterator iter = ddrs_table.begin (); - iter != ddrs_table.end (); ++iter) + for (hash_table::iterator iter = ddrs_table->begin (); + iter != ddrs_table->end (); ++iter) { free_dependence_relation (*iter); *iter = NULL; } - ddrs_table.empty (); + delete ddrs_table; FOR_EACH_VEC_ELT (partitions, i, partition) partition_free (partition);
Re: RFC: [PATCH] Add warn_if_not_aligned attribute
On Sat, 8 Jul 2017, H.J. Lu wrote: > +@item -Wpacked-not-aligned @r{(C, C++, Objective-C and Objective-C++ only)} > +@opindex Wpacked-not-aligned > +@opindex Wno-packed-not-aligned > +Warn if a structure field with explicitly specified alignment in a > +packed struct or union is misaligned. For example, a warning will > +be issued on @code{struct S}, like, @code{warning: alignment 1 of > +'struct S' is less than 8}, in this code: Use @samp for warnings quoted in the manual, as previously discussed. OK with that change, in the absence of C++ maintainer objections within 48 hours. -- Joseph S. Myers jos...@codesourcery.com
Re: [PATCH v2] Simplify pow with constant
This patch simplifies pow (C, x) into exp (x * C1) if C > 0, C1 = log (C). Do this only for fast-math as accuracy is reduced. This is much faster since pow is more complex than exp - with current GLIBC the speedup is more than 7 times for this transformation. The worst-case ULP error of the transformation for powf (10.0, x) in SPEC was 2.5. If we allow use of exp10 in match.pd, the ULP error would be lower. OK for commit? ChangeLog: 2017-08-17 Wilco Dijkstra * match.pd: Add pow (C, x) simplification. -- diff --git a/gcc/match.pd b/gcc/match.pd index 0e36f46b914bc63c257cef47152ab1aa507963e5..a614917c8c9f24bba20c521e9b5f558be4886813 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3622,6 +3622,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (logs (pows @0 @1)) (mult @1 (logs @0 + /* pow(C,x) -> exp(log(C)*x) if C > 0. */ + (for pows (POW) + exps (EXP) + logs (LOG) + (simplify + (pows REAL_CST@0 @1) +(if (real_compare (GT_EXPR, TREE_REAL_CST_PTR (@0), &dconst0)) + (exps (mult (logs @0) @1) + (for sqrts (SQRT) cbrts (CBRT) pows (POW)
Re: C PATCH to remove unused block of code
On Thu, 17 Aug 2017, Marek Polacek wrote: > I've been itching to remove this code for some time now. The comment suggests > that the code is actually unused, so I replaced the body of that "else if" > with > gcc_unreachable (); and ran regtest/bootstrap and nothing broke, so I propose > to do away with it. > > Bootstrapped/regtested on x86_64-linux, ok for trunk? OK, with an appropriate change to the comment above the c_parser_postfix_expression function to say that compound literals are not handled here and callers have to call c_parser_postfix_expression_after_paren_type on encountering them. (I've reviewed all paths to c_parser_postfix_expression in the current parser and don't think any of them can send compound literals to it, because either they are parsing a specific more restricted syntax incompatible with compond literals (OMP cases), are parsing an expression that might be a cast expression so need to parse the (type) first to distinguish, or are parsing a context such as sizeof where a parenthesized type name is allowed as well as a postfix expression and again need to parse the (type) first to distinguish.) -- Joseph S. Myers jos...@codesourcery.com
Re: [PATCH] Fix ancient wrong-code with ?: (PR middle-end/81814)
On Thu, 17 Aug 2017, Marek Polacek wrote: > This PR is about wrong-code and has gone undetected for over 10 years (!). > The issue is that e.g. the following > > (signed char) x == 0 ? (unsigned long long) x : 0 > > was wrongly folded to 0, because fold_cond_expr_with_comparison will fold > A != 0 ? A : 0 to 0. But for x = 0x0100 this is wrong: (signed char) is > 0, > but (unsigned long long) x is not. The culprit is > operand_equal_for_comparison_p > which contains shorten_compare-like code which says that the above is safe to > fold. The code harks back to 1992 so I thought it worth to just get rid of > it. > > But I did some measurements and it turns out that substituting operand_equal_p > for operand_equal_for_comparison_p prevents folding ~6 times in bootstrap. > So I feel uneasy about removing the function completely. Instead, I propose to > remove just the part that is causing trouble. (Maybe I should also delete the > first call to operand_equal_p in operand_equal_for_comparison_p.) > > Bootstrapped/regtested on x86_64-linux, ok for trunk? What about 7? Ok for trunk. Do you have numbers for this patch variant as well? It seems that with some refactoring the remaining transforms should be easily expressible as match.pd patterns now. Richard. > 2017-08-17 Marek Polacek > > PR middle-end/81814 > * fold-const.c (operand_equal_for_comparison_p): Remove code that used > to mimic what shorten_compare did. Change the return type to bool. > (fold_cond_expr_with_comparison): Update call to > operand_equal_for_comparison_p. > (fold_ternary_loc): Likewise. > > * gcc.dg/torture/pr81814.c: New test. > > diff --git gcc/fold-const.c gcc/fold-const.c > index 0a5b168c320..fef9b1a707a 100644 > --- gcc/fold-const.c > +++ gcc/fold-const.c > @@ -113,7 +113,6 @@ static tree negate_expr (tree); > static tree associate_trees (location_t, tree, tree, enum tree_code, tree); > static enum comparison_code comparison_to_compcode (enum tree_code); > static enum tree_code compcode_to_comparison (enum comparison_code); > -static int operand_equal_for_comparison_p (tree, tree, tree); > static int twoval_comparison_p (tree, tree *, tree *, int *); > static tree eval_subst (location_t, tree, tree, tree, tree, tree); > static tree optimize_bit_field_compare (location_t, enum tree_code, > @@ -3365,60 +3364,27 @@ operand_equal_p (const_tree arg0, const_tree arg1, > unsigned int flags) > #undef OP_SAME_WITH_NULL > } > > -/* Similar to operand_equal_p, but see if ARG0 might have been made by > - shorten_compare from ARG1 when ARG1 was being compared with OTHER. > +/* Similar to operand_equal_p, but strip nops first. */ > > - When in doubt, return 0. */ > - > -static int > -operand_equal_for_comparison_p (tree arg0, tree arg1, tree other) > +static bool > +operand_equal_for_comparison_p (tree arg0, tree arg1) > { > - int unsignedp1, unsignedpo; > - tree primarg0, primarg1, primother; > - unsigned int correct_width; > - >if (operand_equal_p (arg0, arg1, 0)) > -return 1; > +return true; > >if (! INTEGRAL_TYPE_P (TREE_TYPE (arg0)) >|| ! INTEGRAL_TYPE_P (TREE_TYPE (arg1))) > -return 0; > +return false; > >/* Discard any conversions that don't change the modes of ARG0 and ARG1 > and see if the inner values are the same. This removes any > signedness comparison, which doesn't matter here. */ > - primarg0 = arg0, primarg1 = arg1; > - STRIP_NOPS (primarg0); > - STRIP_NOPS (primarg1); > - if (operand_equal_p (primarg0, primarg1, 0)) > -return 1; > - > - /* Duplicate what shorten_compare does to ARG1 and see if that gives the > - actual comparison operand, ARG0. > - > - First throw away any conversions to wider types > - already present in the operands. */ > - > - primarg1 = get_narrower (arg1, &unsignedp1); > - primother = get_narrower (other, &unsignedpo); > - > - correct_width = TYPE_PRECISION (TREE_TYPE (arg1)); > - if (unsignedp1 == unsignedpo > - && TYPE_PRECISION (TREE_TYPE (primarg1)) < correct_width > - && TYPE_PRECISION (TREE_TYPE (primother)) < correct_width) > -{ > - tree type = TREE_TYPE (arg0); > - > - /* Make sure shorter operand is extended the right way > - to match the longer operand. */ > - primarg1 = fold_convert (signed_or_unsigned_type_for > -(unsignedp1, TREE_TYPE (primarg1)), primarg1); > - > - if (operand_equal_p (arg0, fold_convert (type, primarg1), 0)) > - return 1; > -} > + STRIP_NOPS (arg0); > + STRIP_NOPS (arg1); > + if (operand_equal_p (arg0, arg1, 0)) > +return true; > > - return 0; > + return false; > } > > /* See if ARG is an expression that is either a comparison or is performing > @@ -5300,7 +5266,7 @@ fold_cond_expr_with_comparison (location_t loc, tree > type, > expressions will be false, so all four give B. The min() > and max()
[PATCH, rs6000] Add testcase coverage for vec_sums built-in
Hi, [Patch, rs6000] Add testcase coverage for vec_sums built-in Add some testcase coverage for the vec_sums() built-in. Tested across power platforms (p6 and newer). OK for trunk? Thanks, -Will [gcc/testsuite] 2017-08-17 Will Schmidt * gcc.target/powerpc/fold-vec-sums-int.c: New. diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-sums-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-sums-int.c new file mode 100644 index 000..6ef5f3b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-sums-int.c @@ -0,0 +1,16 @@ +/* Verify that overloaded built-ins for vec_sums with int + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include + +vector signed int +test_vec_sums (vector signed int vsi2, vector signed int vsi3) +{ + return vec_sums (vsi2, vsi3); +} + +/* { dg-final { scan-assembler-times "vsumsws" 1 } } */
[PATCH, rs6000] testcase coverage for vec_perm built-ins
Hi, [PATCH, rs6000] testcase coverage for vec_perm built-ins Add testcase coverage for the vec_ld intrinsic builtins. Tested across power platforms (p6 and newer). OK for trunk? Thanks, -Will [gcc/testsuite] 2017-08-17 Will Schmidt * gcc.target/powerpc/fold-vec-ld-char.c: New. * gcc.target/powerpc/fold-vec-ld-double.c: New. * gcc.target/powerpc/fold-vec-ld-float.c: New. * gcc.target/powerpc/fold-vec-ld-int.c: New. * gcc.target/powerpc/fold-vec-ld-longlong.c: New. * gcc.target/powerpc/fold-vec-ld-short.c: New. diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-char.c new file mode 100644 index 000..1192b75 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-char.c @@ -0,0 +1,44 @@ +/* Verify that overloaded built-ins for vec_ld* with char + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +/* The below contains vec_ld() calls with both variable and constant values. + * Only calls having constant values will be early-gimple folded. */ + +#include + +vector signed char +testld_sc_vsc (long long ll1, vector signed char vsc2) +{ + return vec_ld (ll1, &vsc2); +} + +vector signed char +testld_sc_sc (long long ll1, signed char sc) +{ + return vec_ld (ll1, &sc); +} + +vector unsigned char +testld_uc_vuc (long long ll1, vector unsigned char vuc2) +{ + return vec_ld (ll1, &vuc2); +} + +vector unsigned char +testld_uc_uc (long long ll1, unsigned char uc) +{ + return vec_ld (ll1, &uc); +} + +vector bool char +testld_bc_vbc (long long ll1, vector bool char vbc2) +{ + return vec_ld (ll1, &vbc2); +} + +/* { dg-final { scan-assembler-times {\mlvx\M} 5 } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-double.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-double.c new file mode 100644 index 000..4fcdc89 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-double.c @@ -0,0 +1,16 @@ +/* Verify that overloaded built-ins for vec_ld with + double inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-mvsx -O2" } */ + +#include + +vector double +testld_ll_vd (long long ll1, vector double vd) +{ + return vec_ld (ll1, &vd); +} + +/* { dg-final { scan-assembler-times {\mlvx\M} 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-float.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-float.c new file mode 100644 index 000..91822cc --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-float.c @@ -0,0 +1,23 @@ +/* Verify that overloaded built-ins for vec_ld with float + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include + +vector float +testld_ll_vf (long long ll1, vector float vf2) +{ + return vec_ld (ll1, &vf2); +} + +vector float +testld_ll_f (long long ll1, float f2) +{ + return vec_ld (ll1, &f2); +} + +/* { dg-final { scan-assembler-times {\mlvx\M} 2 } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-int.c new file mode 100644 index 000..adadb50 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-int.c @@ -0,0 +1,41 @@ +/* Verify that overloaded built-ins for vec_ld* with int + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include + +vector signed int +testld_vsi_vsi (long long ll1, vector signed int vsi2) +{ + return vec_ld (ll1, &vsi2); +} + +vector signed int +testld_vsi_si (long long ll1, signed int si) +{ + return vec_ld (ll1, &si); +} + +vector unsigned int +testld_vui_vui (long long ll1, vector unsigned int vui2) +{ + return vec_ld (ll1, &vui2); +} + +vector unsigned int +testld_vui_ui (long long ll1, unsigned int ui) +{ + return vec_ld (ll1, &ui); +} + +vector bool int +testld_vbi_vbi (long long ll1, vector bool int vbi2) +{ + return vec_ld (ll1, &vbi2); +} + +/* { dg-final { scan-assembler-times {\mlvx\M} 5 } } */ + diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-longlong.c new file mode 100644 index 000..b59bfb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-ld-longlong.c @@ -0,0 +1,28 @@ +/* Verify that overloaded built-ins for vec_ld* with long long + inputs produce the right results. */ + +/* { dg-do compile { target lp64 } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mvsx -mpower8-vector -O2" } */ + +#include + +vector signed long long +testld_vsl_vsl (long long ll1, vector signed long vsl2) +{ + return vec_ld (ll1, &vsl2); +} + +vector unsigned long l
[PATCH, rs6000] testcase coverage for vec_perm built-ins
Hi, [Patch, rs6000] testcase coverage for vec_perm built-ins Add some Testcase coverage for the vector permute intrinsics. Tested across power platforms. OK for trunk? Thanks, -Will [gcc/testsuite] 2017-08-17 Will Schmidt * gcc.target/powerpc/fold-vec-perm-char.c: New. * gcc.target/powerpc/fold-vec-perm-double.c: New. * gcc.target/powerpc/fold-vec-perm-float.c: New. * gcc.target/powerpc/fold-vec-perm-int.c: New. * gcc.target/powerpc/fold-vec-perm-longlong.c: New. * gcc.target/powerpc/fold-vec-perm-pixel.c: New. * gcc.target/powerpc/fold-vec-perm-short.c: New. diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-char.c new file mode 100644 index 000..6ea5c2e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-char.c @@ -0,0 +1,31 @@ +/* Verify that overloaded built-ins for vec_perm with char + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include + +vector bool char +testbc (vector bool char vbc2, vector bool char vbc3, + vector unsigned char vuc) +{ + return vec_perm (vbc2, vbc3, vuc); +} + +vector signed char +testsc (vector signed char vsc2, vector signed char vsc3, + vector unsigned char vuc) +{ + return vec_perm (vsc2, vsc3, vuc); +} + +vector unsigned char +testuc (vector unsigned char vuc2, vector unsigned char vuc3, + vector unsigned char vuc) +{ + return vec_perm (vuc2, vuc3, vuc); +} + +/* { dg-final { scan-assembler-times "vperm" 3 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-double.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-double.c new file mode 100644 index 000..1d6f811 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-double.c @@ -0,0 +1,16 @@ +/* Verify that overloaded built-ins for vec_perm with + double inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-mvsx -O2" } */ + +#include + +vector double +testd (vector double vd2, vector double vd3, vector unsigned char vuc) +{ + return vec_perm (vd2, vd3, vuc); +} + +/* { dg-final { scan-assembler-times "vperm" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-float.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-float.c new file mode 100644 index 000..05b555c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-float.c @@ -0,0 +1,16 @@ +/* Verify that overloaded built-ins for vec_perm with float + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include + +vector float +testf (vector float vf2, vector float vf3, vector unsigned char vuc) +{ + return vec_perm (vf2, vf3, vuc); +} + +/* { dg-final { scan-assembler-times "vperm" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-int.c new file mode 100644 index 000..b72bf8f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-int.c @@ -0,0 +1,31 @@ +/* Verify that overloaded built-ins for vec_perm with int + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-maltivec -O2" } */ + +#include + +vector bool int +testbi (vector bool int vbi2, vector bool int vbi3, + vector unsigned char vuc) +{ + return vec_perm (vbi2, vbi3, vuc); +} + +vector signed int +testsi (vector signed int vsi2, vector signed int vsi3, + vector unsigned char vuc) +{ + return vec_perm (vsi2, vsi3, vuc); +} + +vector unsigned int +testui (vector unsigned int vui2, vector unsigned int vui3, + vector unsigned char vuc) +{ + return vec_perm (vui2, vui3, vuc); +} + +/* { dg-final { scan-assembler-times "vperm" 3 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-longlong.c new file mode 100644 index 000..cc191b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-perm-longlong.c @@ -0,0 +1,31 @@ +/* Verify that overloaded built-ins for vec_perm with long long + inputs produce the right results. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mvsx -O2" } */ + +#include + +vector bool long long +testbl (vector bool long long vbl2, vector bool long long vbl3, + vector unsigned char vuc) +{ + return vec_perm (vbl2, vbl3, vuc); +} + +vector signed long long +testsl (vector signed long vsl2, vector signed long vsl3, + vector unsigned char vuc) +{ + return vec_perm (vsl2, vsl3, vuc); +} + +vector unsigned long long +testul (vector unsigned long long vul2, vector unsigned long long vul3, +
Re: [PATCH] Fix ancient wrong-code with ?: (PR middle-end/81814)
On Thu, Aug 17, 2017 at 04:17:54PM +0200, Richard Biener wrote: > On Thu, 17 Aug 2017, Marek Polacek wrote: > > > This PR is about wrong-code and has gone undetected for over 10 years (!). > > The issue is that e.g. the following > > > > (signed char) x == 0 ? (unsigned long long) x : 0 > > > > was wrongly folded to 0, because fold_cond_expr_with_comparison will fold > > A != 0 ? A : 0 to 0. But for x = 0x0100 this is wrong: (signed char) > > is 0, > > but (unsigned long long) x is not. The culprit is > > operand_equal_for_comparison_p > > which contains shorten_compare-like code which says that the above is safe > > to > > fold. The code harks back to 1992 so I thought it worth to just get rid of > > it. > > > > But I did some measurements and it turns out that substituting > > operand_equal_p > > for operand_equal_for_comparison_p prevents folding ~6 times in > > bootstrap. > > So I feel uneasy about removing the function completely. Instead, I propose > > to > > remove just the part that is causing trouble. (Maybe I should also delete > > the > > first call to operand_equal_p in operand_equal_for_comparison_p.) > > > > Bootstrapped/regtested on x86_64-linux, ok for trunk? What about 7? > > Ok for trunk. Do you have numbers for this patch variant as well? Thanks. Yeah, I've gathered some, too. This patch prevents calling fold_cond_expr_with_comparison that would end up with non-NULL_TREE result 8322 times (all Ada files), this is the 11325 if (COMPARISON_CLASS_P (arg0) 11326 && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), arg1) 11327 && !HONOR_SIGNED_ZEROS (element_mode (arg1))) case; plus 648 times in the 11334 if (COMPARISON_CLASS_P (arg0) 11335 && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), op2) 11336 && !HONOR_SIGNED_ZEROS (element_mode (op2))) case (and a lot of that is coming from libgfortran/generated/*.c and reload.c). > It seems that with some refactoring the remaining transforms should > be easily expressible as match.pd patterns now. That'd be great. Marek
Re: [PATCH] Fix ancient wrong-code with ?: (PR middle-end/81814)
On Thu, 17 Aug 2017, Marek Polacek wrote: > On Thu, Aug 17, 2017 at 04:17:54PM +0200, Richard Biener wrote: > > On Thu, 17 Aug 2017, Marek Polacek wrote: > > > > > This PR is about wrong-code and has gone undetected for over 10 years (!). > > > The issue is that e.g. the following > > > > > > (signed char) x == 0 ? (unsigned long long) x : 0 > > > > > > was wrongly folded to 0, because fold_cond_expr_with_comparison will fold > > > A != 0 ? A : 0 to 0. But for x = 0x0100 this is wrong: (signed char) > > > is 0, > > > but (unsigned long long) x is not. The culprit is > > > operand_equal_for_comparison_p > > > which contains shorten_compare-like code which says that the above is > > > safe to > > > fold. The code harks back to 1992 so I thought it worth to just get rid > > > of it. > > > > > > But I did some measurements and it turns out that substituting > > > operand_equal_p > > > for operand_equal_for_comparison_p prevents folding ~6 times in > > > bootstrap. > > > So I feel uneasy about removing the function completely. Instead, I > > > propose to > > > remove just the part that is causing trouble. (Maybe I should also > > > delete the > > > first call to operand_equal_p in operand_equal_for_comparison_p.) > > > > > > Bootstrapped/regtested on x86_64-linux, ok for trunk? What about 7? > > > > Ok for trunk. Do you have numbers for this patch variant as well? > > Thanks. Yeah, I've gathered some, too. This patch prevents calling > fold_cond_expr_with_comparison that would end up with non-NULL_TREE result > 8322 times (all Ada files), this is the > 11325 if (COMPARISON_CLASS_P (arg0) > 11326 && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), > arg1) > 11327 && !HONOR_SIGNED_ZEROS (element_mode (arg1))) > case; plus 648 times in the > 11334 if (COMPARISON_CLASS_P (arg0) > 11335 && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), > op2) > 11336 && !HONOR_SIGNED_ZEROS (element_mode (op2))) > case (and a lot of that is coming from libgfortran/generated/*.c and > reload.c). So you should be able to extract a C testcase? I suspect sth like long foo (long x, int y) { return y > x ? y : x; } to no longer be folded to return MAX_EXPR (x, (long) y). That would be a shame btw. Richard. > > > It seems that with some refactoring the remaining transforms should > > be easily expressible as match.pd patterns now. > > That'd be great. > > Marek > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Re: Improve ECF_NOTHROW flags for direct internal functions
On Thu, Aug 17, 2017 at 1:06 PM, Richard Sandiford wrote > Richard Biener writes: >> On Thu, Aug 17, 2017 at 11:49 AM, Richard Sandiford >> wrote: >>> Internal functions that map directly to an optab can only throw an >>> exception for -fnon-call-exceptions. This patch handles that in >>> internal_fn_flags, in a similar way to ATTR_*NOTHROW in builtins.def. >>> >>> (Functions that don't throw even for flag_non_call_exceptions should be >>> explicitly marked ECF_NOTHROW in internal-fn.def.) >>> >>> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install? >> >> Hmm. Note the outcome of flag_non_call_exceptions depends on the >> current function and thus IPA passes querying flags would need to >> push an appropriate function context. This means the function should >> get a struct function * argument and opt_for_fn (fn, >> flag_non_call_exceptions) >> should be used. It doesn't help very much that all callers don't have >> any such context either which means this "optimization" looks like >> in the wrong place :/ (the global value of flag_non_call_exceptions in >> the IPA case isn't necessarily conservative). >> >> So if you insist then add a comment and add a && cfun check so >> we're sure we are in non-IPA context (or in properly setup context). > > Bah. In that case, what should happen if a -fno-non-call-exceptions > function is inlined into an -fnon-call-exceptions one? Should the call > keep the NOTHROWness of the original function, or should it lose > NOTHROWness (and thus gain an exception edge)? nothrow-ness is tracked on the GIMPLE stmt via gimple_call_[set_]nothrow_p, GIMPLE shouldn't look at flag_non_call_exceptions, it is basically part of the IL. > I guess the path of least resistance would be to add an extra check > for this case in the places that need it, rather than relying solely > on gimple_call_flags. Well, gimple_call_flags works fine already (looking at the above in addition to internal_fn_flags). call_expr_flags looks like it might not. Richard. > > Thanks, > Richard > > >> >> Richard. >> >>> Richard >>> >>> >>> 2017-08-17 Richard Sandiford >>> >>> gcc/ >>> * internal-fn.h (internal_fn_flags): Just declare and move >>> the actual implementation out-of-line to... >>> * internal-fn.c (internal_fn_flags): ...here. Set ECF_NOTHROW for >>> directly-mapped internal functions if !flag_non_call_exceptions. >>> >>> Index: gcc/internal-fn.h >>> === >>> --- gcc/internal-fn.h 2017-02-23 19:54:03.0 + >>> +++ gcc/internal-fn.h 2017-08-17 09:05:37.459968561 +0100 >>> @@ -107,15 +107,7 @@ internal_fn_name (enum internal_fn fn) >>>return internal_fn_name_array[(int) fn]; >>> } >>> >>> -/* Return the ECF_* flags for function FN. */ >>> - >>> -extern const int internal_fn_flags_array[]; >>> - >>> -static inline int >>> -internal_fn_flags (enum internal_fn fn) >>> -{ >>> - return internal_fn_flags_array[(int) fn]; >>> -} >>> +extern int internal_fn_flags (enum internal_fn fn); >>> >>> /* Return fnspec for function FN. */ >>> >>> Index: gcc/internal-fn.c >>> === >>> --- gcc/internal-fn.c 2017-08-10 14:36:07.453493083 +0100 >>> +++ gcc/internal-fn.c 2017-08-17 09:05:37.459968561 +0100 >>> @@ -2814,3 +2814,18 @@ expand_PHI (internal_fn, gcall *) >>> { >>> gcc_unreachable (); >>> } >>> + >>> +/* Return the ECF_* flags for function FN. */ >>> + >>> +int >>> +internal_fn_flags (enum internal_fn fn) >>> +{ >>> + int flags = internal_fn_flags_array[(int) fn]; >>> + /* Functions that map to optabs can only throw a catchable exception >>> + when non-call exceptions are enabled. The non-call exceptions in >>> + these cases will typically come from things like IEEE exceptions, >>> + divide by zero errors and SEGVs. */ >>> + if (direct_internal_fn_p (fn) && !flag_non_call_exceptions) >>> +flags |= ECF_NOTHROW; >>> + return flags; >>> +}
Re: [PATCH] Fix build of --enable-gather-detailed-mem-stats (PR bootstrap/81864).
On Thu, Aug 17, 2017 at 3:43 PM, Martin Liška wrote: > Hello. > > As described in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81864#c5 we > should not > introduce a new static hash_table variable in order to be sure > mem_alloc_descriptors > are initialized earlier. > > > Patch can bootstrap on ppc64le-redhat-linux and survives regression tests. > > Ready to be installed? Ok. Richard. > Martin > > > gcc/ChangeLog: > > 2017-08-17 Martin Liska > > PR bootstrap/81864 > * tree-loop-distribution.c (ddrs_table): Change type to pointer > type. > (get_data_dependence): Use it as pointer type. > (distribute_loop): Likewise. > --- > gcc/tree-loop-distribution.c | 15 +-- > 1 file changed, 9 insertions(+), 6 deletions(-) > >
[PING] Re: [PATCH] C++: fix ordering of missing std #include suggestion (PR c++/81514)
Ping On Thu, 2017-07-27 at 17:36 -0400, David Malcolm wrote: > PR c++/81514 reports a problem where > g++.dg/lookup/missing-std-include-2.C > fails on Solaris, offering the suggestion: > > error: 'string' is not a member of 'std' > note: suggested alternative: 'sprintf' > > instead of the expected: > > error: 'string' is not a member of 'std' > note: 'std::string' is defined in header ''; did you forget > to '#include '? > > This is after a: > #include > > suggest_alternative_in_explicit_scope currently works in two phases: > > (a) it attempts to look for misspellings within the explicitly-given > namespace and suggests the best it finds > > (b) failing that, it then looks for well-known "std::" > names and suggests a missing header > > This now seems the wrong way round to me; if the user has > typed "std::string", a missing #include seems more helpful > as a suggestion than attempting to look for misspellings. > > This patch reverses the ordering of (a) and (b) above, so that > missing header hints for well-known std:: names are offered first, > only then falling back to misspelling hints. > > The problem doesn't show up on my x86_64-pc-linux-gnu box, as > the pertinent part of the #include appears to be > equivalent to: > > extern int sprintf (char *dst, const char *format, ...); > namespace std > { > using ::sprintf; > } > > The "std::sprintf" thus examined within consider_binding_level > is the same tree node as ::sprintf, and is rejected by: > > /* Skip anticipated decls of builtin functions. */ > if (TREE_CODE (d) == FUNCTION_DECL > && DECL_BUILT_IN (d) > && DECL_ANTICIPATED (d)) > continue; > > and so the name "sprintf" is never considered as a spell-correction > for std::"string". > > Hence we're not issuing spelling corrections for aliases > within a namespace for builtins from the global namespace; > these are pre-created by cxx_builtin_function, which has: > > 4397/* All builtins that don't begin with an '_' should > additionally > 4398 go in the 'std' namespace. */ > 4399if (name[0] != '_') > 4400 { > 4401tree decl2 = copy_node(decl); > 4402push_namespace (std_identifier); > 4403builtin_function_1 (decl2, std_node, false); > 4404pop_namespace (); > 4405 } > > I'm not sure why Solaris' decl of std::sprintf doesn't hit the > reject path above. > > I was able to reproduce the behavior seen on Solaris on my Fedora > box by using this: > > namespace std > { > extern int sprintf (char *dst, const char *format, ...); > } > > which isn't rejected by the "Skip anticipated decls of builtin > functions" test above, and hence sprintf is erroneously offered > as a suggestion. > > The patch reworks the test case to work in the above way, > to trigger the problem on Linux, and then fixes it by > changing the order that the suggestions are tried in > name-lookup.c. It introduces an "empty.h" since the testcase > is also to verify that we suggest a good location for new #include > directives relative to pre-existing #include directives. > > Successfully bootstrapped®rtested on x86_64-pc-linux-gnu. > > OK for trunk? > > gcc/cp/ChangeLog: > PR c++/81514 > * name-lookup.c (maybe_suggest_missing_header): Convert return > type from void to bool; return true iff a suggestion was > offered. > (suggest_alternative_in_explicit_scope): Move call to > maybe_suggest_missing_header to before use of best_match, and > return true if the former offers a suggestion. > > gcc/testsuite/ChangeLog: > PR c++/81514 > * g++.dg/lookup/empty.h: New file. > * g++.dg/lookup/missing-std-include-2.C: Replace include of > stdio.h with empty.h and a declaration of a "std::sprintf" not > based > on a built-in. > --- > gcc/cp/name-lookup.c | 39 +++- > -- > gcc/testsuite/g++.dg/lookup/empty.h| 1 + > .../g++.dg/lookup/missing-std-include-2.C | 11 -- > 3 files changed, 29 insertions(+), 22 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/lookup/empty.h > > diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c > index cd7428a..49c4dea 100644 > --- a/gcc/cp/name-lookup.c > +++ b/gcc/cp/name-lookup.c > @@ -4838,34 +4838,34 @@ get_std_name_hint (const char *name) >return NULL; > } > > -/* Subroutine of suggest_alternative_in_explicit_scope, for use when > we have no > - suggestions to offer. > - If SCOPE is the "std" namespace, then suggest pertinent header > - files for NAME. */ > +/* If SCOPE is the "std" namespace, then suggest pertinent header > + files for NAME at LOCATION. > + Return true iff a suggestion was offered. */ > > -static void > +static bool > maybe_suggest_missing_header (location_t location, tree name, tree > scope) > { >if (scope == NULL_TREE) > -return; > +return false; >
Re: [PATCH] Fix ancient wrong-code with ?: (PR middle-end/81814)
On Thu, Aug 17, 2017 at 04:36:02PM +0200, Richard Biener wrote: > On Thu, 17 Aug 2017, Marek Polacek wrote: > > > On Thu, Aug 17, 2017 at 04:17:54PM +0200, Richard Biener wrote: > > > On Thu, 17 Aug 2017, Marek Polacek wrote: > > > > > > > This PR is about wrong-code and has gone undetected for over 10 years > > > > (!). > > > > The issue is that e.g. the following > > > > > > > > (signed char) x == 0 ? (unsigned long long) x : 0 > > > > > > > > was wrongly folded to 0, because fold_cond_expr_with_comparison will > > > > fold > > > > A != 0 ? A : 0 to 0. But for x = 0x0100 this is wrong: (signed > > > > char) is 0, > > > > but (unsigned long long) x is not. The culprit is > > > > operand_equal_for_comparison_p > > > > which contains shorten_compare-like code which says that the above is > > > > safe to > > > > fold. The code harks back to 1992 so I thought it worth to just get > > > > rid of it. > > > > > > > > But I did some measurements and it turns out that substituting > > > > operand_equal_p > > > > for operand_equal_for_comparison_p prevents folding ~6 times in > > > > bootstrap. > > > > So I feel uneasy about removing the function completely. Instead, I > > > > propose to > > > > remove just the part that is causing trouble. (Maybe I should also > > > > delete the > > > > first call to operand_equal_p in operand_equal_for_comparison_p.) > > > > > > > > Bootstrapped/regtested on x86_64-linux, ok for trunk? What about 7? > > > > > > Ok for trunk. Do you have numbers for this patch variant as well? > > > > Thanks. Yeah, I've gathered some, too. This patch prevents calling > > fold_cond_expr_with_comparison that would end up with non-NULL_TREE result > > 8322 times (all Ada files), this is the > > 11325 if (COMPARISON_CLASS_P (arg0) > > 11326 && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), > > arg1) > > 11327 && !HONOR_SIGNED_ZEROS (element_mode (arg1))) > > case; plus 648 times in the > > 11334 if (COMPARISON_CLASS_P (arg0) > > 11335 && operand_equal_for_comparison_p (TREE_OPERAND (arg0, 0), > > op2) > > 11336 && !HONOR_SIGNED_ZEROS (element_mode (op2))) > > case (and a lot of that is coming from libgfortran/generated/*.c and > > reload.c). > > So you should be able to extract a C testcase? I suspect sth like > > long foo (long x, int y) > { > return y > x ? y : x; > } > > to no longer be folded to return MAX_EXPR (x, (long) y). This is still folded to return MAX_EXPR <(long int) y, x>; so that's fine. It's got to be something more complex that will not be handled now. I'll look tomorrow for a testcase. Marek
[PATCH] Harden variably_modified_type_p
It seems with Ada one can have cycles in types via pointer types and calling variably_modified_type_p on such type recurses indefinitely. The following is an attempt to fix that (albeit I'm not 100% sure there's no caller of the function using TREE_VISITED itself). Cures c38102a.adb with LTO early debug patches for me. Any comments? (no further testing sofar) Richard. Index: gcc/tree.c === --- gcc/tree.c (revision 251141) +++ gcc/tree.c (working copy) @@ -8597,8 +8601,16 @@ variably_modified_type_p (tree type, tre case POINTER_TYPE: case REFERENCE_TYPE: case VECTOR_TYPE: + /* Ada can have pointer types refering to themselves indirectly. */ + if (TREE_VISITED (type)) + return false; + TREE_VISITED (type) = true; if (variably_modified_type_p (TREE_TYPE (type), fn)) - return true; + { + TREE_VISITED (type) = false; + return true; + } + TREE_VISITED (type) = false; break; case FUNCTION_TYPE:
Re: RFC: [PATCH] Add warn_if_not_aligned attribute
On Thu, Aug 17, 2017 at 6:52 AM, Joseph Myers wrote: > On Sat, 8 Jul 2017, H.J. Lu wrote: > >> +@item -Wpacked-not-aligned @r{(C, C++, Objective-C and Objective-C++ only)} >> +@opindex Wpacked-not-aligned >> +@opindex Wno-packed-not-aligned >> +Warn if a structure field with explicitly specified alignment in a >> +packed struct or union is misaligned. For example, a warning will >> +be issued on @code{struct S}, like, @code{warning: alignment 1 of >> +'struct S' is less than 8}, in this code: > > Use @samp for warnings quoted in the manual, as previously discussed. > > OK with that change, in the absence of C++ maintainer objections within 48 > hours. > Here is the updated patch. I moved c++ changes to merge_decls, where alignment is merged, and check_bitfield_type_and_width, where bit-fields are checked. Tested on x86-64 and i686. -- H.J. From 441bd3479a83093ad46fbaa67d270d69808c05b2 Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Fri, 20 Apr 2012 13:49:05 -0700 Subject: [PATCH] Add warn_if_not_aligned attribute Add warn_if_not_aligned attribute as well as command line options: -Wif-not-aligned and -Wpacked-not-aligned. __attribute__((warn_if_not_aligned(N))) causes compiler to issue a warning if the field in a struct or union is not aligned to N: typedef unsigned long long __u64 __attribute__((aligned(4),warn_if_not_aligned(8))); struct foo { int i1; int i2; __u64 x; }; __u64 is aligned to 4 bytes. But inside struct foo, __u64 should be aligned at 8 bytes. It is used to define struct foo in such a way that struct foo has the same layout and x has the same alignment when __u64 is aligned at either 4 or 8 bytes. Since struct foo is normally aligned to 4 bytes, a warning will be issued: warning: alignment 4 of 'struct foo' is less than 8 Align struct foo to 8 bytes: struct foo { int i1; int i2; __u64 x; } __attribute__((aligned(8))); silences the warning. It also warns the field with misaligned offset: struct foo { int i1; int i2; int i3; __u64 x; } __attribute__((aligned(8))); warning: 'x' offset 12 in 'struct foo' isn't aligned to 8 This warning is controlled by -Wif-not-aligned and is enabled by default. When -Wpacked-not-aligned is used, the same warning is also issued for the field with explicitly specified alignment in a packed struct or union: struct __attribute__ ((aligned (8))) S8 { char a[8]; }; struct __attribute__ ((packed)) S { struct S8 s8; }; warning: alignment 1 of 'struct S' is less than 8 This warning is disabled by default and enabled by -Wall. gcc/ PR c/53037 * print-tree.c (print_node): Support DECL_WARN_IF_NOT_ALIGN and TYPE_WARN_IF_NOT_ALIGN. * stor-layout.c (do_type_align): Merge DECL_WARN_IF_NOT_ALIGN. (handle_warn_if_not_align): New. (place_union_field): Call handle_warn_if_not_align. (place_field): Call handle_warn_if_not_align. Copy TYPE_WARN_IF_NOT_ALIGN. (finish_builtin_struct): Copy TYPE_WARN_IF_NOT_ALIGN. (layout_type): Likewise. * tree-core.h (tree_type_common): Add warn_if_not_align. Set spare to 18. (tree_decl_common): Add warn_if_not_align. * tree.c (build_range_type_1): Copy TYPE_WARN_IF_NOT_ALIGN. * tree.h (TYPE_WARN_IF_NOT_ALIGN): New. (SET_TYPE_WARN_IF_NOT_ALIGN): Likewise. (DECL_WARN_IF_NOT_ALIGN): Likewise. (SET_DECL_WARN_IF_NOT_ALIGN): Likewise. * doc/extend.texi: Document warn_if_not_aligned attribute. * doc/invoke.texi: Document -Wif-not-aligned and -Wpacked-not-aligned. gcc/c-family/ PR c/53037 * c-attribs.c (handle_warn_if_not_aligned_attribute): New. (c_common_attribute_table): Add warn_if_not_aligned. (handle_aligned_attribute): Renamed to ... (common_handle_aligned_attribute): Remove argument, name, and add argument, warn_if_not_aligned. Handle warn_if_not_aligned. (handle_aligned_attribute): New. * c.opt: Add -Wif-not-aligned and -Wpacked-not-aligned. gcc/c/ PR c/53037 * c-decl.c (merge_decls): Also merge DECL_WARN_IF_NOT_ALIGN. (check_bitfield_type_and_width): Don't allow bit-field with warn_if_not_aligned type. gcc/cp/ PR c/53037 * decl.c (duplicate_decls): Also merge DECL_WARN_IF_NOT_ALIGN. * decl2.c (grokbitfield): Don't allow bit-field with warn_if_not_aligned type. gcc/testsuite/ PR c/53037 * c-c++-common/pr53037-5.c: New test. * g++.dg/pr53037-1.C: Likewise. * g++.dg/pr53037-2.C: Likewise. * g++.dg/pr53037-3.C: Likewise. * g++.dg/pr53037-4.C: Likewise. * gcc.dg/pr53037-1.c: Likewise. * gcc.dg/pr53037-2.c: Likewise. * gcc.dg/pr53037-3.c: Likewise. * gcc.dg/pr53037-4.c: Likewise. --- gcc/c-family/c-attribs.c | 71 +++ gcc/c-family/c.opt | 8 gcc/c/c-decl.c | 11 + gcc/cp/decl.c | 4 ++ gcc/cp/decl2.c | 7 +++ gcc/doc/extend.texi| 87 ++ gcc/doc/invoke.texi| 29 +++- gcc/print-tree.c | 9 ++-- gcc/s
Re: [PATCH v2] Simplify pow with constant
On Thu, 17 Aug 2017, Wilco Dijkstra wrote: > This patch simplifies pow (C, x) into exp (x * C1) if C > 0, C1 = log (C). Note this changes the outcome for C == +Inf, x == 0 (pow is specified to return 1.0 in that case, but x * C1 == NaN). There's another existing transform with the same issue, 'pow(expN(x), y) -> expN(x*y)', so this is not a new problem. The whole set of these match.pd transforms is guarded by flag_unsafe_math_optimizations, which is a bit strange, on the one hand it does not include -ffinite-math-only, but on the other hand it's defined broadly enough to imply that. (to be clear, I'm not objecting to the patch, just pointing out an existing inconsistency in case someone can offer clarifications) Alexander
[C++ PATCH] BOUND_TEMPLATE_TEMPLATE_PARM_TYPE
Now that template info is on TYPE_LANG_SLOT_1, B_T_T_P_Types don't need type_lang_specific. Excised thusly. nathan -- Nathan Sidwell 2017-08-17 Nathan Sidwell * lex.c (maybe_add_lang_type_raw): BOUND_TEMPLATE_TEMPLATE_PARMs don't need lang_type. (cxx_make_type): Use maybe_add_lang_type_raw return value. * mangle.c (CLASSTYPE_TEMPLATE_ID_P): Don't rely on TYPE_LANG_SPECIFIC. Index: lex.c === --- lex.c (revision 251152) +++ lex.c (working copy) @@ -809,8 +809,7 @@ copy_type (tree type MEM_STAT_DECL) static bool maybe_add_lang_type_raw (tree t) { - if (!(RECORD_OR_UNION_CODE_P (TREE_CODE (t)) - || TREE_CODE (t) == BOUND_TEMPLATE_TEMPLATE_PARM)) + if (!RECORD_OR_UNION_CODE_P (TREE_CODE (t))) return false; TYPE_LANG_SPECIFIC (t) @@ -831,12 +830,10 @@ cxx_make_type (enum tree_code code) { tree t = make_node (code); - maybe_add_lang_type_raw (t); - - /* Set up some flags that give proper default behavior. */ - if (RECORD_OR_UNION_CODE_P (code)) + if (maybe_add_lang_type_raw (t)) { - struct c_fileinfo *finfo = \ + /* Set up some flags that give proper default behavior. */ + struct c_fileinfo *finfo = get_fileinfo (LOCATION_FILE (input_location)); SET_CLASSTYPE_INTERFACE_UNKNOWN_X (t, finfo->interface_unknown); CLASSTYPE_INTERFACE_ONLY (t) = finfo->interface_only; Index: mangle.c === --- mangle.c (revision 251152) +++ mangle.c (working copy) @@ -81,10 +81,10 @@ along with GCC; see the file COPYING3. instantiated outside of the template, and A is the type used without parameters inside the template. */ #define CLASSTYPE_TEMPLATE_ID_P(NODE) \ - (TYPE_LANG_SPECIFIC (NODE) != NULL \ - && (TREE_CODE (NODE) == BOUND_TEMPLATE_TEMPLATE_PARM \ - || (CLASSTYPE_TEMPLATE_INFO (NODE) != NULL \ - && (PRIMARY_TEMPLATE_P (CLASSTYPE_TI_TEMPLATE (NODE)) + (TREE_CODE (NODE) == BOUND_TEMPLATE_TEMPLATE_PARM \ + || (CLASS_TYPE_P (NODE) \ + && CLASSTYPE_TEMPLATE_INFO (NODE) != NULL \ + && PRIMARY_TEMPLATE_P (CLASSTYPE_TI_TEMPLATE (NODE /* For deciding whether to set G.need_abi_warning, we need to consider both warn_abi_version and flag_abi_compat_version. */
Re: [PATCH 1/3] improve detection of attribute conflicts (PR 81544)
Attached is an updated patch with just the minor editorial tweaks for the issues pointed out by Marek (stray comments and spaces), and with the C++ and libstdc++ bits removed and posted separately. I also added some text to the manual to clarify the const/pure effects. I thought quite a bit more about the const/pure attributes we discussed and tried the approach of warning only on a const declaration after one with attribute pure has been used, but otherwise allowing and silently ignoring pure after const. In the end I decided against it, for a few reasons (most of which I already mentioned but just to summarize). First, there is the risk that someone will write code based on the pure declaration even if there's no intervening call to the function between it and the const one. Code tends to be sloppy, and it's also not uncommon to declare the same function more than once, for whatever reason. (The ssa-ccp-2.c test is an example of the latter.) Second, there are cases of attribute conflicts that GCC already points out that are much more benign in their effects (the ones I know about are always_inline/noinline and cold/hot). In light of the risk above it seems only helpful to include const/pure in the same set. Third, I couldn't find another pair of attributes that GCC would deliberately handle this way (silently accept both but prefer one over the other), and introducing a special case just for these two seemed like a wart. Finally, compiling Binutils, GDB, Glkibc, and the Linux kernel with the enhanced warning didn't turn up any code that does this sort of thing, either intentionally or otherwise. With that, I've left the handling unchanged. I do still have the question whether diagnosing attribute conflicts under -Wattributes is right. The manual implies that -Wattributes is for attributes in the wrong places or on the wrong entities, and that -Wignored-attributes should be expected instead when GCC decides to drop one for some reason. It is a little unfortunate that many -Wattributes warnings print just "attribute ignored" (and have done so for years). I think they should all be enhanced to also print why the attribute is ignored (e.g., "'packed' attribute on function declarations ignored/not valid/not supported" or something like that). Those that ignore attributes that would otherwise be valid e.g., because they conflict with other specifiers of the same attribute but with a different operand might then be candidate for changing to -Wignored-attributes. Thanks Martin PR c/81544 - attribute noreturn and warn_unused_result on the same function accepted gcc/c/ChangeLog: PR c/81544 * c-decl.c (c_decl_attributes): Look up existing declaration and pass it to decl_attributes. gcc/c-family/ChangeLog: PR c/81544 * c-attribs.c (attr_aligned_exclusions): New array. (attr_alloc_exclusions, attr_cold_hot_exclusions): Same. (attr_common_exclusions, attr_const_pure_exclusions): Same. (attr_gnu_inline_exclusions, attr_inline_exclusions): Same. (attr_noreturn_exclusions, attr_returns_twice_exclusions): Same. (attr_warn_unused_result_exclusions): Same. (handle_hot_attribute, handle_cold_attribute): Simplify. (handle_const_attribute): Warn on function returning void. (handle_pure_attribute): Same. * c-warn.c (diagnose_mismatched_attributes): Simplify. gcc/testsuite/ChangeLog: PR c/81544 * c-c++-common/Wattributes-2.c: New test. * c-c++-common/Wattributes.c: New test. * c-c++-common/attributes-3.c: Adjust. * gcc.dg/attr-noinline.c: Adjust. * gcc.dg/pr44964.c: Same. * gcc.dg/torture/pr42363.c: Same. * gcc.dg/tree-ssa/ssa-ccp-2.c: Same. gcc/ChangeLog: PR c/81544 * attribs.c (empty_attribute_table): Initialize new member of struct attribute_spec. (decl_attributes): Add argument. Handle mutually exclusive combinations of attributes. * attribs.h (decl_attributes): Add default argument. * tree-core.h (attribute_spec::exclusions, exclude): New type and member. * doc/extend.texi (Common Function Attributes): Update const and pure. diff --git a/gcc/attribs.c b/gcc/attribs.c index 05fa8ef..8be5d9d 100644 --- a/gcc/attribs.c +++ b/gcc/attribs.c @@ -94,7 +94,7 @@ static bool attributes_initialized = false; static const struct attribute_spec empty_attribute_table[] = { - { NULL, 0, 0, false, false, false, NULL, false } + { NULL, 0, 0, false, false, false, NULL, false, NULL } }; /* Return base name of the attribute. Ie '__attr__' is turned into 'attr'. @@ -343,6 +343,97 @@ get_attribute_namespace (const_tree attr) return get_identifier ("gnu"); } +/* Check LAST_DECL and NODE of the same symbol for attributes that are + recorded in EXCL to be mutually exclusive with ATTRNAME, diagnose + them, and return true if any have been found. NODE can be a DECL + or a TYPE. */ + +static bool +diag_attr_exclusions (tree last_decl, tree node, tree attrname, + const attribute_spec *spec) +{ + const attribute_spec::exclusions *excl = spec->exclude; + + tree_cod
Re: [PATCH] Harden variably_modified_type_p
> It seems with Ada one can have cycles in types via pointer types > and calling variably_modified_type_p on such type recurses indefinitely. > The following is an attempt to fix that (albeit I'm not 100% sure > there's no caller of the function using TREE_VISITED itself). > > Cures c38102a.adb with LTO early debug patches for me. > > Any comments? (no further testing sofar) See walk_type_fields for a different approach to the same isse. -- Eric Botcazou
Re: [PATCH] use strnlen in pretty printer for "%.*s" (PR 81859)
On 08/16/2017 02:55 PM, David Malcolm wrote: On Wed, 2017-08-16 at 10:12 -0600, Martin Sebor wrote: PR c/81859 - [8 Regression] valgrind error from warn_about_normalization gcc/ChangeLog: PR c/81859 * pretty-print.c (pp_format): Use strnlen in %.*s to avoid reading past the end of an array. (test_pp_format): Add test cases. Index: gcc/pretty-print.c === --- gcc/pretty-print.c (revision 251100) +++ gcc/pretty-print.c (working copy) @@ -668,15 +668,11 @@ pp_format (pretty_printer *pp, text_info *text) s = va_arg (*text->args_ptr, const char *); - /* Negative precision is treated as if it were omitted. */ - if (n < 0) - n = INT_MAX; + /* Append the lesser of precision and strlen (s) characters + from the array (which need not be a nul-terminated string). + Negative precision is treated as if it were omitted. */ + size_t len = n < 0 ? strlen (s) : strnlen (s, n); - /* Append the lesser of precision and strlen (s) characters. */ - size_t len = strlen (s); - if ((unsigned) n < len) - len = n; - pp_append_text (pp, s, s + len); } break; @@ -1438,6 +1434,13 @@ test_pp_format () ASSERT_PP_FORMAT_2 ("A 12345678", "%c %x", 'A', 0x12345678); ASSERT_PP_FORMAT_2 ("hello world 12345678", "%s %x", "hello world", 0x12345678); + + /* Not nul-terminated. */ + char arr[5] = { '1', '2', '3', '4', '5' }; + ASSERT_PP_FORMAT_2 ("123", "%.*s", 3, arr); + ASSERT_PP_FORMAT_2 ("1234", "%.*s", -1, "1234"); + ASSERT_PP_FORMAT_2 ("12345", "%.*s", 7, "12345"); + The other examples in this selftest append a trailing argument with a known bit pattern (0x12345678), to ensure that we're consuming arguments correctly. Please can you do the same for these tests. Sure. I committed the updated fix in r251157. Martin
[PATCH] correct documentation of attribute ifunc (PR 81882)
The attached patch adjusts the documentation of attribute ifunc to avoid warnings (in C) and errors (in C++) due to the resolver returning a pointer to a function of incompatible type. Martin PR c/81882 - attribute ifunc documentation uses invalid code gcc/ChangeLog: PR c/81882 * doc/extend.texi (attribute ifunc): Avoid relying on ill-formed code (in C++) or code that triggers warnings. Index: gcc/doc/extend.texi === --- gcc/doc/extend.texi (revision 251156) +++ gcc/doc/extend.texi (working copy) @@ -2783,21 +2783,24 @@ The @code{ifunc} attribute is used to mark a funct function using the STT_GNU_IFUNC symbol type extension to the ELF standard. This allows the resolution of the symbol value to be determined dynamically at load time, and an optimized version of the -routine can be selected for the particular processor or other system +routine to be selected for the particular processor or other system characteristics determined then. To use this attribute, first define the implementation functions available, and a resolver function that returns a pointer to the selected implementation function. The implementation functions' declarations must match the API of the -function being implemented, the resolver's declaration is be a -function returning pointer to void function returning void: +function being implemented. The resolver should be declared to +be a function returning a pointer to a function taking no arguments +and returning a pointer to a function of the same type as the +implementation. For example: @smallexample void *my_memcpy (void *dst, const void *src, size_t len) @{ @dots{} + return dst; @} -static void (*resolve_memcpy (void)) (void) +static void* (*resolve_memcpy (void))(void *, const void *, size_t) @{ return my_memcpy; // we'll just always select this routine @}
Re: [PATCH, rs6000] Fix PR target/72804: Poor code gen with -mvsx-timode
On 8/16/17 5:56 PM, Peter Bergner wrote: > On 8/16/17 5:30 PM, Segher Boessenkool wrote: > I'll make the above changes and commit after another quick test cycle. Testing the changes came up clean, so I committed it. Thanks. Peter
Re: [PATCH, rs6000] Remove TARGET_VSX_TIMODE and -mno-vsx-timode usage
On 8/16/17 5:49 PM, Segher Boessenkool wrote: >> - (TI "TARGET_VSX_TIMODE")]) >> + (TI "TARGET_VSX")]) > > You can completely remove the condition here as far as I see. [snip] > Okay for trunk with that simplification. Thanks! Good catch. I doubled checked your suggested changes and agree with all of them. I re-did bootstrap and regtesting of the changes and they came up clean, so I committed it along with the extra simple cleanup to FMOVE128_GPR that we discussed offline. Thanks. Peter
[C++ PATCH] TYPE_TEMPLATE_INFO
This patch concludes with simplifying TYPE_TEMPLATE_INFO. It was too tricky, given the time available, to make TYPE_TEMPLATE_INFO require a templatable _TYPE node, so it remains returning NULL_TREE if you give it a random _TYPE node. That'd be nice to clean up though. I can't recall the underlying reason why TEMPLATE_TEMPLATE_PARM was forked to add BOUND_TEMPLATE_TEMPLATE_PARM. Naively it seems these are distinguished by the absence or presence of TEMPLATE_INFO. But again, too fiddly to examine right now. nathan -- Nathan Sidwell 2017-08-17 Nathan Sidwell * cp-tree.def (TEMPLATE_TEMPLATE_PARM): Remove stale comment. * cp-tree.h (ENUM_TEMPLATE_INFO): Delete. (TYPE_TEMPLATE_INFO): Simplify. (SET_TYPE_TEMPLATE_INFO): Simplify. Index: cp-tree.def === --- cp-tree.def (revision 251152) +++ cp-tree.def (working copy) @@ -163,8 +163,7 @@ DEFTREECODE (TEMPLATE_PARM_INDEX, "templ TEMPLATE_PARM_INDEX. It is used without template arguments like TT in C, - TEMPLATE_TEMPLATE_PARM_TEMPLATE_INFO is NULL_TREE - and TYPE_NAME is a TEMPLATE_DECL. */ + TYPE_NAME is a TEMPLATE_DECL. */ DEFTREECODE (TEMPLATE_TEMPLATE_PARM, "template_template_parm", tcc_type, 0) /* The ordering of the following codes is optimized for the checking Index: cp-tree.h === --- cp-tree.h (revision 251152) +++ cp-tree.h (working copy) @@ -473,10 +473,8 @@ extern GTY(()) tree cp_global_trees[CPTI TYPE_LANG_SLOT_1 For a FUNCTION_TYPE or METHOD_TYPE, this is TYPE_RAISES_EXCEPTIONS. For a POINTER_TYPE (to a METHOD_TYPE), this is TYPE_PTRMEMFUNC_TYPE. - For an ENUMERAL_TYPE, this is ENUM_TEMPLATE_INFO. - For a RECORD_TYPE or UNION_TYPE this is CLASSTYPE_TEMPLATE_INFO, - For a BOUND_TEMPLATE_TEMPLATE_PARM_TYPE this is also - TEMPLATE_TEMPLATE_PARM_TEMPLATE_INFO. + For an ENUMERAL_TYPE, BOUND_TEMPLATE_TEMPLATE_PARM_TYPE, + RECORD_TYPE or UNION_TYPE this is TYPE_TEMPLATE_INFO, BINFO_VIRTUALS For a binfo, this is a TREE_LIST. There is an entry for each @@ -3280,28 +3278,20 @@ extern void decl_shadowed_for_var_insert #define CLASSTYPE_TEMPLATE_INFO(NODE) \ (TYPE_LANG_SLOT_1 (RECORD_OR_UNION_CHECK (NODE))) -/* Template information for an ENUMERAL_TYPE. Although an enumeration may - not be a primary template, it may be declared within the scope of a - primary template and the enumeration constants may depend on - non-type template parameters. */ -#define ENUM_TEMPLATE_INFO(NODE) \ - (TYPE_LANG_SLOT_1 (ENUMERAL_TYPE_CHECK (NODE))) - /* Template information for a template template parameter. */ #define TEMPLATE_TEMPLATE_PARM_TEMPLATE_INFO(NODE) \ (TYPE_LANG_SLOT_1 (BOUND_TEMPLATE_TEMPLATE_PARM_TYPE_CHECK (NODE))) /* Template information for an ENUMERAL_, RECORD_, UNION_TYPE, or BOUND_TEMPLATE_TEMPLATE_PARM type. This ignores any alias - templateness of NODE. */ + templateness of NODE. It'd be nice if this could unconditionally + access the slot, rather than return NULL if given a + non-templatable type. */ #define TYPE_TEMPLATE_INFO(NODE) \ (TREE_CODE (NODE) == ENUMERAL_TYPE \ - ? ENUM_TEMPLATE_INFO (NODE) \ - : (TREE_CODE (NODE) == BOUND_TEMPLATE_TEMPLATE_PARM \ - ? TEMPLATE_TEMPLATE_PARM_TEMPLATE_INFO (NODE) \ - : (CLASS_TYPE_P (NODE) \ - ? CLASSTYPE_TEMPLATE_INFO (NODE)\ - : NULL_TREE))) + || TREE_CODE (NODE) == BOUND_TEMPLATE_TEMPLATE_PARM \ + || RECORD_OR_UNION_TYPE_P (NODE) \ + ? TYPE_LANG_SLOT_1 (NODE) : NULL_TREE) /* Template information (if any) for an alias type. */ #define TYPE_ALIAS_TEMPLATE_INFO(NODE) \ @@ -3321,10 +3311,9 @@ extern void decl_shadowed_for_var_insert UNION_TYPE to VAL. */ #define SET_TYPE_TEMPLATE_INFO(NODE, VAL)\ (TREE_CODE (NODE) == ENUMERAL_TYPE \ - ? (ENUM_TEMPLATE_INFO (NODE) = (VAL))\ - : ((CLASS_TYPE_P (NODE) && !TYPE_ALIAS_P (NODE)) \ - ? (CLASSTYPE_TEMPLATE_INFO (NODE) = (VAL)) \ - : (DECL_TEMPLATE_INFO (TYPE_NAME (NODE)) = (VAL + || (CLASS_TYPE_P (NODE) && !TYPE_ALIAS_P (NODE)) \ + ? (TYPE_LANG_SLOT_1 (NODE) = (VAL))\ + : (DECL_TEMPLATE_INFO (TYPE_NAME (NODE)) = (VAL))) #define TI_TEMPLATE(NODE) TREE_TYPE (TEMPLATE_INFO_CHECK (NODE)) #define TI_ARGS(NODE) TREE_CHAIN (TEMPLATE_INFO_CHECK (NODE))
Re: [PATCH] correct documentation of attribute ifunc (PR 81882)
On Thu, 17 Aug 2017, Martin Sebor wrote: > returns a pointer to the selected implementation function. The > implementation functions' declarations must match the API of the > -function being implemented, the resolver's declaration is be a > -function returning pointer to void function returning void: > +function being implemented. The resolver should be declared to > +be a function returning a pointer to a function taking no arguments > +and returning a pointer to a function of the same type as the > +implementation. For example: The new wording is wrong (extra level of pointer-to-function). I suggest removing this part altogether, it's not useful at all, anyone writing the resolver can deduce what the return type should be based on the fact that a pointer to the implementation is returned. (fwiw a simple 'void *' as return type would work in practice too) Alexander
Re: [PATCH] correct documentation of attribute ifunc (PR 81882)
On 08/17/2017 01:26 PM, Alexander Monakov wrote: On Thu, 17 Aug 2017, Martin Sebor wrote: returns a pointer to the selected implementation function. The implementation functions' declarations must match the API of the -function being implemented, the resolver's declaration is be a -function returning pointer to void function returning void: +function being implemented. The resolver should be declared to +be a function returning a pointer to a function taking no arguments +and returning a pointer to a function of the same type as the +implementation. For example: The new wording is wrong (extra level of pointer-to-function). You're right, that's a silly typo. Thanks for catching it! Here's what I meant to write: The resolver should be declared to be a function taking no arguments and returning a pointer to a function of the same type as the implementation. Attached is an updated patch with the corrected wording. I suggest removing this part altogether, it's not useful at all, anyone writing the resolver can deduce what the return type should be based on the fact that a pointer to the implementation is returned. I suppose that would work too. If there's preference for this approach I'm happy to remove the last sentence. (fwiw a simple 'void *' as return type would work in practice too) Well, yes, it would work, but only about as well as the current wording and example do. I.e., it triggers (pedantic) warnings in C and errors in C++. To get around the errors, the usual approach (despite what the manual says about returning a pointer to a function) is to return void* and cast the address of the function to it. It would be helpful if users could avoid the cast while at the same time having incompatibilities detected for them. (FWIW, I noticed this problem while testing an improvement to have GCC do just that: detect invalid conversions in these kinds of attributes (alias and ifunc) under bug 81854.) Martin PR c/81882 - attribute ifunc documentation uses invalid code gcc/ChangeLog: PR c/81882 * doc/extend.texi (attribute ifunc): Avoid relying on ill-formed code (in C++) or code that triggers warnings. Index: gcc/doc/extend.texi === --- gcc/doc/extend.texi (revision 251156) +++ gcc/doc/extend.texi (working copy) @@ -2783,21 +2783,23 @@ The @code{ifunc} attribute is used to mark a funct function using the STT_GNU_IFUNC symbol type extension to the ELF standard. This allows the resolution of the symbol value to be determined dynamically at load time, and an optimized version of the -routine can be selected for the particular processor or other system +routine to be selected for the particular processor or other system characteristics determined then. To use this attribute, first define the implementation functions available, and a resolver function that returns a pointer to the selected implementation function. The implementation functions' declarations must match the API of the -function being implemented, the resolver's declaration is be a -function returning pointer to void function returning void: +function being implemented. The resolver should be declared to +be a function taking no arguments and returning a pointer to +a function of the same type as the implementation. For example: @smallexample void *my_memcpy (void *dst, const void *src, size_t len) @{ @dots{} + return dst; @} -static void (*resolve_memcpy (void)) (void) +static void* (*resolve_memcpy (void))(void *, const void *, size_t) @{ return my_memcpy; // we'll just always select this routine @}
[PING from 2013][PATCH] fixincludes: handle symlinks with multiple slashes
Looks like the following patch falled through the cracks https://gcc.gnu.org/ml/gcc-patches/2012-12/msg01397.html https://bugs.gentoo.org/show_bug.cgi?id=434180#c16 Thanks! -- Sergei pgpUpEUwrmHQi.pgp Description: Цифровая подпись OpenPGP
Re: [C++ PATCH] TYPE_TEMPLATE_INFO
On Thu, Aug 17, 2017 at 12:00 PM, Nathan Sidwell wrote: > This patch concludes with simplifying TYPE_TEMPLATE_INFO. It was too > tricky, given the time available, to make TYPE_TEMPLATE_INFO require a > templatable _TYPE node, so it remains returning NULL_TREE if you give it a > random _TYPE node. That'd be nice to clean up though. > > I can't recall the underlying reason why TEMPLATE_TEMPLATE_PARM was forked > to add BOUND_TEMPLATE_TEMPLATE_PARM. Naively it seems these are > distinguished by the absence or presence of TEMPLATE_INFO. But again, too > fiddly to examine right now. I don't remember either, but they are different things: a TEMPLATE_TEMPLATE_PARM is a template, a BOUND_TEMPLATE_TEMPLATE_PARM is a type. So I'd be reluctant to try to use the same tree code for both. Jason
Re: RFC: [PATCH] Add warn_if_not_aligned attribute
On Thu, Aug 17, 2017 at 7:56 AM, H.J. Lu wrote: > On Thu, Aug 17, 2017 at 6:52 AM, Joseph Myers wrote: >> On Sat, 8 Jul 2017, H.J. Lu wrote: >> >>> +@item -Wpacked-not-aligned @r{(C, C++, Objective-C and Objective-C++ only)} >>> +@opindex Wpacked-not-aligned >>> +@opindex Wno-packed-not-aligned >>> +Warn if a structure field with explicitly specified alignment in a >>> +packed struct or union is misaligned. For example, a warning will >>> +be issued on @code{struct S}, like, @code{warning: alignment 1 of >>> +'struct S' is less than 8}, in this code: >> >> Use @samp for warnings quoted in the manual, as previously discussed. >> >> OK with that change, in the absence of C++ maintainer objections within 48 >> hours. >> > > Here is the updated patch. I moved c++ changes to merge_decls, where > alignment is merged, and check_bitfield_type_and_width, where bit-fields > are checked. OK. Jason
Re: [PING] Re: [PATCH] C++: fix ordering of missing std #include suggestion (PR c++/81514)
OK. On Thu, Aug 17, 2017 at 7:50 AM, David Malcolm wrote: > Ping > > On Thu, 2017-07-27 at 17:36 -0400, David Malcolm wrote: >> PR c++/81514 reports a problem where >> g++.dg/lookup/missing-std-include-2.C >> fails on Solaris, offering the suggestion: >> >> error: 'string' is not a member of 'std' >> note: suggested alternative: 'sprintf' >> >> instead of the expected: >> >> error: 'string' is not a member of 'std' >> note: 'std::string' is defined in header ''; did you forget >> to '#include '? >> >> This is after a: >> #include >> >> suggest_alternative_in_explicit_scope currently works in two phases: >> >> (a) it attempts to look for misspellings within the explicitly-given >> namespace and suggests the best it finds >> >> (b) failing that, it then looks for well-known "std::" >> names and suggests a missing header >> >> This now seems the wrong way round to me; if the user has >> typed "std::string", a missing #include seems more helpful >> as a suggestion than attempting to look for misspellings. >> >> This patch reverses the ordering of (a) and (b) above, so that >> missing header hints for well-known std:: names are offered first, >> only then falling back to misspelling hints. >> >> The problem doesn't show up on my x86_64-pc-linux-gnu box, as >> the pertinent part of the #include appears to be >> equivalent to: >> >> extern int sprintf (char *dst, const char *format, ...); >> namespace std >> { >> using ::sprintf; >> } >> >> The "std::sprintf" thus examined within consider_binding_level >> is the same tree node as ::sprintf, and is rejected by: >> >> /* Skip anticipated decls of builtin functions. */ >> if (TREE_CODE (d) == FUNCTION_DECL >> && DECL_BUILT_IN (d) >> && DECL_ANTICIPATED (d)) >> continue; >> >> and so the name "sprintf" is never considered as a spell-correction >> for std::"string". >> >> Hence we're not issuing spelling corrections for aliases >> within a namespace for builtins from the global namespace; >> these are pre-created by cxx_builtin_function, which has: >> >> 4397/* All builtins that don't begin with an '_' should >> additionally >> 4398 go in the 'std' namespace. */ >> 4399if (name[0] != '_') >> 4400 { >> 4401tree decl2 = copy_node(decl); >> 4402push_namespace (std_identifier); >> 4403builtin_function_1 (decl2, std_node, false); >> 4404pop_namespace (); >> 4405 } >> >> I'm not sure why Solaris' decl of std::sprintf doesn't hit the >> reject path above. >> >> I was able to reproduce the behavior seen on Solaris on my Fedora >> box by using this: >> >> namespace std >> { >> extern int sprintf (char *dst, const char *format, ...); >> } >> >> which isn't rejected by the "Skip anticipated decls of builtin >> functions" test above, and hence sprintf is erroneously offered >> as a suggestion. >> >> The patch reworks the test case to work in the above way, >> to trigger the problem on Linux, and then fixes it by >> changing the order that the suggestions are tried in >> name-lookup.c. It introduces an "empty.h" since the testcase >> is also to verify that we suggest a good location for new #include >> directives relative to pre-existing #include directives. >> >> Successfully bootstrapped®rtested on x86_64-pc-linux-gnu. >> >> OK for trunk? >> >> gcc/cp/ChangeLog: >> PR c++/81514 >> * name-lookup.c (maybe_suggest_missing_header): Convert return >> type from void to bool; return true iff a suggestion was >> offered. >> (suggest_alternative_in_explicit_scope): Move call to >> maybe_suggest_missing_header to before use of best_match, and >> return true if the former offers a suggestion. >> >> gcc/testsuite/ChangeLog: >> PR c++/81514 >> * g++.dg/lookup/empty.h: New file. >> * g++.dg/lookup/missing-std-include-2.C: Replace include of >> stdio.h with empty.h and a declaration of a "std::sprintf" not >> based >> on a built-in. >> --- >> gcc/cp/name-lookup.c | 39 +++- >> -- >> gcc/testsuite/g++.dg/lookup/empty.h| 1 + >> .../g++.dg/lookup/missing-std-include-2.C | 11 -- >> 3 files changed, 29 insertions(+), 22 deletions(-) >> create mode 100644 gcc/testsuite/g++.dg/lookup/empty.h >> >> diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c >> index cd7428a..49c4dea 100644 >> --- a/gcc/cp/name-lookup.c >> +++ b/gcc/cp/name-lookup.c >> @@ -4838,34 +4838,34 @@ get_std_name_hint (const char *name) >>return NULL; >> } >> >> -/* Subroutine of suggest_alternative_in_explicit_scope, for use when >> we have no >> - suggestions to offer. >> - If SCOPE is the "std" namespace, then suggest pertinent header >> - files for NAME. */ >> +/* If SCOPE is the "std" namespace, then suggest pertinent header >> + files for NAME at LOCATION. >> + Return true iff a suggestion was offered. */ >> >> -static
[PATCH], Delete some PowerPC debug switches
This patch deletes some of the debug switches that I've added over the years, and now don't use any more. I did bootstrap builds and make check runs on a little endian power8 system. There were no regressions. Can I check this into the trunk? 2017-08-17 Michael Meissner * config/rs6000/rs6000-cpus.def (-mvsx-scalar-float): Delete undocumented debugging options. (-mvsx-scalar-double): Likewise. (-mallow-df-permute): Likewise. (-mvectorize-builtins): Likewise. * config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Likewise. (rs6000_builtin_vectorized_function): Likewise. (rs6000_builtin_md_vectorized_function): Likewise. (rs6000_opt_vars): Likewise. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797 Index: gcc/config/rs6000/rs6000.opt === --- gcc/config/rs6000/rs6000.opt(revision 251170) +++ gcc/config/rs6000/rs6000.opt(working copy) @@ -196,13 +196,13 @@ mvsx Target Report Mask(VSX) Var(rs6000_isa_flags) Use vector/scalar (VSX) instructions. +; This option existed in the past, but now is always on. mvsx-scalar-float -Target Undocumented Report Var(TARGET_VSX_SCALAR_FLOAT) Init(1) -; If -mpower8-vector, use VSX arithmetic instructions for SFmode (on by default) +Target RejectNegative Undocumented Ignore +; This option existed in the past, but now is always on. mvsx-scalar-double -Target Undocumented Report Var(TARGET_VSX_SCALAR_DOUBLE) Init(1) -; If -mvsx, use VSX arithmetic instructions for DFmode (on by default) +Target RejectNegative Undocumented Ignore mvsx-align-128 Target Undocumented Report Var(TARGET_VSX_ALIGN_128) Save @@ -216,9 +216,9 @@ mefficient-unaligned-vsx Target Undocumented Report Mask(EFFICIENT_UNALIGNED_VSX) Var(rs6000_isa_flags) ; Consider unaligned VSX vector and fp accesses to be efficient +; This option existed in the past, but it hadn't been used in awhile mallow-df-permute -Target Undocumented Var(TARGET_ALLOW_DF_PERMUTE) Save -; Allow permutation of DF/DI vectors +Target RejectNegative Undocumented Ignore msched-groups Target Undocumented Report Var(TARGET_SCHED_GROUPS) Init(-1) Save @@ -232,9 +232,9 @@ malign-branch-targets Target Undocumented Report Var(TARGET_ALIGN_BRANCH_TARGETS) Init(-1) Save ; Explicitly set rs6000_align_branch_targets +; This option existed in the past, but now is always on. mvectorize-builtins -Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1) Save -; Explicitly control whether we vectorize the builtins or not. +Target RejectNegative Undocumented Ignore mno-update Target Report RejectNegative Mask(NO_UPDATE) Var(rs6000_isa_flags) Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 251170) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -3128,14 +3128,14 @@ rs6000_init_hard_regno_mode_ok (bool glo /* DFmode, see if we want to use the VSX unit. Memory is handled differently, so don't set rs6000_vector_mem. */ - if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE) + if (TARGET_VSX) { rs6000_vector_unit[DFmode] = VECTOR_VSX; rs6000_vector_align[DFmode] = 64; } /* SFmode, see if we want to use the VSX unit. */ - if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT) + if (TARGET_P8_VECTOR) { rs6000_vector_unit[SFmode] = VECTOR_VSX; rs6000_vector_align[SFmode] = 32; @@ -5909,8 +5909,7 @@ rs6000_builtin_vectorized_function (unsi GET_MODE_NAME (TYPE_MODE (type_in))); if (TREE_CODE (type_out) != VECTOR_TYPE - || TREE_CODE (type_in) != VECTOR_TYPE - || !TARGET_VECTORIZE_BUILTINS) + || TREE_CODE (type_in) != VECTOR_TYPE) return NULL_TREE; out_mode = TYPE_MODE (TREE_TYPE (type_out)); @@ -6041,8 +6040,7 @@ rs6000_builtin_md_vectorized_function (t GET_MODE_NAME (TYPE_MODE (type_in))); if (TREE_CODE (type_out) != VECTOR_TYPE - || TREE_CODE (type_in) != VECTOR_TYPE - || !TARGET_VECTORIZE_BUILTINS) + || TREE_CODE (type_in) != VECTOR_TYPE) return NULL_TREE; out_mode = TYPE_MODE (TREE_TYPE (type_out)); @@ -36253,9 +36251,6 @@ static struct rs6000_opt_var const rs600 { "allow-movmisalign", offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN), offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), }, - { "allow-df-permute", -offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE), -offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), }, { "sched-groups", offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS), offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), }, @@ -36265,9 +36260,6 @@ static struct rs6000_opt_var const rs600 { "align-branch-targets", offsetof (stru
[PATCH, rs6000] Fix PR target/80210: ICE in extract_insn
PR target/80210 exposes a problem in rs6000_set_current_function() where is fails to correctly clear the rs6000_previous_fndecl cache correctly. With the test case, we notice that rs6000_previous_fndecl is set (when it shouldn't be) and we end up restoring options from it. In this case, we end up disabling HW sqrt (because of the pragma) when we earlier decided we could generate HW sqrts which leads to an ICE. The current code in rs6000_set_current_function() is kind of a rats nest, so I threw it out and rewrote it, modeling it after how S390 and i386 handle it, which correctly clears the *_previous_fndecl caches. It also makes the code much more readable in my view. This passed bootstrap and regtesting with no regressions and it fixes the ICE. Ok for trunk? This is also broken in GCC 7, GCC 6 and GCC 5. Ok for those after this has been on trunk for a little while and assuming testing passes? Peter gcc/ * config/rs6000/rs6000.c (rs6000_activate_target_options): New function. (rs6000_set_current_function): Rewrite function to use it. gcc/testsuite/ * gcc.target/powerpc/pr80210.c: New test. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 251171) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -36664,23 +36664,30 @@ rs6000_pragma_target_parse (tree args, t /* Remember the last target of rs6000_set_current_function. */ static GTY(()) tree rs6000_previous_fndecl; +/* Restore target's globals from NEW_TREE and invalidate the + rs6000_previous_fndecl cache. */ + +static void +rs6000_activate_target_options (tree new_tree) +{ + cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); + if (TREE_TARGET_GLOBALS (new_tree)) +restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); + else if (new_tree == target_option_default_node) +restore_target_globals (&default_target_globals); + else +TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); + rs6000_previous_fndecl = NULL_TREE; +} + /* Establish appropriate back-end context for processing the function FNDECL. The argument might be NULL to indicate processing at top level, outside of any function scope. */ static void rs6000_set_current_function (tree fndecl) { - tree old_tree = (rs6000_previous_fndecl - ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl) - : NULL_TREE); - - tree new_tree = (fndecl - ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) - : NULL_TREE); - if (TARGET_DEBUG_TARGET) { - bool print_final = false; fprintf (stderr, "\n rs6000_set_current_function"); if (fndecl) @@ -36693,58 +36700,60 @@ rs6000_set_current_function (tree fndecl fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl); fprintf (stderr, "\n"); +} + + /* Only change the context if the function changes. This hook is called + several times in the course of compiling a function, and we don't want to + slow things down too much or call target_reinit when it isn't safe. */ + if (fndecl == rs6000_previous_fndecl) +return; + + tree old_tree; + if (rs6000_previous_fndecl == NULL_TREE) +old_tree = target_option_current_node; + else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)) +old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl); + else +old_tree = target_option_default_node; + + tree new_tree; + if (fndecl == NULL_TREE) +{ + if (old_tree != target_option_current_node) + new_tree = target_option_current_node; + else + new_tree = NULL_TREE; +} + else +{ + new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); + if (new_tree == NULL_TREE) + new_tree = target_option_default_node; +} + + if (TARGET_DEBUG_TARGET) +{ if (new_tree) { fprintf (stderr, "\nnew fndecl target specific options:\n"); debug_tree (new_tree); - print_final = true; } if (old_tree) { fprintf (stderr, "\nold fndecl target specific options:\n"); debug_tree (old_tree); - print_final = true; } - if (print_final) + if (old_tree != NULL_TREE || new_tree != NULL_TREE) fprintf (stderr, "\n"); } - /* Only change the context if the function changes. This hook is called - several times in the course of compiling a function, and we don't want to - slow things down too much or call target_reinit when it isn't safe. */ - if (fndecl && fndecl != rs6000_previous_fndecl) -{ - rs6000_previous_fndecl = fndecl; - if (old_tree == new_tree) - ; - - else if (new_tree && new_tree != target_option_default_node) - { - cl_target_option_restore (&global_options, -
Re: [PATCH, rs6000] 2/3 Add x86 SSE intrinsics to GCC PPC64LE taget
On Thu, 2017-08-17 at 00:28 -0500, Segher Boessenkool wrote: > Hi! > > On Wed, Aug 16, 2017 at 03:35:40PM -0500, Steven Munroe wrote: > > +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, > > __artificial__)) > > +_mm_add_ss (__m128 __A, __m128 __B) > > +{ > > +#ifdef _ARCH_PWR7 > > + __m128 a, b, c; > > + static const __vector unsigned int mask = {0x, 0, 0, 0}; > > + /* PowerISA VSX does not allow partial (for just lower double) > > + * results. So to insure we don't generate spurious exceptions > > + * (from the upper double values) we splat the lower double > > + * before we to the operation. */ > > No leading stars in comments please. Fixed > > > + a = vec_splat (__A, 0); > > + b = vec_splat (__B, 0); > > + c = a + b; > > + /* Then we merge the lower float result with the original upper > > + * float elements from __A. */ > > + return (vec_sel (__A, c, mask)); > > +#else > > + __A[0] = __A[0] + __B[0]; > > + return (__A); > > +#endif > > +} > > It would be nice if we could just write the #else version and get the > more optimised code, but I guess we get something horrible going through > memory, instead? > No, even with GCC8-trunk this field access is going through storage. The generated code for splat, op, select is shorter even when you include loading the constant. vector <-> scalar float is just nasty! > > +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, > > __artificial__)) > > +_mm_rcp_ps (__m128 __A) > > +{ > > + __v4sf result; > > + > > + __asm__( > > + "xvresp %x0,%x1;\n" > > + : "=v" (result) > > + : "v" (__A) > > + : ); > > + > > + return (result); > > +} > > There is a builtin for this (__builtin_vec_re). Yes, not sure how I missed that. Fixed. > > > +/* Convert the lower SPFP value to a 32-bit integer according to the > > current > > + rounding mode. */ > > +extern __inline int __attribute__((__gnu_inline__, __always_inline__, > > __artificial__)) > > +_mm_cvtss_si32 (__m128 __A) > > +{ > > + __m64 res = 0; > > +#ifdef _ARCH_PWR8 > > + __m128 vtmp; > > + __asm__( > > + "xxsldwi %x1,%x2,%x2,3;\n" > > + "xscvspdp %x1,%x1;\n" > > + "fctiw %1,%1;\n" > > + "mfvsrd %0,%x1;\n" > > + : "=r" (res), > > + "=&wi" (vtmp) > > + : "wa" (__A) > > + : ); > > +#endif > > + return (res); > > +} > > Maybe it could do something better than return the wrong answer for non-p8? Ok this gets tricky. Before _ARCH_PWR8 the vector to scalar transfer would go through storage. But that is not the worst of it. The semantic of cvtss requires rint or llrint. But __builtin_rint will generate a call to libm unless we assert -ffast-math. And we don't have builtins to generate fctiw/fctid directly. So I will add the #else using __builtin_rint if that libm dependency is ok (this will pop in the DG test for older machines. > > > +extern __inline int __attribute__((__gnu_inline__, __always_inline__, > > __artificial__)) > > > +#ifdef __LITTLE_ENDIAN__ > > + return result[1]; > > +#elif __BIG_ENDIAN__ > > + return result [0]; > > Remove the extra space here? > > > +_mm_max_pi16 (__m64 __A, __m64 __B) > > > + res.as_short[0] = (m1.as_short[0] > m2.as_short[0])? m1.as_short[0]: > > m2.as_short[0]; > > + res.as_short[1] = (m1.as_short[1] > m2.as_short[1])? m1.as_short[1]: > > m2.as_short[1]; > > + res.as_short[2] = (m1.as_short[2] > m2.as_short[2])? m1.as_short[2]: > > m2.as_short[2]; > > + res.as_short[3] = (m1.as_short[3] > m2.as_short[3])? m1.as_short[3]: > > m2.as_short[3]; > > Space before ? and : . done > > > +_mm_min_pi16 (__m64 __A, __m64 __B) > > In this function, too. > > > +extern __inline int __attribute__((__gnu_inline__, __always_inline__, > > __artificial__)) > > +_m_pmovmskb (__m64 __A) > > +{ > > + return _mm_movemask_pi8 (__A); > > +} > > +/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values > > + in B and produce the high 16 bits of the 32-bit results. */ > > +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, > > __artificial__)) > > Newline before the comment? done > > > +_mm_sad_pu8 (__m64 __A, __m64 __B) > > > + /* Sum four groups of bytes into integers. */ > > + vsum = (__vector signed int) vec_sum4s (vabsdiff, zero); > > + /* sum across four integers with integer result. */ > > + vsum = vec_sums (vsum, (__vector signed int) zero); > > + /* the sum is in the right most 32-bits of the vector result. > > + Transfer the a GPR and truncate to 16 bits. */ > > That last line has an indentation problem. Sentences should start with > a capital, in those last two comments. > Fixed > > +/* Stores the data in A to the address P without polluting the caches. */ > > +extern __inline void __attribute__((__gnu_inline__, __always_inline__, > > __artificial__)) > > +_mm_stream_pi (__m64 *__P, __m64 __A) > > +{ > > + /* Use the data cache block touch for store transien
[PATCH] detect incompatible aliases (PR c/81854)
Joseph, while looking into implementing enhancement your request pr81824 I noticed that GCC silently accepts incompatible alias declarations (pr81854) so as sort of a proof-concept for the former I enhanced the checking already done for other kinds of incompatibilities to also detect those mentioned in the latter bug. Attached is this patch, tested on x85_64-linux. Jonathan, the patch requires suppressing the warning in libstdc++ compatibility symbol definitions in compatibility.cc. I couldn't find a way to do it without the suppression but I'd be happy to try again if you have an idea for how. As an example, the patch lets GCC detect mistakes like: size_t __attribute__ ((ifunc ("bar_resolver"))) bar (void*, const void*, size_t); void* fast_bar (void *d, const void *s, size_t n) { ... } void* slow_bar (void *d, const void *s, size_t n) { ... } void* bar_resolver (void) { return fast ? &fast_bar : &slow_bar; } By complaining that the ifunc resolver should return a function pointer it makes the programmer change the declaration of the resolver to one of: __typeof__ (bar)* bar_resolver (void) { ... } or __typeof__ (fast_bar)* bar_resolver (void) { ... } which then triggers either -Wincompatible-pointer-types or -Wattributes, respectively. (I used the latter warning in my patch but maybe the former would be more appropriate). Martin PS A documentation-only patch to update the description of attribute ifunc was posted separately here: https://gcc.gnu.org/ml/gcc-patches/2017-08/msg01095.html PPS To make use of this checking (and compile without the new warnings) Glibc needs the following patch: diff --git a/include/libc-symbols.h b/include/libc-symbols.h index fe3ab81..5413e56 100644 --- a/include/libc-symbols.h +++ b/include/libc-symbols.h @@ -790,7 +790,8 @@ for linking") /* Helper / base macros for indirect function symbols. */ #define __ifunc_resolver(type_name, name, expr, arg, init, classifier) \ - classifier inhibit_stack_protector void *name##_ifunc (arg) \ + classifier inhibit_stack_protector \ + __typeof (type_name) *name##_ifunc (arg) \ {\ init (); \ __typeof (type_name) *res = expr; \ PR c/81854 - weak alias of an incompatible symbol accepted gcc/ChangeLog: PR c/81854 * cgraphunit.c (handle_alias_pairs): Reject aliases between functions of incompatible types. gcc/testsuite/ChangeLog: PR c/81854 * gcc.dg/pr81854.c: New test. * g++.dg/ext/attr-ifunc-5.C: New test. * g++.dg/ext/attr-ifunc-1.C: Adjust. * g++.dg/ext/attr-ifunc-2.C: Same. * g++.dg/ext/attr-ifunc-3.C: Same. * g++.dg/ext/attr-ifunc-4.C: Same. * g++.old-deja/g++.abi/vtable2.C: Same. * gcc.dg/attr-ifunc-1.c: Same. libstdc++-v3/ChangeLog: PR c/81854 * src/c++98/compatibility.cc: Suppress -Wattributes. diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index e8cc765..8d09a79 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -1352,6 +1352,63 @@ handle_alias_pairs (void) if (TREE_CODE (p->decl) == FUNCTION_DECL && target_node && is_a (target_node)) { + tree t1 = TREE_TYPE (p->decl); + tree t2 = TREE_TYPE (target_node->decl); + + if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (p->decl))) + { + t2 = TREE_TYPE (t2); + if (POINTER_TYPE_P (t2)) + { + t2 = TREE_TYPE (t2); + if (!FUNC_OR_METHOD_TYPE_P (t2)) + { + if (warning_at (DECL_SOURCE_LOCATION (p->decl), + OPT_Wattributes, + "%q+D % resolver should return " + "a function pointer", + p->decl)) + inform (DECL_SOURCE_LOCATION (target_node->decl), +"resolver declaration here"); + + t2 = NULL_TREE; + } + } + else + { + /* Deal with static member function pointers. */ + if (TREE_CODE (t2) == RECORD_TYPE + && TYPE_FIELDS (t2) + && TREE_CODE (TREE_TYPE (TYPE_FIELDS (t2))) == POINTER_TYPE + && (TREE_CODE (TREE_TYPE (TREE_TYPE (TYPE_FIELDS (t2 + == METHOD_TYPE)) + t2 = TREE_TYPE (TREE_TYPE (TYPE_FIELDS (t2))); + else + { + error ("%q+D % resolver must return a function " + "pointer", + p->decl); + inform (DECL_SOURCE_LOCATION (target_node->decl), + "resolver declaration here"); + + t2 = NULL_TREE; + } + } + } + + if (t2 + && (!FUNC_OR_METHOD_TYPE_P (t2) + || (prototype_p (t1) + && prototype_p (t2) + && !types_compatible_p (t1, t2 + { + if (warning_at (DECL_SOURCE_LOCATION (p->decl), OPT_Wattributes, + "%q+D alias between functions of incompatible " + "types %qT and %qT", p->decl, t1, t2)) + inform (DECL_SOURCE_LOCATION (target_node->decl), + "aliased declaration here"); + } + cgrap
libgo patch committed: Pass -funwind-tables when compiling C code
This patch to the go tool in libgo changes it to use -funwind-tables when compiling C code to support the cgo tool. Using -funwind-tables is necessary to permit Go code to correctly throw a panic through C code. This hasn't been necessary in the past as -funwind-tables is the default on x86. However, it is not the default for PPC AIX, among other systems. Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu. Committed to mainline. Ian Index: gcc/go/gofrontend/MERGE === --- gcc/go/gofrontend/MERGE (revision 251133) +++ gcc/go/gofrontend/MERGE (working copy) @@ -1,4 +1,4 @@ -152164a7249ecc5c2bfd4a091450dc7c2855f609 +9ff49c64ea6dbb5e08d1fa859b99b06049413279 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. Index: libgo/go/cmd/go/build.go === --- libgo/go/cmd/go/build.go(revision 250873) +++ libgo/go/cmd/go/build.go(working copy) @@ -3268,6 +3268,12 @@ func (b *builder) ccompilerCmd(envvar, d a = append(a, "-fno-common") } + // gccgo uses the language-independent exception mechanism to + // handle panics, so it always needs unwind tables. + if _, ok := buildToolchain.(gccgoToolchain); ok { + a = append(a, "-funwind-tables") + } + return a }
Re: [PATCH] correct documentation of attribute ifunc (PR 81882)
On Aug 17 2017, Martin Sebor wrote: > -static void (*resolve_memcpy (void)) (void) > +static void* (*resolve_memcpy (void))(void *, const void *, size_t) Please use consistent spacing. Andreas. -- Andreas Schwab, sch...@linux-m68k.org GPG Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5 "And now for something completely different."