[committed] Add var in push_fields_onto_fieldstack
Hi, this patch adds a new variable in push_fields_onto_fieldstack, improving readability. Bootstrapped and reg-tested on x86_64. Committed to trunk as obvious. Thanks, - Tom Add var in push_fields_onto_fieldstack 2015-10-27 Tom de Vries * tree-ssa-structalias.c (push_fields_onto_fieldstack): Add and use var field_type. --- gcc/tree-ssa-structalias.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index 8d86dcb..9632ce1 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -5313,13 +5313,14 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack, { bool push = false; HOST_WIDE_INT foff = bitpos_of_field (field); + tree field_type = TREE_TYPE (field); if (!var_can_have_subvars (field) - || TREE_CODE (TREE_TYPE (field)) == QUAL_UNION_TYPE - || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE) + || TREE_CODE (field_type) == QUAL_UNION_TYPE + || TREE_CODE (field_type) == UNION_TYPE) push = true; else if (!push_fields_onto_fieldstack - (TREE_TYPE (field), fieldstack, offset + foff) + (field_type, fieldstack, offset + foff) && (DECL_SIZE (field) && !integer_zerop (DECL_SIZE (field /* Empty structures may have actual size, like in C++. So @@ -5372,8 +5373,8 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack, e.may_have_pointers = true; e.only_restrict_pointers = (!has_unknown_size - && POINTER_TYPE_P (TREE_TYPE (field)) - && TYPE_RESTRICT (TREE_TYPE (field))); + && POINTER_TYPE_P (field_type) + && TYPE_RESTRICT (field_type)); fieldstack->safe_push (e); } } -- 1.9.1
[PATCH, PR67742] Handle restrict struct fields recursively
[ was: Re: [PATCH, 1/2] Add handle_param parameter to create_variable_info_for_1 ] On 26/10/15 17:23, Tom de Vries wrote: Why does create_variable_info_for_1 only handle the single-field case? That is, I expected this to be handled by c_v_r_f_1 fully. Yep, that's the goal of PR67742. I've written a patch in an earlier version of the patch series that implements that, I'm currently porting that patch to this patch series. I'll post asap. This is the follow-up patch that handles restrict parameters recursively, also in struct fields. Bootstrapped and reg-tested on x86_64. OK for trunk? Thanks, - Tom Handle restrict struct fields recursively 2015-10-26 Tom de Vries * tree-ssa-structalias.c (struct fieldoff): Add restrict_var field. (push_fields_onto_fieldstack): Add and handle handle_param parameter. (create_variable_info_for_1): Add extra arg to call to push_fields_onto_fieldstack. Add type_contains_placeholder_p test. Handle restrict pointer fields. (intra_create_variable_infos): Call create_variable_info_for_1 with handle_param == true. Call make_restrict_var_constraints. * gcc.dg/tree-ssa/restrict-8.c: New test. --- gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c | 17 +++ gcc/tree-ssa-structalias.c | 47 +- 2 files changed, 56 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c new file mode 100644 index 000..b0ab164 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ + +struct s +{ + int *__restrict__ *__restrict__ pp; +}; + +int +f (struct s s, int *b) +{ + *b = 1; + **s.pp = 2; + return *b; +} + +/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */ diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index 344c3b2..2031175 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -399,6 +399,7 @@ new_var_info (tree t, const char *name) return ret; } +static varinfo_t create_variable_info_for_1 (tree, const char *, bool); /* A map mapping call statements to per-stmt variables for uses and clobbers specific to the call. */ @@ -5196,6 +5197,8 @@ struct fieldoff unsigned may_have_pointers : 1; unsigned only_restrict_pointers : 1; + + varinfo_t restrict_var; }; typedef struct fieldoff fieldoff_s; @@ -5290,11 +5293,12 @@ field_must_have_pointers (tree t) OFFSET is used to keep track of the offset in this entire structure, rather than just the immediately containing structure. Returns false if the caller is supposed to handle the field we - recursed for. */ + recursed for. If HANDLE_PARAM is set, we're handling part of a function + parameter. */ static bool push_fields_onto_fieldstack (tree type, vec *fieldstack, - HOST_WIDE_INT offset) + HOST_WIDE_INT offset, bool handle_param) { tree field; bool empty_p = true; @@ -5320,7 +5324,7 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack, || TREE_CODE (field_type) == UNION_TYPE) push = true; else if (!push_fields_onto_fieldstack - (field_type, fieldstack, offset + foff) + (field_type, fieldstack, offset + foff, handle_param) && (DECL_SIZE (field) && !integer_zerop (DECL_SIZE (field /* Empty structures may have actual size, like in C++. So @@ -5341,7 +5345,8 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack, if (!pair && offset + foff != 0) { - fieldoff_s e = {0, offset + foff, false, false, false, false}; + fieldoff_s e = {0, offset + foff, false, false, false, false, +NULL}; pair = fieldstack->safe_push (e); } @@ -5375,6 +5380,19 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack, = (!has_unknown_size && POINTER_TYPE_P (field_type) && TYPE_RESTRICT (field_type)); + if (handle_param + && e.only_restrict_pointers + && !type_contains_placeholder_p (TREE_TYPE (field_type))) + { + varinfo_t rvi; + tree heapvar = build_fake_var_decl (TREE_TYPE (field_type)); + DECL_EXTERNAL (heapvar) = 1; + rvi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS", + true); + rvi->is_restrict_var = 1; + insert_vi_for_tree (heapvar, rvi); + e.restrict_var = rvi; + } fieldstack->safe_push (e); } } @@ -5679,7 +5697,7 @@ create_variable_info_for_1 (tree decl, const char *name, bool handle_param) bool notokay = false; unsigned int i; - push_fields_onto_fieldstack (decl_type, &fieldstack, 0); + push_fields_onto_fieldstack (decl_type, &fieldstack, 0, handle_param); for (i = 0; !notokay && fieldstack.iterate (i, &fo); i++) if (fo->has_unknown_size @@ -5722,6 +5740,7 @@ create_variable_info_for_1 (tree decl, const char *
Re: [OpenACC 5/11] C++ FE changes
On Mon, Oct 26, 2015 at 03:35:20PM -0700, Cesar Philippidis wrote: > I used that generic message for all of those clauses except for _GANG, > _WORKER and _VECTOR. The gang clause, at the very least, needed it to > disambiguate the static and num arguments. If you want I can handle > _WORKER and _VECTOR with the generic message. I only included it because > those arguments are optional, whereas they are mandatory for the other > clauses. > > Is this patch OK for trunk? Ok. Jakub
Re: [OpenACC 1/11] UNIQUE internal function
On Mon, Oct 26, 2015 at 03:32:45PM -0700, Nathan Sidwell wrote: > Richard, Jakub, > this updates patch 1 to use the target-insns.def mechanism of detecting > conditionally-implemented instructions. Otherwise it's the same as > yesterday's patch. To recap: > > 1) Moved the subcodes to an enumeration in internal-fn.h > > 2) Remove ECF_LEAF > > 3) Added check in initialize_ctrl_altering > > 4) tracer code now (continues) to only look in last stmt of block > > I looked at fnsplit and do not believe I need changes there. That's > changing things like: > if (cheap test) > do cheap thing > else > do complex thing > > to break out the else part into a separate function. That's fine -- it'll > copy the whole CFG of interest. The question is if some UNIQUE call could be ever considered as part of the cheap test or do cheap thing. If not, everything is fine of course for fnsplit. > ok? Ok for me, but please wait for Richi's ack too. Jakub
Re: [OpenACC 7/11] execution model
On Mon, Oct 26, 2015 at 04:11:20PM -0700, Nathan Sidwell wrote: > Jakub, Richard, > This is the updated version of patch 7, using target-insns.def for the new > insns. Otherwise same as yesterday's, which had the following changes: > > The significant change is that now the head/tail unique markers are > threaded on a data dependency variable. I'd not noticed its lack being a > problem, but this is certainly more robust in showing the ordering > dependency between calls. The dependency var is the 2nd parameter, and all > others are simply shifted along by one. > > At RTL generation time the date dependency is exposed to the RTL expander, > which in the PTX case simply does a src->dst move, which will eventually be > deleted as unnecessary. > > ok? LGTM, though could I ask you to try to try to move the struct oacc_collapse expand_oacc_collapse_init expand_oacc_collapse_vars expand_oacc_for additions somewhere else (e.g. in between expand_omp_taskreg and expand_omp_for_init_counts), because it seems patch just got too confused and gave up, so most of expand_omp_for which I assume is unchanged except for > + else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) > +{ > + gcc_assert (!inner_stmt); > + expand_oacc_for (region, &fd); > +} addition is considered to be deleted in one place and added into another one; if patch does this, I'd be afraid svn blame or git blame would do so too, and thus lose history for expand_omp_for. If moving it around doesn't help, no big deal, but if it helps, it would be appreciated. > (is_oacc_parallel, is_oaccc_kernels): New. One too many Cs. Jakub
Re: Drop types_compatible_p from operand_equal_p
On Mon, 26 Oct 2015, Jan Hubicka wrote: > > On Sat, 24 Oct 2015, Jan Hubicka wrote: > > > > > Hi, as discussed earlier, the types_compatible_p in operand_equal_p > > > seems redundant. (it is callers work to figure out how much of type > > > matching it wants. If not, we probably want to treat most of other > > > references and casts simlar way). > > > > > > Bootstrapped/regtested x86_64-linux. OK? > > > > Ok. > > > > Btw, you need to audit tree hashing for required changes with respect > > to your ones to operand_equal_p. operand_equal_p is the equality > > function for it. > > Yep, I am aware that tree hasing must match. I think my changes are safe so > far: > - ctors are already hashed resonably > - types are not hashed so the changes strenghtening OEP_ADDRESS_OF are safe > - OEP_ADDRESS_OF (so far) still boils down to syntactic matching. > > Looking at this I noticed that simple_cst_equal in tree.c seems to reimplement > OEP_CONSTANT part of operand_equal_p and it seems to have bugs - i.e. not > comparing index for CONSTRUCTOR ELTs. simple_cst_equal is also quite incomplete and I'd rather have it die... > Also I was always bit unsure how the path through operand_equal_p that allows > different tree codes to match (stripping MEM_REF) and use of STRIP_NOPS > combine > with add_expr that doesn't do these. Do we have some mechanism that will > prevent this from corrupting hashtables? We should ensure that equal trees get equal hash codes (heh, probably easy to add a wrapper around operand_equal_p double-checking that). Richard. > Honza > > > > Thanks, > > Richard. > > > > > * fold-const.c (operand_equal_p): Drop types_compatible_p when > > > comparing references. > > > > > > Index: fold-const.c > > > === > > > --- fold-const.c (revision 229278) > > > +++ fold-const.c (working copy) > > > @@ -2982,9 +2982,6 @@ operand_equal_p (const_tree arg0, const_ > > > TYPE_SIZE (TREE_TYPE (arg1)), > > > flags))) > > > return 0; > > > - /* Verify that access happens in similar types. */ > > > - if (!types_compatible_p (TREE_TYPE (arg0), TREE_TYPE (arg1))) > > > - return 0; > > > /* Verify that accesses are TBAA compatible. */ > > > if (flag_strict_aliasing > > > && (!alias_ptr_types_compatible_p > > > > > > > > > > -- > > Richard Biener > > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB > > 21284 (AG Nuernberg) > > -- Richard Biener SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
Re: [PR c/64765, c/64880] Support OpenACC Combined Directives in C, C++
Hi! On Thu, 8 Oct 2015 18:39:25 +0200, I wrote: > Some bits extracted out of gomp-4_0-branch, and some other bits > rewritten; here is a patch to support OpenACC Combined Directives in C, > C++. (The Fortran front end already does support these.) > > As far as I know, Jakub is not available at this time, so maybe the C > (Joseph) and C++ (Jason, Nathan) front end maintainers could please > review this, instead of him? (The front end changes as well as the few > other cleanup changes should all be straight forward.) OK for trunk once > bootstrap tested? Given the approval by Joseph and Nathan (thanks!), I have now committed this, in r229404: commit 2c4c872529d6e1051fb9cdd641ad5cc9fbc0e2cb Author: tschwinge Date: Tue Oct 27 08:39:15 2015 + [PR c/64765, c/64880] Support OpenACC Combined Directives in C, C++ gcc/c-family/ PR c/64765 PR c/64880 * c-common.h (c_oacc_split_loop_clauses): Declare function. * c-omp.c (c_oacc_split_loop_clauses): New function. gcc/c/ PR c/64765 PR c/64880 * c-parser.c (c_parser_oacc_loop): Add mask, cclauses formal parameters, and handle these. Adjust all users. (c_parser_oacc_kernels, c_parser_oacc_parallel): Merge functions into... (c_parser_oacc_kernels_parallel): ... this new function. Adjust all users. * c-tree.h (c_finish_oacc_parallel, c_finish_oacc_kernels): Don't declare functions. (c_finish_omp_construct): Declare function. * c-typeck.c (c_finish_oacc_parallel, c_finish_oacc_kernels): Merge functions into... (c_finish_omp_construct): ... this new function. gcc/cp/ PR c/64765 PR c/64880 * cp-tree.h (finish_oacc_kernels, finish_oacc_parallel): Don't declare functions. (finish_omp_construct): Declare function. * parser.c (cp_parser_oacc_loop): Add p_name, mask, cclauses formal parameters, and handle these. Adjust all users. (cp_parser_oacc_kernels, cp_parser_oacc_parallel): Merge functions into... (cp_parser_oacc_kernels_parallel): ... this new function. Adjust all users. * semantics.c (finish_oacc_kernels, finish_oacc_parallel): Merge functions into... (finish_omp_construct): ... this new function. gcc/ * tree.h (OACC_PARALLEL_BODY, OACC_PARALLEL_CLAUSES) (OACC_KERNELS_BODY, OACC_KERNELS_CLAUSES, OACC_KERNELS_COMBINED) (OACC_PARALLEL_COMBINED): Don't define macros. Adjust all users. gcc/testsuite/ PR c/64765 PR c/64880 * c-c++-common/goacc/loop-1.c: Don't skip for C++. Don't prune sorry message. (PR64765): New function. * gfortran.dg/goacc/coarray_2.f90: XFAIL. * gfortran.dg/goacc/combined_loop.f90: Extend. Don't prune sorry message. * gfortran.dg/goacc/cray.f95: Refine prune directive. * gfortran.dg/goacc/parameter.f95: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/combdir-1.c: New file. * testsuite/libgomp.oacc-fortran/combdir-1.f90: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@229404 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 6 + gcc/c-family/ChangeLog | 9 ++ gcc/c-family/c-common.h| 1 + gcc/c-family/c-omp.c | 40 +- gcc/c/ChangeLog| 19 +++ gcc/c/c-parser.c | 148 +--- gcc/c/c-tree.h | 3 +- gcc/c/c-typeck.c | 36 ++--- gcc/cp/ChangeLog | 18 +++ gcc/cp/cp-tree.h | 3 +- gcc/cp/parser.c| 149 - gcc/cp/semantics.c | 54 +++- gcc/fortran/trans-openmp.c | 4 - gcc/gimplify.c | 16 +-- gcc/testsuite/ChangeLog| 13 ++ gcc/testsuite/c-c++-common/goacc/loop-1.c | 10 +- gcc/testsuite/gfortran.dg/goacc/coarray_2.f90 | 1 + gcc/testsuite/gfortran.dg/goacc/combined_loop.f90 | 9 +- gcc/testsuite/gfortran.dg/goacc/cray.f95 | 2 +- gcc/testsuite/gfortran.dg/goacc/parameter.f95 | 2 +- gcc/tree-pretty-print.c| 11 +- gcc/tree.def | 8 +- gcc/tree.h | 20 --- libgomp/ChangeLog | 5 + .../libgomp.oacc-c-c++-common/combdir-1.c | 52 +++ .../testsuite/libgomp.oacc-fortran/combdir-1.f90 | 37 + 26 files changed, 407 insertions(+), 269 deletions(-) diff --gi
[PATCH] Remove push/pop_cfun around cgraph::release_function_body
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2015-10-27 Richard Biener * cfg.c (free_edge): Add function argument and use it instead of cfun. (clear_edges): Likewise. * cfg.h (clear_edges): Adjust prototype. * cfgexpand.c (pass_expand::execute): Adjust. * cfgloop.c (release_recorded_exits): Add function argument and use it instead of cfun. * cfgloop.h (release_recorded_exits): Adjust prototype. (loops_state_satisfies_p): Add overload with function argument. (loops_state_set): Likewise. (loops_state_clear): Likewise. (struct loop_iterator): Add function argument to constructor and iterator and use it instead of cfun. (FOR_EACH_LOOP_FN): New macro. (loop_optimizer_finalize): Add overload with function argument. * loop-init.c (loop_optimizer_init): Adjust. (fix_loop_structure): Likewise. (loop_optimizer_finaliz): Add function argument and use it instead of cfun. * tree-cfg.c (delete_tree_cfg_annotations): Likewise. * tree-cfg.h (delete_tree_cfg_annotations): Adjust prototype. * cgraph.c (release_function_body): Do not push/pop cfun. * final.c (rest_of_clean_state): Adjust. * graphite.c (graphite_finalize): Likewise. * tree-ssa-copy.c (fini_copy_prop): Likewise. * tree-ssa-dce.c (perform_tree_ssa_dce): Likewise. * tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Likewise. (tree_unroll_loops_completely): Likewise. (pass_complete_unrolli::execute): Likewise. * tree-ssa-loop-niter.c (free_numbers_of_iterations_estimates): Add function argument and use it instead of cfun. * tree-ssa-loop-niter.h (free_numbers_of_iterations_estimates): Adjust prototype. * tree-ssa-loop.c (tree_ssa_loop_done): Adjust. * tree-ssa.c (delete_tree_ssa): Add function argument and use it instead of cfun. * tree-ssa.h (delete_tree_ssa): Adjust prototype. * tree-ssanames.c (fini_ssanames): Add function argument and use it instead of cfun. * tree-ssanames.c (fini_ssanames): Adjust prototype. * tree-vrp.c (execute_vrp): Adjust. * value-prof.c (free_histograms): Add function argument and use it instead of cfun. * value-prof.h (free_histograms): Adjust prototype. Index: trunk/gcc/cfg.c === --- trunk.orig/gcc/cfg.c2015-10-26 15:27:42.150921714 +0100 +++ trunk/gcc/cfg.c 2015-10-26 15:32:23.679086458 +0100 @@ -88,35 +88,35 @@ init_flow (struct function *the_fun) without actually removing it from the pred/succ arrays. */ static void -free_edge (edge e) +free_edge (function *fn, edge e) { - n_edges_for_fn (cfun)--; + n_edges_for_fn (fn)--; ggc_free (e); } /* Free the memory associated with the edge structures. */ void -clear_edges (void) +clear_edges (struct function *fn) { basic_block bb; edge e; edge_iterator ei; - FOR_EACH_BB_FN (bb, cfun) + FOR_EACH_BB_FN (bb, fn) { FOR_EACH_EDGE (e, ei, bb->succs) - free_edge (e); + free_edge (fn, e); vec_safe_truncate (bb->succs, 0); vec_safe_truncate (bb->preds, 0); } - FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) -free_edge (e); - vec_safe_truncate (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, 0); - vec_safe_truncate (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs, 0); + FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (fn)->succs) +free_edge (fn, e); + vec_safe_truncate (EXIT_BLOCK_PTR_FOR_FN (fn)->preds, 0); + vec_safe_truncate (ENTRY_BLOCK_PTR_FOR_FN (fn)->succs, 0); - gcc_assert (!n_edges_for_fn (cfun)); + gcc_assert (!n_edges_for_fn (fn)); } /* Allocate memory for basic_block. */ @@ -350,7 +350,7 @@ remove_edge_raw (edge e) disconnect_src (e); disconnect_dest (e); - free_edge (e); + free_edge (cfun, e); } /* Redirect an edge's successor from one block to another. */ Index: trunk/gcc/cfg.h === --- trunk.orig/gcc/cfg.h2015-10-26 15:27:42.150921714 +0100 +++ trunk/gcc/cfg.h 2015-10-26 15:30:14.536634739 +0100 @@ -74,8 +74,8 @@ struct GTY(()) control_flow_graph { }; -extern void init_flow (struct function *); -extern void clear_edges (void); +extern void init_flow (function *); +extern void clear_edges (function *); extern basic_block alloc_block (void); extern void link_block (basic_block, basic_block); extern void unlink_block (basic_block); Index: trunk/gcc/cfgexpand.c === --- trunk.orig/gcc/cfgexpand.c 2015-10-26 15:27:42.150921714 +0100 +++ trunk/gcc/cfgexpand.c 2015-10-26 15:30:14.538634762 +0100 @@ -6307,7 +6307,7 @@ pass_expand::execute (function *fun) /* Free stuff we
Re: [PR 67794] Also remap SSA_NAMEs defined in ASMs in IPA-SRA
On 9 October 2015 at 09:46, Richard Biener wrote: > On Thu, 8 Oct 2015, Martin Jambor wrote: > >> Hi, >> >> the following fixes PR 67794 by properly remapping SSA_NAMEs which are >> based on PARM_DECLs which are about to be removed as unnecessary. And >> by "properly" I mean also when they are defined by a GIMPL_ASM >> statement. In fact, it switches to using an iterator over definitions >> to make sure it always handles everything... well,except for PHIs >> which are still handled specially because, from a quick glance over >> their source, it seemed to me that the iterator does not support them. >> >> Bootstrapped and tested on x86_64-linux. OK for trunk? >> The issue is most probably latent on a number of old branches, do we >> want to backport the patch to any of them? >> >> Thanks, >> >> Martin >> >> >> 2015-10-08 Martin Jambor >> >> tree-optimization/67794 >> * tree-sra.c (replace_removed_params_ssa_names): Do not distinguish >> between types of state,ents but accept original definitions as a >> parameter. >> (ipa_sra_modify_function_body): Use FOR_EACH_SSA_DEF_OPERAND to >> iterate over definitions. >> >> testsuite/ >> * gcc.dg/ipa/ipa-sra-10.c: Nw test. >> * gcc.dg/torture/pr67794.c: Likewise. >> >> diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c >> b/gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c >> new file mode 100644 >> index 000..24b64d1 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c >> @@ -0,0 +1,34 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O2 -fipa-sra -fdump-tree-eipa_sra-details" } */ >> + >> +extern void consume (int); >> +extern int glob, glob1, glob2; >> +extern int get (void); >> + >> + >> +static void __attribute__ ((noinline)) >> +foo (int a) >> +{ >> + a = glob; >> + consume (a); >> + a = get (); >> + consume (a); >> + __asm__ volatile("" : : ""(a)); >> + consume (a); >> + >> + if (glob1) >> +a = glob1; >> + else >> +a = glob2; >> + consume (a); >> +} >> + >> +int >> +bar (int a) >> +{ >> + foo (a); >> + glob = a; >> + return 0; >> +} >> + >> +/* { dg-final { scan-tree-dump-times "replacing an SSA name of a removed >> param" 4 "eipa_sra" } } */ >> diff --git a/gcc/testsuite/gcc.dg/torture/pr67794.c >> b/gcc/testsuite/gcc.dg/torture/pr67794.c >> new file mode 100644 >> index 000..5489e56 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/torture/pr67794.c >> @@ -0,0 +1,15 @@ >> +/* { dg-do compile } */ >> + >> +int *b; >> +static void fn1(int *best, int *dmin) { >> + int a[64]; >> + dmin = a; >> + __asm__ volatile("" : "+&r"(dmin) : ""(best)); >> +} >> + >> +__attribute__((always_inline)) static inline void fn2(int *best) { >> fn1(best, b); } >> + >> +void fn3(void) { >> + int c[1]; >> + fn2(c); >> +} >> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c >> index 4327990..f2a4e72 100644 >> --- a/gcc/tree-sra.c >> +++ b/gcc/tree-sra.c >> @@ -4612,61 +4612,45 @@ get_adjustment_for_base (ipa_parm_adjustment_vec >> adjustments, tree base) >>return NULL; >> } >> >> -/* If the statement STMT defines an SSA_NAME of a parameter which is to be >> - removed because its value is not used, replace the SSA_NAME with a one >> - relating to a created VAR_DECL together all of its uses and return true. >> - ADJUSTMENTS is a pointer to an adjustments vector. */ >> +/* If OLD_NAME, which is being defined by statement STMT, is an SSA_NAME of >> a >> + parameter which is to be removed because its value is not used, create a >> new >> + SSA_NAME relating to a replacement VAR_DECL, replace all uses of the >> + original with it and return it. If there is no need to re-map, return >> true. >> + ADJUSTMENTS is a pointer to a vector of IPA-SRA adjustments. */ > > The docs still say we return a bool > > Otherwise the patch looks ok to me. I think we want to backport it > to branche(s) after a while. > Hi Martin, After your backport in the gcc-5 branch, I see build failures: /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c: In function ‘tree_node* replace_removed_params_ssa_names(tree_node*, gimple_statement_base**, ipa_parm_adjustment_vec)’: /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4609: error: cannot convert ‘gimple_statement_base**’ to ‘gimple_statement_base*’ for argument ‘2’ to ‘tree_node* make_ssa_name(tree_node*, gimple_statement_base*)’ /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c: In function ‘bool ipa_sra_modify_function_body(ipa_parm_adjustment_vec)’: /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4703: error: cannot convert ‘gphi*’ to ‘gimple_statement_base**’ for argument ‘2’ to ‘tree_node* replace_removed_params_ssa_names(tree_node*, gimple_statement_base**, ipa_parm_adjustment_vec)’ /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4772: error: cannot convert ‘gimple_statement_base*’ to ‘gimple_statement_base**’ for argument ‘2’ to
[RFC, PATCH v2] Disable -fprofile-use related optimizations if corresponding .gcda file not found.
Hi! As was pointed out in previous thread (https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00723.html), sometimes PGO-built binaries can actually introduce performance regressions. We could identify affected object files and disable PGO for them by simply removing corresponding .gcda file. My previous patch was incomplete and had two major drawbacks: * It disabled unrelated options (e.g. -frename-registers) and PGO-related options, set up by user explicitly, if corresponding .gcda file is not found. * As Markus pointed out, in many cases we actually don't want to disable PGO-related options even if .gcda file is missing. This patch tries to solve these issues in the following way: * Introduce new -fprofile-use-partial switch. * If -fprofile-use-partial is ON, try to find corresponding .gcda file in the very early stage during compiler invoking (actually, when common command line options are parsed). If .gcda file exists, enable PGO-related optimizations and emit warning otherwise. I believe this should not break existing code. Regtested and bootstrapped on x86_64-unknown-linux-gnu. Does the patch look sensible? Thanks, -Maxim gcc/ChangeLog: 2015-10-27 Maxim Ostapenko * common.opt (profile_file_name): New variable. (fprofile-use-partial): Likewise. (fprofile-use-partial=): Likewise. * opts.c: Include gcov-io.h. (common_handle_option): Defer enabling PGO-related optimizations until we know if corresponding .gcda file exists. (maybe_setup_aux_base_name): New function. (setup_coverage_filename): Likewise. (enable_fdo_optimizations): Move up in source file. (finish_options): Call maybe_setup_aux_base_name and setup coverage filename. Enable PGO-related optimizations if corresponding .gcda file exists if -fprofile-use-partial is used. If -fprofile-use is used, enable PGO-related optimizations without any other conditions. * coverage.c (coverage_init): Adjust to use profile_file_name. diff --git a/gcc/common.opt b/gcc/common.opt index 12ca0d6..fb04201 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -198,6 +198,9 @@ const char *main_input_basename Variable int main_input_baselength +Variable +char *profile_file_name + ; Which options have been printed by --help. Variable char *help_printed @@ -1861,6 +1864,14 @@ fprofile-use= Common Joined RejectNegative Enable common options for performing profile feedback directed optimizations, and set -fprofile-dir=. +fprofile-use-partial +Common Var(flag_profile_use_partial) +Enable common options for performing profile feedback directed optimizations. + +fprofile-use-partial= +Common Joined RejectNegative +Enable common options for performing profile feedback directed optimizations, and set -fprofile-dir=. + fprofile-values Common Report Var(flag_profile_values) Insert code to profile values of expressions. diff --git a/gcc/coverage.c b/gcc/coverage.c index 4e08e5f..461dd48 100644 --- a/gcc/coverage.c +++ b/gcc/coverage.c @@ -1184,7 +1184,7 @@ void coverage_init (const char *filename) { int len = strlen (filename); - int prefix_len = 0; + da_file_name = profile_file_name; /* Since coverage_init is invoked very early, before the pass manager, we need to set up the dumping explicitly. This is @@ -1193,24 +1193,6 @@ coverage_init (const char *filename) g->get_passes ()->get_pass_profile ()->static_pass_number; g->get_dumps ()->dump_start (profile_pass_num, NULL); - if (!profile_data_prefix && !IS_ABSOLUTE_PATH (filename)) -profile_data_prefix = getpwd (); - - if (profile_data_prefix) -prefix_len = strlen (profile_data_prefix); - - /* Name of da file. */ - da_file_name = XNEWVEC (char, len + strlen (GCOV_DATA_SUFFIX) - + prefix_len + 2); - - if (profile_data_prefix) -{ - memcpy (da_file_name, profile_data_prefix, prefix_len); - da_file_name[prefix_len++] = '/'; -} - memcpy (da_file_name + prefix_len, filename, len); - strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX); - bbg_file_stamp = local_tick; if (flag_auto_profile) diff --git a/gcc/opts.c b/gcc/opts.c index 9a3fbb3..2e0cfa4 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see #include "opts-diagnostic.h" #include "insn-attr-common.h" #include "common/common-target.h" +#include "gcov-io.h" static void set_Wstrict_aliasing (struct gcc_options *opts, int onoff); @@ -660,6 +661,97 @@ default_options_optimization (struct gcc_options *opts, lang_mask, handlers, loc, dc); } +/* Enable FDO-related flags. */ + +static void +enable_fdo_optimizations (struct gcc_options *opts, + struct gcc_options *opts_set, + int value) +{ + if (!opts_set->x_flag_branch_probabilities) +opts->x_flag_branch_probabilities = value; + if (!opts_set->x_flag_profile_values) +opts->x_flag_profile_values = value; + if (!opts_set->x_flag_unroll_loops) +opts->x_flag_unroll_loops = value; + if (!opts_set->x_flag_peel_loops)
Re: [testsuite, committed, PR68063] Add missing private clause in libgomp.c++/member-2.C
Hi Jakub! On Fri, 23 Oct 2015 12:48:57 +0200, Tom de Vries wrote: > this patch adds a missing private clause in libgomp.c++/member-2.C (as > you suggested in the PR). > > This allows the test to succeed consistently. I'm seeing occasional (very rarely) failure of libgomp.c++/member-1.C (without anything in libgomp.log, unfortunately). I have not made an attempt to properly understand what it's doing, but this: [...] 150#pragma omp parallel private (f) 151 { 152f = false; 153 #pragma omp single 154 #pragma omp taskloop lastprivate (a, t, b, n) 155for (int i = 0; i < 30; i++) 156 { 157int q = omp_get_thread_num (); 158if (f && (a != 7 * q || b != 8 * q || r != 9 * q || t != 10 * q)) 159 __builtin_abort (); 160take (a, r, t, b); 161A::a = 7 * q; 162A::b = 8 * q; 163R::r = 9 * q; [...] ... looks a bit as if it might need to get the same patch applied that Tom has applied to libgomp.c++/member-2.C: > --- a/libgomp/testsuite/libgomp.c++/member-2.C > +++ b/libgomp/testsuite/libgomp.c++/member-2.C > @@ -154,7 +154,7 @@ A::m1 () > { >f = false; > #pragma omp single > -#pragma omp taskloop lastprivate (a, T::t, b, n) > +#pragma omp taskloop lastprivate (a, T::t, b, n) private (R::r) >for (int i = 0; i < 30; i++) > { > int q = omp_get_thread_num (); Could you please review that, and while at it, could there possibly be any other races of r or R::r, or other objects? Grüße Thomas signature.asc Description: PGP signature
Fold comparisons between sqrt and zero
Richard Biener writes: > On Mon, Oct 26, 2015 at 10:36 AM, Richard Sandiford > wrote: >> An upcoming patch adds a fold from hypot(x,x) to fabs(x)*sqrt(2). >> This is unusual in that it could trigger in the gimplifier but would >> require new SSA names to be created. This patch makes sure that we >> don't try to create new SSA names when we're not yet in SSA form. >> >> Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi. >> OK to install? > > Please use > > if (gimple_in_ssa_p (cfun)) > res = make_ssa_name (type); > else > res = create_tmp_reg (type); > > Ok with that change. Thanks, committed in that form. I retested the series on top of it and the create_tmp_reg causes the later signbit patch (not yet committed) to regress on: signbit(sqrt(x)) which is always 0 for -ffast-math. The signbit fold first converts it to: sqrt(x) < 0 and whether we realise that this is false depends on a race between two folders: the sqrt comparison folder, which wants to convert it to: x < 0*0 and the generic tree_expr_nonnegative_p rule for ... < 0, which would give the hoped-for 0. The sqrt code already handles comparisons with negative values specially, so this patch simply extends that idea to comparisons with zero. I don't really like patches like this since they'll probably help no real code, but still... Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi. OK to install? Thanks, Richard gcc/ * match.pd: Handle sqrt(x) cmp 0 specially. gcc/testsuite/ * gcc.dg/torture/builtin-sqrt-cmp-1.c: New test. diff --git a/gcc/match.pd b/gcc/match.pd index 26491d2..b8e6b46 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -1973,6 +1973,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) { constant_boolean_node (true, type); }) /* sqrt(x) > y is the same as x >= 0, if y is negative. */ (ge @0 { build_real (TREE_TYPE (@0), dconst0); }))) + (if (real_equal (TREE_REAL_CST_PTR (@1), &dconst0)) + (switch + /* sqrt(x) < 0 is always false. */ + (if (cmp == LT_EXPR) + { constant_boolean_node (false, type); }) + /* sqrt(x) >= 0 is always true if we don't care about NaNs. */ + (if (cmp == GE_EXPR && !HONOR_NANS (@0)) + { constant_boolean_node (true, type); }) + /* sqrt(x) <= 0 -> x == 0. */ + (if (cmp == LE_EXPR) + (eq @0 @1)) + /* Otherwise sqrt(x) cmp 0 -> x cmp 0. Here cmp can be >=, >, + == or !=. In the last case: + + (sqrt(x) != 0) == (NaN != 0) == true == (x != 0) + + if x is negative or NaN. Due to -funsafe-math-optimizations, + the results for other x follow from natural arithmetic. */ + (cmp @0 @1))) (if (cmp == GT_EXPR || cmp == GE_EXPR) (with { diff --git a/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c b/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c new file mode 100644 index 000..3f4a708 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c @@ -0,0 +1,53 @@ +/* { dg-do link } */ +/* { dg-options "-ffast-math" } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ + +extern double sqrt (double); +extern float sqrtf (float); +extern long double sqrtl (long double); + +/* All references to link_error should go away at compile-time. */ +extern void link_error (void); + +#define TEST_ONE(SUFFIX, TYPE) \ + void __attribute__ ((noinline, noclone)) \ + test##SUFFIX (TYPE f, int *res) \ + {\ +TYPE sqrt_res = sqrt##SUFFIX (f); \ +res[0] = sqrt_res < 0; \ +if (res[0])\ + link_error (); \ +res[1] = sqrt_res <= 0;\ +if (res[1] != (f == 0))\ + link_error (); \ +res[2] = (sqrt_res == 0); \ +if (res[2] != (f == 0))\ + link_error (); \ +res[3] = (sqrt_res != 0); \ +if (res[3] != (f != 0))\ + link_error (); \ +res[4] = (sqrt_res > 0); \ +if (res[4] != (f > 0)) \ + link_error (); \ +res[5] = (sqrt_res >= 0); \ +if (!res[5]) \ + link_error (); \ + } + +volatile float f; +volatile double d; +volatile long double ld; + +TEST_ONE (f, float) +TEST_ONE (, double) +TEST_ONE (l, long double) + +int +main () +{ + int res[6]; + testf (f, res); + test (d, res); + testl (ld, res); + return 0; +}
Re: [testsuite, committed, PR68063] Add missing private clause in libgomp.c++/member-2.C
On Tue, Oct 27, 2015 at 10:15:11AM +0100, Thomas Schwinge wrote: > ... looks a bit as if it might need to get the same patch applied that > Tom has applied to libgomp.c++/member-2.C: You're right, member-2.C is a templatized version of member-1.C, a change to add private (R::r) to that corresponding member-1.C taskloop directive is preapproved. The other member-*.C testcases don't have this issue. Jakub
[Patch, Contrib] Download ISL 0.15 by download_prerequisites
Hi all, recently, support for ISL 0.15 was added to GCC and also ftp://gcc.gnu.org/pub/gcc/infrastructure/ now contains ISL 0.15. Hence, there is no reason not to download the newest version by download_prerequisites. OK for the trunk? (One could also add it to GCC 5 as the ISL 0.15 patches landed there as well on 2015-10-12.) Side remark: I think one could could consider to also put newer versions of the other prerequisites on the FTP server, which currently has quite old versions: - GMP: 4.3.2 of January 2010 - current is 6.0.0a of March 2014 (6.1RC is Oct 2015) - MPFR: 2.4.2 of mid 2009 - current is 3.1.3 of June 2015 (the web page has additionally 3 post-relase bug-fix patches) - MPC: 0.8.1 of end of 2009 - current is 1.0.2 of February 2015 Cheers, Tobias contrib/ * download_prerequisites: Download ISL 0.15. diff --git a/contrib/download_prerequisites b/contrib/download_prerequisites index 6940330..a685a1d 100755 --- a/contrib/download_prerequisites +++ b/contrib/download_prerequisites @@ -48,7 +48,7 @@ ln -sf $MPC mpc || exit 1 # Necessary to build GCC with the Graphite loop optimizations. if [ "$GRAPHITE_LOOP_OPT" = "yes" ] ; then - ISL=isl-0.14 + ISL=isl-0.15 wget ftp://gcc.gnu.org/pub/gcc/infrastructure/$ISL.tar.bz2 || exit 1 tar xjf $ISL.tar.bz2 || exit 1
Re: [testsuite, committed, PR68063] Add missing private clause in libgomp.c++/member-2.C
Hi! On Tue, 27 Oct 2015 11:08:15 +0100, Jakub Jelinek wrote: > On Tue, Oct 27, 2015 at 10:15:11AM +0100, Thomas Schwinge wrote: > > ... looks a bit as if it might need to get the same patch applied that > > Tom has applied to libgomp.c++/member-2.C: > > You're right, member-2.C is a templatized version of member-1.C, > a change to add private (R::r) to that corresponding member-1.C taskloop > directive is preapproved. Tested, and committed in r229411: commit 540c8b2a12162472a4f644b7b533041a813a0332 Author: tschwinge Date: Tue Oct 27 10:32:32 2015 + [PR testsuite/68063] Add missing private clause in libgomp.c++/member-1.C PR testsuite/68063 * testsuite/libgomp.c++/member-1.C (A::m1): Add missing private clause. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@229411 138bc75d-0d04-0410-961f-82ee72b054a4 --- libgomp/ChangeLog| 5 + libgomp/testsuite/libgomp.c++/member-1.C | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git libgomp/ChangeLog libgomp/ChangeLog index ca34af8..0194503 100644 --- libgomp/ChangeLog +++ libgomp/ChangeLog @@ -1,3 +1,8 @@ +2015-10-27 Thomas Schwinge + + PR testsuite/68063 + * testsuite/libgomp.c++/member-1.C (A::m1): Add missing private clause. + 2015-10-27 James Norris * testsuite/libgomp.oacc-c-c++-common/combdir-1.c: New file. diff --git libgomp/testsuite/libgomp.c++/member-1.C libgomp/testsuite/libgomp.c++/member-1.C index d2d0c5b..c7c1ba4 100644 --- libgomp/testsuite/libgomp.c++/member-1.C +++ libgomp/testsuite/libgomp.c++/member-1.C @@ -151,7 +151,7 @@ A::m1 () { f = false; #pragma omp single -#pragma omp taskloop lastprivate (a, t, b, n) +#pragma omp taskloop lastprivate (a, t, b, n) private (R::r) for (int i = 0; i < 30; i++) { int q = omp_get_thread_num (); Grüße Thomas signature.asc Description: PGP signature
Re: [PATCH][AArch64] Enable autoprefetcher modelling in the scheduler
On Thu, Oct 22, 2015 at 05:05:26PM +0100, Kyrill Tkachov wrote: > Hi all, > > This patch enables the autoprefetcher heuristic for scheduling in AArch64. > It is enabled for the Cortex-A53, Cortex-A57 cores and is off for the other > cores, > leaving their behaviour unchanged. > > When enabled, the scheduler will try to sort groups of loads or stores in > order of the offset from a common base register. > > From what I understand of the relevant scheduling hooks, there are > essentially three levels of this: > 1) Don't use the autoprefetcher heuristic > 2) Use it to order loads/stores but allow other scheduling heuristics to > reorder them again to maximise multi-issue opportunities > 3) Use it to order loads/stores and keep that order, even if it can harm > multi-issue opportunities. > > With this patch I get a 0.4% improvement in SPECINT 2006 and 1.7% improvement > in SPECFP 2006 on a Cortex-A57 as well as improvements in various streaming > workloads. > > On Cortex-A53 I see improvements to various streaming workloads and there's > no regressions or improvements on SPEC2000. > > Bootstrapped and tested on aarch64-none-linux-gnu. > > Ok for trunk? OK. Thanks, James > 2015-10-22 Kyrylo Tkachov > > * config/aarch64/aarch64-protos.h > (struct tune_params): Add autoprefetcher_model field. > * config/aarch64/aarch64.c: Include params.h > (generic_tunings): Specify autoprefetcher_model value. > (cortexa53_tunings): Likewise. > (cortexa57_tunings): Likewise. > (cortexa72_tunings): Likewise. > (thunderx_tunings): Likewise. > (xgene1_tunings): Likewise. > (aarch64_first_cycle_multipass_dfa_lookahead_guard): New function. > (TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD): Define. > (aarch64_override_options_internal): Set > PARAM_SCHED_AUTOPREF_QUEUE_DEPTH param.
Re: [PATCH] Use subreg_regno instead of subreg_regno_offset
On 10/26/2015 11:46 PM, Anatoliy Sokolov wrote: This patch change code 'REGNO (subreg) + subreg_regno_offset (...)' with subreg_regno (subreg). The patch has whitespace damage that makes it difficult to apply. Please use text/plain attachments. Index: gcc/reg-stack.c === --- gcc/reg-stack.c(revision 229083) +++ gcc/reg-stack.c(working copy) @@ -416,11 +416,7 @@ rtx subreg; if (STACK_REG_P (subreg = SUBREG_REG (*pat))) { - int regno_off = subreg_regno_offset (REGNO (subreg), - GET_MODE (subreg), - SUBREG_BYTE (*pat), - GET_MODE (*pat)); - *pat = FP_MODE_REG (REGNO (subreg) + regno_off, + *pat = FP_MODE_REG (subreg_regno (subreg), GET_MODE (subreg)); return pat; Isn't this wrong? subreg_regno wants to be called with a SUBREG, but here we already had subreg = SUBREG_REG (*pat). @@ -5522,12 +5516,7 @@ op0 = SUBREG_REG (op0); code0 = GET_CODE (op0); if (code0 == REG && REGNO (op0) < FIRST_PSEUDO_REGISTER) - op0 = gen_rtx_REG (word_mode, - (REGNO (op0) + - subreg_regno_offset (REGNO (SUBREG_REG (orig_op0)), - GET_MODE (SUBREG_REG (orig_op0)), - SUBREG_BYTE (orig_op0), - GET_MODE (orig_op0; + op0 = gen_rtx_REG (word_mode, subreg_regno (op0)); } Same problem as in the reg-stack code? The existing code was using orig_op0 to get the subreg, you've changed it to use op0 which is already the SUBREG_REG. With an x86 test you're not exercising reload, and even on other targets this is not a frequently used path. I've stopped reviewing here, I think this is a good example of the kind of cleanup patch we _shouldn't_ be doing. We've proved it's risky, and unless these cleanup patches were a preparation for functional changes, we should just leave the code alone IMO. Bernd
Re: [AArch64][PATCH 1/7] Add support for ARMv8.1 Adv.SIMD,instructions.
On Fri, Oct 23, 2015 at 01:16:25PM +0100, Matthew Wahab wrote: > The ARMv8.1 architecture extension adds two Adv.SIMD instructions, > sqrdmlah and sqrdmlsh. This patch series adds the instructions to the > AArch64 backend together with the ACLE feature macro and NEON intrinsics > to make use of them. The instructions are enabled when -march=armv8.1-a > is selected. > > To support execution tests for the instructions, code is also added to > the testsuite to check the target capabilities and to specify required > compiler options. > > This patch adds target feature macros for the instructions. Subsequent > patches: > - add the instructions to the aarch64-simd patterns, > - add GCC builtins to generate the instructions, > - add the ACLE feature macro __ARM_FEATURE_QRDMX, > - add support for ARMv8.1-A Adv.SIMD tests to the dejagnu support code, > - add NEON intrinsics for the basic form of the instructions. > - add NEON intrinsics for the *_lane forms of the instructions. > > Tested the series for aarch64-none-linux-gnu with native bootstrap and > make check on an ARMv8 architecture. Also tested aarch64-none-elf with > cross-compiled check-gcc on an ARMv8.1 emulator. > > Ok for trunk? > Matthew OK. Thanks, James > > gcc/ > 2015-10-23 Matthew Wahab > > * config/aarch64/aarch64.h (AARCH64_ISA_RDMA): New. > (TARGET_SIMD_RDMA): New. >
Re: [PATCH]Add -fprofile-use option for check_effective_target_freorder.
On 26/10/15 13:24, Bernd Schmidt wrote: On 10/26/2015 02:17 PM, Teresa Johnson wrote: On Mon, Oct 26, 2015 at 2:00 AM, Renlin Li wrote: * lib/target-supports.exp (check_effective_target_freorder): Add -fprofile-use flag. Hmmm, the testcases themselves which use this predicate only use -freorder-and-partition, so maybe this requires more thought. Yes. In all of the related testcases, only -freorder-and-partition flag is provided explicitly. How about creating a new dg-add-options for freorder? proc add_options_for_freorder { flags } { return "$flags -freorder-blocks-and-partition -fprofile-use" } proc check_effective_target_freorder {} { return [check_no_compiler_messages freorder object { void foo (void) { } } [add_options_for_freorder ""]] } And in all test case which requires freorder support, "{ dg-add-options freorder }" is used to add necessary compiler flags? Regards, Renlin Bernd
Re: [PATCH]Add -fprofile-use option for check_effective_target_freorder.
On 10/27/2015 11:54 AM, Renlin Li wrote: Yes. In all of the related testcases, only -freorder-and-partition flag is provided explicitly. How about creating a new dg-add-options for freorder? proc add_options_for_freorder { flags } { return "$flags -freorder-blocks-and-partition -fprofile-use" } proc check_effective_target_freorder {} { return [check_no_compiler_messages freorder object { void foo (void) { } } [add_options_for_freorder ""]] } I don't know that tcl syntax but apparently this is done already for a number of other cases. So, probably the right thing to do. You might want to coordinate with Maxim ostapenko who's currently working on another -fprofile-use patch. Hopefully not one that disables -freorder-blocks-and-partition if it can't find .gcda files. Bernd
Re: scheduling conditional branches after stores
On 10/09/2015 11:36 PM, Mike Stump wrote: So, I keep on seeing inaccurate schedule time on the conditional branches after a store, and tracked it down to this type of solution. On my machine, I can run these two in the same cycle, but with a REG_DEP_OUTPUT dependency it was moving the branch to the next cycle. I kinda wanted a control dependency of some sort, but those seem only to be used for predicated instructions, which doesn’t apply to the case at hand. For the instructions, from my view, there are no data dependencies at all. What dependencies exists is the dependency that one cannot interchange: mem[$r1] = 1 if ($r2) jump L5 This last case being the one that I’m interested in. So, the question is, is the below patch the right way to fix this? I'd recommend trying to fix this with the adjust_cost hook instead. Bernd
Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof
On 10/26/2015 01:27 PM, Richard Biener wrote: On Mon, Oct 26, 2015 at 1:01 PM, Jakub Jelinek wrote: On Mon, Oct 26, 2015 at 12:57:53PM +0100, Bernd Schmidt wrote: On 10/26/2015 12:47 PM, Jakub Jelinek wrote: Because the amount of code that uses this (including GCC itself) is just too huge, so everywhere in the middle-end we also special case last array members of structs. While C99+ has flexible array members, e.g. C++ does not, so users are left with using struct { ... type fld[1]; }; Yes, and that case is documented. However, the issue is arrays declared with a larger size than 1 or 0 - is there really code using them as flexible array members? I believe so, though don't have pointers to that right now. But vaguely remember we saw various cases of using 2 or other values too. Yes, char[4] is quite common (basically making sure there is no appearant padding behind the array due to alignment - just in case compilers might be clever because of that). Ugh, how ugly. Can we at least agree not to allow multi-dimensional arrays with a size larger than one to be considered flexible? Bernd
Re: [AArch64][PATCH 2/7] Add sqrdmah, sqrdmsh instructions.
On Fri, Oct 23, 2015 at 01:19:20PM +0100, Matthew Wahab wrote: > The ARMv8.1 architecture extension adds two Adv.SIMD instructions, > sqrdmlah and sqrdmlsh. This patch adds the instructions to the > aarch64-simd patterns, making them conditional on the TARGET_SIMD_RDMA > feature macro introduced in the previous patch. > > The instructions patterns are defined using unspec expressions, so that > they are only generated through builtins added by this patch series. To > simplify the definition, iterators SQRDMLAH and rdma_as are added, to > iterate over the add (sqrdmlah) and subtract (sqrdmlsh) forms of the > instructions. > > Tested the series for aarch64-none-linux-gnu with native bootstrap and > make check on an ARMv8 architecture. Also tested aarch64-none-elf with > cross-compiled check-gcc on an ARMv8.1 emulator. > > Ok for trunk? > Matthew OK with the name of the iterator fixed to something more clear as to what you are iterating over. > gcc/ > 2015-10-23 Matthew Wahab > > * config/aarch64/aarch64-simd.md > (aarch64_sqmovun): Fix some white-space. > (aarch64_qmovun): Likewise. > (aarch64_sqrdmlh): New. > (aarch64_sqrdmlh_lane): New. > (aarch64_sqrdmlh_laneq): New. > * config/aarch64/iterators.md (UNSPEC_SQRDMLAH): New. > (UNSPEC_SQRDMLSH): New. > (SQRDMLAH): New. > (rdma_as): New. > > diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md > index 964f8f1..409ba7b 100644 > --- a/gcc/config/aarch64/iterators.md > +++ b/gcc/config/aarch64/iterators.md > @@ -303,6 +303,8 @@ > UNSPEC_PMULL2 ; Used in aarch64-simd.md. > UNSPEC_REV_REGLIST ; Used in aarch64-simd.md. > UNSPEC_VEC_SHR ; Used in aarch64-simd.md. > +UNSPEC_SQRDMLAH ; Used in aarch64-simd.md. > +UNSPEC_SQRDMLSH ; Used in aarch64-simd.md. > ]) > > ;; --- > @@ -932,6 +934,8 @@ > UNSPEC_SQSHRN UNSPEC_UQSHRN > UNSPEC_SQRSHRN UNSPEC_UQRSHRN]) > > +(define_int_iterator SQRDMLAH [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH]) > + This name does not make it clear that you will iterate over an "A" and an "S" form. I'd like to see a clearer naming choice, RDMAS? SQRDMLHADDSUB? etc. Thanks, James
Re: [AArch64][PATCH 3/7] Add builtins for ARMv8.1 Adv.SIMD,instructions.
On Fri, Oct 23, 2015 at 01:20:55PM +0100, Matthew Wahab wrote: > The ARMv8.1 architecture extension adds two Adv.SIMD instructions, > sqrdmlah and sqrdmlsh. This patch adds the GCC builtins to generate the new > instructions which are needed for the NEON intrinsics added later in > this series. > > Tested the series for aarch64-none-linux-gnu with native bootstrap and > make check on an ARMv8 architecture. Also tested aarch64-none-elf with > cross-compiled check-gcc on an ARMv8.1 emulator. > > Ok for trunk? > Matthew OK. Thanks, James > > gcc/ > 2015-10-23 Matthew Wahab > > * config/aarch64/aarch64-simd-builtins.def > (sqrdmlah, sqrdmlsh): New. > (sqrdmlah_lane, sqrdmlsh_lane): New. > (sqrdmlah_laneq, sqrdmlsh_laneq): New. >
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On 10/19/2015 09:55 PM, H.J. Lu wrote: * calls.c (prepare_call_address): Don't handle -fno-plt here. Is any other target using -fno-plt? If not, and if that's really just a revert I'll approve it on the condition that the x86 maintainers are happy with the rest of the changes. Your patch is word-wrapped. Bernd
Re: Fold comparisons between sqrt and zero
On Tue, Oct 27, 2015 at 10:21 AM, Richard Sandiford wrote: > Richard Biener writes: >> On Mon, Oct 26, 2015 at 10:36 AM, Richard Sandiford >> wrote: >>> An upcoming patch adds a fold from hypot(x,x) to fabs(x)*sqrt(2). >>> This is unusual in that it could trigger in the gimplifier but would >>> require new SSA names to be created. This patch makes sure that we >>> don't try to create new SSA names when we're not yet in SSA form. >>> >>> Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi. >>> OK to install? >> >> Please use >> >> if (gimple_in_ssa_p (cfun)) >> res = make_ssa_name (type); >> else >> res = create_tmp_reg (type); >> >> Ok with that change. > > Thanks, committed in that form. I retested the series on top of it and > the create_tmp_reg causes the later signbit patch (not yet committed) > to regress on: > > signbit(sqrt(x)) > > which is always 0 for -ffast-math. The signbit fold first converts it to: > > sqrt(x) < 0 > > and whether we realise that this is false depends on a race between two > folders: the sqrt comparison folder, which wants to convert it to: > > x < 0*0 > > and the generic tree_expr_nonnegative_p rule for ... < 0, which would > give the hoped-for 0. > > The sqrt code already handles comparisons with negative values specially, > so this patch simply extends that idea to comparisons with zero. > > I don't really like patches like this since they'll probably help no > real code, but still... > > Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi. > OK to install? Ok. Thanks, Richard. > Thanks, > Richard > > > gcc/ > * match.pd: Handle sqrt(x) cmp 0 specially. > > gcc/testsuite/ > * gcc.dg/torture/builtin-sqrt-cmp-1.c: New test. > > diff --git a/gcc/match.pd b/gcc/match.pd > index 26491d2..b8e6b46 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -1973,6 +1973,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > { constant_boolean_node (true, type); }) > /* sqrt(x) > y is the same as x >= 0, if y is negative. */ > (ge @0 { build_real (TREE_TYPE (@0), dconst0); }))) > + (if (real_equal (TREE_REAL_CST_PTR (@1), &dconst0)) > + (switch > + /* sqrt(x) < 0 is always false. */ > + (if (cmp == LT_EXPR) > + { constant_boolean_node (false, type); }) > + /* sqrt(x) >= 0 is always true if we don't care about NaNs. */ > + (if (cmp == GE_EXPR && !HONOR_NANS (@0)) > + { constant_boolean_node (true, type); }) > + /* sqrt(x) <= 0 -> x == 0. */ > + (if (cmp == LE_EXPR) > + (eq @0 @1)) > + /* Otherwise sqrt(x) cmp 0 -> x cmp 0. Here cmp can be >=, >, > + == or !=. In the last case: > + > + (sqrt(x) != 0) == (NaN != 0) == true == (x != 0) > + > + if x is negative or NaN. Due to -funsafe-math-optimizations, > + the results for other x follow from natural arithmetic. */ > + (cmp @0 @1))) > (if (cmp == GT_EXPR || cmp == GE_EXPR) >(with > { > diff --git a/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c > b/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c > new file mode 100644 > index 000..3f4a708 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c > @@ -0,0 +1,53 @@ > +/* { dg-do link } */ > +/* { dg-options "-ffast-math" } */ > +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ > + > +extern double sqrt (double); > +extern float sqrtf (float); > +extern long double sqrtl (long double); > + > +/* All references to link_error should go away at compile-time. */ > +extern void link_error (void); > + > +#define TEST_ONE(SUFFIX, TYPE) \ > + void __attribute__ ((noinline, noclone)) \ > + test##SUFFIX (TYPE f, int *res) \ > + {\ > +TYPE sqrt_res = sqrt##SUFFIX (f); \ > +res[0] = sqrt_res < 0; \ > +if (res[0])\ > + link_error (); \ > +res[1] = sqrt_res <= 0;\ > +if (res[1] != (f == 0))\ > + link_error (); \ > +res[2] = (sqrt_res == 0); \ > +if (res[2] != (f == 0))\ > + link_error (); \ > +res[3] = (sqrt_res != 0); \ > +if (res[3] != (f != 0))\ > + link_error (); \ > +res[4] = (sqrt_res > 0); \ > +if (res[4] != (f > 0)) \ > + link_error (); \ > +res[5] = (sqrt_res >= 0); \ > +if (!res[5]) \ > + link_error (); \ > + } > + > +volatile float f; > +volatile double d; > +volatile long double ld; > + > +TEST_ONE (f, float) > +TEST_ONE (, double) > +TEST_ONE (l, long double) > + > +int >
Re: [AArch64][PATCH 4/7] Add ACLE feature macro for ARMv8.1,Adv.SIMD instructions.
On Fri, Oct 23, 2015 at 01:22:16PM +0100, Matthew Wahab wrote: > The ARMv8.1 architecture extension adds two Adv.SIMD instructions, > sqrdmlah and sqrdmlsh. This patch adds the feature macro > __ARM_FEATURE_QRDMX to indicate the presence of these instructions, > generating it when the feature is available, as it is when > -march=armv8.1-a is selected. > > Tested the series for aarch64-none-linux-gnu with native bootstrap and > make check on an ARMv8 architecture. Also tested aarch64-none-elf with > cross-compiled check-gcc on an ARMv8.1 emulator. > > Ok for trunk? > Matthew I don't see this macro documented in the versions of ACLE available from the ARM documentation sites, and googling doesn't show anything other than your patches. You don't explicitly mention anywhere in cover text for this series where these new features are (or will be?) documented. Could you please write a more complete description of where these new macros and intrinsics come from and what they are intended to do? I would not like to accept them without some confidence that these names have been finalized, and I am nervous about having the best description of the behaviour of them be the GCC source code. Richard, Marcus? Thanks, James > > gcc/ > 2015-10-23 Matthew Wahab > > * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add > ARM_FEATURE_QRDMX. >
Re: [Patch] Avoid is_simple_use bug in vectorizable_live_operation
On Mon, Oct 26, 2015 at 6:15 PM, Alan Hayward wrote: > > > On 26/10/2015 13:35, "Richard Biener" wrote: > >>On Mon, Oct 26, 2015 at 1:33 PM, Alan Hayward >>wrote: >>> There is a potential bug in vectorizable_live_operation. >>> >>> Consider the case where the first op for stmt is valid, but the second >>>is >>> null. >>> The first time through the for () loop, it will call out to >>> vect_is_simple_use () which will set dt. >>> The second time, because op is null, vect_is_simple_use () will not be >>> called. >>> However, dt is still set to a valid value, therefore the loop will >>> continue. >>> >>> Note this is different from the case where the first op is null, which >>> will cause the loop not call vect_is_simple_use () and then return >>>false. >>> >>> It is possible that this was intentional, however that is not clear from >>> the code. >>> >>> The fix is to simply ensure dt is initialized to a default value on each >>> iteration. >> >>I think the patch is a strict improvement, thus OK. Still a NULL operand >>is not possible in GIMPLE so the op && check is not necessary. The way >>it iterates over all stmt uses is a bit scary anyway. As it is ok with >>all invariants it should probably simply do sth like >> >> FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) >> if (!vect_is_simple_use (op, )) >> >>and be done with that. Unvisited uses can only be constants (ok). >> >>Care to rework the funtion like that if you are here? >> > > Ok, I’ve updated as requested. Ok. Please remove code = gimple_assign_rhs_code (stmt); op_type = TREE_CODE_LENGTH (code); rhs_class = get_gimple_rhs_class (code); gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op); gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op); and associated variables as I belive otherwise a --disable-checking build will fail with set-but-not-used warnings. And the asserts are quite pointless given the now sanitized loop. Thanks, Richard. > > Cheers, > Alan. >
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On Tue, Oct 27, 2015 at 4:20 AM, Bernd Schmidt wrote: > On 10/19/2015 09:55 PM, H.J. Lu wrote: >> >> * calls.c (prepare_call_address): Don't handle -fno-plt here. > > > Is any other target using -fno-plt? If not, and if that's really just a aarch64 is the only target which checks -fno-plt from commit log below. I CCed the code author. aarch64 may suffer from the same issue. > revert I'll approve it on the condition that the x86 maintainers are happy > with the rest of the changes. Uros, can you take a look? > Your patch is word-wrapped. Sorry for that. Here is the link for the patch: https://gcc.gnu.org/git/?p=gcc.git;a=patch;h=a116a4af073bb99c3117dae38f02d528815bc431 Thanks. -- H.J. --- Author: jiwang Date: Thu Aug 6 16:02:16 2015 + [AArch64] Tighten direct call pattern for sibcall to repair -fno-plt 2015-08-06 Jiong Wang gcc/ * config/aarch64/constraints.md (Usf): Add the test of aarch64_is_noplt_call_p. gcc/testsuite/ * gcc.target/aarch64/noplt_3.c: New testcase. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226682 138bc75d-0d04-0410-961f-82ee72b054a4 commit 2bcb7473b37f9aa76e530f0a2007893489f61586 Author: jiwang Date: Thu Aug 6 15:57:36 2015 + [AArch64] Tighten direct call pattern to repair -fno-plt 2015-08-06 Jiong Wang gcc/ * config/aarch64/aarch64-protos.h (aarch64_is_noplt_call_p): New declaration. * config/aarch64/aarch64.c (aarch64_is_noplt_call_p): New function. * config/aarch64/aarch64.md (call_value_symbol): Check noplt scenarios. (call_symbol): Likewise. gcc/testsuite/ * gcc.target/aarch64/noplt_1.c: New testcase. * gcc.target/aarch64/noplt_2.c: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226681 138bc75d-0d04-0410-961f-82ee72b054a4
Re: [PATCH]Add -fprofile-use option for check_effective_target_freorder.
On 27/10/15 14:06, Bernd Schmidt wrote: On 10/27/2015 11:54 AM, Renlin Li wrote: Yes. In all of the related testcases, only -freorder-and-partition flag is provided explicitly. How about creating a new dg-add-options for freorder? proc add_options_for_freorder { flags } { return "$flags -freorder-blocks-and-partition -fprofile-use" } proc check_effective_target_freorder {} { return [check_no_compiler_messages freorder object { void foo (void) { } } [add_options_for_freorder ""]] } I don't know that tcl syntax but apparently this is done already for a number of other cases. So, probably the right thing to do. You might want to coordinate with Maxim ostapenko who's currently working on another -fprofile-use patch. Hopefully not one that disables -freorder-blocks-and-partition if it can't find .gcda files. AFAU, adding -fprofile-use would lead to enabling a bunch of PGO-related optimizations wherever you have .gcda file or not. So, you can end up with a completely different optimization pipeline for your tests if you enable -fprofile-use for them. Is it desirable? Anyway, disabling any compile options provided by user explicitly sounds like a bad idea for me, so disabling -freorder-blocks-and-partition if it can't find .gcda file seems to be not acceptable. -Maxim Bernd
Re: [PATCH]Add -fprofile-use option for check_effective_target_freorder.
On 10/27/2015 12:38 PM, Maxim Ostapenko wrote: Anyway, disabling any compile options provided by user explicitly sounds like a bad idea for me, so disabling -freorder-blocks-and-partition if it can't find .gcda file seems to be not acceptable. The current situation is that -fr-b-a-p is disabled if -fprofile-use is not also present, because it would have no effect in this case. We're looking at fixing -fr-b-a-p testcases to pass -fprofile-use; ideally that will stay working with your patch (I haven't checked). Bernd
[Ada] Pragma SPARK_Mode and expression functions
This patch ensures that an internally generated subprogram body that completes an expression function inherits the SPARK_Mode from the expression function spec. -- Source -- -- expr_funcs.ads package Expr_Funcs with SPARK_Mode is function F1 return Boolean is (True); -- F1 spec has mode ON function F2 return Boolean;-- F2 spec has mode ON function F3 return Boolean;-- F3 spec has mode ON private pragma SPARK_Mode (Off); -- function F1 return Boolean is -- F1 body has mode ON -- begin --return True; -- end F1; function F2 return Boolean is (True); -- F2 body has mode OFF function F3 return Boolean is (True) -- F3 body attempts mdoe ON with SPARK_Mode; -- Error end Expr_Funcs; -- Compilation and output -- $ gcc -c expr_funcs.ads expr_funcs.ads:16:11: cannot change SPARK_Mode from Off to On expr_funcs.ads:16:11: SPARK_Mode was set to Off at line 7 Tested on x86_64-pc-linux-gnu, committed on trunk 2015-10-27 Hristian Kirtchev * inline.adb (Is_Expression_Function): Removed. * sem_ch6.adb (Analyze_Subprogram_Body_Helper): An internally generated subprogram body that completes an expression function inherits the SPARK_Mode from the spec. * sem_res.adb (Resolve_Call): Update all calls to Is_Expression_Function. * sem_util.ads, sem_util.adb (Is_Expression_Function): Reimplemented. (Is_Expression_Function_Or_Completion): New routine. Index: inline.adb === --- inline.adb (revision 229413) +++ inline.adb (working copy) @@ -1357,10 +1357,6 @@ -- Returns True if subprogram Id is defined in the visible part of a -- package specification. - function Is_Expression_Function (Id : Entity_Id) return Boolean; - -- Returns True if subprogram Id was defined originally as an expression - -- function. - --- -- Has_Formal_With_Discriminant_Dependent_Fields -- --- @@ -1472,20 +1468,6 @@ and then List_Containing (Decl) = Visible_Declarations (P); end In_Package_Visible_Spec; - - -- Is_Expression_Function -- - - - function Is_Expression_Function (Id : Entity_Id) return Boolean is - Decl : Node_Id := Parent (Parent (Id)); - begin - if Nkind (Parent (Id)) = N_Defining_Program_Unit_Name then -Decl := Parent (Decl); - end if; - - return Nkind (Original_Node (Decl)) = N_Expression_Function; - end Is_Expression_Function; - -- Is_Unit_Subprogram -- Index: sem_util.adb === --- sem_util.adb(revision 229413) +++ sem_util.adb(working copy) @@ -5081,7 +5081,6 @@ (Is_Concurrent_Type (Scope (Discriminal_Link (E))) or else Is_Concurrent_Record_Type (Scope (Discriminal_Link (E); - end Denotes_Discriminant; - @@ -11677,26 +11676,46 @@ function Is_Expression_Function (Subp : Entity_Id) return Boolean is - Decl : Node_Id; - begin - if Ekind (Subp) /= E_Function then + if Ekind_In (Subp, E_Function, E_Subprogram_Body) then + return + Nkind (Original_Node (Unit_Declaration_Node (Subp))) = + N_Expression_Function; + else return False; + end if; + end Is_Expression_Function; + -- + -- Is_Expression_Function_Or_Completion -- + -- + + function Is_Expression_Function_Or_Completion + (Subp : Entity_Id) return Boolean + is + Subp_Decl : Node_Id; + + begin + if Ekind (Subp) = E_Function then + Subp_Decl := Unit_Declaration_Node (Subp); + + -- The function declaration is either an expression function or is + -- completed by an expression function body. + + return + Is_Expression_Function (Subp) + or else (Nkind (Subp_Decl) = N_Subprogram_Declaration + and then Present (Corresponding_Body (Subp_Decl)) + and then Is_Expression_Function + (Corresponding_Body (Subp_Decl))); + + elsif Ekind (Subp) = E_Subprogram_Body then + return Is_Expression_Function (Subp); + else - Decl := Unit_Declaration_Node (Subp); - return Nkind (Decl) = N_Subprogram_Declaration - and then - (Nkind (Original_Node (Decl)
[Ada] Delete response file even when link failed
When a response file was created and the link failed, the response file was not deleted. It is deleted now. Tested on x86_64-pc-linux-gnu, committed on trunk 2015-10-27 Vincent Celier * gnatlink.adb: Always delete the response file, even when the invocation of gcc to link failed. Index: gnatlink.adb === --- gnatlink.adb(revision 229413) +++ gnatlink.adb(working copy) @@ -1859,6 +1859,10 @@ -- been compiled. if Opt.CodePeer_Mode then + if Tname_FD /= Invalid_FD then + Delete (Tname); + end if; + return; end if; @@ -2052,16 +2056,14 @@ System.OS_Lib.Spawn (Linker_Path.all, Args, Success); - if Success then + -- Delete the temporary file used in conjunction with linking if one + -- was created. See Process_Bind_File for details. --- Delete the temporary file used in conjunction with linking --- if one was created. See Process_Bind_File for details. + if Tname_FD /= Invalid_FD then +Delete (Tname); + end if; -if Tname_FD /= Invalid_FD then - Delete (Tname); -end if; - - else + if not Success then Error_Msg ("error when calling " & Linker_Path.all); Exit_Program (E_Fatal); end if;
Re: [PATCH, GCC 5 branch] Fix compile time regression caused by fix to PR64111
On Mon, Oct 26, 2015 at 6:14 PM, Caroline Tice wrote: > Here is my promised backport to the GCC 5 branch, for the patch below > that went into ToT last week. As with the previous patch, I've > verified that it fixes the problem, bootstraps and has no new > regression test failures. Is this ok to commit to the gcc-5-branch? Ok. Thanks, Richard. > -- Caroline Tice > cmt...@google.com > > > On Fri, Oct 23, 2015 at 3:22 PM, Caroline Tice wrote: >> This patch fixes a compile-time regression that was originally >> introduced by the fix >> for PR64111, in GCC 4.9.3.One of our user's encountered this problem >> with a >> particular file, where the compile time (on arm) went from 20 seconds >> to 150 seconds. >> >> The fix in this patch was suggested by Richard Biener, who wrote the >> original fix for >> PR64111. I have verified that this patch fixes the compile time >> regression; I have bootstrapped >> the compiler with this patch; and I have run the regression testsuite >> (no regressions). >> Is this ok to commit to ToT? (I am also working on backports for >> gcc-5_branch and gcc-4_9-branch). >> >> -- Caroline Tice >> cmt...@google.com >> > gcc/ChangeLog: > > 2015-10-26 Caroline Tice > > (from Richard Biener) > * tree.c (int_cst_hasher::hash): Replace XOR with more efficient > call to iterative_hash_host_wide_int.
Re: [PATCH, GCC 4.9 branch] Fix compile time regression caused by fix to PR64111
On Tue, Oct 27, 2015 at 4:36 AM, Caroline Tice wrote: > Here is my promised backport to the GCC 4.9 branch, for the patch below > that went into ToT last week. As with the previous patch, I've > verified that it fixes the problem, bootstraps and has no new > regression test failures. Is this ok to commit to the gcc-4_9-branch? Ok. Thanks, Richard. > -- Caroline Tice > cmt...@google.com > > gcc/ChangeLog: > > 2015-10-26 Caroline Tice > > (from Richard Biener) > * tree.c (int_cst_hash_hash): Replace XORs with more efficient > calls to iterative_hash_host_wide_int.
[Ada] Legality checks on initialization of limited objects
This patch corrects an omission on the legality check for allocators with a qualified expression when the expression is of a limited type. The check must be performed after the expression is fully resolved, to handle properly complex overloading cases such as indexings of parameterless functions that return arrays. ACATS test B750a04. Tested on x86_64-pc-linux-gnu, committed on trunk 2015-10-27 Ed Schonberg * sem_ch4.adb (Analyze_Allocator): Do not perform legality check on allocators for limited objects in a qualified expression, because expression has not been resolved. * sem_res.adb (Resolve_Allocator): Perform check on legality of limited objects after resolution. Add sem_ch3.adb to context. Index: sem_res.adb === --- sem_res.adb (revision 229421) +++ sem_res.adb (working copy) @@ -57,6 +57,7 @@ with Sem_Attr; use Sem_Attr; with Sem_Cat; use Sem_Cat; with Sem_Ch4; use Sem_Ch4; +with Sem_Ch3; use Sem_Ch3; with Sem_Ch6; use Sem_Ch6; with Sem_Ch8; use Sem_Ch8; with Sem_Ch13; use Sem_Ch13; @@ -4680,6 +4681,22 @@ Check_Non_Static_Context (Expression (E)); Check_Unset_Reference (Expression (E)); + -- Allocators generated by the build-in-place expansion mechanism + -- are explicitly marked as coming from source but do not need to be + -- checked for limited initialization. To exclude this case, ensure + -- that the parent of the allocator is a source node. + + if Is_Limited_Type (Etype (E)) + and then Comes_From_Source (N) + and then Comes_From_Source (Parent (N)) + and then not In_Instance_Body + then +if not OK_For_Limited_Init (Etype (E), Expression (E)) then + Error_Msg_N ("initialization not allowed for limited types", N); + Explain_Limited_Type (Etype (E), N); +end if; + end if; + -- A qualified expression requires an exact match of the type. -- Class-wide matching is not allowed. Index: sem_ch4.adb === --- sem_ch4.adb (revision 229413) +++ sem_ch4.adb (working copy) @@ -549,22 +549,6 @@ Type_Id := Etype (E); Set_Directly_Designated_Type (Acc_Type, Type_Id); - -- Allocators generated by the build-in-place expansion mechanism - -- are explicitly marked as coming from source but do not need to be - -- checked for limited initialization. To exclude this case, ensure - -- that the parent of the allocator is a source node. - - if Is_Limited_Type (Type_Id) - and then Comes_From_Source (N) - and then Comes_From_Source (Parent (N)) - and then not In_Instance_Body - then -if not OK_For_Limited_Init (Type_Id, Expression (E)) then - Error_Msg_N ("initialization not allowed for limited types", N); - Explain_Limited_Type (Type_Id, N); -end if; - end if; - -- A qualified expression requires an exact match of the type, -- class-wide matching is not allowed.
[Ada] Spurious error on legal use of abstract state constituent
This patch modifies the analysis of pragma Refined_Global to treat objects and states as constituents only when their encapsulating state appears in pragma Global. -- Source -- -- pack.ads package Pack with Abstract_State => (State1, State2), Initializes=> (Var, State1, State2) is Var : Integer := 0; procedure Initialize_State with Global => (Output => State1), Depends => (State1 => null); end Pack; -- pack.adb package body Pack with Refined_State => (State1 => (A, B), State2 => (Inner.Inner_Var, Inner.Inner_State)) is A, B : Integer; package Inner with Abstract_State => Inner_State, Initializes=> (Inner_State, Inner_Var => Var) is Inner_Var : Integer; procedure Initialize_Inner with Global => (Output => (Inner_State, Inner_Var), Input => Var), Depends => (Inner_State => null, Inner_Var => Var); end Inner; procedure Initialize_State is separate with Refined_Global => (Output => (A, B)), Refined_Depends => ((A, B) => null); procedure Double_B is separate with Global => (In_Out => B), Depends => (B => B); package body Inner is separate; begin Initialize_State; Double_B; end Pack; -- pack-double_b.adb separate (Pack) procedure Double_B is begin B := B * 2; end Double_B; -- pack-initialize_state.adb separate (Pack) procedure Initialize_State is begin A := 0; B := 0; end Initialize_State; -- pack-inner.adb separate (Pack) package body Inner with Refined_State => (Inner_State => Inner_Body_Var) is Inner_Body_Var : Integer; procedure Initialize_Inner with Refined_Global => (Output => (Inner_Body_Var, Inner_Var), Input => Var), Refined_Depends => (Inner_Body_Var => null, Inner_Var => Var) is begin Inner_Body_Var := 0; Inner_Var := Var; end Initialize_Inner; begin Initialize_Inner; end Inner; - -- Compilation -- - $ gcc -c pack.adb Tested on x86_64-pc-linux-gnu, committed on trunk 2015-10-27 Hristian Kirtchev * sem_prag.adb (Analyze_Refined_Global_In_Decl_Part): Add variable States. (Check_Refined_Global_Item): An object or state acts as a constituent only when the corresponding encapsulating state appears in pragma Global. (Collect_Global_Item): Add a state with non-null visible refinement to list States. Index: sem_prag.adb === --- sem_prag.adb(revision 229414) +++ sem_prag.adb(working copy) @@ -527,7 +527,7 @@ --E_Constant - "constant" --E_Discriminant - "discriminant" --E_Generic_In_Out_Parameter - "generic parameter" - --E_Generic_Out_Parameter- "generic parameter" + --E_Generic_In_Parameter - "generic parameter" --E_In_Parameter - "parameter" --E_In_Out_Parameter - "parameter" --E_Loop_Parameter - "loop parameter" @@ -24057,6 +24057,9 @@ Spec_Id : Entity_Id; -- The entity of the subprogram subject to pragma Refined_Global + States : Elist_Id := No_Elist; + -- A list of all states with visible refinement found in pragma Global + procedure Check_In_Out_States; -- Determine whether the corresponding Global pragma mentions In_Out -- states with visible refinement and if so, ensure that one of the @@ -24566,11 +24569,14 @@ begin -- When the state or object acts as a constituent of another -- state with a visible refinement, collect it for the state --- completeness checks performed later on. +-- completeness checks performed later on. Note that the item +-- acts as a constituent only when the encapsulating state is +-- present in pragma Global. if Ekind_In (Item_Id, E_Abstract_State, E_Constant, E_Variable) and then Present (Encapsulating_State (Item_Id)) and then Has_Visible_Refinement (Encapsulating_State (Item_Id)) + and then Contains (States, Encapsulating_State (Item_Id)) then if Global_Mode = Name_Input then Append_New_Elmt (Item_Id, In_Constits); @@ -24715,6 +24721,8 @@ Has_Null_State := True; elsif Has_Non_Null_Refinement (Item_Id) then + Append_New_Elmt (Item_Id, States); + if Item_Mode = Name_Input then Has_In_State := True; elsif Item_Mode = Name_In_Out then
Re: [PATCH] [AArch64] Distinct costs for sign and zero extension
On Tue, Oct 20, 2015 at 07:52:45AM +0800, Andrew Pinski wrote: > On Tue, Oct 20, 2015 at 7:10 AM, Evandro Menezes > wrote: > > Some micro-architectures may favor one of sign or zero extension over the > > other in the base plus extended register offset addressing mode. > > Yes I was going to create the same patch as ThunderX is one of those > micro-architectures. OK (Though the patch would have looked better had all the in-tree examples not be no-op splits!). I've committed this as r229431 on your behalf. Please provide full ChangeLog entries with the patch submission, I derived the Name/Email fields and committed this: 2015-10-27 Evandro Menezes * config/aarch64/aarch64-protos.h (cpu_addrcost_table): Split member for register extension into sign and zero register extension. * config/aarch64/aarch64.c (generic_addrcost_table): Infer values for sign and zero register extension. (cortexa57_addrcost_table): Likewise. (xgene1_addrcost_table): Likewise. Thanks, James > > > > This patch separates the member "register_extend" of the structure > > "cpu_addrcost_table" into two, one for sign and the other zero extension. > > > > Please, commit if it's alright. > > > > Thank you, > > > > -- > > Evandro Menezes > > >
Re: [PATCH] Adjust some patterns wrt :c
On Mon, 26 Oct 2015, Marc Glisse wrote: > On Mon, 26 Oct 2015, Richard Biener wrote: > > > @@ -435,7 +435,7 @@ (define_operator_list RINT BUILT_IN_RINT > > > > /* Fold (A & ~B) - (A & B) into (A ^ B) - B. */ > > (simplify > > - (minus (bit_and:cs @0 (bit_not @1)) (bit_and:s @0 @1)) > > + (minus (bit_and:cs @0 (bit_not @1)) (bit_and:cs @0 @1)) > > (minus (bit_xor @0 @1) @1)) > > Sorry, I should have listed them all, but the same applies to > /* Fold (A & B) - (A & ~B) into B - (A ^ B). */ > a few lines below. Heh. I wonder if we can teach genmatch to auto-annotate commutative operands (detecting the cases where it doesn't get us anything but redundant patterns). Or implement a warning that catches them at least. Anyway, fixed in my local tree, will go out with my next match.pd modification. Richard.
Re: [PATCH][ARM] PR target/67929 Tighten vfp3_const_double_for_bits checks
On Wed, Oct 14, 2015 at 1:23 PM, Kyrill Tkachov wrote: > Hi all, > > This patch fixes the referenced PR by rewriting the > vfp3_const_double_for_bits function in arm.c > The function is supposed to accept positive CONST_DOUBLE rtxes whose value > is an exact power of 2 > and whose log2 is between 1 and 32. That is values like 2.0, 4.0, 8.9, 16.0 > etc... > > The current implementation seems to have been written under the assumption > that exact_real_truncate returns > false if the input value is not an exact integer, whereas in fact > exact_real_truncate returns false if the > truncation operation was not exact, which are different things. This would > lead the function to accept any > CONST_DOUBLE that can truncate to a power of 2, such as 4.9, 16.2 etc. > > In any case, I've rewritten this function and used the real_isinteger > predicate to check if the real value > is an exact integer. > > The testcase demonstrates the kind of wrong code that this patch addresses. > > This bug appears on GCC 5 and 4.9 as well, but due to the recent > introduction of CONST_DOUBLE_REAL_VALUE > this patch doesn't apply on those branches. I will soon post the > backportable variant. > > Bootstrapped and tested on arm-none-linux-gnueabihf. > > Ok for trunk? Thanks, this is OK for trunk and all release branches. Ramana > > Thanks, > Kyrill > > 2015-10-12 Kyrylo Tkachov > > PR target/67929 > * config/arm/arm.c (vfp3_const_double_for_bits): Rewrite. > * config/arm/constraints.md (Dp): Update callsite. > * config/arm/predicates.md (const_double_vcvt_power_of_two): Likewise. > > 2015-10-12 Kyrylo Tkachov > > PR target/67929 > * gcc.target/arm/pr67929_1.c: New test.
Re: [PATCH, PR67742] Handle restrict struct fields recursively
On 27/10/15 08:25, Tom de Vries wrote: @@ -5968,7 +5990,16 @@ intra_create_variable_infos (struct function *fn) for (; p; p = vi_next (p)) { if (p->only_restrict_pointers) - make_constraint_from_global_restrict (p, "PARM_RESTRICT"); + { + varinfo_t vi = lookup_restrict_pointed_var (p); + if (vi != NULL) + { + make_constraint_from (p, vi->id); + make_restrict_var_constraints (vi); + } + else + make_constraint_from_global_restrict (p, "PARM_RESTRICT"); + } else if (p->may_have_pointers) make_constraint_from (p, nonlocal_id); } -- 1.9.1 Thinking it over a bit more, I realized the constraint handling started to be messy. I've reworked the patch series to simplify that first. 1 Simplify constraint handling 2 Rename make_restrict_var_constraints to make_param_constraints 3 Add recursion to make_param_constraints 4 Add handle_param parameter to create_variable_info_for_1 5 Handle recursive restrict pointer in create_variable_info_for_1 6 Handle restrict struct fields recursively Currently doing bootstrap and regtest on x86_64. I'll repost the patch series in reply to this message. Thanks, - Tom
Re: [PR 67794] Also remap SSA_NAMEs defined in ASMs in IPA-SRA
On Tue, Oct 27, 2015 at 09:56:48AM +0100, Christophe Lyon wrote: > Hi Martin, > > After your backport in the gcc-5 branch, I see build failures: > /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c: > In function ‘tree_node* replace_removed_params_ssa_names(tree_node*, > gimple_statement_base**, ipa_parm_adjustment_vec)’: > /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4609: > error: cannot convert ‘gimple_statement_base**’ to > ‘gimple_statement_base*’ for argument ‘2’ to ‘tree_node* > make_ssa_name(tree_node*, gimple_statement_base*)’ > /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c: > In function ‘bool > ipa_sra_modify_function_body(ipa_parm_adjustment_vec)’: > /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4703: > error: cannot convert ‘gphi*’ to ‘gimple_statement_base**’ for > argument ‘2’ to ‘tree_node* > replace_removed_params_ssa_names(tree_node*, gimple_statement_base**, > ipa_parm_adjustment_vec)’ > /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4772: > error: cannot convert ‘gimple_statement_base*’ to > ‘gimple_statement_base**’ for argument ‘2’ to ‘tree_node* > replace_removed_params_ssa_names(tree_node*, gimple_statement_base**, > ipa_parm_adjustment_vec)’ > make[2]: *** [tree-sra.o] Error 1 > > I see this on aarch64* and arm* targets. > > Can you fix this? Oops, I must have mistakenly committed the trunk version to the branch. I have just fixed the problem by committing the following (after checking that tree-sra.c now matches the one I have tested on the branch). Sorry and thanks for reporting, Martin 2015-10-27 Martin Jambor * tree-sra.c (replace_removed_params_ssa_names): Change type of parameter stmt to gimple. Index: gcc/tree-sra.c === --- gcc/tree-sra.c (revision 229434) +++ gcc/tree-sra.c (working copy) @@ -4587,7 +4587,7 @@ get_adjustment_for_base (ipa_parm_adjust ADJUSTMENTS is a pointer to a vector of IPA-SRA adjustments. */ static tree -replace_removed_params_ssa_names (tree old_name, gimple *stmt, +replace_removed_params_ssa_names (tree old_name, gimple stmt, ipa_parm_adjustment_vec adjustments) { struct ipa_parm_adjustment *adj;
Re: [PATCH] Pass manager: add support for termination of pass list
On 10/26/2015 02:48 PM, Richard Biener wrote: > On Thu, Oct 22, 2015 at 1:02 PM, Martin Liška wrote: >> On 10/21/2015 04:06 PM, Richard Biener wrote: >>> On Wed, Oct 21, 2015 at 1:24 PM, Martin Liška wrote: On 10/21/2015 11:59 AM, Richard Biener wrote: > On Wed, Oct 21, 2015 at 11:19 AM, Martin Liška wrote: >> On 10/20/2015 03:39 PM, Richard Biener wrote: >>> On Tue, Oct 20, 2015 at 3:00 PM, Martin Liška wrote: Hello. As part of upcoming merge of HSA branch, we would like to have possibility to terminate pass manager after execution of the HSA generation pass. The HSA back-end is implemented as a tree pass that directly emits HSAIL from gimple tree representation. The pass operates on clones created by HSA IPA pass and the pass manager should stop execution of further RTL passes. Suggested patch survives bootstrap and regression tests on x86_64-linux-pc. What do you think about it? >>> >>> Are you sure it works this way? >>> >>> Btw, you will miss executing of all the cleanup passes that will >>> eventually free memory >>> associated with the function. So I'd rather support a >>> TODO_discard_function which >>> should basically release the body from the cgraph. >> >> Hi. >> >> Agree with you that I should execute all TODOs, which can be easily done. >> However, if I just try to introduce the suggested TODO and handle it >> properly >> by calling cgraph_node::release_body, then for instance fn->gimple_df, >> fn->cfg are >> released and I hit ICEs on many places. >> >> Stopping the pass manager looks necessary, or do I miss something? > > "Stopping the pass manager" is necessary after TODO_discard_function, yes. > But that may be simply done via a has_body () check then? Thanks, there's second version of the patch. I'm going to start regression tests. >>> >>> As release_body () will free cfun you should pop_cfun () before >>> calling it (and thus >> >> Well, release_function_body calls both push & pop, so I think calling pop >> before cgraph_node::release_body is not necessary. > > (ugh). > >> If tried to call pop_cfun before cgraph_node::release_body, I have cfun still >> pointing to the same (function *) (which is gcc_freed, but cfun != NULL). > > Hmm, I meant to call pop_cfun then after it (unless you want to fix the above, > none of the freeing functions should techincally need 'cfun', just add > 'fn' parameters ...). > > I expected pop_cfun to eventually set cfun to NULL if it popped the > "last" cfun. Why > doesn't it do that? > >>> drop its modification). Also TODO_discard_functiuon should be only set for >>> local passes thus you should probably add a gcc_assert (cfun). >>> I'd move its handling earlier, definitely before the ggc_collect, eventually >>> before the pass_fini_dump_file () (do we want a last dump of the >>> function or not?). >> >> Fully agree, moved here. >> >>> >>> @@ -2397,6 +2410,10 @@ execute_pass_list_1 (opt_pass *pass) >>> { >>>gcc_assert (pass->type == GIMPLE_PASS >>> || pass->type == RTL_PASS); >>> + >>> + >>> + if (!gimple_has_body_p (current_function_decl)) >>> + return; >>> >>> too much vertical space. With popping cfun before releasing the body the >>> check >>> might just become if (!cfun) and >> >> As mentioned above, as release body is symmetric (calling push & pop), the >> suggested >> guard will not work. > > I suggest to fix it. If it calls push/pop it should leave with the > original cfun, popping again > should make it NULL? > >>> >>> @@ -2409,7 +2426,7 @@ execute_pass_list (function *fn, opt_pass *pass) >>> { >>>push_cfun (fn); >>>execute_pass_list_1 (pass); >>> - if (fn->cfg) >>> + if (gimple_has_body_p (current_function_decl) && fn->cfg) >>> { >>>free_dominance_info (CDI_DOMINATORS); >>>free_dominance_info (CDI_POST_DOMINATORS); >>> >>> here you'd need to guard the pop_cfun call on cfun != NULL. IMHO it's >>> better >>> to not let cfun point to deallocated data. >> >> As I've read the code, since we gcc_free 'struct function', one can just >> rely on >> gimple_has_body_p (current_function_decl) as it correctly realizes that >> DECL_STRUCT_FUNCTION (current_function_decl) == NULL. > > I'm sure that with some GC checking ggc_free makes them #deadbeef or so: > > void > ggc_free (void *p) > { > ... > #ifdef ENABLE_GC_CHECKING > /* Poison the data, to indicate the data is garbage. */ > VALGRIND_DISCARD (VALGRIND_MAKE_MEM_UNDEFINED (p, size)); > memset (p, 0xa5, size); > #endif > > so I don't think that's a good thing to rely on ;) > > Richard. Hi Richi, fully agree that relying of 0xdeadbeaf is not good. I'm sending quite simple patch v4 where I enable popping up the stack to eventually set cfun
Re: [PATCH] tree-scalar-evolution.c: Handle LSHIFT by constant
--in-reply-to On 26/10/15 08:58, Richard Biener wrote: > > On Fri, Oct 23, 2015 at 5:15 PM, Alan Lawrence wrote: >> + chrec2 = fold_build2 (LSHIFT_EXPR, TREE_TYPE (rhs1), >> + build_int_cst (TREE_TYPE (rhs1), 1), > > 'type' instead of TREE_TYPE (rhs1) I presume you mean the first of the two (allowing removal of the chrec_convert), and that I keep the second TREE_TYPE (rhs1), consistent with your previous observation that I should do the multiply in the type of rhs1. (This appears correct looking at e.g. gcc.target/i386/avx2-vpsllwi-2.c) Hence, I've committed the attached as r229437. Thanks, Alan --- gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c | 33 gcc/tree-scalar-evolution.c | 14 ++ 2 files changed, 47 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c b/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c new file mode 100644 index 000..b1ce2ec --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c @@ -0,0 +1,33 @@ +/* PR tree-optimization/65963. */ +#include "tree-vect.h" + +#define N 512 + +int in[2*N], out[N]; + +__attribute__ ((noinline)) void +loop (void) +{ + for (int i = 0; i < N; i++) +out[i] = in[i << 1] + 7; +} + +int +main (int argc, char **argv) +{ + check_vect (); + for (int i = 0; i < 2*N; i++) +{ + in[i] = i; + __asm__ volatile ("" : : : "memory"); +} + loop (); + __asm__ volatile ("" : : : "memory"); + for (int i = 0; i < N; i++) +{ + if (out[i] != i*2 + 7) + abort (); +} + return 0; +} +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect" { target { vect_strided2 } } } } */ diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c index 0753bf3..8e95ddd 100644 --- a/gcc/tree-scalar-evolution.c +++ b/gcc/tree-scalar-evolution.c @@ -1840,6 +1840,20 @@ interpret_rhs_expr (struct loop *loop, gimple *at_stmt, res = chrec_fold_multiply (type, chrec1, chrec2); break; +case LSHIFT_EXPR: + /* Handle A<
[PATCH, 1/6] Simplify constraint handling
On 27/10/15 13:24, Tom de Vries wrote: Thinking it over a bit more, I realized the constraint handling started to be messy. I've reworked the patch series to simplify that first. 1Simplify constraint handling 2Rename make_restrict_var_constraints to make_param_constraints 3Add recursion to make_param_constraints 4Add handle_param parameter to create_variable_info_for_1 5Handle recursive restrict pointer in create_variable_info_for_1 6Handle restrict struct fields recursively Currently doing bootstrap and regtest on x86_64. I'll repost the patch series in reply to this message. This patch gets rid of this bit of code in intra_create_variable_infos: ... if (restrict_pointer_p) make_constraint_from_global_restrict (p, "PARM_RESTRICT"); else .. I already proposed to remove it here ( https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02426.html ) but there is a problem with that approach: It can happen that restrict_pointer_p is true, but p->only_restrict_pointers is false. This happens with fipa-pta, when create_function_info_for created a varinfo for the parameter before intra_create_variable_infos was called. The patch handles that case now by setting p->only_restrict_pointers. Thanks, - Tom Simplify constraint handling 2015-10-27 Tom de Vries * tree-ssa-structalias.c (intra_create_variable_infos): Simplify constraint handling. --- gcc/tree-ssa-structalias.c | 17 +++-- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index 5e070bc..4610914 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -5884,6 +5884,8 @@ intra_create_variable_infos (struct function *fn) p = create_variable_info_for_1 (t, alias_get_name (t)); insert_vi_for_tree (t, p); } + else if (restrict_pointer_p) + p->only_restrict_pointers = 1; /* For restrict qualified pointers build a representative for the pointed-to object. Note that this ends up handling @@ -5902,17 +5904,12 @@ intra_create_variable_infos (struct function *fn) continue; } - if (restrict_pointer_p) - make_constraint_from_global_restrict (p, "PARM_RESTRICT"); - else + for (; p; p = vi_next (p)) { - for (; p; p = vi_next (p)) - { - if (p->only_restrict_pointers) - make_constraint_from_global_restrict (p, "PARM_RESTRICT"); - else if (p->may_have_pointers) - make_constraint_from (p, nonlocal_id); - } + if (p->only_restrict_pointers) + make_constraint_from_global_restrict (p, "PARM_RESTRICT"); + else if (p->may_have_pointers) + make_constraint_from (p, nonlocal_id); } } -- 1.9.1
[PATCH, 2/6] Rename make_restrict_var_constraints to make_param_constraints
On 27/10/15 13:24, Tom de Vries wrote: Thinking it over a bit more, I realized the constraint handling started to be messy. I've reworked the patch series to simplify that first. 1Simplify constraint handling 2Rename make_restrict_var_constraints to make_param_constraints 3Add recursion to make_param_constraints 4Add handle_param parameter to create_variable_info_for_1 5Handle recursive restrict pointer in create_variable_info_for_1 6Handle restrict struct fields recursively Currently doing bootstrap and regtest on x86_64. I'll repost the patch series in reply to this message. This no-functional-changes patch: - moves the one constraint handling loop left in intra_create_variable_infos to make_restrict_var_constraints - renames make_restrict_var_constraints to make_param_constraints - adds a parameter toplevel to make_param_constraints to distinguish between the two calling contexts Thanks, - Tom Rename make_restrict_var_constraints to make_param_constraints 2015-10-27 Tom de Vries * tree-ssa-structalias.c (make_restrict_var_constraints): Rename to ... (make_param_constraints): ... this. Add toplevel parameter. (intra_create_variable_infos): Use make_param_constraints. --- gcc/tree-ssa-structalias.c | 29 + 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index 4610914..e88fbf0 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -5845,18 +5845,29 @@ debug_solution_for_var (unsigned int var) dump_solution_for_var (stderr, var); } -/* Register the constraints for restrict var VI. */ +/* Register the constraints for VI. If TOPLEVEL then VI is a function + parameter, otherwise VI is part of a function parameter. */ static void -make_restrict_var_constraints (varinfo_t vi) +make_param_constraints (varinfo_t vi, bool toplevel) { for (; vi; vi = vi_next (vi)) if (vi->may_have_pointers) { if (vi->only_restrict_pointers) - make_constraint_from_global_restrict (vi, "GLOBAL_RESTRICT"); + { + if (toplevel) + make_constraint_from_global_restrict (vi, "PARM_RESTRICT"); + else + make_constraint_from_global_restrict (vi, "GLOBAL_RESTRICT"); + } else - make_copy_constraint (vi, nonlocal_id); + { + if (toplevel) + make_constraint_from (vi, nonlocal_id); + else + make_copy_constraint (vi, nonlocal_id); + } } } @@ -5900,17 +5911,11 @@ intra_create_variable_infos (struct function *fn) vi->is_restrict_var = 1; insert_vi_for_tree (heapvar, vi); make_constraint_from (p, vi->id); - make_restrict_var_constraints (vi); + make_param_constraints (vi, false); continue; } - for (; p; p = vi_next (p)) - { - if (p->only_restrict_pointers) - make_constraint_from_global_restrict (p, "PARM_RESTRICT"); - else if (p->may_have_pointers) - make_constraint_from (p, nonlocal_id); - } + make_param_constraints (p, true); } /* Add a constraint for a result decl that is passed by reference. */ -- 1.9.1
[PATCH] Fix PR68067
The following patch adjusts negate_expr_p to account for the fact that we can't generally change a - (b - c) to (c - b) + a because -INF - 0 is ok while 0 - -INF not. Similarly for a - (b + c). While creating testcases I noticed that MULT_EXPR handling is bogus as well as with -INF/2 * 2 neither operand can be negated safely. I believe the division case is also still wrong but I refrained from touching it with this patch. Bootstrapped and tested on x86_64-unknown-linux-gnu. Richard. 2015-10-27 Richard Biener PR middle-end/68067 * fold-const.c (negate_expr_p): We cannot negate plus or minus if overflow is not wrapping. Likewise multiplication unless one operand is constant and not power of two. (fold_negate_expr): Adjust accordingly. * gcc.dg/torture/pr68067-1.c: New testcase. * gcc.dg/torture/pr68067-2.c: Likewise. Index: gcc/fold-const.c === --- gcc/fold-const.c(revision 229404) +++ gcc/fold-const.c(working copy) @@ -443,7 +443,9 @@ negate_expr_p (tree t) case PLUS_EXPR: if (HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type)) - || HONOR_SIGNED_ZEROS (element_mode (type))) + || HONOR_SIGNED_ZEROS (element_mode (type)) + || (INTEGRAL_TYPE_P (type) + && ! TYPE_OVERFLOW_WRAPS (type))) return false; /* -(A + B) -> (-B) - A. */ if (negate_expr_p (TREE_OPERAND (t, 1)) @@ -457,12 +459,23 @@ negate_expr_p (tree t) /* We can't turn -(A-B) into B-A when we honor signed zeros. */ return !HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type)) && !HONOR_SIGNED_ZEROS (element_mode (type)) +&& (! INTEGRAL_TYPE_P (type) +|| TYPE_OVERFLOW_WRAPS (type)) && reorder_operands_p (TREE_OPERAND (t, 0), TREE_OPERAND (t, 1)); case MULT_EXPR: - if (TYPE_UNSIGNED (TREE_TYPE (t))) -break; + if (TYPE_UNSIGNED (type)) + break; + /* INT_MIN/n * n doesn't overflow while negating one operand it does + if n is a power of two. */ + if (INTEGRAL_TYPE_P (TREE_TYPE (t)) + && ! TYPE_OVERFLOW_WRAPS (TREE_TYPE (t)) + && ! ((TREE_CODE (TREE_OPERAND (t, 0)) == INTEGER_CST +&& ! integer_pow2p (TREE_OPERAND (t, 0))) + || (TREE_CODE (TREE_OPERAND (t, 1)) == INTEGER_CST + && ! integer_pow2p (TREE_OPERAND (t, 1) + break; /* Fall through. */ Index: gcc/testsuite/gcc.dg/torture/pr68067-1.c === --- gcc/testsuite/gcc.dg/torture/pr68067-1.c(revision 0) +++ gcc/testsuite/gcc.dg/torture/pr68067-1.c(working copy) @@ -0,0 +1,12 @@ +/* { dg-do run } */ + +int main() +{ + int a = -1; + static int b = -2147483647 - 1; + static int c = 0; + int t = a - (b - c); + if (t != 2147483647) +__builtin_abort(); + return 0; +} Index: gcc/testsuite/gcc.dg/torture/pr68067-2.c === --- gcc/testsuite/gcc.dg/torture/pr68067-2.c(revision 0) +++ gcc/testsuite/gcc.dg/torture/pr68067-2.c(working copy) @@ -0,0 +1,13 @@ +/* { dg-do run } */ + +int main() +{ + int a = -1; + static int b = -2147483647 - 1; + static int c = 0; + int t = a - (b + c*-2); + if (t != 2147483647) +__builtin_abort(); + return 0; +} +
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On Tue, Oct 27, 2015 at 12:37 PM, H.J. Lu wrote: > On Tue, Oct 27, 2015 at 4:20 AM, Bernd Schmidt wrote: >> On 10/19/2015 09:55 PM, H.J. Lu wrote: >>> >>> * calls.c (prepare_call_address): Don't handle -fno-plt here. >> >> >> Is any other target using -fno-plt? If not, and if that's really just a > > aarch64 is the only target which checks -fno-plt from commit log below. > I CCed the code author. aarch64 may suffer from the same issue. > >> revert I'll approve it on the condition that the x86 maintainers are happy >> with the rest of the changes. > > Uros, can you take a look? I can only say that I don't oppose the changes. The details of linking are out of my expertise, but OTOH, you probably know these details better than I. So, I'll rubberstamp the patch with OK, assuming you will fix eventual fallout. Thanks, Uros.
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On 27/10/15 11:37, H.J. Lu wrote: On Tue, Oct 27, 2015 at 4:20 AM, Bernd Schmidt wrote: On 10/19/2015 09:55 PM, H.J. Lu wrote: * calls.c (prepare_call_address): Don't handle -fno-plt here. Is any other target using -fno-plt? If not, and if that's really just a aarch64 is the only target which checks -fno-plt from commit log below. I CCed the code author. aarch64 may suffer from the same issue. H.J, Thanks for the info. After a quick reading of the PR at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67215, I found the problem description is very x86 specific, I still don't understand what's wrong if we do the following transformation when -fno-plt specified on x86-74, thus I am not sure whether there is problem exist on aarch64. Can you please give more explanation? callproc2@PLT || V movqproc2@GOTPCREL(%rip), %rax call*%rax
[PATCH, 3/6] Add recursion to make_param_constraints
On 27/10/15 13:24, Tom de Vries wrote: Thinking it over a bit more, I realized the constraint handling started to be messy. I've reworked the patch series to simplify that first. 1Simplify constraint handling 2Rename make_restrict_var_constraints to make_param_constraints 3Add recursion to make_param_constraints 4Add handle_param parameter to create_variable_info_for_1 5Handle recursive restrict pointer in create_variable_info_for_1 6Handle restrict struct fields recursively Currently doing bootstrap and regtest on x86_64. I'll repost the patch series in reply to this message. This patch: - registers the connection between a restrict pointer var and a restrict var in a new hash_map restrict_pointed_var. - move the restrict pointer constraint handling from intra_create_variable_infos to make_param_constraints The result of this and the two preceding patches is that the constraint handling for params in intra_create_variable_infos is reduced to a single call to make_param_constraints. Thanks, - Tom Add recursion to make_param_constraints 2015-10-27 Tom de Vries * tree-ssa-structalias.c (restrict_pointed_var): New static var. (insert_restrict_pointed_var, lookup_restrict_pointed_var): New function. (make_param_constraints): Handle case that lookup_restrict_pointed_var (vi) != NULL. (intra_create_variable_infos): Call insert_restrict_pointed_var. Simplify constraint handling. Delete restrict_pointed_var. --- gcc/tree-ssa-structalias.c | 48 ++ 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index e88fbf0..93bc325 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -5607,6 +5607,39 @@ check_for_overlaps (vec fieldstack) return false; } +/* Map from restrict pointer variable info to restrict var variable info. */ + +static hash_map *restrict_pointed_var = NULL; + +/* Insert VI2 as the restrict var for VI in the restrict_pointed_var map. */ + +static void +insert_restrict_pointed_var (varinfo_t vi, varinfo_t vi2) +{ + if (restrict_pointed_var == NULL) + restrict_pointed_var = new hash_map; + + bool mapped = restrict_pointed_var->put (vi, vi2); + gcc_assert (!mapped); +} + +/* Find the restrict var for restrict pointer VI in the restrict_pointed_var + map. If VI does not exist in the map, return NULL, otherwise, return the + varinfo we found. */ + +static varinfo_t +lookup_restrict_pointed_var (varinfo_t vi) +{ + if (restrict_pointed_var == NULL) +return NULL; + varinfo_t *slot = restrict_pointed_var->get (vi); + if (slot == NULL) +return NULL; + + return *slot; +} + + /* Create a varinfo structure for NAME and DECL, and add it to VARMAP. This will also create any varinfo structures necessary for fields of DECL. */ @@ -5856,7 +5889,13 @@ make_param_constraints (varinfo_t vi, bool toplevel) { if (vi->only_restrict_pointers) { - if (toplevel) + varinfo_t rvi = lookup_restrict_pointed_var (vi); + if (rvi != NULL) + { + make_constraint_from (vi, rvi->id); + make_param_constraints (rvi, false); + } + else if (toplevel) make_constraint_from_global_restrict (vi, "PARM_RESTRICT"); else make_constraint_from_global_restrict (vi, "GLOBAL_RESTRICT"); @@ -5910,14 +5949,15 @@ intra_create_variable_infos (struct function *fn) vi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS"); vi->is_restrict_var = 1; insert_vi_for_tree (heapvar, vi); - make_constraint_from (p, vi->id); - make_param_constraints (vi, false); - continue; + insert_restrict_pointed_var (p, vi); } make_param_constraints (p, true); } + delete restrict_pointed_var; + restrict_pointed_var = NULL; + /* Add a constraint for a result decl that is passed by reference. */ if (DECL_RESULT (fn->decl) && DECL_BY_REFERENCE (DECL_RESULT (fn->decl))) -- 1.9.1
[PATCH, 4/6] Add handle_param parameter to create_variable_info_for_1
On 27/10/15 13:24, Tom de Vries wrote: it over a bit more, I realized the constraint handling started to be messy. I've reworked the patch series to simplify that first. 1Simplify constraint handling 2Rename make_restrict_var_constraints to make_param_constraints 3Add recursion to make_param_constraints 4Add handle_param parameter to create_variable_info_for_1 5Handle recursive restrict pointer in create_variable_info_for_1 6Handle restrict struct fields recursively Currently doing bootstrap and regtest on x86_64. I'll repost the patch series in reply to this message. A repost of the patch at https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02776.html . I've changed the structure a bit such that it's clear that the remaining 'build_fake_var_decl' bit is in 'the lookup_vi_for_tree (t) != NULL' branch. Thanks, - Tom Add handle_param parameter to create_variable_info_for_1 2015-10-26 Tom de Vries * tree-ssa-structalias.c (create_variable_info_for_1): Add and handle handle_param parameter. (create_variable_info_for): Call create_variable_info_for_1 with extra arg. (intra_create_variable_infos): Same. Handle case that lookup_restrict_pointed_var (p) is not NULL. --- gcc/tree-ssa-structalias.c | 53 -- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index 93bc325..a639944 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -5642,10 +5642,10 @@ lookup_restrict_pointed_var (varinfo_t vi) /* Create a varinfo structure for NAME and DECL, and add it to VARMAP. This will also create any varinfo structures necessary for fields - of DECL. */ + of DECL. DECL is a function parameter if HANDLE_PARAM is set. */ static varinfo_t -create_variable_info_for_1 (tree decl, const char *name) +create_variable_info_for_1 (tree decl, const char *name, bool handle_param) { varinfo_t vi, newvi; tree decl_type = TREE_TYPE (decl); @@ -5721,6 +5721,18 @@ create_variable_info_for_1 (tree decl, const char *name) if (POINTER_TYPE_P (TREE_TYPE (decl)) && TYPE_RESTRICT (TREE_TYPE (decl))) vi->only_restrict_pointers = 1; + if (vi->only_restrict_pointers + && !type_contains_placeholder_p (TREE_TYPE (TREE_TYPE (decl))) + && handle_param) + { + varinfo_t rvi; + tree heapvar = build_fake_var_decl (TREE_TYPE (decl_type)); + DECL_EXTERNAL (heapvar) = 1; + rvi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS", false); + rvi->is_restrict_var = 1; + insert_vi_for_tree (heapvar, rvi); + insert_restrict_pointed_var (vi, rvi); + } fieldstack.release (); return vi; } @@ -5772,7 +5784,7 @@ create_variable_info_for_1 (tree decl, const char *name) static unsigned int create_variable_info_for (tree decl, const char *name) { - varinfo_t vi = create_variable_info_for_1 (decl, name); + varinfo_t vi = create_variable_info_for_1 (decl, name, false); unsigned int id = vi->id; insert_vi_for_tree (decl, vi); @@ -5925,31 +5937,30 @@ intra_create_variable_infos (struct function *fn) { bool restrict_pointer_p = (POINTER_TYPE_P (TREE_TYPE (t)) && TYPE_RESTRICT (TREE_TYPE (t))); - bool recursive_restrict_p - = (restrict_pointer_p - && !type_contains_placeholder_p (TREE_TYPE (TREE_TYPE (t; varinfo_t p = lookup_vi_for_tree (t); if (p == NULL) { - p = create_variable_info_for_1 (t, alias_get_name (t)); + p = create_variable_info_for_1 (t, alias_get_name (t), true); insert_vi_for_tree (t, p); } else if (restrict_pointer_p) - p->only_restrict_pointers = 1; - - /* For restrict qualified pointers build a representative for - the pointed-to object. Note that this ends up handling - out-of-bound references conservatively by aggregating them - in the first/last subfield of the object. */ - if (recursive_restrict_p) { - varinfo_t vi; - tree heapvar = build_fake_var_decl (TREE_TYPE (TREE_TYPE (t))); - DECL_EXTERNAL (heapvar) = 1; - vi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS"); - vi->is_restrict_var = 1; - insert_vi_for_tree (heapvar, vi); - insert_restrict_pointed_var (p, vi); + p->only_restrict_pointers = 1; + + /* For restrict qualified pointers build a representative for + the pointed-to object. Note that this ends up handling + out-of-bound references conservatively by aggregating them + in the first/last subfield of the object. */ + if (!type_contains_placeholder_p (TREE_TYPE (TREE_TYPE (t + { + tree heapvar = build_fake_var_decl (TREE_TYPE (TREE_TYPE (t))); + DECL_EXTERNAL (heapvar) = 1; + varinfo_t vi + = create_variable_info_for_1 (heapvar, "PARM_NOALIAS", false); + vi->is_restrict_var = 1; + insert_vi_for_tree (heapvar, vi); + insert_restrict_pointed_var (p, vi); + } } make_pa
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On Tue, Oct 27, 2015 at 5:52 AM, Jiong Wang wrote: > > > On 27/10/15 11:37, H.J. Lu wrote: >> >> On Tue, Oct 27, 2015 at 4:20 AM, Bernd Schmidt >> wrote: >>> >>> On 10/19/2015 09:55 PM, H.J. Lu wrote: * calls.c (prepare_call_address): Don't handle -fno-plt here. >>> >>> >>> Is any other target using -fno-plt? If not, and if that's really just a >> >> aarch64 is the only target which checks -fno-plt from commit log below. >> I CCed the code author. aarch64 may suffer from the same issue. > > > H.J, > > Thanks for the info. > > After a quick reading of the PR at > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67215, > I found the problem description is very x86 specific, I still don't > understand what's > wrong if we do the following transformation when -fno-plt specified on > x86-74, thus I > am not sure whether there is problem exist on aarch64. Can you please give > more explanation? > > callproc2@PLT > > || > V > > movqproc2@GOTPCREL(%rip), %rax > call*%rax call *proc2@GOTPCREL(%rip) doesn't use a register and saves one instruction. -- H.J.
[PATCH, 5/6] Handle recursive restrict pointer in create_variable_info_for_1
On 27/10/15 13:24, Tom de Vries wrote: Thinking it over a bit more, I realized the constraint handling started to be messy. I've reworked the patch series to simplify that first. 1Simplify constraint handling 2Rename make_restrict_var_constraints to make_param_constraints 3Add recursion to make_param_constraints 4Add handle_param parameter to create_variable_info_for_1 5Handle recursive restrict pointer in create_variable_info_for_1 6Handle restrict struct fields recursively Currently doing bootstrap and regtest on x86_64. I'll repost the patch series in reply to this message. A repost of the patch at https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02779.html . With the constraint handling dealt with earlier, it has become rather minimal. Thanks, - Tom Handle recursive restrict pointer in create_variable_info_for_1 2015-10-26 Tom de Vries * tree-ssa-structalias.c (create_variable_info_for_1): Enable recursive handling of restrict pointers. * gcc.dg/tree-ssa/restrict-7.c: New test. --- gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c | 12 gcc/tree-ssa-structalias.c | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c new file mode 100644 index 000..f7a68c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ + +int +f (int *__restrict__ *__restrict__ *__restrict__ a, int *b) +{ + *b = 1; + ***a = 2; + return *b; +} + +/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */ diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index a639944..a297a36 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -5728,7 +5728,7 @@ create_variable_info_for_1 (tree decl, const char *name, bool handle_param) varinfo_t rvi; tree heapvar = build_fake_var_decl (TREE_TYPE (decl_type)); DECL_EXTERNAL (heapvar) = 1; - rvi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS", false); + rvi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS", true); rvi->is_restrict_var = 1; insert_vi_for_tree (heapvar, rvi); insert_restrict_pointed_var (vi, rvi); -- 1.9.1
[PATCH, 6/6] Handle restrict struct fields recursively
On 27/10/15 13:24, Tom de Vries wrote: Thinking it over a bit more, I realized the constraint handling started to be messy. I've reworked the patch series to simplify that first. 1Simplify constraint handling 2Rename make_restrict_var_constraints to make_param_constraints 3Add recursion to make_param_constraints 4Add handle_param parameter to create_variable_info_for_1 5Handle recursive restrict pointer in create_variable_info_for_1 6Handle restrict struct fields recursively Currently doing bootstrap and regtest on x86_64. I'll repost the patch series in reply to this message. A repost of the patch at https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02838.html . Thanks, - Tom Handle restrict struct fields recursively 2015-10-26 Tom de Vries * tree-ssa-structalias.c (struct fieldoff): Add restrict_var field. (push_fields_onto_fieldstack): Add and handle handle_param parameter. (create_variable_info_for_1): Add extra arg to call to push_fields_onto_fieldstack. Handle restrict pointer fields. * gcc.dg/tree-ssa/restrict-8.c: New test. --- gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c | 17 gcc/tree-ssa-structalias.c | 32 +- 2 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c new file mode 100644 index 000..b0ab164 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ + +struct s +{ + int *__restrict__ *__restrict__ pp; +}; + +int +f (struct s s, int *b) +{ + *b = 1; + **s.pp = 2; + return *b; +} + +/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */ diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c index a297a36..fac1739 100644 --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -399,6 +399,7 @@ new_var_info (tree t, const char *name) return ret; } +static varinfo_t create_variable_info_for_1 (tree, const char *, bool); /* A map mapping call statements to per-stmt variables for uses and clobbers specific to the call. */ @@ -5196,6 +5197,8 @@ struct fieldoff unsigned may_have_pointers : 1; unsigned only_restrict_pointers : 1; + + varinfo_t restrict_var; }; typedef struct fieldoff fieldoff_s; @@ -5290,11 +5293,12 @@ field_must_have_pointers (tree t) OFFSET is used to keep track of the offset in this entire structure, rather than just the immediately containing structure. Returns false if the caller is supposed to handle the field we - recursed for. */ + recursed for. If HANDLE_PARAM is set, we're handling part of a function + parameter. */ static bool push_fields_onto_fieldstack (tree type, vec *fieldstack, - HOST_WIDE_INT offset) + HOST_WIDE_INT offset, bool handle_param) { tree field; bool empty_p = true; @@ -5320,7 +5324,7 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack, || TREE_CODE (field_type) == UNION_TYPE) push = true; else if (!push_fields_onto_fieldstack - (field_type, fieldstack, offset + foff) + (field_type, fieldstack, offset + foff, handle_param) && (DECL_SIZE (field) && !integer_zerop (DECL_SIZE (field /* Empty structures may have actual size, like in C++. So @@ -5341,7 +5345,8 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack, if (!pair && offset + foff != 0) { - fieldoff_s e = {0, offset + foff, false, false, false, false}; + fieldoff_s e = {0, offset + foff, false, false, false, false, +NULL}; pair = fieldstack->safe_push (e); } @@ -5375,6 +5380,19 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack, = (!has_unknown_size && POINTER_TYPE_P (field_type) && TYPE_RESTRICT (field_type)); + if (handle_param + && e.only_restrict_pointers + && !type_contains_placeholder_p (TREE_TYPE (field_type))) + { + varinfo_t rvi; + tree heapvar = build_fake_var_decl (TREE_TYPE (field_type)); + DECL_EXTERNAL (heapvar) = 1; + rvi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS", + true); + rvi->is_restrict_var = 1; + insert_vi_for_tree (heapvar, rvi); + e.restrict_var = rvi; + } fieldstack->safe_push (e); } } @@ -5679,7 +5697,7 @@ create_variable_info_for_1 (tree decl, const char *name, bool handle_param) bool notokay = false; unsigned int i; - push_fields_onto_fieldstack (decl_type, &fieldstack, 0); + push_fields_onto_fieldstack (decl_type, &fieldstack, 0, handle_param); for (i = 0; !notokay && fieldstack.iterate (i, &fo); i++) if (fo->has_unknown_size @@ -5770,6 +5788,10 @@ create_variable_info_for_1 (tree decl, const char *name, bool handle_param)
[PATCH] Add -fchecking
This adds -fchecking as a way to enable internal consistency checks even in release builds (or disable checking with -fno-checking - up to a certain extent - with checking enabled). Bootstrap & regtest running on x86_64-unknown-linux-gnu. Richard. 2015-10-27 Richard Biener * common.opt (fchecking): New flag controlling flag_checking. * passes.c (execute_function_todo): Guard checking code with flag_checking instead of ENABLE_CHECKING. (execute_todo): Likewise. (execute_one_pass): Likewise. (verify_curr_properties): Always compile. * timevar.c: Include options.h. (timer::print): Adjust output. * doc/invoke.texi (fchecking): Document. Index: gcc/common.opt === *** gcc/common.opt (revision 229404) --- gcc/common.opt (working copy) *** int optimize_fast *** 46,56 Variable bool in_lto_p = false - ; Enable additional checks of internal state consistency, which may slow - ; the compiler down. - Variable - bool flag_checking = CHECKING_P - ; 0 means straightforward implementation of complex divide acceptable. ; 1 means wide ranges of inputs must work for complex divide. ; 2 means C99-like requirements for complex multiply and divide. --- 46,51 *** fcheck-new *** 1002,1007 --- 997,1006 Common Var(flag_check_new) Check the return value of new in C++. + fchecking + Common Var(flag_checking) Init(CHECKING_P) + Perform internal consistency checkings. + fcombine-stack-adjustments Common Report Var(flag_combine_stack_adjustments) Optimization Looks for opportunities to reduce stack adjustments and stack references. Index: gcc/passes.c === *** gcc/passes.c(revision 229404) --- gcc/passes.c(working copy) *** execute_function_todo (function *fn, voi *** 1952,1960 gcc_assert (dom_info_state (fn, CDI_POST_DOMINATORS) == DOM_NONE); /* If we've seen errors do not bother running any verifiers. */ ! if (!seen_error ()) { - #if defined ENABLE_CHECKING dom_state pre_verify_state = dom_info_state (fn, CDI_DOMINATORS); dom_state pre_verify_pstate = dom_info_state (fn, CDI_POST_DOMINATORS); --- 1952,1960 gcc_assert (dom_info_state (fn, CDI_POST_DOMINATORS) == DOM_NONE); /* If we've seen errors do not bother running any verifiers. */ ! if (!seen_error () ! && flag_checking) { dom_state pre_verify_state = dom_info_state (fn, CDI_DOMINATORS); dom_state pre_verify_pstate = dom_info_state (fn, CDI_POST_DOMINATORS); *** execute_function_todo (function *fn, voi *** 1988,1994 /* Make sure verifiers don't change dominator state. */ gcc_assert (dom_info_state (fn, CDI_DOMINATORS) == pre_verify_state); gcc_assert (dom_info_state (fn, CDI_POST_DOMINATORS) == pre_verify_pstate); - #endif } fn->last_verified = flags & TODO_verify_all; --- 1991,1996 *** execute_function_todo (function *fn, voi *** 2008,2018 static void execute_todo (unsigned int flags) { ! #if defined ENABLE_CHECKING ! if (cfun && need_ssa_update_p (cfun)) gcc_assert (flags & TODO_update_ssa_any); - #endif timevar_push (TV_TODO); --- 2010,2019 static void execute_todo (unsigned int flags) { ! if (flag_checking ! && cfun && need_ssa_update_p (cfun)) gcc_assert (flags & TODO_update_ssa_any); timevar_push (TV_TODO); *** clear_last_verified (function *fn, void *** 2076,2089 /* Helper function. Verify that the properties has been turn into the properties expected by the pass. */ - #ifdef ENABLE_CHECKING static void verify_curr_properties (function *fn, void *data) { unsigned int props = (size_t)data; gcc_assert ((fn->curr_properties & props) == props); } - #endif /* Initialize pass dump file. */ /* This is non-static so that the plugins can use it. */ --- 2077,2088 *** execute_one_pass (opt_pass *pass) *** 2331,2344 /* Run pre-pass verification. */ execute_todo (pass->todo_flags_start); ! #ifdef ENABLE_CHECKING ! do_per_function (verify_curr_properties, ! (void *)(size_t)pass->properties_required); ! #endif /* If a timevar is present, start it. */ if (pass->tv_id != TV_NONE) ! timevar_push (pass->tv_id); /* Do it! */ todo_after = pass->execute (cfun); --- 2330,2342 /* Run pre-pass verification. */ execute_todo (pass->todo_flags_start); ! if (flag_checking) ! do_per_function (verify_curr_properties, !(void *)(size_t)pass->properties_required); /* If a timevar is present, start it. */ if (pass->tv_id != TV_NONE) ! timevar_push (pass->tv_id);
Re: [Patch] Avoid is_simple_use bug in vectorizable_live_operation
On 27/10/2015 11:36, "Richard Biener" wrote: >On Mon, Oct 26, 2015 at 6:15 PM, Alan Hayward >wrote: >> >> >> On 26/10/2015 13:35, "Richard Biener" >>wrote: >> >>>On Mon, Oct 26, 2015 at 1:33 PM, Alan Hayward >>>wrote: There is a potential bug in vectorizable_live_operation. Consider the case where the first op for stmt is valid, but the second is null. The first time through the for () loop, it will call out to vect_is_simple_use () which will set dt. The second time, because op is null, vect_is_simple_use () will not be called. However, dt is still set to a valid value, therefore the loop will continue. Note this is different from the case where the first op is null, which will cause the loop not call vect_is_simple_use () and then return false. It is possible that this was intentional, however that is not clear from the code. The fix is to simply ensure dt is initialized to a default value on each iteration. >>> >>>I think the patch is a strict improvement, thus OK. Still a NULL >>>operand >>>is not possible in GIMPLE so the op && check is not necessary. The way >>>it iterates over all stmt uses is a bit scary anyway. As it is ok with >>>all invariants it should probably simply do sth like >>> >>> FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) >>> if (!vect_is_simple_use (op, )) >>> >>>and be done with that. Unvisited uses can only be constants (ok). >>> >>>Care to rework the funtion like that if you are here? >>> >> >> Ok, I’ve updated as requested. > >Ok. Please remove > > code = gimple_assign_rhs_code (stmt); > op_type = TREE_CODE_LENGTH (code); > rhs_class = get_gimple_rhs_class (code); > gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op); > gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op); > >and associated variables as I belive otherwise a --disable-checking build >will fail with set-but-not-used warnings. And the asserts are quite >pointless >given the now sanitized loop. > I did consider removing those asserts, but didn’t want to remove any important checks. Didn’t think about the disable-checking build. Anyway, new patch attached. Cheers, Alan. avoid_issimple3.patch Description: Binary data
Re: [mask conversion, patch 1/2] Add pattern for mask conversions
On Mon, Oct 19, 2015 at 2:22 PM, Ilya Enkovich wrote: > Hi, > > This patch adds a vectorization pattern which detects cases where mask > conversion is needed and adds it. It is done for all statements which may > consume mask. Some additional changes were made to support MASK_LOAD with > pattern and allow scalar mode for vectype of pattern stmt. It is applied on > top of all other boolean vector series. Does it look OK? Ick - more patterns. I was really hoping we could get away without patterns for bools :/ Ok. Thanks, Richard. > Thanks, > Ilya > -- > gcc/ > > 2015-10-19 Ilya Enkovich > > * optabs.c (expand_binop_directly): Allow scalar mode for > vec_pack_trunc_optab. > * tree-vect-loop.c (vect_determine_vectorization_factor): Skip > boolean vector producers from pattern sequence when computing VF. > * tree-vect-patterns.c (vect_vect_recog_func_ptrs) Add > vect_recog_mask_conversion_pattern. > (search_type_for_mask): Choose the smallest > type if different size types are mixed. > (build_mask_conversion): New. > (vect_recog_mask_conversion_pattern): New. > (vect_pattern_recog_1): Allow scalar mode for boolean vectype. > * tree-vect-stmts.c (vectorizable_mask_load_store): Support masked > load with pattern. > (vectorizable_conversion): Support boolean vectors. > (free_stmt_vec_info): Allow patterns for statements with no lhs. > * tree-vectorizer.h (NUM_PATTERNS): Increase to 14. > > > diff --git a/gcc/optabs.c b/gcc/optabs.c > index 83f4be3..8d61d33 100644 > --- a/gcc/optabs.c > +++ b/gcc/optabs.c > @@ -1055,7 +1055,8 @@ expand_binop_directly (machine_mode mode, optab > binoptab, >/* The mode of the result is different then the mode of the > arguments. */ >tmp_mode = insn_data[(int) icode].operand[0].mode; > - if (GET_MODE_NUNITS (tmp_mode) != 2 * GET_MODE_NUNITS (mode)) > + if (VECTOR_MODE_P (mode) > + && GET_MODE_NUNITS (tmp_mode) != 2 * GET_MODE_NUNITS (mode)) > { > delete_insns_since (last); > return NULL_RTX; > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index 14804b3..e388533 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -497,6 +497,17 @@ vect_determine_vectorization_factor (loop_vec_info > loop_vinfo) > } > } > > + /* Boolean vectors don't affect VF. */ > + if (VECTOR_BOOLEAN_TYPE_P (vectype)) > + { > + if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si)) > + { > + pattern_def_seq = NULL; > + gsi_next (&si); > + } > + continue; > + } > + > /* The vectorization factor is according to the smallest > scalar type (or the largest vector size, but we only > support one vector size per loop). */ > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c > index a737129..34b1ea6 100644 > --- a/gcc/tree-vect-patterns.c > +++ b/gcc/tree-vect-patterns.c > @@ -76,6 +76,7 @@ static gimple *vect_recog_mult_pattern (vec *, > static gimple *vect_recog_mixed_size_cond_pattern (vec *, > tree *, tree *); > static gimple *vect_recog_bool_pattern (vec *, tree *, tree *); > +static gimple *vect_recog_mask_conversion_pattern (vec *, tree *, > tree *); > static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { > vect_recog_widen_mult_pattern, > vect_recog_widen_sum_pattern, > @@ -89,7 +90,8 @@ static vect_recog_func_ptr > vect_vect_recog_func_ptrs[NUM_PATTERNS] = { > vect_recog_divmod_pattern, > vect_recog_mult_pattern, > vect_recog_mixed_size_cond_pattern, > - vect_recog_bool_pattern}; > + vect_recog_bool_pattern, > + vect_recog_mask_conversion_pattern}; > > static inline void > append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt) > @@ -3180,7 +3182,7 @@ search_type_for_mask (tree var, vec_info *vinfo) >enum vect_def_type dt; >tree rhs1; >enum tree_code rhs_code; > - tree res = NULL; > + tree res = NULL, res2; > >if (TREE_CODE (var) != SSA_NAME) > return NULL; > @@ -3213,13 +3215,26 @@ search_type_for_mask (tree var, vec_info *vinfo) > case BIT_AND_EXPR: > case BIT_IOR_EXPR: > case BIT_XOR_EXPR: > - if (!(res = search_type_for_mask (rhs1, vinfo))) > - res = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo); > + res = search_type_for_mask (rhs1, vinfo); > + res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo); > + if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2))) > + res = res2; >break; > > default: >if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) > { > + tree comp_vectype, mask_type; > + >
Re: [OpenACC 1/11] UNIQUE internal function
On Tue, Oct 27, 2015 at 9:03 AM, Jakub Jelinek wrote: > On Mon, Oct 26, 2015 at 03:32:45PM -0700, Nathan Sidwell wrote: >> Richard, Jakub, >> this updates patch 1 to use the target-insns.def mechanism of detecting >> conditionally-implemented instructions. Otherwise it's the same as >> yesterday's patch. To recap: >> >> 1) Moved the subcodes to an enumeration in internal-fn.h >> >> 2) Remove ECF_LEAF >> >> 3) Added check in initialize_ctrl_altering >> >> 4) tracer code now (continues) to only look in last stmt of block >> >> I looked at fnsplit and do not believe I need changes there. That's >> changing things like: >> if (cheap test) >> do cheap thing >> else >> do complex thing >> >> to break out the else part into a separate function. That's fine -- it'll >> copy the whole CFG of interest. > > The question is if some UNIQUE call could be ever considered as part of the > cheap test or do cheap thing. If not, everything is fine of course for > fnsplit. > >> ok? > > Ok for me, but please wait for Richi's ack too. + /* An IFN_UNIQUE call must be duplicated as part of its group, +or not at all. */ + if (is_gimple_call (g) && gimple_call_internal_p (g) + && gimple_call_internal_unique_p (g)) &&s always to the next line Otherwise looks ok to me now. Thanks, Richard. > Jakub
[PATCH] Fix PR68104
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied. Richard. 2015-10-27 Richard Biener PR tree-optimization/68104 * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Move strided access check ... (vect_compute_data_refs_alignment): ... here. * gcc.dg/torture/pr68104.c: New testcase. Index: gcc/tree-vect-data-refs.c === *** gcc/tree-vect-data-refs.c (revision 229404) --- gcc/tree-vect-data-refs.c (working copy) *** vect_compute_data_ref_alignment (struct *** 629,640 /* Initialize misalignment to unknown. */ SET_DR_MISALIGNMENT (dr, -1); - /* Strided accesses perform only component accesses, misalignment information - is irrelevant for them. */ - if (STMT_VINFO_STRIDED_P (stmt_info) - && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) - return true; - if (tree_fits_shwi_p (DR_STEP (dr))) misalign = DR_INIT (dr); aligned_to = DR_ALIGNED_TO (dr); --- 651,656 *** vect_compute_data_refs_alignment (vec_in *** 794,811 unsigned int i; FOR_EACH_VEC_ELT (datarefs, i, dr) ! if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) ! && !vect_compute_data_ref_alignment (dr)) ! { ! if (is_a (vinfo)) ! { ! /* Mark unsupported statement as unvectorizable. */ ! STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false; ! continue; ! } ! else ! return false; ! } return true; } --- 810,836 unsigned int i; FOR_EACH_VEC_ELT (datarefs, i, dr) ! { ! stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr)); ! if (STMT_VINFO_VECTORIZABLE (stmt_info) ! && !vect_compute_data_ref_alignment (dr)) ! { ! /* Strided accesses perform only component accesses, misalignment !information is irrelevant for them. */ ! if (STMT_VINFO_STRIDED_P (stmt_info) ! && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) ! continue; ! ! if (is_a (vinfo)) ! { ! /* Mark unsupported statement as unvectorizable. */ ! STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false; ! continue; ! } ! else ! return false; ! } ! } return true; } Index: gcc/testsuite/gcc.dg/torture/pr68104.c === *** gcc/testsuite/gcc.dg/torture/pr68104.c (revision 0) --- gcc/testsuite/gcc.dg/torture/pr68104.c (working copy) *** *** 0 --- 1,22 + /* { dg-do compile } */ + + typedef struct + { + char vl; + char weight; + } ib_vl_arb_element_t; + typedef struct { ib_vl_arb_element_t vl_entry[32]; } ib_vl_arb_table_t; + typedef enum { IB_SUCCESS } ib_api_status_t; + int a, b, d; + char c; + void fn1(); + ib_api_status_t fn2() + { + int e = b; + ib_vl_arb_table_t f; + if (e) + for (a = 0; a < d; a++) + f.vl_entry[a].vl &= c; + fn1(f); + return IB_SUCCESS; + }
Re: [Patch] Avoid is_simple_use bug in vectorizable_live_operation
On Tue, Oct 27, 2015 at 2:35 PM, Alan Hayward wrote: > > > On 27/10/2015 11:36, "Richard Biener" wrote: > >>On Mon, Oct 26, 2015 at 6:15 PM, Alan Hayward >>wrote: >>> >>> >>> On 26/10/2015 13:35, "Richard Biener" >>>wrote: >>> On Mon, Oct 26, 2015 at 1:33 PM, Alan Hayward wrote: > There is a potential bug in vectorizable_live_operation. > > Consider the case where the first op for stmt is valid, but the second >is > null. > The first time through the for () loop, it will call out to > vect_is_simple_use () which will set dt. > The second time, because op is null, vect_is_simple_use () will not be > called. > However, dt is still set to a valid value, therefore the loop will > continue. > > Note this is different from the case where the first op is null, which > will cause the loop not call vect_is_simple_use () and then return >false. > > It is possible that this was intentional, however that is not clear >from > the code. > > The fix is to simply ensure dt is initialized to a default value on >each > iteration. I think the patch is a strict improvement, thus OK. Still a NULL operand is not possible in GIMPLE so the op && check is not necessary. The way it iterates over all stmt uses is a bit scary anyway. As it is ok with all invariants it should probably simply do sth like FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE) if (!vect_is_simple_use (op, )) and be done with that. Unvisited uses can only be constants (ok). Care to rework the funtion like that if you are here? >>> >>> Ok, I’ve updated as requested. >> >>Ok. Please remove >> >> code = gimple_assign_rhs_code (stmt); >> op_type = TREE_CODE_LENGTH (code); >> rhs_class = get_gimple_rhs_class (code); >> gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op); >> gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op); >> >>and associated variables as I belive otherwise a --disable-checking build >>will fail with set-but-not-used warnings. And the asserts are quite >>pointless >>given the now sanitized loop. >> > > I did consider removing those asserts, but didn’t want to remove any > important checks. Didn’t think about the disable-checking build. > Anyway, new patch attached. Ok. Richard. > > Cheers, > Alan. >
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On 27/10/15 13:06, H.J. Lu wrote: On Tue, Oct 27, 2015 at 5:52 AM, Jiong Wang wrote: On 27/10/15 11:37, H.J. Lu wrote: On Tue, Oct 27, 2015 at 4:20 AM, Bernd Schmidt wrote: On 10/19/2015 09:55 PM, H.J. Lu wrote: * calls.c (prepare_call_address): Don't handle -fno-plt here. Is any other target using -fno-plt? If not, and if that's really just a aarch64 is the only target which checks -fno-plt from commit log below. I CCed the code author. aarch64 may suffer from the same issue. H.J, Thanks for the info. After a quick reading of the PR at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67215, I found the problem description is very x86 specific, I still don't understand what's wrong if we do the following transformation when -fno-plt specified on x86-74, thus I am not sure whether there is problem exist on aarch64. Can you please give more explanation? callproc2@PLT || V movqproc2@GOTPCREL(%rip), %rax call*%rax call *proc2@GOTPCREL(%rip) doesn't use a register and saves one instruction. OK, then it's fairly x86-64 specific optimization, because we can't do "call *mem" in aarch64 and some other targets.
[PATCH][ARM] Fix costing of vmul+vcvt combine pattern
Hi all, This patch allows us to handle the *combine_vcvtf2i pattern in rtx costs by properly identifying it as a toint coversion. Before this I saw a pattern like: (set (reg/i:SI 0 r0) (fix:SI (fix:SF (mult:SF (reg:SF 16 s0 [ a ]) (const_double:SF 3.2e+1 [0x0.8p+6]) being assigned a cost of 40 because the costs blindly recursed into the operands. With this patch for -mcpu=cortex-a57 I see it being assigned a cost of 4. Bootstrapped and tested on arm-none-linux-gnueabihf. Ok for trunk? Thanks, Kyrill 2015-10-27 Kyrylo Tkachov * config/arm/arm.c (arm_new_rtx_costs, FIX case): Handle combine_vcvtf2i pattern. commit 1e040710d1022ce816eac9b4f6065bc7aa2be9cf Author: Kyrylo Tkachov Date: Wed Oct 14 11:26:07 2015 +0100 [ARM] Fix costing of vmul+vcvt combine pattern diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index b37b507..33ad433 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -11064,6 +11064,23 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, case UNSIGNED_FIX: if (TARGET_HARD_FLOAT) { + /* The *combine_vcvtf2i reduces a vmul+vcvt into + a vcvt fixed-point conversion. */ + if (code == FIX && mode == SImode + && GET_CODE (XEXP (x, 0)) == FIX + && GET_MODE (XEXP (x, 0)) == SFmode + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1)) + > 0) + { + if (speed_p) + *cost += extra_cost->fp[0].toint; + + *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, + code, 0, speed_p); + return true; + } + if (GET_MODE_CLASS (mode) == MODE_INT) { mode = GET_MODE (XEXP (x, 0));
Re: [PATCH v4] SH FDPIC backend support
On Mon, 2015-10-26 at 22:47 -0400, Rich Felker wrote: > On Sun, Oct 25, 2015 at 11:28:51PM +0900, Oleg Endo wrote: > > On Fri, 2015-10-23 at 02:32 -0400, Rich Felker wrote: > > > Here's my updated version of the FDPIC patch with all requested > > > changes made and Changelog added. I've included all the original > > > authors. This is my first time writing such an extensive > > > Changelog > > > entry so please let me know if there are things I got wrong. > > > > I took the liberty and fixed some minor formatting trivia and > > extracted > > functions sh_emit_storesi and sh_emit_storehi which are used in > > sh_trampoline_init to effectively memcpy code into the trampoline > > area. Can you please check it? If it's OK I'll commit the > > attached > > patch to trunk. > > Is there anything in particular you'd like me to check? It builds > fine > for fdpic target, successfully compiles musl libc.so, and busybox > runs > with the resulting libc.so. I did a quick visual inspection of the > diff between my version and yours too and didn't see anything that > looked suspicious to me. Thanks. I have committed it as r229438 after a sanity check with "make all" on sh-elf. The way libcalls are now emitted is a bit unhandy. If more special-ABI libcalls are to be added in the future, they all have to do the jsr vs. bsrf handling (some potential candidates for new libcalls are optimized soft FP routines). Then we still have PR 65374 and PR 54019. In the future maybe we should come up with something that allows emitting libcalls in a more transparent way... Cheers, Oleg
Re: [OpenACC 7/11] execution model
On 10/27/15 01:18, Jakub Jelinek wrote: LGTM, though could I ask you to try to try to move the struct oacc_collapse expand_oacc_collapse_init expand_oacc_collapse_vars expand_oacc_for additions somewhere else (e.g. in between expand_omp_taskreg and expand_omp_for_init_counts), ok, I wasn't sure of the best placement. because it seems patch just got too confused and gave up, so most of expand_omp_for which I assume is unchanged except for + else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) +{ + gcc_assert (!inner_stmt); + expand_oacc_for (region, &fd); +} addition is considered to be deleted in one place and added into another one; if patch does this, I'd be afraid svn blame or git blame would do so too, and thus lose history for expand_omp_for. If moving it around doesn't help, no big deal, but if it helps, it would be appreciated. yeah, I noticed diff got confused. (I'm not sure the above suggestion will resolve it, but we can give it a go. nathan
Re: [OpenACC 1/11] UNIQUE internal function
On 10/27/15 06:45, Richard Biener wrote: On Tue, Oct 27, 2015 at 9:03 AM, Jakub Jelinek wrote: Ok for me, but please wait for Richi's ack too. + /* An IFN_UNIQUE call must be duplicated as part of its group, +or not at all. */ + if (is_gimple_call (g) && gimple_call_internal_p (g) + && gimple_call_internal_unique_p (g)) &&s always to the next line oh, did not know that. Otherwise looks ok to me now. Great thanks! nathan
Re: [OpenACC 1/11] UNIQUE internal function
On Tue, Oct 27, 2015 at 07:03:40AM -0700, Nathan Sidwell wrote: > On 10/27/15 06:45, Richard Biener wrote: > >On Tue, Oct 27, 2015 at 9:03 AM, Jakub Jelinek wrote: > > >>Ok for me, but please wait for Richi's ack too. > > > >+ /* An IFN_UNIQUE call must be duplicated as part of its group, > >+or not at all. */ > >+ if (is_gimple_call (g) && gimple_call_internal_p (g) > >+ && gimple_call_internal_unique_p (g)) > > > >&&s always to the next line > > oh, did not know that. I believe the general rule is if all the conditions are short enough that everything fits on a single line, you can write it as if (a && b && c && d) but as soon as you need to wrap, it should be one && per line, so if (a && b && c && d) style in that case rather than if (a && b && c && d) But, lots of code doesn't do it this way. Jakub
Re: [PATCH, VECTOR ABI] Add __attribute__((__simd__)) to GCC.
Hello Joseph, On 23 Oct 14:16, Joseph Myers wrote: > On Fri, 23 Oct 2015, Kirill Yukhin wrote: > > > > You need to update this patch to take account of Marek's fix for bug > > > 67964 > > > (it was because I was suspicious of the "continue;" in this patch > > > accepting invalid syntax that I found that bug), retest and resubmit. > > I've rebased the patch on top of current trunk. > > This isn't taking proper account of Marek's fix. > > > @@ -3993,6 +4001,12 @@ c_parser_attributes (c_parser *parser) > > break; > > continue; > > } > > + if (is_attribute_p ("simd", attr_name)) > > + { > > + parser->simd_attr_present = 1; > > + c_parser_consume_token (parser); > > + continue; > > + } > > Any such continue needs first to break if the next token isn't a comma; > otherwise you accept bad syntax with no comma between successive > attributes. I've understood your point. Fixed both C/C++. I've additionally fixed Cilk case in C++ by analogy w/ Marek's fix. Boostrapped. Regtesting is in progress. Is it ok for trunk if pass? gcc/ * cp/parser.h (cp_parser): Add simd_attr_present. * cp/parser.c (cp_parser_late_return_type_opt): Handle simd_attr_present, require comman in __vector__ attribute. (cp_parser_gnu_attribute_list): Ditto. * c/c-parser.c (c_parser): Add simd_attr_present flag. (c_parser_declaration_or_fndef): Call c_parser_declaration_or_fndef if simd_attr_present is set. (c_finish_omp_declare_simd): Handle simd_attr_present. * doc/extend.texi (simd): Document new attribute. * omp-low.c (pass_omp_simd_clone::gate): If target allows - call without additional conditions. gcc/testsuite/ * c-c++-common/attr-simd.c: New test. * c-c++-common/attr-simd-2.c: Ditto. * c-c++-common/attr-simd-3.c: Ditto. -- Thanks, K index c8c6a2d..b026f72 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -224,6 +224,9 @@ struct GTY(()) c_parser { /* Buffer to hold all the tokens from parsing the vector attribute for the SIMD-enabled functions (formerly known as elemental functions). */ vec *cilk_simd_fn_tokens; + + /* Designates if "simd" attribute is specified in decl. */ + BOOL_BITFIELD simd_attr_present : 1; }; @@ -1701,7 +1704,8 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, if (declarator == NULL) { if (omp_declare_simd_clauses.exists () - || !vec_safe_is_empty (parser->cilk_simd_fn_tokens)) + || !vec_safe_is_empty (parser->cilk_simd_fn_tokens) + || parser->simd_attr_present) c_finish_omp_declare_simd (parser, NULL_TREE, NULL_TREE, omp_declare_simd_clauses); c_parser_skip_to_end_of_block_or_statement (parser); @@ -1797,7 +1801,8 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, if (!d) d = error_mark_node; if (omp_declare_simd_clauses.exists () - || !vec_safe_is_empty (parser->cilk_simd_fn_tokens)) + || !vec_safe_is_empty (parser->cilk_simd_fn_tokens) + || parser->simd_attr_present) c_finish_omp_declare_simd (parser, d, NULL_TREE, omp_declare_simd_clauses); } @@ -1810,7 +1815,8 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, if (!d) d = error_mark_node; if (omp_declare_simd_clauses.exists () - || !vec_safe_is_empty (parser->cilk_simd_fn_tokens)) + || !vec_safe_is_empty (parser->cilk_simd_fn_tokens) + || parser->simd_attr_present) c_finish_omp_declare_simd (parser, d, NULL_TREE, omp_declare_simd_clauses); start_init (d, asm_name, global_bindings_p ()); @@ -1839,7 +1845,8 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, chainon (postfix_attrs, all_prefix_attrs)); if (omp_declare_simd_clauses.exists () - || !vec_safe_is_empty (parser->cilk_simd_fn_tokens)) + || !vec_safe_is_empty (parser->cilk_simd_fn_tokens) + || parser->simd_attr_present) { tree parms = NULL_TREE; if (d && TREE_CODE (d) == FUNCTION_DECL) @@ -1968,7 +1975,8 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, true, false, NULL, vNULL); store_parm_decls (); if (omp_declare_simd_clauses.exists () - || !vec_safe_is_empty (parser->cilk_simd_fn_tokens)) + || !vec_safe_is_empty (parser->cilk_sim
Re: Partial fix for LTO bootstrap with Ada
[ cc gcc-patches ] On 23/10/15 18:40, Eric Botcazou wrote: --- a/gcc/tree-ssa-structalias.c +++ b/gcc/tree-ssa-structalias.c @@ -5887,7 +5887,10 @@ intra_create_variable_infos (struct function *fn) if (POINTER_TYPE_P (TREE_TYPE (t)) && TYPE_RESTRICT (TREE_TYPE (t))) - make_constraint_from_global_restrict (p, "PARM_RESTRICT"); + { + gcc_unreachable (); + make_constraint_from_global_restrict (p, "PARM_RESTRICT"); + } else { for (; p; p = vi_next (p)) ... to detect the 'type_contains_placeholder_p' case, but the assert did not trigger. Is it possible to add an ada testcase that triggers the 'type_contains_placeholder_p' case? Well, LTO testcases are painful to create and it's 3 years later... In any case, since: 2015-10-02 Eric Botcazou * gcc-interface/ada-tree.h (DECL_RESTRICTED_ALIASING_P): New flag. * gcc-interface/decl.c (gnat_to_gnu_param): For parameters passed by reference but whose type isn't by-ref and whose mechanism hasn't been forced to by-ref, set the DECL_RESTRICTED_ALIASING_P flag directly on them instead of changing their type. * gcc-interface/trans.c (scan_rhs_r): New helper function. (independent_iterations_p): New predicate. (Loop_Statement_to_gnu): For a loop with an iteration scheme, set an ivdep pragma if the iterations are independent. the Ada compiler doesn't use restrict-qualified pointer types anymore so you can back out my change from 2012 if need be. Sofar I didn't need to revert this change. But, are you saying here it's dead code? In that case, it might make sense to revert it anyway. Thanks, - Tom
Re: [OpenACC 1/11] UNIQUE internal function
On 10/27/15 01:03, Jakub Jelinek wrote: On Mon, Oct 26, 2015 at 03:32:45PM -0700, Nathan Sidwell wrote: to break out the else part into a separate function. That's fine -- it'll copy the whole CFG of interest. The question is if some UNIQUE call could be ever considered as part of the cheap test or do cheap thing. If not, everything is fine of course for fnsplit. It doesn't matter (although I doubt the CFG it's attached to will be considered cheap) for how I'm using it. We never generate a CFG where part of the UNIQUE sequence will be in the cheap thing block and another part not in the cheap thing block. nathan
Re: [PATCH, VECTOR ABI] Add __attribute__((__simd__)) to GCC.
On Tue, Oct 27, 2015 at 05:06:58PM +0300, Kirill Yukhin wrote: > Boostrapped. Regtesting is in progress. Is it ok for trunk if pass? > > gcc/ > * cp/parser.h (cp_parser): Add simd_attr_present. > * cp/parser.c (cp_parser_late_return_type_opt): Handle > simd_attr_present, > require comman in __vector__ attribute. > (cp_parser_gnu_attribute_list): Ditto. > * c/c-parser.c (c_parser): Add simd_attr_present flag. > (c_parser_declaration_or_fndef): Call c_parser_declaration_or_fndef > if simd_attr_present is set. gcc/cp/ and gcc/c/ have their own ChangeLog files, therefore cp/ or c/ prefixes should also never appear in the ChangeLog entries. > (c_finish_omp_declare_simd): Handle simd_attr_present. > * doc/extend.texi (simd): Document new attribute. > * omp-low.c (pass_omp_simd_clone::gate): If target allows - call > without additional conditions. > gcc/testsuite/ > * c-c++-common/attr-simd.c: New test. > * c-c++-common/attr-simd-2.c: Ditto. > * c-c++-common/attr-simd-3.c: Ditto. > - error ("%<#pragma omp declare simd%> cannot be used in the same " > + error ("%<#pragma omp declare simd%> or __simd__ attribute cannot be > used in the same " I'd write % instead of __simd__. __simd__ is just one of the possible spellings of the attribute... >"function marked as a Cilk Plus SIMD-enabled function"); >vec_free (parser->cilk_simd_fn_tokens); >return; > @@ -15423,7 +15441,7 @@ c_finish_omp_declare_simd (c_parser *parser, tree > fndecl, tree parms, >unsigned int tokens_avail = parser->tokens_avail; >gcc_assert (parser->tokens == &parser->tokens_buf[0]); >bool is_cilkplus_cilk_simd_fn = false; > - > + >if (flag_cilkplus && !vec_safe_is_empty (parser->cilk_simd_fn_tokens)) If you are changing this, please remove all trailing whitespace from the empty line. > + if (parser->simd_attr_present > + && is_cilkplus_cilk_simd_fn) This could fit on one line if (parser->simd_attr_present && is_cilkplus_cilk_simd_fn) just fine. > + error ("SIMD-enabled function attributes" > + "are allowed when attribute __simd__ is specified"); See earlier. > > + /* Attach `omp declare simd’ attribute if __simd__ is specified AND no > OpenMP clauses > + present in decl. */ > + if (parser->simd_attr_present > + && parser->tokens_avail == 0) See earlier. > @@ -19363,6 +19365,18 @@ cp_parser_late_return_type_opt (cp_parser* parser, > cp_declarator *declarator, >= cp_parser_late_parsing_omp_declare_simd (parser, >declarator->std_attributes); > > + if (parser->simd_attr_present > + && !declare_simd_p) Ditto. > +{ > + if (cilk_simd_fn_vector_p) > + error ("__simd__ attribute cannot be used in the same function" > +" marked as a Cilk Plus SIMD-enabled function"); Ditto. > diff --git a/gcc/omp-low.c b/gcc/omp-low.c > index ad7c017..232dc5c 100644 > --- a/gcc/omp-low.c > +++ b/gcc/omp-low.c > @@ -17412,10 +17412,7 @@ public: > bool > pass_omp_simd_clone::gate (function *) > { > - return ((flag_openmp || flag_openmp_simd > -|| flag_cilkplus > -|| (in_lto_p && !flag_wpa)) > - && (targetm.simd_clone.compute_vecsize_and_simdlen != NULL)); > + return targetm.simd_clone.compute_vecsize_and_simdlen != NULL; > } > > } // anon namespace I wonder what the compile time effect this will have. have (alternative is of course > diff --git a/gcc/testsuite/c-c++-common/attr-simd-2.c > b/gcc/testsuite/c-c++-common/attr-simd-2.c > new file mode 100644 > index 000..e9afc11 > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/attr-simd-2.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile } */ > +/* { dg-options "-fdump-tree-optimized -fopenmp-simd" } */ > + > +#pragma omp declare simd > +__attribute__((__simd__)) > +static int simd_attr (void) > +{ > + return 0; > +} > + > +/* { dg-final { scan-tree-dump "omp declare simd" "optimized" } } */ You should also test other spellings of the attribute... Jakub
[PATCH] Fix PR66313
When factoring a*b + a*c to (b + c)*a we have to guard against the case of a == 0 as after the factoring b + c might overflow in that case. Fixed by doing the addition in an unsigned type if required. Bootstrap / regtest pending on x86_64-unknown-linux-gnu. Richard. 2015-10-27 Richard Biener PR middle-end/66313 * fold-const.c (fold_plusminus_mult_expr): If the factored factor may be zero use a wrapping type for the inner operation. * c-c++-common/ubsan/pr66313.c: New testcase. Index: gcc/fold-const.c === *** gcc/fold-const.c(revision 229404) --- gcc/fold-const.c(working copy) *** fold_plusminus_mult_expr (location_t loc *** 6976,6989 } } ! if (same) return fold_build2_loc (loc, MULT_EXPR, type, fold_build2_loc (loc, code, type, fold_convert_loc (loc, type, alt0), fold_convert_loc (loc, type, alt1)), fold_convert_loc (loc, type, same)); ! return NULL_TREE; } /* Subroutine of native_encode_expr. Encode the INTEGER_CST --- 6989,7016 } } ! if (!same) ! return NULL_TREE; ! ! if (! INTEGRAL_TYPE_P (type) ! || TYPE_OVERFLOW_WRAPS (type) ! || TREE_CODE (same) == INTEGER_CST) return fold_build2_loc (loc, MULT_EXPR, type, fold_build2_loc (loc, code, type, fold_convert_loc (loc, type, alt0), fold_convert_loc (loc, type, alt1)), fold_convert_loc (loc, type, same)); ! /* Same may be zero and thus the operation 'code' may overflow. Perform ! the addition in an unsigned type. */ ! tree utype = unsigned_type_for ( type); ! return fold_build2_loc (loc, MULT_EXPR, type, ! fold_convert_loc ! (loc, type, !fold_build2_loc (loc, code, utype, ! fold_convert_loc (loc, utype, alt0), ! fold_convert_loc (loc, utype, alt1))), ! fold_convert_loc (loc, type, same)); } /* Subroutine of native_encode_expr. Encode the INTEGER_CST Index: gcc/testsuite/c-c++-common/ubsan/pr66313.c === *** gcc/testsuite/c-c++-common/ubsan/pr66313.c (revision 0) --- gcc/testsuite/c-c++-common/ubsan/pr66313.c (working copy) *** *** 0 --- 1,12 + /* { dg-do run } */ + /* { dg-options "-fsanitize=undefined -fsanitize-undefined-trap-on-error" } */ + + int __attribute__((noinline,noclone)) + f(int a, int b, int c) + { + return a * b + a * c; + } + int main() + { + return f(0, __INT_MAX__, __INT_MAX__); + }
Re: [PATCH] Fix PR68067
On Tue, 27 Oct 2015, Richard Biener wrote: > > The following patch adjusts negate_expr_p to account for the fact > that we can't generally change a - (b - c) to (c - b) + a because > -INF - 0 is ok while 0 - -INF not. Similarly for a - (b + c). > While creating testcases I noticed that MULT_EXPR handling is bogus > as well as with -INF/2 * 2 neither operand can be negated safely. > > I believe the division case is also still wrong but I refrained > from touching it with this patch. Here is the division part. Bootstrap / regtest running on x86_64-unknown-linux-gnu. A related testcase went in with r202204. Richard. 2015-10-27 Richard Biener * fold-const.c (negate_expr_p): Adjust the division case to properly avoid introducing undefined overflow. (fold_negate_expr): Likewise. Index: gcc/fold-const.c === --- gcc/fold-const.c(revision 229404) +++ gcc/fold-const.c(working copy) @@ -475,29 +488,23 @@ negate_expr_p (tree t) case TRUNC_DIV_EXPR: case ROUND_DIV_EXPR: case EXACT_DIV_EXPR: - /* In general we can't negate A / B, because if A is INT_MIN and -B is 1, we may turn this into INT_MIN / -1 which is undefined -and actually traps on some architectures. But if overflow is -undefined, we can negate, because - (INT_MIN / 1) is an -overflow. */ if (INTEGRAL_TYPE_P (TREE_TYPE (t))) { - if (!TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (t))) - break; - /* If overflow is undefined then we have to be careful because -we ask whether it's ok to associate the negate with the -division which is not ok for example for --((a - b) / c) where (-(a - b)) / c may invoke undefined -overflow because of negating INT_MIN. So do not use -negate_expr_p here but open-code the two important cases. */ - if (TREE_CODE (TREE_OPERAND (t, 0)) == NEGATE_EXPR - || (TREE_CODE (TREE_OPERAND (t, 0)) == INTEGER_CST - && may_negate_without_overflow_p (TREE_OPERAND (t, 0 + if (negate_expr_p (TREE_OPERAND (t, 0))) return true; + + /* In general we can't negate B in A / B, because if A is INT_MIN and +B is 1, we may turn this into INT_MIN / -1 which is undefined +and actually traps on some architectures. */ + if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (t)) + || (TREE_CODE (TREE_OPERAND (t, 1)) == INTEGER_CST + && ! integer_onep (TREE_OPERAND (t, 1 + return negate_expr_p (TREE_OPERAND (t, 1)); } - else if (negate_expr_p (TREE_OPERAND (t, 0))) - return true; - return negate_expr_p (TREE_OPERAND (t, 1)); + else + return (negate_expr_p (TREE_OPERAND (t, 0)) + || negate_expr_p (TREE_OPERAND (t, 1))); + break; case NOP_EXPR: /* Negate -((double)float) as (double)(-float). */ @@ -667,40 +681,22 @@ fold_negate_expr (location_t loc, tree t case TRUNC_DIV_EXPR: case ROUND_DIV_EXPR: case EXACT_DIV_EXPR: - /* In general we can't negate A / B, because if A is INT_MIN and + /* In general we can't negate B in A / B, because if A is INT_MIN and B is 1, we may turn this into INT_MIN / -1 which is undefined -and actually traps on some architectures. But if overflow is -undefined, we can negate, because - (INT_MIN / 1) is an -overflow. */ - if (!INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_UNDEFINED (type)) -{ - const char * const warnmsg = G_("assuming signed overflow does not " - "occur when negating a division"); - tem = TREE_OPERAND (t, 1); - if (negate_expr_p (tem)) - { - if (INTEGRAL_TYPE_P (type) - && (TREE_CODE (tem) != INTEGER_CST - || integer_onep (tem))) - fold_overflow_warning (warnmsg, WARN_STRICT_OVERFLOW_MISC); - return fold_build2_loc (loc, TREE_CODE (t), type, - TREE_OPERAND (t, 0), negate_expr (tem)); - } - /* If overflow is undefined then we have to be careful because -we ask whether it's ok to associate the negate with the -division which is not ok for example for --((a - b) / c) where (-(a - b)) / c may invoke undefined -overflow because of negating INT_MIN. So do not use -negate_expr_p here but open-code the two important cases. */ - tem = TREE_OPERAND (t, 0); - if ((INTEGRAL_TYPE_P (type) - && (TREE_CODE (tem) == NEGATE_EXPR - || (TREE_CODE (tem) == INTEGER_CST - && may_negate_without_overflow_p (tem - || !INTEGRAL_TYPE_P (type)) - return fold_build2_loc (loc, TREE_CODE (t), type
[PATCH] Fix PR65962 fallout
This fixes another fallout of the PR65962 fix, gcc.dg/vect/vect-62.c failing for targets that use constant pool entries for the array initializer. On x86_64 we fail to vectorize the 2nd loop in the testcase because PRE makes the latch block non-empty (and adds a loop carried dependency). The patch fixes the code that is supposed to inhibit PRE from doing this. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. Richard. 2015-10-27 Richard Biener PR tree-optimization/65962 * tree-ssa-pre.c (eliminate_dom_walker::before_dom_children): Avoid creating loop carried dependences also for outer loops of the loop a use to replace is in. * gcc.dg/vect/vect-62.c: Adjust. Index: gcc/tree-ssa-pre.c === *** gcc/tree-ssa-pre.c (revision 229404) --- gcc/tree-ssa-pre.c (working copy) *** eliminate_dom_walker::before_dom_childre *** 4082,4089 gimple *def_stmt = SSA_NAME_DEF_STMT (sprime); basic_block def_bb = gimple_bb (def_stmt); if (gimple_code (def_stmt) == GIMPLE_PHI ! && b->loop_father->header == def_bb) { ssa_op_iter iter; tree op; bool found = false; --- 4090,4098 gimple *def_stmt = SSA_NAME_DEF_STMT (sprime); basic_block def_bb = gimple_bb (def_stmt); if (gimple_code (def_stmt) == GIMPLE_PHI ! && def_bb->loop_father->header == def_bb) { + loop_p loop = def_bb->loop_father; ssa_op_iter iter; tree op; bool found = false; *** eliminate_dom_walker::before_dom_childre *** 4092,4100 affine_iv iv; def_bb = gimple_bb (SSA_NAME_DEF_STMT (op)); if (def_bb ! && flow_bb_inside_loop_p (b->loop_father, def_bb) ! && simple_iv (b->loop_father, ! b->loop_father, op, &iv, true)) { found = true; break; --- 4101,4108 affine_iv iv; def_bb = gimple_bb (SSA_NAME_DEF_STMT (op)); if (def_bb ! && flow_bb_inside_loop_p (loop, def_bb) ! && simple_iv (loop, loop, op, &iv, true)) { found = true; break; *** eliminate_dom_walker::before_dom_childre *** 4110,4116 print_generic_expr (dump_file, sprime, 0); fprintf (dump_file, " which would add a loop" " carried dependence to loop %d\n", ! b->loop_father->num); } /* Don't keep sprime available. */ sprime = NULL_TREE; --- 4118,4124 print_generic_expr (dump_file, sprime, 0); fprintf (dump_file, " which would add a loop" " carried dependence to loop %d\n", ! loop->num); } /* Don't keep sprime available. */ sprime = NULL_TREE; Index: gcc/testsuite/gcc.dg/vect/vect-62.c === *** gcc/testsuite/gcc.dg/vect/vect-62.c (revision 229404) --- gcc/testsuite/gcc.dg/vect/vect-62.c (working copy) *** int main1 () *** 33,41 } /* Multidimensional array. Aligned. The "inner" dimensions ! are invariant in the inner loop. Vectorizable, but the ! vectorizer detects that everything is invariant and that ! the loop is better left untouched. (it should be optimized away). */ for (i = 0; i < N; i++) { for (j = 0; j < N; j++) --- 33,40 } /* Multidimensional array. Aligned. The "inner" dimensions ! are invariant in the inner loop. The outer loop is ! vectorizable after invariant/store motion. */ for (i = 0; i < N; i++) { for (j = 0; j < N; j++) *** int main (void) *** 65,69 return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ --- 64,68 return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
> > OK, then it's fairly x86-64 specific optimization, because we can't do "call > *mem" in > aarch64 and some other targets. It is a fairly x86_64 specific optimization and doesn't apply to AArch64. The question really is what impact does removing the generic code handling have on aarch64 - is it a no-op or not for the existing -fno-plt implementation in the AArch64 backend ? The only case that is of interest is the bit below in calls.c and it looks like that may well be redundant with the logic in the backend already, but I have not done the full analysis to convince myself that the code in the backend is sufficient. - && (!flag_plt - || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type))) - && !targetm.binds_local_p (fndecl_or_type)) regards Ramana
[PATCH][RTL-ifcvt] PR rtl-optimization/67749: Do not emit separate SET insn in IF-ELSE case
Hi all, This patch fixes the gcc.dg/ifcvt-2.c test for x86_64 where we were failing to if-convert. This was because in my patch at https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=228194 which tried to emit a SET to move the source of insn_a or insn_b (that came from the test block) into a temporary. A SET however, is not always enough. For example, on x86_64 in order for the resulting insn to be recognised it frequently needs to be in PARALLEL with a (clobber (reg:CC FLAGS_REG)). This leads to the insn not being recognised. So this patch removes that SET and instead generates a couple of temporary pseudos that gets passed on a bit later to the code that loads the operands into registers when they're not general_operand. This way we just modify the existing (recognisable) sets, allowing us to if-convert the testcase. Bootstrapped and tested on x86_64, arm, aarch64. Ok for trunk? Thanks, Kyrill 2015-10-27 Kyrylo Tkachov PR rtl-optimization/67749 * ifcvt.c (noce_try_cmove_arith): Do not emit move in IF-ELSE case before emitting the two blocks. Instead modify the register in the corresponding final insn of the basic block. commit d58740af39ceaf2d654cf3feff97b39fb6da3e82 Author: Kyrylo Tkachov Date: Tue Sep 29 13:44:21 2015 +0100 [RTL-ifcvt] PR rtl-optimization/67749 diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c index 8846e69..7c6e7ca 100644 --- a/gcc/ifcvt.c +++ b/gcc/ifcvt.c @@ -2135,38 +2135,29 @@ noce_try_cmove_arith (struct noce_if_info *if_info) emit might clobber the register used by B or A, so move it to a pseudo first. */ + rtx tmp_a = NULL_RTX; + rtx tmp_b = NULL_RTX; + if (b_simple || !else_bb) -{ - rtx tmp_b = gen_reg_rtx (x_mode); - /* Perform the simplest kind of set. The register allocator - should remove it if it's not actually needed. If this set is not - a valid insn (can happen on the is_mem path) then end_ifcvt_sequence - will cancel the whole sequence. Don't try any of the fallback paths - from noce_emit_move_insn since we want this to be the simplest kind - of move. */ - emit_insn (gen_rtx_SET (tmp_b, b)); - b = tmp_b; -} +tmp_b = gen_reg_rtx (x_mode); if (a_simple || !then_bb) -{ - rtx tmp_a = gen_reg_rtx (x_mode); - emit_insn (gen_rtx_SET (tmp_a, a)); - a = tmp_a; -} +tmp_a = gen_reg_rtx (x_mode); orig_a = a; orig_b = b; rtx emit_a = NULL_RTX; rtx emit_b = NULL_RTX; - + rtx_insn *tmp_insn = NULL; + bool modified_in_a = false; + bool modified_in_b = false; /* If either operand is complex, load it into a register first. The best way to do this is to copy the original insn. In this way we preserve any clobbers etc that the insn may have had. This is of course not possible in the IS_MEM case. */ - if (! general_operand (a, GET_MODE (a))) + if (! general_operand (a, GET_MODE (a)) || tmp_a) { if (is_mem) @@ -2174,36 +2165,51 @@ noce_try_cmove_arith (struct noce_if_info *if_info) rtx reg = gen_reg_rtx (GET_MODE (a)); emit_a = gen_rtx_SET (reg, a); } - else if (! insn_a) - goto end_seq_and_fail; else { - a = gen_reg_rtx (GET_MODE (a)); - rtx_insn *copy_of_a = as_a (copy_rtx (insn_a)); - rtx set = single_set (copy_of_a); - SET_DEST (set) = a; + if (insn_a) + { + a = tmp_a ? tmp_a : gen_reg_rtx (GET_MODE (a)); + + rtx_insn *copy_of_a = as_a (copy_rtx (insn_a)); + rtx set = single_set (copy_of_a); + SET_DEST (set) = a; - emit_a = PATTERN (copy_of_a); + emit_a = PATTERN (copy_of_a); + } + else + { + rtx tmp_reg = tmp_a ? tmp_a : gen_reg_rtx (GET_MODE (a)); + emit_a = gen_rtx_SET (tmp_reg, a); + a = tmp_reg; + } } } - if (! general_operand (b, GET_MODE (b))) + if (! general_operand (b, GET_MODE (b)) || tmp_b) { if (is_mem) { rtx reg = gen_reg_rtx (GET_MODE (b)); emit_b = gen_rtx_SET (reg, b); } - else if (! insn_b) - goto end_seq_and_fail; else { - b = gen_reg_rtx (GET_MODE (b)); - rtx_insn *copy_of_b = as_a (copy_rtx (insn_b)); - rtx set = single_set (copy_of_b); + if (insn_b) + { + b = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b)); + rtx_insn *copy_of_b = as_a (copy_rtx (insn_b)); + rtx set = single_set (copy_of_b); - SET_DEST (set) = b; - emit_b = PATTERN (copy_of_b); + SET_DEST (set) = b; + emit_b = PATTERN (copy_of_b); + } + else + { + rtx tmp_reg = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b)); + emit_b = gen_rtx_SET (tmp_reg, b); + b = tmp_reg; + } } } @@ -2211,16 +2217,35 @@ noce_try_cmove_arith (struct noce_if_info *if_info) swap insn that sets up A with the one that sets up B. If even that doesn't help, punt. */ -if (emit_a && modified_in_p (orig_b, emit_a)) - { - if (modified_in_p (orig_a, emit_b)) - goto end_seq_and
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On Tue, Oct 27, 2015 at 7:34 AM, Ramana Radhakrishnan wrote: > >> >> OK, then it's fairly x86-64 specific optimization, because we can't do "call >> *mem" in >> aarch64 and some other targets. > > It is a fairly x86_64 specific optimization and doesn't apply to AArch64. > > The question really is what impact does removing the generic code handling > have on aarch64 - is it a no-op or not for the existing -fno-plt > implementation in the AArch64 backend ? The only case that is of interest is > the bit below in calls.c and it looks like that may well be redundant with > the logic in the backend already, but I have not done the full analysis to > convince myself that the code in the backend is sufficient. > > - && (!flag_plt > - || lookup_attribute ("noplt", DECL_ATTRIBUTES > (fndecl_or_type))) > - && !targetm.binds_local_p (fndecl_or_type)) > -fno-plt is a backend specific optimization and should be handled in backend. -- H.J.
Re: [PATCH] Pass manager: add support for termination of pass list
On Tue, Oct 27, 2015 at 1:36 PM, Martin Liška wrote: > On 10/26/2015 02:48 PM, Richard Biener wrote: >> On Thu, Oct 22, 2015 at 1:02 PM, Martin Liška wrote: >>> On 10/21/2015 04:06 PM, Richard Biener wrote: On Wed, Oct 21, 2015 at 1:24 PM, Martin Liška wrote: > On 10/21/2015 11:59 AM, Richard Biener wrote: >> On Wed, Oct 21, 2015 at 11:19 AM, Martin Liška wrote: >>> On 10/20/2015 03:39 PM, Richard Biener wrote: On Tue, Oct 20, 2015 at 3:00 PM, Martin Liška wrote: > Hello. > > As part of upcoming merge of HSA branch, we would like to have > possibility to terminate > pass manager after execution of the HSA generation pass. The HSA > back-end is implemented > as a tree pass that directly emits HSAIL from gimple tree > representation. The pass operates > on clones created by HSA IPA pass and the pass manager should stop > execution of further > RTL passes. > > Suggested patch survives bootstrap and regression tests on > x86_64-linux-pc. > > What do you think about it? Are you sure it works this way? Btw, you will miss executing of all the cleanup passes that will eventually free memory associated with the function. So I'd rather support a TODO_discard_function which should basically release the body from the cgraph. >>> >>> Hi. >>> >>> Agree with you that I should execute all TODOs, which can be easily >>> done. >>> However, if I just try to introduce the suggested TODO and handle it >>> properly >>> by calling cgraph_node::release_body, then for instance fn->gimple_df, >>> fn->cfg are >>> released and I hit ICEs on many places. >>> >>> Stopping the pass manager looks necessary, or do I miss something? >> >> "Stopping the pass manager" is necessary after TODO_discard_function, >> yes. >> But that may be simply done via a has_body () check then? > > Thanks, there's second version of the patch. I'm going to start > regression tests. As release_body () will free cfun you should pop_cfun () before calling it (and thus >>> >>> Well, release_function_body calls both push & pop, so I think calling pop >>> before cgraph_node::release_body is not necessary. >> >> (ugh). >> >>> If tried to call pop_cfun before cgraph_node::release_body, I have cfun >>> still >>> pointing to the same (function *) (which is gcc_freed, but cfun != NULL). >> >> Hmm, I meant to call pop_cfun then after it (unless you want to fix the >> above, >> none of the freeing functions should techincally need 'cfun', just add >> 'fn' parameters ...). >> >> I expected pop_cfun to eventually set cfun to NULL if it popped the >> "last" cfun. Why >> doesn't it do that? >> drop its modification). Also TODO_discard_functiuon should be only set for local passes thus you should probably add a gcc_assert (cfun). I'd move its handling earlier, definitely before the ggc_collect, eventually before the pass_fini_dump_file () (do we want a last dump of the function or not?). >>> >>> Fully agree, moved here. >>> @@ -2397,6 +2410,10 @@ execute_pass_list_1 (opt_pass *pass) { gcc_assert (pass->type == GIMPLE_PASS || pass->type == RTL_PASS); + + + if (!gimple_has_body_p (current_function_decl)) + return; too much vertical space. With popping cfun before releasing the body the check might just become if (!cfun) and >>> >>> As mentioned above, as release body is symmetric (calling push & pop), the >>> suggested >>> guard will not work. >> >> I suggest to fix it. If it calls push/pop it should leave with the >> original cfun, popping again >> should make it NULL? >> @@ -2409,7 +2426,7 @@ execute_pass_list (function *fn, opt_pass *pass) { push_cfun (fn); execute_pass_list_1 (pass); - if (fn->cfg) + if (gimple_has_body_p (current_function_decl) && fn->cfg) { free_dominance_info (CDI_DOMINATORS); free_dominance_info (CDI_POST_DOMINATORS); here you'd need to guard the pop_cfun call on cfun != NULL. IMHO it's better to not let cfun point to deallocated data. >>> >>> As I've read the code, since we gcc_free 'struct function', one can just >>> rely on >>> gimple_has_body_p (current_function_decl) as it correctly realizes that >>> DECL_STRUCT_FUNCTION (current_function_decl) == NULL. >> >> I'm sure that with some GC checking ggc_free makes them #deadbeef or so: >> >> void >> ggc_free (void *p) >> { >> ... >> #ifdef ENABLE_GC_CHECKING >> /* Poison the data, to indicate the data is garbage. */ >> VALGRIND_DISCARD (VALGRIND_MAKE_MEM_UNDEFINED (p, size)); >> memset (p, 0xa5, size); >> #endif >> >> so I don't t
[PR fortran/63865] OpenACC cache directive: match Fortran support with C/C++ (was: [gomp4] OpenACC cache directive maintenance)
Hi! On Wed, 05 Nov 2014 17:44:58 +0100, I wrote: > On Wed, 05 Nov 2014 17:36:46 +0100, I wrote: > > In r217146, I applied the following to gomp-4_0-branch: > > > > [OpenACC cache directive maintenance in C/C++] > I also tried to make this work for Fortran, but didn't manage to (in > a reasonable amount of time, which has not been a lot that I allocated) > ;-) -- would you please have a look at this (but it's not urgent). > > [WIP patch] That never got resolved, so I've now done it myself, directly for trunk. OK to commit? commit 1e0a6332eb8b713afaeb43b554d4aae501a78bf9 Author: Thomas Schwinge Date: Tue Oct 27 12:20:12 2015 +0100 [PR fortran/63865] OpenACC cache directive: match Fortran support with C/C++ gcc/fortran/ PR fortran/63865 * openmp.c (resolve_oacc_cache): Remove function. (gfc_match_oacc_cache): Enable array sections. (resolve_omp_clauses, gfc_resolve_oacc_directive): Change accordingly. * trans-openmp.c (gfc_trans_omp_clauses): Likewise. gcc/testsuite/ PR fortran/63865 * gfortran.dg/goacc/coarray.f95: Expect the OpenACC cache directive to work. * gfortran.dg/goacc/loop-1.f95: Likewise. * gfortran.dg/goacc/cache-1.f95: Likewise, and extend testing. * gfortran.dg/goacc/cray.f95: Likewise. * gfortran.dg/goacc/parameter.f95: Likewise. --- gcc/fortran/openmp.c | 16 gcc/fortran/trans-openmp.c| 22 -- gcc/testsuite/gfortran.dg/goacc/cache-1.f95 | 9 +++-- gcc/testsuite/gfortran.dg/goacc/coarray.f95 | 3 ++- gcc/testsuite/gfortran.dg/goacc/cray.f95 | 4 +--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95| 1 - gcc/testsuite/gfortran.dg/goacc/parameter.f95 | 3 +-- 7 files changed, 31 insertions(+), 27 deletions(-) diff --git gcc/fortran/openmp.c gcc/fortran/openmp.c index 3c12d8e..6c78c97 100644 --- gcc/fortran/openmp.c +++ gcc/fortran/openmp.c @@ -1387,7 +1387,8 @@ gfc_match_oacc_cache (void) { gfc_omp_clauses *c = gfc_get_omp_clauses (); match m = gfc_match_omp_variable_list (" (", -&c->lists[OMP_LIST_CACHE], true); +&c->lists[OMP_LIST_CACHE], true, +NULL, NULL, true); if (m != MATCH_YES) { gfc_free_omp_clauses(c); @@ -3107,6 +3108,7 @@ resolve_omp_clauses (gfc_code *code, locus *where, case OMP_LIST_MAP: case OMP_LIST_TO: case OMP_LIST_FROM: + case OMP_LIST_CACHE: for (; n != NULL; n = n->next) { if (n->expr) @@ -3380,7 +3382,6 @@ resolve_omp_clauses (gfc_code *code, locus *where, n->sym->name, name, where); /* FALLTHRU */ case OMP_LIST_DEVICE_RESIDENT: - case OMP_LIST_CACHE: check_symbol_not_pointer (n->sym, *where, name); check_array_not_assumed (n->sym, *where, name); break; @@ -4597,13 +4598,6 @@ resolve_oacc_loop (gfc_code *code) } -static void -resolve_oacc_cache (gfc_code *code ATTRIBUTE_UNUSED) -{ - sorry ("Sorry, !$ACC cache unimplemented yet"); -} - - void gfc_resolve_oacc_declare (gfc_namespace *ns) { @@ -4657,6 +4651,7 @@ gfc_resolve_oacc_directive (gfc_code *code, gfc_namespace *ns ATTRIBUTE_UNUSED) case EXEC_OACC_ENTER_DATA: case EXEC_OACC_EXIT_DATA: case EXEC_OACC_WAIT: +case EXEC_OACC_CACHE: resolve_omp_clauses (code, &code->loc, code->ext.omp_clauses, NULL, true); break; @@ -4665,9 +4660,6 @@ gfc_resolve_oacc_directive (gfc_code *code, gfc_namespace *ns ATTRIBUTE_UNUSED) case EXEC_OACC_LOOP: resolve_oacc_loop (code); break; -case EXEC_OACC_CACHE: - resolve_oacc_cache (code); - break; default: break; } diff --git gcc/fortran/trans-openmp.c gcc/fortran/trans-openmp.c index def8afb..3be9f51 100644 --- gcc/fortran/trans-openmp.c +++ gcc/fortran/trans-openmp.c @@ -1778,9 +1778,6 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, case OMP_LIST_DEVICE_RESIDENT: clause_code = OMP_CLAUSE_DEVICE_RESIDENT; goto add_clause; - case OMP_LIST_CACHE: - clause_code = OMP_CLAUSE__CACHE_; - goto add_clause; add_clause: omp_clauses @@ -2159,14 +2156,27 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, break; case OMP_LIST_TO: case OMP_LIST_FROM: + case OMP_LIST_CACHE: for (; n != NULL; n = n->next) { if (!n->sym->attr.referenced) continue; - tree node = build_omp_clause (input_location, - list == OMP_LIST_TO -
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On 27/10/15 14:50, H.J. Lu wrote: On Tue, Oct 27, 2015 at 7:34 AM, Ramana Radhakrishnan wrote: OK, then it's fairly x86-64 specific optimization, because we can't do "call *mem" in aarch64 and some other targets. It is a fairly x86_64 specific optimization and doesn't apply to AArch64. The question really is what impact does removing the generic code handling have on aarch64 - is it a no-op or not for the existing -fno-plt implementation in the AArch64 backend ? The only case that is of interest is the bit below in calls.c and it looks like that may well be redundant with the logic in the backend already, but I have not done the full analysis to convince myself that the code in the backend is sufficient. - && (!flag_plt - || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type))) - && !targetm.binds_local_p (fndecl_or_type)) -fno-plt is a backend specific optimization and should be handled in backend. The removing of those generic code has broken aarch64. Actually those code in calls.c shouldn't prevent such "call *mem" opportunity on x86-64 because the combine pass should combine "load reg, symbol + call reg" back into "call *mem" on x86-64 as there is related define_insn. the testcases in PR67215 and included in your patch, all of which are loops, failed because either RTL PRE or loop pass will hoist address calculation pattern as invariant out of loop into another basic block different with the call_insn. while combine pass only work within basic block scope, thus we have missed such combine opportunity on x86-64. I am not sure anyone has done experiment before on extend combine pass to larger scope.
Re: [PR fortran/63865] OpenACC cache directive: match Fortran support with C/C++ (was: [gomp4] OpenACC cache directive maintenance)
On Tue, Oct 27, 2015 at 04:19:49PM +0100, Thomas Schwinge wrote: > On Wed, 05 Nov 2014 17:44:58 +0100, I wrote: > > On Wed, 05 Nov 2014 17:36:46 +0100, I wrote: > > > In r217146, I applied the following to gomp-4_0-branch: > > > > > > [OpenACC cache directive maintenance in C/C++] > > > I also tried to make this work for Fortran, but didn't manage to (in > > a reasonable amount of time, which has not been a lot that I allocated) > > ;-) -- would you please have a look at this (but it's not urgent). > > > > [WIP patch] > > That never got resolved, so I've now done it myself, directly for trunk. > OK to commit? Ok. Jakub
Re: [PATCH] Pass manager: add support for termination of pass list
On 10/27/2015 03:49 PM, Richard Biener wrote: > On Tue, Oct 27, 2015 at 1:36 PM, Martin Liška wrote: >> On 10/26/2015 02:48 PM, Richard Biener wrote: >>> On Thu, Oct 22, 2015 at 1:02 PM, Martin Liška wrote: On 10/21/2015 04:06 PM, Richard Biener wrote: > On Wed, Oct 21, 2015 at 1:24 PM, Martin Liška wrote: >> On 10/21/2015 11:59 AM, Richard Biener wrote: >>> On Wed, Oct 21, 2015 at 11:19 AM, Martin Liška wrote: On 10/20/2015 03:39 PM, Richard Biener wrote: > On Tue, Oct 20, 2015 at 3:00 PM, Martin Liška wrote: >> Hello. >> >> As part of upcoming merge of HSA branch, we would like to have >> possibility to terminate >> pass manager after execution of the HSA generation pass. The HSA >> back-end is implemented >> as a tree pass that directly emits HSAIL from gimple tree >> representation. The pass operates >> on clones created by HSA IPA pass and the pass manager should stop >> execution of further >> RTL passes. >> >> Suggested patch survives bootstrap and regression tests on >> x86_64-linux-pc. >> >> What do you think about it? > > Are you sure it works this way? > > Btw, you will miss executing of all the cleanup passes that will > eventually free memory > associated with the function. So I'd rather support a > TODO_discard_function which > should basically release the body from the cgraph. Hi. Agree with you that I should execute all TODOs, which can be easily done. However, if I just try to introduce the suggested TODO and handle it properly by calling cgraph_node::release_body, then for instance fn->gimple_df, fn->cfg are released and I hit ICEs on many places. Stopping the pass manager looks necessary, or do I miss something? >>> >>> "Stopping the pass manager" is necessary after TODO_discard_function, >>> yes. >>> But that may be simply done via a has_body () check then? >> >> Thanks, there's second version of the patch. I'm going to start >> regression tests. > > As release_body () will free cfun you should pop_cfun () before > calling it (and thus Well, release_function_body calls both push & pop, so I think calling pop before cgraph_node::release_body is not necessary. >>> >>> (ugh). >>> If tried to call pop_cfun before cgraph_node::release_body, I have cfun still pointing to the same (function *) (which is gcc_freed, but cfun != NULL). >>> >>> Hmm, I meant to call pop_cfun then after it (unless you want to fix the >>> above, >>> none of the freeing functions should techincally need 'cfun', just add >>> 'fn' parameters ...). >>> >>> I expected pop_cfun to eventually set cfun to NULL if it popped the >>> "last" cfun. Why >>> doesn't it do that? >>> > drop its modification). Also TODO_discard_functiuon should be only set > for > local passes thus you should probably add a gcc_assert (cfun). > I'd move its handling earlier, definitely before the ggc_collect, > eventually > before the pass_fini_dump_file () (do we want a last dump of the > function or not?). Fully agree, moved here. > > @@ -2397,6 +2410,10 @@ execute_pass_list_1 (opt_pass *pass) > { >gcc_assert (pass->type == GIMPLE_PASS > || pass->type == RTL_PASS); > + > + > + if (!gimple_has_body_p (current_function_decl)) > + return; > > too much vertical space. With popping cfun before releasing the body the > check > might just become if (!cfun) and As mentioned above, as release body is symmetric (calling push & pop), the suggested guard will not work. >>> >>> I suggest to fix it. If it calls push/pop it should leave with the >>> original cfun, popping again >>> should make it NULL? >>> > > @@ -2409,7 +2426,7 @@ execute_pass_list (function *fn, opt_pass *pass) > { >push_cfun (fn); >execute_pass_list_1 (pass); > - if (fn->cfg) > + if (gimple_has_body_p (current_function_decl) && fn->cfg) > { >free_dominance_info (CDI_DOMINATORS); >free_dominance_info (CDI_POST_DOMINATORS); > > here you'd need to guard the pop_cfun call on cfun != NULL. IMHO it's > better > to not let cfun point to deallocated data. As I've read the code, since we gcc_free 'struct function', one can just rely on gimple_has_body_p (current_function_decl) as it correctly realizes that DECL_STRUCT_FUNCTION (current_function_decl) == NULL. >>> >>> I'm sure that with some GC checking ggc_free makes them #deadbeef or so: >>> >>> void >>> ggc_free (void *p) >>> { >>> ... >>> #ifdef ENABLE_GC_CHECK
Re: [AArch64][dejagnu][PATCH 5/7] Dejagnu support for ARMv8.1 Adv.SIMD.
On 24/10/15 08:16, Bernhard Reutner-Fischer wrote: On October 23, 2015 2:24:26 PM GMT+02:00, Matthew Wahab wrote: The ARMv8.1 architecture extension adds two Adv.SIMD instructions,. This patch adds support in Dejagnu for ARMv8.1 Adv.SIMD specifiers and checks. The new test options are - { dg-add-options arm_v8_1a_neon }: Add compiler options needed to enable ARMv8.1 Adv.SIMD. - { dg-require-effective-target arm_v8_1a_neon_hw }: Require a target capable of executing ARMv8.1 Adv.SIMD instructions. Please error with something more meaningful than FOO, !__ARM_FEATURE_QRDMX comes to mind. TIA, I've reworked the patch so that the error is "__ARM_FEATURE_QRDMX not defined" and also strengthened the check_effective_target tests. Retested for aarch64-none-elf with cross-compiled check-gcc on an ARMv8.1 emulator. Also tested with a version of the compiler that doesn't define the ACLE feature macro. Matthew gcc/testsuite 2015-10-27 Matthew Wahab * lib/target-supports.exp (add_options_for_arm_v8_1a_neon): New. (check_effective_target_arm_arch_FUNC_ok) (add_options_for_arm_arch_FUNC) (check_effective_target_arm_arch_FUNC_multilib): Add "armv8.1-a" to the list to be generated. (check_effective_target_arm_v8_1a_neon_ok_nocache): New. (check_effective_target_arm_v8_1a_neon_ok): New. (check_effective_target_arm_v8_1a_neon_hw): New. >From b12969882298cb79737e882c48398c58a45161b9 Mon Sep 17 00:00:00 2001 From: Matthew Wahab Date: Mon, 26 Oct 2015 14:58:36 + Subject: [PATCH 5/7] [Testsuite] Add dejagnu options for armv8.1 neon Change-Id: Ib58b8c4930ad3971af3ea682eda043e14cd2e8b3 --- gcc/testsuite/lib/target-supports.exp | 56 ++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 4d5b0a3d..0fb679d 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -2700,6 +2700,16 @@ proc add_options_for_arm_v8_neon { flags } { return "$flags $et_arm_v8_neon_flags -march=armv8-a" } +# Add the options needed for ARMv8.1 Adv.SIMD. + +proc add_options_for_arm_v8_1a_neon { flags } { +if { [istarget aarch64*-*-*] } { + return "$flags -march=armv8.1-a" +} else { + return "$flags" +} +} + proc add_options_for_arm_crc { flags } { if { ! [check_effective_target_arm_crc_ok] } { return "$flags" @@ -2984,7 +2994,8 @@ foreach { armfunc armflag armdef } { v4 "-march=armv4 -marm" __ARM_ARCH_4__ v7r "-march=armv7-r" __ARM_ARCH_7R__ v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__ v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__ - v8a "-march=armv8-a" __ARM_ARCH_8A__ } { + v8a "-march=armv8-a" __ARM_ARCH_8A__ + v8_1a "-march=armv8.1a" __ARM_ARCH_8A__ } { eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] { proc check_effective_target_arm_arch_FUNC_ok { } { if { [ string match "*-marm*" "FLAG" ] && @@ -3141,6 +3152,25 @@ proc check_effective_target_arm_neonv2_hw { } { } [add_options_for_arm_neonv2 ""]] } +# Return 1 if the target supports the ARMv8.1 Adv.SIMD extension, 0 +# otherwise. The test is valid for AArch64. + +proc check_effective_target_arm_v8_1a_neon_ok_nocache { } { +if { ![istarget aarch64*-*-*] } { + return 0 +} +return [check_no_compiler_messages_nocache arm_v8_1a_neon_ok assembly { + #if !defined (__ARM_FEATURE_QRDMX) + #error "__ARM_FEATURE_QRDMX not defined" + #endif +} [add_options_for_arm_v8_1a_neon ""]] +} + +proc check_effective_target_arm_v8_1a_neon_ok { } { +return [check_cached_effective_target arm_v8_1a_neon_ok \ + check_effective_target_arm_v8_1a_neon_ok_nocache] +} + # Return 1 if the target supports executing ARMv8 NEON instructions, 0 # otherwise. @@ -3159,6 +3189,30 @@ proc check_effective_target_arm_v8_neon_hw { } { } [add_options_for_arm_v8_neon ""]] } +# Return 1 if the target supports executing the ARMv8.1 Adv.SIMD extension, 0 +# otherwise. The test is valid for AArch64. + +proc check_effective_target_arm_v8_1a_neon_hw { } { +if { ![check_effective_target_arm_v8_1a_neon_ok] } { + return 0; +} +return [check_runtime_nocache arm_v8_1a_neon_hw_available { + int + main (void) + { + long long a = 0, b = 1; + long long result = 0; + + asm ("sqrdmlah %s0,%s1,%s2" + : "=w"(result) + : "w"(a), "w"(b) + : /* No clobbers. */); + + return result; + } +} [add_options_for_arm_v8_1a_neon ""]] +} + # Return 1 if this is a ARM target with NEON enabled. proc check_effective_target_arm_neon { } { -- 2.1.4
Re: Move some comparison simplifications to match.pd
Hi Marc, On 30/08/15 08:57, Marc Glisse wrote: Hello, just trying to shrink fold-const.c a bit more. initializer_zerop is close to what I was looking for with zerop, but I wasn't sure if it would be safe (it accepts some CONSTRUCTOR and STRING_CST). At some point I tried using sign_bit_p, but using the return of that function in the simplification confused the machinery too much. I added an "overload" of element_precision like the one in element_mode, for convenience. Bootstrap+testsuite on ppc64le-redhat-linux. 2015-08-31 Marc Glisse gcc/ * tree.h (zerop): New function. * tree.c (zerop): Likewise. (element_precision): Handle expressions. * match.pd (define_predicates): Add zerop. (x <= +Inf): Fix comment. (abs (x) == 0, A & C == C, A & C != 0): Converted from ... * fold-const.c (fold_binary_loc): ... here. Remove. gcc/testsuite/ * gcc.dg/tree-ssa/cmp-1.c: New file. +/* If we have (A & C) != 0 where C is the sign bit of A, convert + this into A < 0. Similarly for (A & C) == 0 into A >= 0. */ +(for cmp (eq ne) + ncmp (ge lt) + (simplify + (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && (TYPE_PRECISION (TREE_TYPE (@0)) + == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0 + && element_precision (@2) >= element_precision (@0) + && wi::only_sign_bit_p (@1, element_precision (@0))) + (with { tree stype = signed_type_for (TREE_TYPE (@0)); } +(ncmp (convert:stype @0) { build_zero_cst (stype); }) + With this patch and this pattern pattern in particular I've seen some code quality regressions on aarch64. I'm still trying to reduce a testcase to demonstrate the issue but it seems to involve intorucing extra conversions from unsigned to signed values. If I gate this pattern on !TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve. Any thoughts? Thanks, Kyrill
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On Tue, Oct 27, 2015 at 8:26 AM, Jiong Wang wrote: > > > On 27/10/15 14:50, H.J. Lu wrote: >> >> On Tue, Oct 27, 2015 at 7:34 AM, Ramana Radhakrishnan >> wrote: OK, then it's fairly x86-64 specific optimization, because we can't do "call *mem" in aarch64 and some other targets. >>> >>> It is a fairly x86_64 specific optimization and doesn't apply to AArch64. >>> >>> The question really is what impact does removing the generic code >>> handling have on aarch64 - is it a no-op or not for the existing -fno-plt >>> implementation in the AArch64 backend ? The only case that is of interest is >>> the bit below in calls.c and it looks like that may well be redundant with >>> the logic in the backend already, but I have not done the full analysis to >>> convince myself that the code in the backend is sufficient. >>> >>> - && (!flag_plt >>> - || lookup_attribute ("noplt", DECL_ATTRIBUTES >>> (fndecl_or_type))) >>> - && !targetm.binds_local_p (fndecl_or_type)) >>> >> -fno-plt is a backend specific optimization and should be handled >> in backend. >> > > The removing of those generic code has broken aarch64. > Can you handle -fno-plt in aarch64 call expander? -- H.J.
Re: more accurate error messages omp in fortran
(was "Re: more accurate omp in fortran" Ping. Cesar On 10/22/2015 08:21 AM, Cesar Philippidis wrote: > Currently, for certain omp and oacc errors the fortran will inaccurately > report exactly where in the omp/acc construct the error has occurred. E.g. > >!$acc parallel copy (i) copy (i) copy (j) >1 > Error: Symbol ‘i’ present on multiple clauses at (1) > > instead of > >!$acc parallel copy (i) copy (i) copy (j) > 1 > Error: Symbol ‘i’ present on multiple clauses at (1) > > The problem here is how the front end uses the locus for the construct > and not the individual clause. As a result that diagnostic pointer > points to the end of the construct. > > This patch teaches gfc_resolve_omp_clauses how to use the locus of each > individual clause instead of the construct when reporting errors > involving OMP_LIST_ clauses (which are typically clauses involving > variables). It's still not perfect, but it does improve the quality of > the error reporting a little. In particular, in openacc, other compilers > are somewhat lenient in allowing variables to appear in multiple > clauses, e.g. copyin (foo) copyout (foo), but this is clearly forbidden > by the spec. I received some bug reports complaining that gfortran's > errors aren't accurate. > > I've also split off the check for variables appearing in multiple > clauses into a separate function. It's a little overkill for trunk right > now, but it is used quite a bit in gomp4 for oacc declare. > > I've tested these changes on x86_64. Is this ok for trunk? > > Cesar > >
Re: [PATCH] Add -fchecking
On 10/27/2015 04:17 PM, Richard Biener wrote: > > This adds -fchecking as a way to enable internal consistency checks > even in release builds (or disable checking with -fno-checking - up to > a certain extent - with checking enabled). I remember that Jakub proposed to use __builtin_expect with flag_checking. I wonder, if it is possible to implement without hacking AWK scripts just for this particular case? For example, to define flag_checking to something like #define flag_checking __builtin_expect (flag_checking_val, CHECKING_P) (provided that flag_checking_val is the actual value got from command-line options). -- Regards, Mikhail Maltsev
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On 27/10/15 14:50, H.J. Lu wrote: > On Tue, Oct 27, 2015 at 7:34 AM, Ramana Radhakrishnan > wrote: >> >>> >>> OK, then it's fairly x86-64 specific optimization, because we can't do >>> "call *mem" in >>> aarch64 and some other targets. >> >> It is a fairly x86_64 specific optimization and doesn't apply to AArch64. >> >> The question really is what impact does removing the generic code handling >> have on aarch64 - is it a no-op or not for the existing -fno-plt >> implementation in the AArch64 backend ? The only case that is of interest is >> the bit below in calls.c and it looks like that may well be redundant with >> the logic in the backend already, but I have not done the full analysis to >> convince myself that the code in the backend is sufficient. >> >> - && (!flag_plt >> - || lookup_attribute ("noplt", DECL_ATTRIBUTES >> (fndecl_or_type))) >> - && !targetm.binds_local_p (fndecl_or_type)) >> > > -fno-plt is a backend specific optimization and should be handled > in backend. > HJ, Thanks for committing the change even when we were discussing the change - As I suspected the handling in the backend isn't sufficient, the call expanders need to handle this case in the AArch64 backend. Minimally tested - Ok if no regressions on aarch64-none-elf? regards Ramana * config/aarch64/aarch64.md (call, call_value): Handle noplt.
Re: Re: [Bulk] [OpenACC 0/7] host_data construct
On 10/26/2015 11:34 AM, Jakub Jelinek wrote: > On Fri, Oct 23, 2015 at 10:51:42AM -0500, James Norris wrote: >> @@ -12942,6 +12961,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, >> bool declare_simd) >> case OMP_CLAUSE_GANG: >> case OMP_CLAUSE_WORKER: >> case OMP_CLAUSE_VECTOR: >> +case OMP_CLAUSE_USE_DEVICE: >>pc = &OMP_CLAUSE_CHAIN (c); >>continue; >> > > Are there any restrictions on whether you can specify the same var multiple > times in use_device clause? > #pragma acc host_data use_device (x) use_device (x) use_device (y, y, y) > ? > If not, have you verified that the gimplifier doesn't ICE on it? Generally > it doesn't like the same var being mentioned multiple times. > If yes, you can use e.g. the generic_head bitmap for that and in any case, > cover that with sufficient testsuite coverage. Generally variables cannot appear in multiple clauses. I'll add more testing for this. >> diff --git a/gcc/gimplify.c b/gcc/gimplify.c >> index ab9e540..0c32219 100644 >> --- a/gcc/gimplify.c >> +++ b/gcc/gimplify.c >> @@ -93,6 +93,8 @@ enum gimplify_omp_var_data >> >>GOVD_MAP_0LEN_ARRAY = 32768, >> >> + GOVD_USE_DEVICE = 65536, >> + >>GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE >> | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR >> | GOVD_LOCAL) >> @@ -116,7 +118,9 @@ enum omp_region_type >>ORT_COMBINED_TARGET = 33, >>/* Dummy OpenMP region, used to disable expansion of >> DECL_VALUE_EXPRs in taskloop pre body. */ >> - ORT_NONE = 64 >> + ORT_NONE = 64, >> + /* An OpenACC host-data region. */ >> + ORT_HOST_DATA = 128 > > I'd prefer ORT_NONE to be the last one, can you just renumber it and put > ORT_HOST_DATA before it? OK. >> +static tree >> +gimplify_oacc_host_data_1 (tree *tp, int *walk_subtrees, >> + void *data ATTRIBUTE_UNUSED) >> +{ > > Your use_device sounds very similar to use_device_ptr clause in OpenMP, > which is allowed on #pragma omp target data construct and is implemented > quite a bit differently from this; it is unclear if the OpenACC standard > requires this kind of implementation, or you just chose to implement it this > way. In particular, the GOMP_target_data call puts the variables mentioned > in the use_device_ptr clauses into the mapping structures (similarly how > map clause appears) and the corresponding vars are privatized within the > target data region (which is a host region, basically a fancy { } braces), > where the private variables contain the offloading device's pointers. Is this a new OpenMP 4.5 feature? I'll take a closer look and see if they are similar enough. I also noticed that OpenMP 4.5 has something similar to OpenACC's enter/exit data construct now. >> + splay_tree_node n = NULL; >> + location_t loc = EXPR_LOCATION (*tp); >> + >> + switch (TREE_CODE (*tp)) >> +{ >> +case ADDR_EXPR: >> + { >> +tree decl = TREE_OPERAND (*tp, 0); >> + >> +switch (TREE_CODE (decl)) >> + { >> + case ARRAY_REF: >> + case ARRAY_RANGE_REF: >> + case COMPONENT_REF: >> + case VIEW_CONVERT_EXPR: >> + case REALPART_EXPR: >> + case IMAGPART_EXPR: >> +if (TREE_CODE (TREE_OPERAND (decl, 0)) == VAR_DECL) >> + n = splay_tree_lookup (gimplify_omp_ctxp->variables, >> + (splay_tree_key) TREE_OPERAND (decl, 0)); >> +break; > > I must say this looks really strange, you throw away all the offsets > embedded in the component codes (fixed or variable). > Where comes the above list? What about other components (say bit field refs, > etc.)? I'm not sure. This is one of those things where multiple developers worked on it, and the history got lost. I'll investigate it. >> +case VAR_DECL: > > What is so special about VAR_DECLs? Shouldn't PARM_DECLs / RESULT_DECLs > be treated the same way? >> --- a/libgomp/libgomp.map >> +++ b/libgomp/libgomp.map >> @@ -378,6 +378,7 @@ GOACC_2.0 { >> GOACC_wait; >> GOACC_get_thread_num; >> GOACC_get_num_threads; >> +GOACC_deviceptr; >> }; >> >> GOACC_2.0.1 { > > You shouldn't be adding new symbols into a symbol version that appeared in a > compiler that shipped already (GCC 5 already had GOACC_2.0 symbols). > So it should go into GOACC_2.0.1. OK. >> diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c >> index af067d6..497ab92 100644 >> --- a/libgomp/oacc-mem.c >> +++ b/libgomp/oacc-mem.c >> @@ -204,6 +204,38 @@ acc_deviceptr (void *h) >>return d; >> } >> >> +/* This function is used as a helper in generated code to implement pointer >> + lookup in host_data regions. Unlike acc_deviceptr, it returns its >> argument >> + unchanged on a shared-memory system (e.g. the host). */ >> + >> +void * >> +GOACC_deviceptr (void *h) >> +{ >> + splay_tree_key n; >> + void *d; >> + void *offset; >> + >> + goacc_lazy_initialize (); >> + >> + st
Re: Move some comparison simplifications to match.pd
On Tue, 27 Oct 2015, Kyrill Tkachov wrote: Hi Marc, On 30/08/15 08:57, Marc Glisse wrote: Hello, just trying to shrink fold-const.c a bit more. initializer_zerop is close to what I was looking for with zerop, but I wasn't sure if it would be safe (it accepts some CONSTRUCTOR and STRING_CST). At some point I tried using sign_bit_p, but using the return of that function in the simplification confused the machinery too much. I added an "overload" of element_precision like the one in element_mode, for convenience. Bootstrap+testsuite on ppc64le-redhat-linux. 2015-08-31 Marc Glisse gcc/ * tree.h (zerop): New function. * tree.c (zerop): Likewise. (element_precision): Handle expressions. * match.pd (define_predicates): Add zerop. (x <= +Inf): Fix comment. (abs (x) == 0, A & C == C, A & C != 0): Converted from ... * fold-const.c (fold_binary_loc): ... here. Remove. gcc/testsuite/ * gcc.dg/tree-ssa/cmp-1.c: New file. +/* If we have (A & C) != 0 where C is the sign bit of A, convert + this into A < 0. Similarly for (A & C) == 0 into A >= 0. */ +(for cmp (eq ne) + ncmp (ge lt) + (simplify + (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && (TYPE_PRECISION (TREE_TYPE (@0)) + == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0 + && element_precision (@2) >= element_precision (@0) + && wi::only_sign_bit_p (@1, element_precision (@0))) This condition is a bit strict when @0 is signed, for an int32_t i, (i & (5LL << 42)) != 0 would work as well thanks to sign extension. + (with { tree stype = signed_type_for (TREE_TYPE (@0)); } +(ncmp (convert:stype @0) { build_zero_cst (stype); }) + With this patch and this pattern pattern in particular I've seen some code quality regressions on aarch64. I'm still trying to reduce a testcase to demonstrate the issue but it seems to involve intorucing extra conversions from unsigned to signed values. If I gate this pattern on !TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve. Any thoughts? Hmm, my first thoughts would be: * conversion from unsigned to signed is a NOP, * if a & signbit != 0 is faster to compute than a < 0, that's how a < 0 should be expanded by the target, * so the problem is probably something else, maybe the bit_and combined better with another operation, or the cast obfuscates things enough to confuse a later pass. Or maybe something related to @2. An example would help understand what we are talking about... -- Marc Glisse
Re: Move some comparison simplifications to match.pd
On 27/10/15 15:57, Marc Glisse wrote: On Tue, 27 Oct 2015, Kyrill Tkachov wrote: Hi Marc, On 30/08/15 08:57, Marc Glisse wrote: Hello, just trying to shrink fold-const.c a bit more. initializer_zerop is close to what I was looking for with zerop, but I wasn't sure if it would be safe (it accepts some CONSTRUCTOR and STRING_CST). At some point I tried using sign_bit_p, but using the return of that function in the simplification confused the machinery too much. I added an "overload" of element_precision like the one in element_mode, for convenience. Bootstrap+testsuite on ppc64le-redhat-linux. 2015-08-31 Marc Glisse gcc/ * tree.h (zerop): New function. * tree.c (zerop): Likewise. (element_precision): Handle expressions. * match.pd (define_predicates): Add zerop. (x <= +Inf): Fix comment. (abs (x) == 0, A & C == C, A & C != 0): Converted from ... * fold-const.c (fold_binary_loc): ... here. Remove. gcc/testsuite/ * gcc.dg/tree-ssa/cmp-1.c: New file. +/* If we have (A & C) != 0 where C is the sign bit of A, convert + this into A < 0. Similarly for (A & C) == 0 into A >= 0. */ +(for cmp (eq ne) + ncmp (ge lt) + (simplify + (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && (TYPE_PRECISION (TREE_TYPE (@0)) + == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0 + && element_precision (@2) >= element_precision (@0) + && wi::only_sign_bit_p (@1, element_precision (@0))) This condition is a bit strict when @0 is signed, for an int32_t i, (i & (5LL << 42)) != 0 would work as well thanks to sign extension. + (with { tree stype = signed_type_for (TREE_TYPE (@0)); } +(ncmp (convert:stype @0) { build_zero_cst (stype); }) + With this patch and this pattern pattern in particular I've seen some code quality regressions on aarch64. I'm still trying to reduce a testcase to demonstrate the issue but it seems to involve intorucing extra conversions from unsigned to signed values. If I gate this pattern on !TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve. Any thoughts? Hmm, my first thoughts would be: * conversion from unsigned to signed is a NOP, * if a & signbit != 0 is faster to compute than a < 0, that's how a < 0 should be expanded by the target, * so the problem is probably something else, maybe the bit_and combined better with another operation, or the cast obfuscates things enough to confuse a later pass. Or maybe something related to @2. Thanks, So here the types are shorts and unsigned shorts. On aarch64 these are HImode values and there's no direct arithmetic operations on them, so they have to be extended to SImode and truncated back. An example would help understand what we are talking about... Working on it. I'm trying to introduce gcc_unreachable calls into the compiler when the bad situation happens and reducing the original file, but I think I'm not capturing the conditions that trigger this behaviour exactly right :( Kyrill
Re: [AArch64][PATCH 2/7] Add sqrdmah, sqrdmsh instructions.
On 27/10/15 11:18, James Greenhalgh wrote: ;; --- @@ -932,6 +934,8 @@ UNSPEC_SQSHRN UNSPEC_UQSHRN UNSPEC_SQRSHRN UNSPEC_UQRSHRN]) +(define_int_iterator SQRDMLAH [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH]) + This name does not make it clear that you will iterate over an "A" and an "S" form. I'd like to see a clearer naming choice, RDMAS? SQRDMLHADDSUB? etc. SQRDMLHADDSUB is a little difficult to read. How about SQRDMLH_AS, to keep the link to the instruction? Matthew
Re: [AArch64][PATCH 2/7] Add sqrdmah, sqrdmsh instructions.
On Tue, Oct 27, 2015 at 04:11:07PM +, Matthew Wahab wrote: > On 27/10/15 11:18, James Greenhalgh wrote: > > >> ;; --- > >>@@ -932,6 +934,8 @@ > >> UNSPEC_SQSHRN UNSPEC_UQSHRN > >> UNSPEC_SQRSHRN UNSPEC_UQRSHRN]) > >> > >>+(define_int_iterator SQRDMLAH [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH]) > >>+ > > > >This name does not make it clear that you will iterate over an "A" and an > >"S" form. I'd like to see a clearer naming choice, RDMAS? SQRDMLHADDSUB? etc. > > SQRDMLHADDSUB is a little difficult to read. How about SQRDMLH_AS, > to keep the link to the instruction? Sounds good to me. Thanks, James
Re: Move some comparison simplifications to match.pd
On Tue, 27 Oct 2015, Kyrill Tkachov wrote: So here the types are shorts and unsigned shorts. On aarch64 these are HImode values and there's no direct arithmetic operations on them, so they have to be extended to SImode and truncated back. Ah ok, that makes sense. I expect it is in the case where @0 is shorter than a word and @2 is longer than @0. I wouldn't expect the signedness to matter that much, but maybe one of sign/zero-extension simplifies better than the other. This problem would happen because we are still missing a pass that handles type promotion. It means we could consider gating the transformation by the existence of lt in the mode of @0, but I don't see any query of optabs in match.pd yet... Still, a testcase would be necessary to go with whatever patch, and I may be completely on the wrong track. -- Marc Glisse
[PATCH, committed] PR fortran/68108 -- Check for valid array ref.
I've committed the attached patch after testing on x86_64-*-freebsd. A regression was introduced by my fix for PR fortran/67805, which failed to check for a valid array ref. Note, Mikael approved the patch in the PR audit trail. 2015-10-27 Steven G. Kargl PR fortran/68108 * decl.c (char_len_param_value): Check for REF_ARRAY. 2015-10-27 Steven G. Kargl PR fortran/68108 * gfortran.dg/pr67805_2.f90: New test. -- Steve Index: gcc/fortran/decl.c === --- gcc/fortran/decl.c (revision 229445) +++ gcc/fortran/decl.c (working copy) @@ -748,13 +748,15 @@ char_len_param_value (gfc_expr **expr, b /* This catches the invalid code "[character(m(2:3)) :: 'x', 'y']", which causes an ICE if gfc_reduce_init_expr() is called. */ - if (e->ref && e->ref->u.ar.type == AR_UNKNOWN + if (e->ref && e->ref->type == REF_ARRAY + && e->ref->u.ar.type == AR_UNKNOWN && e->ref->u.ar.dimen_type[0] == DIMEN_RANGE) goto syntax; gfc_reduce_init_expr (e); - if ((e->ref && e->ref->u.ar.type != AR_ELEMENT) + if ((e->ref && e->ref->type == REF_ARRAY + && e->ref->u.ar.type != AR_ELEMENT) || (!e->ref && e->expr_type == EXPR_ARRAY)) { gfc_free_expr (e); Index: gcc/testsuite/gfortran.dg/pr67805_2.f90 === --- gcc/testsuite/gfortran.dg/pr67805_2.f90 (revision 0) +++ gcc/testsuite/gfortran.dg/pr67805_2.f90 (working copy) @@ -0,0 +1,22 @@ +! { dg-do compile } +! PR fortran/68108 +! Code contributed by Juergen Reuter (juergen.reuter at desy dot de) +! Test fix for regression caused by PR fortran/67805. +module lexers + implicit none + type :: template_t + character(256) :: charset1 + integer :: len1 + end type template_t + +contains + + subroutine match_quoted (tt, s, n) +type(template_t), intent(in) :: tt +character(*), intent(in) :: s +integer, intent(out) :: n +character(tt%len1) :: ch1 +ch1 = tt%charset1 + end subroutine match_quoted + +end module lexers
Re: [PR fortran/63865] OpenACC cache directive: match Fortran support with C/C++
Hi! On Tue, 27 Oct 2015 16:26:54 +0100, Jakub Jelinek wrote: > On Tue, Oct 27, 2015 at 04:19:49PM +0100, Thomas Schwinge wrote: > > On Wed, 05 Nov 2014 17:44:58 +0100, I wrote: > > > On Wed, 05 Nov 2014 17:36:46 +0100, I wrote: > > > > In r217146, I applied the following to gomp-4_0-branch: > > > > > > > > [OpenACC cache directive maintenance in C/C++] > > > > > I also tried to make this work for Fortran, but didn't manage to (in > > > a reasonable amount of time, which has not been a lot that I allocated) > > > ;-) -- would you please have a look at this (but it's not urgent). > > > > > > [WIP patch] > > > > That never got resolved, so I've now done it myself, directly for trunk. > > OK to commit? > > Ok. Thanks for the speedy review, committed in r229448: commit 09382f4e948e567fd47a518eeb5484d848898753 Author: tschwinge Date: Tue Oct 27 16:54:52 2015 + [PR fortran/63865] OpenACC cache directive: match Fortran support with C/C++ gcc/fortran/ PR fortran/63865 * openmp.c (resolve_oacc_cache): Remove function. (gfc_match_oacc_cache): Enable array sections. (resolve_omp_clauses, gfc_resolve_oacc_directive): Change accordingly. * trans-openmp.c (gfc_trans_omp_clauses): Likewise. gcc/testsuite/ PR fortran/63865 * gfortran.dg/goacc/coarray.f95: Expect the OpenACC cache directive to work. * gfortran.dg/goacc/loop-1.f95: Likewise. * gfortran.dg/goacc/cache-1.f95: Likewise, and extend testing. * gfortran.dg/goacc/cray.f95: Likewise. * gfortran.dg/goacc/parameter.f95: Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@229448 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/fortran/ChangeLog | 10 ++ gcc/fortran/openmp.c | 16 gcc/fortran/trans-openmp.c| 22 -- gcc/testsuite/ChangeLog | 11 +++ gcc/testsuite/gfortran.dg/goacc/cache-1.f95 | 9 +++-- gcc/testsuite/gfortran.dg/goacc/coarray.f95 | 3 ++- gcc/testsuite/gfortran.dg/goacc/cray.f95 | 4 +--- gcc/testsuite/gfortran.dg/goacc/loop-1.f95| 1 - gcc/testsuite/gfortran.dg/goacc/parameter.f95 | 3 +-- 9 files changed, 52 insertions(+), 27 deletions(-) diff --git gcc/fortran/ChangeLog gcc/fortran/ChangeLog index 37956ce..02564ce 100644 --- gcc/fortran/ChangeLog +++ gcc/fortran/ChangeLog @@ -1,3 +1,13 @@ +2015-10-27 Thomas Schwinge + James Norris + + PR fortran/63865 + * openmp.c (resolve_oacc_cache): Remove function. + (gfc_match_oacc_cache): Enable array sections. + (resolve_omp_clauses, gfc_resolve_oacc_directive): Change + accordingly. + * trans-openmp.c (gfc_trans_omp_clauses): Likewise. + 2015-10-27 Steven G. Kargl PR fortran/68108 diff --git gcc/fortran/openmp.c gcc/fortran/openmp.c index 3c12d8e..6c78c97 100644 --- gcc/fortran/openmp.c +++ gcc/fortran/openmp.c @@ -1387,7 +1387,8 @@ gfc_match_oacc_cache (void) { gfc_omp_clauses *c = gfc_get_omp_clauses (); match m = gfc_match_omp_variable_list (" (", -&c->lists[OMP_LIST_CACHE], true); +&c->lists[OMP_LIST_CACHE], true, +NULL, NULL, true); if (m != MATCH_YES) { gfc_free_omp_clauses(c); @@ -3107,6 +3108,7 @@ resolve_omp_clauses (gfc_code *code, locus *where, case OMP_LIST_MAP: case OMP_LIST_TO: case OMP_LIST_FROM: + case OMP_LIST_CACHE: for (; n != NULL; n = n->next) { if (n->expr) @@ -3380,7 +3382,6 @@ resolve_omp_clauses (gfc_code *code, locus *where, n->sym->name, name, where); /* FALLTHRU */ case OMP_LIST_DEVICE_RESIDENT: - case OMP_LIST_CACHE: check_symbol_not_pointer (n->sym, *where, name); check_array_not_assumed (n->sym, *where, name); break; @@ -4597,13 +4598,6 @@ resolve_oacc_loop (gfc_code *code) } -static void -resolve_oacc_cache (gfc_code *code ATTRIBUTE_UNUSED) -{ - sorry ("Sorry, !$ACC cache unimplemented yet"); -} - - void gfc_resolve_oacc_declare (gfc_namespace *ns) { @@ -4657,6 +4651,7 @@ gfc_resolve_oacc_directive (gfc_code *code, gfc_namespace *ns ATTRIBUTE_UNUSED) case EXEC_OACC_ENTER_DATA: case EXEC_OACC_EXIT_DATA: case EXEC_OACC_WAIT: +case EXEC_OACC_CACHE: resolve_omp_clauses (code, &code->loc, code->ext.omp_clauses, NULL, true); break; @@ -4665,9 +4660,6 @@ gfc_resolve_oacc_directive (gfc_code *code, gfc_namespace *ns ATTRIBUTE_UNUSED) case EXEC_OACC_LOOP: resolve_oacc_loop (code); break; -case EXEC_OACC_CACHE: - resolve_oacc_cache (code); -
Re: [PING][PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m
Ping. BR, Andre On 13/10/15 18:01, Andre Vieira wrote: This patch ports the aeabi_idiv routine from Linaro Cortex-Strings (https://git.linaro.org/toolchain/cortex-strings.git), which was contributed by ARM under Free BSD license. The new aeabi_idiv routine is used to replace the one in libgcc/config/arm/lib1funcs.S. This replacement happens within the Thumb1 wrapper. The new routine is under LGPLv3 license. The main advantage of this version is that it can improve the performance of the aeabi_idiv function for Thumb1. This solution will also increase the code size. So it will only be used if __OPTIMIZE_SIZE__ is not defined. Make check passed for armv6-m. libgcc/ChangeLog: 2015-08-10 Hale Wang Andre Vieira * config/arm/lib1funcs.S: Add new wrapper.
Re: [PATCH] Fix PR66313
On Tue, 27 Oct 2015, Richard Biener wrote: > When factoring a*b + a*c to (b + c)*a we have to guard against the > case of a == 0 as after the factoring b + c might overflow in that > case. Fixed by doing the addition in an unsigned type if required. The same applies to a == -1 (consider b and c equal to -(INT_MIN/2), when a*b + a*c is INT_MIN but b+c overflows). In that case, if you avoid b+c overflowing using an unsigned type, but convert back to signed for the multiplication, you get a spurious overflow from the multiplication instead. -- Joseph S. Myers jos...@codesourcery.com
Re: [PATCH] PR/67682, break SLP groups up if only some elements match
On 26/10/15 15:04, Richard Biener wrote: apart from the fact that you'll post a new version you need to adjust GROUP_GAP. You also seem to somewhat "confuse" "first I stmts" and "a group of size I", those are not the same when the group has haps. I'd say "a group of size i" makes the most sense here thus I suggest to adjust the function comment accordingly. Ok, thanks for pointing this out. My objective had been to only split the store groups - which in BB vectorization, always seem to have gap 0 1 1 1. I didn't come up with a good scheme for how to split load groups, but it seemed that I didn't need to do anything there if I restricted to BB vectorization only. For example, consider (ignoring that we could multiply the first four elements by 1 and add 0 to the last four): a[0] = b[I] + 1; a[1] = b[J] + 2; a[2] = b[K] + 3; a[3] = b[L] + 4; a[4] = b[M] * 3; a[5] = b[N] * 4; a[6] = b[O] * 5; a[7] = b[P] * 7; with constants I,J,K,L,M,N,O,P. Even with those being a sequence 2 0 1 1 3 0 2 1 with overlaps and repetitions, this works fine for BB SLP (two subgroups of stores, *sharing* a load group but with different permutations). Likewise 0 1 2 3 0 2 4 6. For loop SLP, yes it looks like the load group needs to be split. So how; and what constraints to impose on those constants? (There is no single right answer!) A fairly-strict scheme could be that (I,J,K,L) must be within a contiguous block of memory, that does not overlap with the contiguous block containing (M,N,O,P). Then, splitting the load group on the boundary seems reasonable, and updating the gaps as you suggest. However, when you say "the group first elements GROUP_GAP is the gap at the _end_ of the whole group" - the gap at the end is the gap that comes after the last element and up towhat? Say I...P are consecutive, the input would have gaps 0 1 1 1 1 1 1 1. If we split the load group, we would want subgroups with gaps 0 1 1 1 and 0 1 1 1? (IIUC, you suggest and 0111?) If they are disjoint sets, but overlapping blocks of memory, say 0 2 4 6 1 3 5 7...then do we create two load groups, with gap 0 2 2 2 and 0 2 2 2 again? Does something record that the load groups access overlapping areas, and record the offset against each other? If there are repeated elements (as in the BB SLP case mentioned above), I'm not clear how we can split this effectively...so may have to rule out that case. (Moreover, if we are considering hybrid SLP, it may not be clear what the loop accesses are, we may be presented only with the SLP accesses. Do we necessarily want to pull those out of a load group?) So I expect I may resolve some of these issues as I progress, but I'm curious as to whether (and why) the patch was really broken (wrt gaps) as it stood... Thanks, Alan
Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86
On 10/27/2015 09:42 AM, Ramana Radhakrishnan wrote: On 27/10/15 14:50, H.J. Lu wrote: On Tue, Oct 27, 2015 at 7:34 AM, Ramana Radhakrishnan wrote: OK, then it's fairly x86-64 specific optimization, because we can't do "call *mem" in aarch64 and some other targets. It is a fairly x86_64 specific optimization and doesn't apply to AArch64. The question really is what impact does removing the generic code handling have on aarch64 - is it a no-op or not for the existing -fno-plt implementation in the AArch64 backend ? The only case that is of interest is the bit below in calls.c and it looks like that may well be redundant with the logic in the backend already, but I have not done the full analysis to convince myself that the code in the backend is sufficient. - && (!flag_plt - || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type))) - && !targetm.binds_local_p (fndecl_or_type)) -fno-plt is a backend specific optimization and should be handled in backend. HJ, Thanks for committing the change even when we were discussing the change This is what I'm primarily concerned about. Bernd's message was pretty clear in my mind: https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02861.html It was conditional approval based on no other target using -fno-plt and agreement from the x86 maintainers. HJ replied that aarch64 uses -fno-plt: https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02865.html And then apparently HJ committed the patch. commit 47b727e5ec3f6f4f0a30ee899adce80185ad6999 Author: hjl Date: Tue Oct 27 14:29:31 2015 + When reviewers conditionally approve a patch, the conditions need to be satisfied before a patch can be committed. Ignoring the conditions seems like a significant breech of trust to me. HJ, why did you commit the patch given it didn't meet the conditions Bernd set forth for approval? Jeff
Re: [c++-delayed-folding] Introduce convert_to_pointer_nofold
On Sun, Oct 25, 2015 at 04:49:08AM -1000, Jason Merrill wrote: > On 10/19/2015 05:33 AM, Marek Polacek wrote: > >+if (fold_p) > >+ expr = fold_build1_loc (loc, NOP_EXPR, totype, expr); > >+else > >+ expr = build1_loc (loc, NOP_EXPR, totype, expr); > > Rather than duplicate code like this everywhere, maybe we should introduce a > maybe_fold_build1_loc macro that takes fold_p as an argument. Good point. Like the following? Testing in progress. diff --git gcc/convert.c gcc/convert.c index 1ce8099..4a6a70d 100644 --- gcc/convert.c +++ gcc/convert.c @@ -37,12 +37,17 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "ubsan.h" +#define maybe_fold_build1_loc(FOLD_P, LOC, CODE, TYPE, EXPR) \ + ((FOLD_P) ? fold_build1_loc (LOC, CODE, TYPE, EXPR) \ + : build1_loc (LOC, CODE, TYPE, EXPR)) + /* Convert EXPR to some pointer or reference type TYPE. EXPR must be pointer, reference, integer, enumeral, or literal zero; - in other cases error is called. */ + in other cases error is called. If FOLD_P is true, try to fold the + expression. */ -tree -convert_to_pointer (tree type, tree expr) +static tree +convert_to_pointer_1 (tree type, tree expr, bool fold_p) { location_t loc = EXPR_LOCATION (expr); if (TREE_TYPE (expr) == type) @@ -59,9 +64,10 @@ convert_to_pointer (tree type, tree expr) addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (expr))); if (to_as == from_as) - return fold_build1_loc (loc, NOP_EXPR, type, expr); + return maybe_fold_build1_loc (fold_p, loc, NOP_EXPR, type, expr); else - return fold_build1_loc (loc, ADDR_SPACE_CONVERT_EXPR, type, expr); + return maybe_fold_build1_loc (fold_p, loc, ADDR_SPACE_CONVERT_EXPR, + type, expr); } case INTEGER_TYPE: @@ -75,20 +81,37 @@ convert_to_pointer (tree type, tree expr) unsigned int pprec = TYPE_PRECISION (type); unsigned int eprec = TYPE_PRECISION (TREE_TYPE (expr)); - if (eprec != pprec) - expr = fold_build1_loc (loc, NOP_EXPR, - lang_hooks.types.type_for_size (pprec, 0), - expr); + if (eprec != pprec) + expr + = maybe_fold_build1_loc (fold_p, loc, NOP_EXPR, +lang_hooks.types.type_for_size (pprec, 0), +expr); } - - return fold_build1_loc (loc, CONVERT_EXPR, type, expr); + return maybe_fold_build1_loc (fold_p, loc, CONVERT_EXPR, type, expr); default: error ("cannot convert to a pointer type"); - return convert_to_pointer (type, integer_zero_node); + return convert_to_pointer_1 (type, integer_zero_node, fold_p); } } +/* A wrapper around convert_to_pointer_1 that always folds the + expression. */ + +tree +convert_to_pointer (tree type, tree expr) +{ + return convert_to_pointer_1 (type, expr, true); +} + +/* A wrapper around convert_to_pointer_1 that only folds the + expression if it is CONSTANT_CLASS_P. */ + +tree +convert_to_pointer_nofold (tree type, tree expr) +{ + return convert_to_pointer_1 (type, expr, CONSTANT_CLASS_P (expr)); +} /* Convert EXPR to some floating-point type TYPE. diff --git gcc/convert.h gcc/convert.h index ac78f95..24fa6bf 100644 --- gcc/convert.h +++ gcc/convert.h @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see extern tree convert_to_integer (tree, tree); extern tree convert_to_integer_nofold (tree, tree); extern tree convert_to_pointer (tree, tree); +extern tree convert_to_pointer_nofold (tree, tree); extern tree convert_to_real (tree, tree); extern tree convert_to_fixed (tree, tree); extern tree convert_to_complex (tree, tree); diff --git gcc/cp/cvt.c gcc/cp/cvt.c index 0a30270..cb73bb7 100644 --- gcc/cp/cvt.c +++ gcc/cp/cvt.c @@ -241,7 +241,7 @@ cp_convert_to_pointer (tree type, tree expr, tsubst_flags_t complain) gcc_assert (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) == GET_MODE_SIZE (TYPE_MODE (type))); - return convert_to_pointer (type, expr); + return convert_to_pointer_nofold (type, expr); } if (type_unknown_p (expr)) Marek