[PATCH] Rename parameters which are within scop
--- gcc/graphite-isl-ast-to-gimple.c | 153 +++ 1 file changed, 122 insertions(+), 31 deletions(-) diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c index b32781a..3e2c1fa 100644 --- a/gcc/graphite-isl-ast-to-gimple.c +++ b/gcc/graphite-isl-ast-to-gimple.c @@ -124,9 +124,84 @@ void ivs_params_clear (ivs_params &ip) } } -static tree -gcc_expression_from_isl_expression (tree type, __isl_take isl_ast_expr *, - ivs_params &ip); +class translate_isl_ast_to_gimple +{ +public: + translate_isl_ast_to_gimple (sese r) + : region (r) + { } + + edge translate_isl_ast (loop_p context_loop, __isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + edge translate_isl_ast_node_for (loop_p context_loop, + __isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + edge translate_isl_ast_for_loop (loop_p context_loop, + __isl_keep isl_ast_node *node_for, + edge next_e, + tree type, tree lb, tree ub, + ivs_params &ip); + + edge translate_isl_ast_node_if (loop_p context_loop, + __isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + edge translate_isl_ast_node_user (__isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + edge translate_isl_ast_node_block (loop_p context_loop, +__isl_keep isl_ast_node *node, +edge next_e, ivs_params &ip); + + tree unary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, +ivs_params &ip); + + tree binary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, + ivs_params &ip); + + tree ternary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, + ivs_params &ip); + + tree nary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, + ivs_params &ip); + + tree gcc_expression_from_isl_expression (tree type, + __isl_take isl_ast_expr *, + ivs_params &ip); + + tree gcc_expression_from_isl_ast_expr_id (tree type, + __isl_keep isl_ast_expr *expr_id, + ivs_params &ip); + + tree gcc_expression_from_isl_expr_int (tree type, +__isl_take isl_ast_expr *expr); + + tree gcc_expression_from_isl_expr_op (tree type, + __isl_take isl_ast_expr *expr, + ivs_params &ip); + + struct loop *graphite_create_new_loop (edge entry_edge, +__isl_keep isl_ast_node *node_for, +loop_p outer, tree type, +tree lb, tree ub, ivs_params &ip); + + edge graphite_create_new_guard (edge entry_edge, + __isl_take isl_ast_expr *if_cond, + ivs_params &ip); + + edge graphite_create_new_loop_guard (edge entry_edge, + __isl_keep isl_ast_node *node_for, + tree *type, + tree *lb, tree *ub, ivs_params &ip); + + void build_iv_mapping (vec iv_map, gimple_bb_p gbb, +__isl_keep isl_ast_expr *user_expr, ivs_params &ip, +sese region); +private: + sese region; +}; /* Return the tree variable that corresponds to the given isl ast identifier expression (an isl_ast_expr of type isl_ast_expr_id). @@ -136,7 +211,8 @@ gcc_expression_from_isl_expression (tree type, __isl_take isl_ast_expr *, converting type sizes may be problematic when we switch to smaller types. */ -static tree +tree +translate_isl_ast_to_gimple:: gcc_expression_from_isl_ast_expr_id (tree type, __isl_keep isl_ast_expr *expr_id, ivs_params &ip) @@ -147,7 +223,7 @@ gcc_expression_from_isl_ast_expr_id (tree type, res = ip.find (tmp_isl_id); isl_id_free (tmp_isl_id); gcc_assert (res != ip.end () && - "Could not map isl_id to tree expression"); + "Could not map isl_id to tree expression"); isl_ast_expr_free (expr_id); return fold_convert (type, res->second); } @@ -155,7 +231,8 @@ gcc_expression_from_isl_ast_expr_id (tree type, /* Converts an isl_ast_expr_int expression E to a GCC expression tree of type TYPE. */ -static tree +tree +translate_isl_ast_to_gimple:: gcc_expression_from_isl_expr_int
[PATCH] Refactor graphite-isl-ast-to-gimple.c
Refactor graphite-isl-ast-to-gimple.c: Refactor so that each function can access 'region'. This will help maintain a parameter rename_map within a region. No functional change intended. This patch will be followed by another set of patches where translate_isl_ast_to_gimple::region is used to keep parameters which needs renaming. Since we are planning to remove limit_scops, we now have to maintain a set of parameters which needs renaming. This refactoring helps avoid passing `region' to many functions. gcc/ChangeLog: 2015-07-19 Aditya Kumar * graphite-isl-ast-to-gimple.c: Refactor so that each function can access 'region'. This will help maintain a parameter rename_map within a region. --- gcc/graphite-isl-ast-to-gimple.c | 153 +++ 1 file changed, 122 insertions(+), 31 deletions(-) diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c index b32781a..86a921b 100644 --- a/gcc/graphite-isl-ast-to-gimple.c +++ b/gcc/graphite-isl-ast-to-gimple.c @@ -124,9 +124,84 @@ void ivs_params_clear (ivs_params &ip) } } -static tree -gcc_expression_from_isl_expression (tree type, __isl_take isl_ast_expr *, - ivs_params &ip); +class translate_isl_ast_to_gimple +{ +public: + translate_isl_ast_to_gimple (sese r) + : region (r) + { } + + edge translate_isl_ast (loop_p context_loop, __isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + edge translate_isl_ast_node_for (loop_p context_loop, + __isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + edge translate_isl_ast_for_loop (loop_p context_loop, + __isl_keep isl_ast_node *node_for, + edge next_e, + tree type, tree lb, tree ub, + ivs_params &ip); + + edge translate_isl_ast_node_if (loop_p context_loop, + __isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + edge translate_isl_ast_node_user (__isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + edge translate_isl_ast_node_block (loop_p context_loop, +__isl_keep isl_ast_node *node, +edge next_e, ivs_params &ip); + + tree unary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, +ivs_params &ip); + + tree binary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, + ivs_params &ip); + + tree ternary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, + ivs_params &ip); + + tree nary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, + ivs_params &ip); + + tree gcc_expression_from_isl_expression (tree type, + __isl_take isl_ast_expr *, + ivs_params &ip); + + tree gcc_expression_from_isl_ast_expr_id (tree type, + __isl_keep isl_ast_expr *expr_id, + ivs_params &ip); + + tree gcc_expression_from_isl_expr_int (tree type, +__isl_take isl_ast_expr *expr); + + tree gcc_expression_from_isl_expr_op (tree type, + __isl_take isl_ast_expr *expr, + ivs_params &ip); + + struct loop *graphite_create_new_loop (edge entry_edge, +__isl_keep isl_ast_node *node_for, +loop_p outer, tree type, +tree lb, tree ub, ivs_params &ip); + + edge graphite_create_new_guard (edge entry_edge, + __isl_take isl_ast_expr *if_cond, + ivs_params &ip); + + edge graphite_create_new_loop_guard (edge entry_edge, + __isl_keep isl_ast_node *node_for, + tree *type, + tree *lb, tree *ub, ivs_params &ip); + + void build_iv_mapping (vec iv_map, gimple_bb_p gbb, +__isl_keep isl_ast_expr *user_expr, ivs_params &ip, +sese region); +private: + sese region; +}; /* Return the tree variable that corresponds to the given isl ast identifier expression (an isl_ast_expr of type isl_ast_expr_id). @@ -136,7 +211,8 @@ gcc_expression_from_isl_expression (tree type, __isl_take isl_ast_expr *, converting type sizes may be problematic when we switch to smaller types. *
[PATCH] Refactor graphite-isl-ast-to-gimple.c
From: Aditya Kumar Refactor graphite-isl-ast-to-gimple.c: Refactor so that each function can access 'region'. This will help maintain a parameter rename_map within a region. No functional change intended. This patch will be followed by another set of patches where translate_isl_ast_to_gimple::region is used to keep parameters which needs renaming. Since we are planning to remove limit_scops, we now have to maintain a set of parameters which needs renaming. This refactoring helps avoid passing `region' to all the functions in this file. It passes bootstrap and regtest. gcc/ChangeLog: 2015-07-19 Aditya Kumar * graphite-isl-ast-to-gimple.c: Refactor so that each function can access 'region'. This will help maintain a parameter rename_map within a region. --- gcc/graphite-isl-ast-to-gimple.c | 212 +-- 1 file changed, 180 insertions(+), 32 deletions(-) diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c index b620a48..618c62d 100644 --- a/gcc/graphite-isl-ast-to-gimple.c +++ b/gcc/graphite-isl-ast-to-gimple.c @@ -125,9 +125,142 @@ void ivs_params_clear (ivs_params &ip) } } -static tree -gcc_expression_from_isl_expression (tree type, __isl_take isl_ast_expr *, - ivs_params &ip); +class translate_isl_ast_to_gimple +{ + public: + translate_isl_ast_to_gimple (sese r) +: region (r) + { } + + /* Translates an ISL AST node NODE to GCC representation in the + context of a SESE. */ + edge translate_isl_ast (loop_p context_loop, __isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + /* Translates an isl_ast_node_for to Gimple. */ + edge translate_isl_ast_node_for (loop_p context_loop, + __isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + /* Create the loop for a isl_ast_node_for. + + - NEXT_E is the edge where new generated code should be attached. */ + edge translate_isl_ast_for_loop (loop_p context_loop, + __isl_keep isl_ast_node *node_for, + edge next_e, + tree type, tree lb, tree ub, + ivs_params &ip); + + /* Translates an isl_ast_node_if to Gimple. */ + edge translate_isl_ast_node_if (loop_p context_loop, + __isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + /* Translates an isl_ast_node_user to Gimple. + + FIXME: We should remove iv_map.create (loop->num + 1), if it is + possible. */ + edge translate_isl_ast_node_user (__isl_keep isl_ast_node *node, + edge next_e, ivs_params &ip); + + /* Translates an isl_ast_node_block to Gimple. */ + edge translate_isl_ast_node_block (loop_p context_loop, +__isl_keep isl_ast_node *node, +edge next_e, ivs_params &ip); + + /* Converts a unary isl_ast_expr_op expression E to a GCC expression tree of + type TYPE. */ + tree unary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, +ivs_params &ip); + + /* Converts a binary isl_ast_expr_op expression E to a GCC expression tree of + type TYPE. */ + tree binary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, + ivs_params &ip); + + /* Converts a ternary isl_ast_expr_op expression E to a GCC expression tree of + type TYPE. */ + tree ternary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, + ivs_params &ip); + + /* Converts an isl_ast_expr_op expression E with unknown number of arguments + to a GCC expression tree of type TYPE. */ + tree nary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, + ivs_params &ip); + + /* Converts an ISL AST expression E back to a GCC expression tree of + type TYPE. */ + tree gcc_expression_from_isl_expression (tree type, + __isl_take isl_ast_expr *, + ivs_params &ip); + + /* Return the tree variable that corresponds to the given isl ast identifier + expression (an isl_ast_expr of type isl_ast_expr_id). + + FIXME: We should replace blind conversation of id's type with derivation + of the optimal type when we get the corresponding isl support. Blindly + converting type sizes may be problematic when we switch to smaller + types. */ + tree gcc_expression_from_isl_ast_expr_id (tree type, + __isl_keep isl_ast_expr *expr_id, + ivs_params &ip); + + /* Converts an isl_ast_expr_int expression E
[PATCH] [graphite] Reduce the number of params in a scop to 3
More than 3 params consumes too much memory while bootstrapping gcc with graphite enabled. BOOT_CFLAGS="-g -O2 -fgraphite-identity -floop-block -floop-interchange -floop-strip-mine" --- gcc/params.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/params.def b/gcc/params.def index 0ca3451..1f6e40e 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -850,7 +850,7 @@ DEFPARAM (PARAM_LOOP_UNROLL_JAM_DEPTH, DEFPARAM (PARAM_GRAPHITE_MAX_NB_SCOP_PARAMS, "graphite-max-nb-scop-params", "maximum number of parameters in a SCoP", - 10, 0, 0) + 3, 0, 0) /* Maximal number of basic blocks in the functions analyzed by Graphite. */ -- 2.1.0.243.g30d45f7
[Graphite] Redesign Graphite scop detection
From: hiraditya Redesign Graphite scop detection for faster compiler time and detecting more SCoPs. Existing algorithm for SCoP detection in graphite was based on dominator tree where a tree (CFG) traversal was required for analyzing an SESE. The tree traversal is linear in the number of basic blocks and SCoP detection is (probably) linear in number of instructions. That algorithm utilized a generic infrastructure of SESE which does not directly represent loops. With regards to graphite framework, we are only interested in subtrees with loops. The new algorithm is geared towards tree traversal on loop structure. The algorithm is linear in number of loops which is faster than the previous algorithm. Briefly, we start the traversal at a loop-nest and analyze it recursively for validity. Once a valid loop is found we find a valid adjacent loop. If an adjacent loop is found and is valid, we merge both loop nests otherwise we form a SCoP from the previous loop nest, and resume the algorithm from the adjacent loop nest. The data structure to represent an SESE is an ordered pair of edges (entry, exit). The new algoritm can extend a SCoP in both the directions. With this approach, the number of instructions to be analyzed for validity reduces to a minimal set. We start by analyzing those statements which are inside a loop, because validity of those statements is necessary for the validity of loop. The statements outside the loop nest can be just excluded from the SESE if they are not valid. This patch depends on: https://gcc.gnu.org/ml/gcc-patches/2015-09/msg02024.html Passes (c,c++,fortran) regtest and bootstrap. gcc/ChangeLog: 2015-09-27 Aditya Kumar Sebastian Pop * graphite-optimize-isl.c (optimize_isl): * graphite-scop-detection.c (struct sese_l): New type. (get_entry_bb): API for getting entry bb of SESE. (get_exit_bb): API for getting exit bb of SESE. (class debug_printer): New type. Simple printer in debug mode. (trivially_empty_bb_p): New. Return true when BB is empty or contains only debug instructions. (graphite_can_represent_expr): Call scalar_evoution_in_region instead of analyze_scalar_evolution. Pass in scop instead of only the scop entry. (stmt_has_simple_data_refs_p): Pass in scop instead of only the scop entry. (stmt_simple_for_scop_p): Same. (harmful_stmt_in_bb): Same. (graphite_can_represent_loop): Deleted. (struct scopdet_info): Deleted. (scopdet_basic_block_info): Deleted. (build_scops_1): Deleted. (bb_in_sd_region): Deleted. (find_single_entry_edge): Deleted. (find_single_exit_edge): Deleted. (create_single_entry_edge): Deleted. (sd_region_without_exit): Deleted. (create_single_exit_edge): Deleted. (unmark_exit_edges): Deleted. (mark_exit_edges): Deleted. (create_sese_edges): Deleted. (build_graphite_scops): Deleted. (canonicalize_loop_closed_ssa): Recompute all dominators at the end. (build_scops): Use the new scop_builder to build scops. (dot_all_scops_1): Use the new pretty printer. Print loop father as well. (loop_body_is_valid_scop): New. Return true if loop body is a valid scop. (class scop_builder): New. Builds SCoPs for polyhedral optimizatios. (scop_builder): New. Constructor. (static sese_l invalid_sese): sese_l with invalid edges. (get_sese): Get an sese (from a loop) if possible, invalid_sese otherwise. (get_nearest_dom_with_single_entry): Get nearest dominator of a basic_block with single entry. Return NULL if we get to the beginning of a function. (get_nearest_pdom_with_single_exit): Get nearest post-dominator of a basic_block with single exit. Return NULL if we get to the beginning of a function. (print_sese): Pretty-print SESE. (merge_sese): Merge two SESEs if possible and return the new SESE. (build_scop_depth): Start building the SCoP within a loop nest. (build_scop_breadth): Start building the SCoP at a single loop depth. Merge adjacent SESEs if valid. (can_represent_loop_1): Returns true if Graphite can represent loop inside SCoP. Helper for can_represent_loop. (can_represent_loop): Returns true if Graphite can represent LOOP and all its nested loops in SCoP. (loop_is_valid_scop): Returns true if LOOP and all its nests constitute a valid SCoP. (region_has_one_loop): Returns true of a region has only one loop. (add_scop): Add SCoP to the list of valid scops. Removes an already existing scop if it intersects with or subsumed by this one. (harmful_stmt_in_region): Returns true if SCoP has any statment which cannot be represented by Graphite
[PATCH] Make compute_deps, extend_schedule static
From: hiraditya No functional changes intended. Passes make check and bootstrap. gcc/ChangeLog: 2015-09-29 Aditya Kumar * graphite-dependences.c (scop_get_dependences): Moved in down in order to be visible to its caller. * graphite-poly.h: Removed compute_deps, and extend_schedule. --- gcc/graphite-dependences.c | 62 +++--- gcc/graphite-poly.h| 16 2 files changed, 31 insertions(+), 47 deletions(-) diff --git a/gcc/graphite-dependences.c b/gcc/graphite-dependences.c index 85f16f3..e39394a 100644 --- a/gcc/graphite-dependences.c +++ b/gcc/graphite-dependences.c @@ -47,35 +47,6 @@ along with GCC; see the file COPYING3. If not see #include "graphite-poly.h" -isl_union_map * -scop_get_dependences (scop_p scop) -{ - isl_union_map *dependences; - - if (!scop->must_raw) -compute_deps (scop, SCOP_BBS (scop), - &scop->must_raw, &scop->may_raw, - &scop->must_raw_no_source, &scop->may_raw_no_source, - &scop->must_war, &scop->may_war, - &scop->must_war_no_source, &scop->may_war_no_source, - &scop->must_waw, &scop->may_waw, - &scop->must_waw_no_source, &scop->may_waw_no_source); - - dependences = isl_union_map_copy (scop->must_raw); - dependences = isl_union_map_union (dependences, -isl_union_map_copy (scop->must_war)); - dependences = isl_union_map_union (dependences, -isl_union_map_copy (scop->must_waw)); - dependences = isl_union_map_union (dependences, -isl_union_map_copy (scop->may_raw)); - dependences = isl_union_map_union (dependences, -isl_union_map_copy (scop->may_war)); - dependences = isl_union_map_union (dependences, -isl_union_map_copy (scop->may_waw)); - - return dependences; -} - /* Add the constraints from the set S to the domain of MAP. */ static isl_map * @@ -252,7 +223,7 @@ extend_schedule_1 (__isl_take isl_map *map, void *user) /* Return a relation that has uniform output dimensions. */ -__isl_give isl_union_map * +static __isl_give isl_union_map * extend_schedule (__isl_take isl_union_map *x) { int max = 0; @@ -519,7 +490,7 @@ subtract_commutative_associative_deps (scop_p scop, /* Compute the original data dependences in SCOP for all the reads and writes in PBBS. */ -void +static void compute_deps (scop_p scop, vec pbbs, isl_union_map **must_raw, isl_union_map **may_raw, @@ -595,6 +566,35 @@ transform_is_safe (scop_p scop, isl_union_map *transform) return res; } +isl_union_map * +scop_get_dependences (scop_p scop) +{ + isl_union_map *dependences; + + if (!scop->must_raw) +compute_deps (scop, SCOP_BBS (scop), + &scop->must_raw, &scop->may_raw, + &scop->must_raw_no_source, &scop->may_raw_no_source, + &scop->must_war, &scop->may_war, + &scop->must_war_no_source, &scop->may_war_no_source, + &scop->must_waw, &scop->may_waw, + &scop->must_waw_no_source, &scop->may_waw_no_source); + + dependences = isl_union_map_copy (scop->must_raw); + dependences = isl_union_map_union (dependences, +isl_union_map_copy (scop->must_war)); + dependences = isl_union_map_union (dependences, +isl_union_map_copy (scop->must_waw)); + dependences = isl_union_map_union (dependences, +isl_union_map_copy (scop->may_raw)); + dependences = isl_union_map_union (dependences, +isl_union_map_copy (scop->may_war)); + dependences = isl_union_map_union (dependences, +isl_union_map_copy (scop->may_waw)); + + return dependences; +} + /* Return true when the SCOP transformed schedule is correct. */ bool diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h index 3bd22f0..b2dbd36 100644 --- a/gcc/graphite-poly.h +++ b/gcc/graphite-poly.h @@ -456,22 +456,6 @@ scop_set_nb_params (scop_p scop, graphite_dim_t nb_params) } bool graphite_legal_transform (scop_p); -__isl_give isl_union_map *extend_schedule (__isl_take isl_union_map *); - -void -compute_deps (scop_p scop, vec pbbs, - isl_union_map **must_raw, - isl_union_map **may_raw, - isl_union_map **must_raw_no_source, - isl_union_map **may_raw_no_source, - isl_union_map **must_war, - isl_union_map **may_war, - isl_union_map **must_war_no_source, -
[PATCH] Rename gimple_bb to gimple_poly_bb
From: hiraditya Renaming gimple_bb to gimple_poly_bb because there is a function gimple_bb by the same name in gimple.h. No functional change intended. Passes regtest and bootstrap. gcc/ChangeLog: 2015-10-01 Aditya Kumar Sebastian Pop * graphite-isl-ast-to-gimple.c (class translate_isl_ast_to_gimple): Renamed type from gimple_bb_p to gimple_poly_bb_p. (translate_isl_ast_node_user): Same. * graphite-poly.c (new_poly_bb): Same. * graphite-poly.h (gbb_from_bb): Same. * sese.h: Same. * graphite-sese-to-poly.c (new_gimple_bb): gimple_bb_p -> gimple_poly_bb_p (build_scop_scattering): Same. (find_params_in_bb): Same. (add_conditions_to_domain): Same. (sese_dom_walker::before_dom_children): Same. (analyze_drs_in_stmts): Same. (new_pbb_from_pbb): Same. (free_data_refs_aux): New pointer to type base_alias_pair. * graphite-sese-to-poly.h: Same. * sese.c (if_region_set_false_region): Fixed Indentation. (move_sese_in_condition): Same. --- gcc/graphite-isl-ast-to-gimple.c | 6 +++--- gcc/graphite-poly.c | 6 +++--- gcc/graphite-poly.h | 6 +++--- gcc/graphite-sese-to-poly.c | 30 +++--- gcc/graphite-sese-to-poly.h | 4 ++-- gcc/sese.c | 5 +++-- gcc/sese.h | 10 +- 7 files changed, 34 insertions(+), 33 deletions(-) diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c index 4bdeca0..18342d0 100644 --- a/gcc/graphite-isl-ast-to-gimple.c +++ b/gcc/graphite-isl-ast-to-gimple.c @@ -259,7 +259,7 @@ class translate_isl_ast_to_gimple FIXME: Instead of using a vec that maps each loop id to a possible chrec, we could consider using a map that maps loop ids to the corresponding tree expressions. */ - void build_iv_mapping (vec iv_map, gimple_bb_p gbb, + void build_iv_mapping (vec iv_map, gimple_poly_bb_p gbb, __isl_keep isl_ast_expr *user_expr, ivs_params &ip, sese region); private: @@ -739,7 +739,7 @@ translate_isl_ast_node_for (loop_p context_loop, __isl_keep isl_ast_node *node, void translate_isl_ast_to_gimple:: -build_iv_mapping (vec iv_map, gimple_bb_p gbb, +build_iv_mapping (vec iv_map, gimple_poly_bb_p gbb, __isl_keep isl_ast_expr *user_expr, ivs_params &ip, sese region) { @@ -775,7 +775,7 @@ translate_isl_ast_node_user (__isl_keep isl_ast_node *node, isl_id *name_id = isl_ast_expr_get_id (name_expr); poly_bb_p pbb = (poly_bb_p) isl_id_get_user (name_id); gcc_assert (pbb); - gimple_bb_p gbb = PBB_BLACK_BOX (pbb); + gimple_poly_bb_p gbb = PBB_BLACK_BOX (pbb); vec iv_map; isl_ast_expr_free (name_expr); isl_id_free (name_id); diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c index d439758..f70a542 100644 --- a/gcc/graphite-poly.c +++ b/gcc/graphite-poly.c @@ -180,7 +180,7 @@ new_poly_bb (scop_p scop, void *black_box) pbb_set_black_box (pbb, black_box); PBB_DRS (pbb).create (3); PBB_IS_REDUCTION (pbb) = false; - GBB_PBB ((gimple_bb_p) black_box) = pbb; + GBB_PBB ((gimple_poly_bb_p) black_box) = pbb; return pbb; } @@ -324,7 +324,7 @@ print_pbb_domain (FILE *file, poly_bb_p pbb, int verbosity ATTRIBUTE_UNUSED) /* Dump the cases of a graphite basic block GBB on FILE. */ static void -dump_gbb_cases (FILE *file, gimple_bb_p gbb) +dump_gbb_cases (FILE *file, gimple_poly_bb_p gbb) { int i; gimple *stmt; @@ -351,7 +351,7 @@ dump_gbb_cases (FILE *file, gimple_bb_p gbb) /* Dump conditions of a graphite basic block GBB on FILE. */ static void -dump_gbb_conditions (FILE *file, gimple_bb_p gbb) +dump_gbb_conditions (FILE *file, gimple_poly_bb_p gbb) { int i; gimple *stmt; diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h index b2dbd36..fd59d6f 100644 --- a/gcc/graphite-poly.h +++ b/gcc/graphite-poly.h @@ -277,7 +277,7 @@ struct poly_bb bool is_reduction; }; -#define PBB_BLACK_BOX(PBB) ((gimple_bb_p) PBB->black_box) +#define PBB_BLACK_BOX(PBB) ((gimple_poly_bb_p) PBB->black_box) #define PBB_SCOP(PBB) (PBB->scop) #define PBB_DRS(PBB) (PBB->drs) #define PBB_IS_REDUCTION(PBB) (PBB->is_reduction) @@ -319,10 +319,10 @@ extern void debug_gmp_value (mpz_t); /* Returns a gimple_bb from BB. */ -static inline gimple_bb_p +static inline gimple_poly_bb_p gbb_from_bb (basic_block bb) { - return (gimple_bb_p) bb->aux; + return (gimple_poly_bb_p) bb->aux; } /* The poly_bb of the BB. */ diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c index 26f75e9..65d3c9b 100644 --- a/gcc/graphite-sese-to-poly.c +++ b/gcc/graphite-sese-to-poly.c @@ -198,12 +198,12 @@ reduction_phi_p (sese region, gphi_iterator *psi) /* Store the GRAPHITE representation of BB. */ -static gimple_bb_p +static gimple_poly_bb_
[PATCH] use sese_l throughout scop-detection
From: hiraditya Use sese_l throughout SCoP detection and create vec at the very end when all SCoPs have been identified. 'struct sese_l' is very lightweight (two pointers) compared to 'struct scop'. No functional change intended. Passes regtest and bootstrap. gcc/ChangeLog: 2015-10-01 Aditya Kumar * graphite-scop-detection.c (struct sese_l): New conversion constructor so that this type can be pushed into a vec. (class scop_builder): use sese_l to collect scops. (get_scops): New getter function. (remove_intersecting_scops): Use sese_l instead of scops_p. (intersects): Same. (add_scop): Same. (subsumes): Same. (remove_subscops): Same. (build_scops): Add scops to vec once all the scops have been detected. --- gcc/graphite-scop-detection.c | 119 +- 1 file changed, 70 insertions(+), 49 deletions(-) diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index a498ddc..953781d 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -62,11 +62,26 @@ struct sese_l : entry (e), exit (x) { } + /* This is to push objects of sese_l in a vec. */ + sese_l (int i) + : entry (NULL), exit (NULL) + { +gcc_assert (i == 0); + } + operator bool () const { return entry && exit; } + const sese_l& + operator= (const sese_l &s) + { +entry = s.entry; +exit = s.exit; +return *this; + } + edge entry; edge exit; }; @@ -813,13 +828,20 @@ loop_body_is_valid_scop (loop_p loop, sese_l scop) class scop_builder { public: - scop_builder (vec *s) -: scops (s) + scop_builder () +: scops (vNULL) { } static sese_l invalid_sese; - sese_l get_sese (loop_p loop) + vec +get_scops () + { +return scops; + } + + sese_l +get_sese (loop_p loop) { if (!loop) return invalid_sese; @@ -898,10 +920,7 @@ class scop_builder } /* Merge scops at same loop depth and returns the new sese. - TODO: Free the already allocated sese's first and second, or reuse. - Returns SECOND when first is NULL. SECOND cannot be NULL. - Frees up SECOND and returns a new SESE when merge was successful. - */ + Returns a new SESE when merge was successful, INVALID_SESE otherwise. */ static sese_l merge_sese (sese_l first, sese_l second) @@ -952,7 +971,7 @@ class scop_builder /* For now we just want to bail out when exit does not post-dominate entry. TODO: We might just add a basic_block at the exit to make exit - post-dominate entry (the entrire region). */ + post-dominate entry (the entire region). */ if (!dominated_by_p (CDI_POST_DOMINATORS, get_entry_bb (entry), get_exit_bb (exit)) || !dominated_by_p (CDI_DOMINATORS, get_exit_bb (exit), @@ -1127,8 +1146,6 @@ class scop_builder add_scop (sese_l s) { gcc_assert (s); -edge scop_begin = s.entry; -edge scop_end = s.exit; /* Do not add scops with only one loop. */ if (region_has_one_loop (s)) @@ -1138,7 +1155,7 @@ class scop_builder return; } -if (get_exit_bb (scop_end) == EXIT_BLOCK_PTR_FOR_FN (cfun)) +if (get_exit_bb (s.exit) == EXIT_BLOCK_PTR_FOR_FN (cfun)) { DEBUG_PRINT (dp << "\n[scop-detection-fail] " << "Discarding SCoP exiting to return"; @@ -1146,16 +1163,13 @@ class scop_builder return; } -sese sese_reg = new_sese (scop_begin, scop_end); -scop_p newscop = new_scop (sese_reg); - /* Remove all the scops which are subsumed by s. */ -remove_subscops (newscop); +remove_subscops (s); /* Replace this with split-intersecting scops. */ -remove_intersecting_scops (newscop); +remove_intersecting_scops (s); -scops->safe_push (newscop); +scops.safe_push (s); DEBUG_PRINT (dp << "\nAdding SCoP "; print_sese (dump_file, s)); } @@ -1200,31 +1214,29 @@ class scop_builder /* Returns true if S1 subsumes/surrounds S2. */ static bool -subsumes (scop_p s1, scop_p s2) +subsumes (sese_l s1, sese_l s2) { -if (dominated_by_p (CDI_DOMINATORS, get_entry_bb (s2->region->entry), - get_entry_bb (s1->region->entry)) - && dominated_by_p (CDI_POST_DOMINATORS, get_entry_bb (s2->region->exit), - get_entry_bb (s1->region->exit))) +if (dominated_by_p (CDI_DOMINATORS, get_entry_bb (s2.entry), + get_entry_bb (s1.entry)) + && dominated_by_p (CDI_POST_DOMINATORS, get_entry_bb (s2.exit), + get_entry_bb (s1.exit))) return true; return false; } - /* TODO: Maybe vec can be made as vec so that it consumes - less memory and l
[PATCH] outline functions from stmt_simple_for_scop_p
From: hiraditya Outlined functions from stmt_simple_for_scop_p. No functional changes intended. Passes regtest and bootstrap. gcc/ChangeLog: 2015-10-01 Aditya Kumar * graphite-scop-detection.c (stmt_has_side_effects): New function outlined from stmt_simple_for_scop_p. (graphite_can_represent_stmt): Same. (stmt_simple_for_scop_p): Moved code out of this function for better readability. --- gcc/graphite-scop-detection.c | 75 ++- 1 file changed, 45 insertions(+), 30 deletions(-) diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index a498ddc..38e2f18 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -304,44 +304,33 @@ stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt) return res; } -/* Return true only when STMT is simple enough for being handled by Graphite. - This depends on SCOP, as the parameters are initialized relatively to - this basic block, the linear functions are initialized based on the outermost - loop containing STMT inside the SCOP. BB is the place where we try to - evaluate the STMT. */ +/* GIMPLE_ASM and GIMPLE_CALL may embed arbitrary side effects. + Calls have side-effects, except those to const or pure + functions. */ static bool -stmt_simple_for_scop_p (sese_l scop, gimple *stmt, basic_block bb) +stmt_has_side_effects (gimple *stmt) { - loop_p loop = bb->loop_father; - - gcc_assert (scop); - - /* GIMPLE_ASM and GIMPLE_CALL may embed arbitrary side effects. - Calls have side-effects, except those to const or pure - functions. */ if (gimple_has_volatile_ops (stmt) || (gimple_code (stmt) == GIMPLE_CALL && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))) || (gimple_code (stmt) == GIMPLE_ASM)) { DEBUG_PRINT (dp << "[scop-detection-fail] " - << "Graphite cannot handle this stmt:\n"; + << "Statement has side-effects:\n"; print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS)); - return false; + return true; } + return false; +} - if (is_gimple_debug (stmt)) -return true; - - if (!stmt_has_simple_data_refs_p (scop, stmt)) -{ - DEBUG_PRINT (dp << "[scop-detection-fail] " - << "Graphite cannot handle data-refs in stmt:\n"; - print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);); - return false; -} +/* Returns true if STMT can be represented in polyhedral model. LABEL, + simple COND stmts, pure calls, and assignments can be repesented. */ +static bool +graphite_can_represent_stmt (sese_l scop, gimple *stmt, basic_block bb) +{ + loop_p loop = bb->loop_father; switch (gimple_code (stmt)) { case GIMPLE_LABEL: @@ -352,15 +341,15 @@ stmt_simple_for_scop_p (sese_l scop, gimple *stmt, basic_block bb) /* We can handle all binary comparisons. Inequalities are also supported as they can be represented with union of polyhedra. */ -enum tree_code code = gimple_cond_code (stmt); -if (!(code == LT_EXPR + enum tree_code code = gimple_cond_code (stmt); + if (!(code == LT_EXPR || code == GT_EXPR || code == LE_EXPR || code == GE_EXPR || code == EQ_EXPR || code == NE_EXPR)) - { - DEBUG_PRINT (dp << "[scop-detection-fail] " + { + DEBUG_PRINT (dp << "[scop-detection-fail] " << "Graphite cannot handle cond stmt:\n"; print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS)); return false; @@ -394,8 +383,34 @@ stmt_simple_for_scop_p (sese_l scop, gimple *stmt, basic_block bb) print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS)); return false; } +} - return false; +/* Return true only when STMT is simple enough for being handled by Graphite. + This depends on SCOP, as the parameters are initialized relatively to + this basic block, the linear functions are initialized based on the outermost + loop containing STMT inside the SCOP. BB is the place where we try to + evaluate the STMT. */ + +static bool +stmt_simple_for_scop_p (sese_l scop, gimple *stmt, basic_block bb) +{ + gcc_assert (scop); + + if (is_gimple_debug (stmt)) +return true; + + if (stmt_has_side_effects (stmt)) +return false; + + if (!stmt_has_simple_data_refs_p (scop, stmt)) +{ + DEBUG_PRINT (dp << "[scop-detection-fail] " + << "Graphite cannot handle data-refs in stmt:\n"; + print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);); + return false; +} + + return graphite_can_represent_stmt (scop, stmt, bb); } /* Return true when BB contains a harmful operation for a scop: that -- 2.1.0.243.g30d45f7
[PATCH] Reject loops early where ivs cannot be represented.
During scop detection we can figure out if loop IVs cannot be represeted in the polyhedral model. We now bail out early instead of waiting until graphite-sese-to-poly.c Passes regtest and bootstrap with BOOT_CFLAGS=="-g -O2 -fgraphite-identity -floop-nest-optimize" with ISL-0.15. gcc/ChangeLog: 2015-10-02 Aditya Kumar * graphite-scop-detection.c (loop_ivs_can_be_represented): New. Return true when a loop iv can be represented by a signed int. (loop_body_is_valid_scop): Call loop_ivs_can_be_represented. * graphite-sese-to-poly.c (remove_gbbs_in_scop): (new_gimple_poly_bb): Renamed from new_gimple_bb. (free_gimple_poly_bb): Renamed from free_gimple_bb. (try_generate_gimple_bb): Hoist loop invariant code. (analyze_drs_in_stmts): Same. (build_scop_drs): Call renamed functions. (new_pbb_from_pbb): Same. (scop_ivs_can_be_represented): Delete as functionality now moved to graphite-scop-detection.c (build_poly_scop): Remove call to scop_ivs_can_be_represented. --- gcc/graphite-scop-detection.c | 28 gcc/graphite-sese-to-poly.c | 74 +-- 2 files changed, 43 insertions(+), 59 deletions(-) diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index a498ddc..d3f5a41 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -783,12 +783,40 @@ dot_scop (scop_p scop) #endif } +/* Can all ivs be represented by a signed integer? + As ISL might generate negative values in its expressions, signed loop ivs + are required in the backend. */ + +static bool +loop_ivs_can_be_represented (loop_p loop) +{ + for (gphi_iterator psi = gsi_start_phis (loop->header); + !gsi_end_p (psi); gsi_next (&psi)) +{ + gphi *phi = psi.phi (); + tree res = PHI_RESULT (phi); + tree type = TREE_TYPE (res); + + if (TYPE_UNSIGNED (type) + && TYPE_PRECISION (type) >= TYPE_PRECISION (long_long_integer_type_node)) +return false; +} + return true; +} + /* Return true when the body of LOOP has statements that can be represented as a valid scop. */ static bool loop_body_is_valid_scop (loop_p loop, sese_l scop) { + if (!loop_ivs_can_be_represented (loop)) +{ + DEBUG_PRINT (dp << "[scop-detection-fail] loop_" + << loop->num << "IV cannot be represented.\n"); + return false; +} + if (!loop_nest_has_data_refs (loop)) { DEBUG_PRINT (dp << "[scop-detection-fail] loop_" diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c index 26f75e9..a219919 100644 --- a/gcc/graphite-sese-to-poly.c +++ b/gcc/graphite-sese-to-poly.c @@ -199,7 +199,7 @@ reduction_phi_p (sese region, gphi_iterator *psi) /* Store the GRAPHITE representation of BB. */ static gimple_bb_p -new_gimple_bb (basic_block bb, vec drs) +new_gimple_poly_bb (basic_block bb, vec drs) { struct gimple_bb *gbb; @@ -233,7 +233,7 @@ free_data_refs_aux (vec datarefs) /* Frees GBB. */ static void -free_gimple_bb (struct gimple_bb *gbb) +free_gimple_poly_bb (struct gimple_bb *gbb) { free_data_refs_aux (GBB_DATA_REFS (gbb)); free_data_refs (GBB_DATA_REFS (gbb)); @@ -253,7 +253,7 @@ remove_gbbs_in_scop (scop_p scop) poly_bb_p pbb; FOR_EACH_VEC_ELT (SCOP_BBS (scop), i, pbb) -free_gimple_bb (PBB_BLACK_BOX (pbb)); +free_gimple_poly_bb (PBB_BLACK_BOX (pbb)); } /* Deletes all scops in SCOPS. */ @@ -310,24 +310,21 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) drs.create (5); sese region = SCOP_REGION (scop); loop_p nest = outermost_loop_in_sese_1 (region, bb); - gimple_stmt_iterator gsi; + loop_p loop = bb->loop_father; + if (!loop_in_sese_p (loop, region)) +loop = nest; + gimple_stmt_iterator gsi; for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { gimple *stmt = gsi_stmt (gsi); - loop_p loop; - if (is_gimple_debug (stmt)) continue; - loop = loop_containing_stmt (stmt); - if (!loop_in_sese_p (loop, region)) - loop = nest; - graphite_find_data_references_in_stmt (nest, loop, stmt, &drs); } - return new_gimple_bb (bb, drs); + return new_gimple_poly_bb (bb, drs); } /* Returns true if all predecessors of BB, that are not dominated by BB, are @@ -1887,7 +1884,7 @@ build_scop_drs (scop_p scop) for (i = 0; SCOP_BBS (scop).iterate (i, &pbb); i++) if (GBB_DATA_REFS (PBB_BLACK_BOX (pbb)).is_empty ()) { - free_gimple_bb (PBB_BLACK_BOX (pbb)); + free_gimple_poly_bb (PBB_BLACK_BOX (pbb)); free_poly_bb (pbb); SCOP_BBS (scop).ordered_remove (i); i--; @@ -1935,19 +1932,18 @@ analyze_drs_in_stmts (scop_p scop, basic_block bb, vec stmts) return; nest = outermost_l
[PATCH 1/2] [Refactoring graphite] Move declarations, assign types, renaming.
1. Move declarations near the assignment/usage. 2. Assign type to members which were void*. 3. Rename scop->context to scop::param_context, and scop::ctx to scop::isl_context No functional changes intended. Passes regtest and bootstrap. gcc/ChangeLog: 2015-10-05 Aditya Kumar * graphite-dependences.c (scop_get_reads): Renamed scop->context to scop->param_context. (scop_get_must_writes): Same. (scop_get_may_writes): Same. (scop_get_original_schedule): Same. (scop_get_transformed_schedule): Same. (subtract_commutative_associative_deps): Same. * graphite-isl-ast-to-gimple.c (add_parameters_to_ivs_params): Same. (generate_isl_context): Same. (generate_isl_schedule): Same. (scop_to_isl_ast): Same. (graphite_regenerate_ast_isl): Same. * graphite-optimize-isl.c (scop_get_domains): Same. (optimize_isl): Renamed scop->context to scop->param_context. * graphite-poly.c (new_poly_bb): Change the type of argument to gimple_poly_bb_p. (new_scop): Renamed scop->context to scop->param_context. (free_scop): Same. (print_scop_context): Same. * graphite-poly.h (new_poly_dr): Change the type of argument from void* to data_reference_p. (struct poly_bb): Change the type of black_box to gimple_poly_bb_p. (new_poly_bb): Change the type of argument from void* to gimple_poly_bb_p. (pbb_set_black_box): Same. (struct scop): Rename context to param_context, ctx to isl_context. * graphite-scop-detection.c (scop_detection::build_scop_bbs_1): Move declarations closer to assignment. (find_params_in_bb): Same. (find_scop_parameters): Same. * graphite-sese-to-poly.c (unsigned ssa_name_version_typesize): Global to be used for statement IDs. (isl_id_for_pbb): Use ssa_name_version_typesize. (simple_copy_phi_p): Move declarations closer to assignment. (build_pbb_scattering_polyhedrons): Same. (build_scop_scattering): Same. (isl_id_for_ssa_name): Same. (extract_affine_name): Same. (extract_affine_int): Same. (extract_affine): Same. (set_scop_parameter_dim): Use renamed member. (build_loop_iteration_domains): Same. (add_param_constraints): Same. (build_scop_iteration_domain): Same. (pdr_add_data_dimensions): Same. (build_poly_dr): Same. (build_scop_drs): Move declarations closer to assignment. (analyze_drs_in_stmts): Same. (insert_out_of_ssa_copy): Same. (insert_out_of_ssa_copy_on_edge): Same. (propagate_expr_outside_region): Same. (rewrite_phi_out_of_ssa): Same. (rewrite_degenerate_phi): Same. (rewrite_reductions_out_of_ssa): Same. (rewrite_cross_bb_scalar_dependence): Same. (handle_scalar_deps_crossing_scop_limits): Same. (rewrite_cross_bb_scalar_deps): Same. * graphite.c (graphite_transform_loops): Use renamed member. --- gcc/graphite-dependences.c | 12 +-- gcc/graphite-isl-ast-to-gimple.c | 13 +-- gcc/graphite-optimize-isl.c | 22 ++-- gcc/graphite-poly.c | 12 +-- gcc/graphite-poly.h | 14 +-- gcc/graphite-scop-detection.c| 21 ++-- gcc/graphite-sese-to-poly.c | 212 --- gcc/graphite.c | 2 +- 8 files changed, 134 insertions(+), 174 deletions(-) diff --git a/gcc/graphite-dependences.c b/gcc/graphite-dependences.c index e39394a..4752133 100644 --- a/gcc/graphite-dependences.c +++ b/gcc/graphite-dependences.c @@ -79,7 +79,7 @@ scop_get_reads (scop_p scop, vec pbbs) int i, j; poly_bb_p pbb; poly_dr_p pdr; - isl_space *space = isl_set_get_space (scop->context); + isl_space *space = isl_set_get_space (scop->param_context); isl_union_map *res = isl_union_map_empty (space); FOR_EACH_VEC_ELT (pbbs, i, pbb) @@ -100,7 +100,7 @@ scop_get_must_writes (scop_p scop, vec pbbs) int i, j; poly_bb_p pbb; poly_dr_p pdr; - isl_space *space = isl_set_get_space (scop->context); + isl_space *space = isl_set_get_space (scop->param_context); isl_union_map *res = isl_union_map_empty (space); FOR_EACH_VEC_ELT (pbbs, i, pbb) @@ -121,7 +121,7 @@ scop_get_may_writes (scop_p scop, vec pbbs) int i, j; poly_bb_p pbb; poly_dr_p pdr; - isl_space *space = isl_set_get_space (scop->context); + isl_space *space = isl_set_get_space (scop->param_context); isl_union_map *res = isl_union_map_empty (space); FOR_EACH_VEC_ELT (pbbs, i, pbb) @@ -141,7 +141,7 @@ scop_get_original_schedule (scop_p scop, vec pbbs) { int i; poly_bb_p pbb; - isl_space *space = isl_set_get_space (scop->context); + isl_space *space = isl_set_get_space (scop->param_context); isl_union_map *res = isl_union_map_empty (space); FOR_EACH_VEC_ELT (pbbs, i, pbb) @@ -
[PATCH] Early exit to avoid redundant computations
Analyze only those bbs which are outside the region for uses which might be defined inside the region. This is intended to improve the compile time. This algorithm may be further improved by only looking at the successors of region as these regions are sese. Added FIXMEs to make this improvement in future. Passes regtest and bootstrap on x86_64. gcc/ChangeLog: 2015-10-05 Aditya Kumar * graphite-sese-to-poly.c (build_loop_iteration_domains): Only loops which are in this region are passed so gcc_assert and remove redundant computation. * sese.c (sese_build_liveouts): Pass only those bbs which are not in region. (sese_bad_liveouts_use): Only BBs which are not in region are passed so gcc_assert on that and remove unnecessary computation. (sese_build_liveouts_use): Same. --- gcc/graphite-sese-to-poly.c | 3 ++- gcc/sese.c | 36 +++- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c index 15d16c1..d0c7eb4 100644 --- a/gcc/graphite-sese-to-poly.c +++ b/gcc/graphite-sese-to-poly.c @@ -595,6 +595,7 @@ build_loop_iteration_domains (scop_p scop, struct loop *loop, tree nb_iters = number_of_latch_executions (loop); sese region = SCOP_REGION (scop); + gcc_assert (loop_in_sese_p (loop, region)); isl_set *inner = isl_set_copy (outer); int pos = isl_set_dim (outer, isl_dim_set); @@ -679,7 +680,7 @@ build_loop_iteration_domains (scop_p scop, struct loop *loop, else gcc_unreachable (); - if (loop->inner && loop_in_sese_p (loop->inner, region)) + if (loop->inner) build_loop_iteration_domains (scop, loop->inner, nb + 1, isl_set_copy (inner), doms); diff --git a/gcc/sese.c b/gcc/sese.c index 2c654bc..7637ebb 100644 --- a/gcc/sese.c +++ b/gcc/sese.c @@ -134,20 +134,16 @@ static void sese_build_liveouts_use (sese region, bitmap liveouts, basic_block bb, tree use) { - unsigned ver; - basic_block def_bb; - + gcc_assert (!bb_in_sese_p (bb, region)); if (TREE_CODE (use) != SSA_NAME) return; - ver = SSA_NAME_VERSION (use); - def_bb = gimple_bb (SSA_NAME_DEF_STMT (use)); + basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (use)); - if (!def_bb - || !bb_in_sese_p (def_bb, region) - || bb_in_sese_p (bb, region)) + if (!def_bb || !bb_in_sese_p (def_bb, region)) return; + unsigned ver = SSA_NAME_VERSION (use); bitmap_set_bit (liveouts, ver); } @@ -188,24 +184,21 @@ static bool sese_bad_liveouts_use (sese region, bitmap liveouts, basic_block bb, tree use) { - unsigned ver; - basic_block def_bb; + gcc_assert (!bb_in_sese_p (bb, region)); if (TREE_CODE (use) != SSA_NAME) return false; - ver = SSA_NAME_VERSION (use); + unsigned ver = SSA_NAME_VERSION (use); /* If it's in liveouts, the variable will get a new PHI node, and the debug use will be properly adjusted. */ if (bitmap_bit_p (liveouts, ver)) return false; - def_bb = gimple_bb (SSA_NAME_DEF_STMT (use)); + basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (use)); - if (!def_bb - || !bb_in_sese_p (def_bb, region) - || bb_in_sese_p (bb, region)) + if (!def_bb || !bb_in_sese_p (def_bb, region)) return false; return true; @@ -248,10 +241,19 @@ sese_build_liveouts (sese region, bitmap liveouts) basic_block bb; FOR_EACH_BB_FN (bb, cfun) -sese_build_liveouts_bb (region, liveouts, bb); + { +/* FIXME: We could start iterating form the successor of sese. */ +if (!bb_in_sese_p (bb, region)) + sese_build_liveouts_bb (region, liveouts, bb); + } + if (MAY_HAVE_DEBUG_STMTS) FOR_EACH_BB_FN (bb, cfun) - sese_reset_debug_liveouts_bb (region, liveouts, bb); +{ + /* FIXME: We could start iterating form the successor of sese. */ + if (!bb_in_sese_p (bb, region)) + sese_reset_debug_liveouts_bb (region, liveouts, bb); +} } /* Builds a new SESE region from edges ENTRY and EXIT. */ -- 2.1.0.243.g30d45f7
RE: [PATCH] [graphite] use debug_printer throughout graphite.
I agree that the macro does not look good, but I think use cases do. This saves one level of indentation which were used only for debug messages. Mostly useful when the functions have deep indentations coupled with debug messages. Also, this saves three lines of code for (almost) each usage of fprintf style. One for the check ( if (dump_file...) ... ), and two for the braces. I first saw this style in llvm where they use something similar, but their debug printer is more extensive. I was trying to adopt that with a minimal functioning debug printer. Anyways, I'll revert these changes. -Aditya -Original Message- From: Richard Biener [mailto:richard.guent...@gmail.com] Sent: Wednesday, October 07, 2015 9:02 AM To: hiraditya Cc: GCC Patches; s@samsung.com; Tobias Grosser; aditya...@samsung.com; Richard Biener Subject: Re: [PATCH] [graphite] use debug_printer throughout graphite. On Wed, Oct 7, 2015 at 2:55 PM, hiraditya wrote: > The debug_printer provides an elegant way to represent debug related > statements, so we are extending its usage throughout graphite infrastructure. > No functional changes intended. Passes regtest and bootstrap. But the DEBUG_PRINT macro is super-ugly. I find the existing style with printfs better as it is what everybody else uses. So I'd rather kill DEBUG_PRINT and friends... Richard. > gcc/ChangeLog: > > 2015-10-07 hiraditya > > * graphite-isl-ast-to-gimple.c (graphite_regenerate_ast_isl): Use > debug_printer. > * graphite-optimize-isl.c (get_schedule_for_band): Same. > (optimize_isl): Same. > * graphite-scop-detection.c (canonicalize_loop_closed_ssa): Rename db > to dbgs. > (scop_detection::merge_sese): Same. > (scop_detection::build_scop_depth): Same. > (scop_detection::build_scop_breadth): Same. > (scop_detection::loop_is_valid_scop): Same. > (scop_detection::add_scop): Same. > (scop_detection::harmful_stmt_in_region): Same. > (scop_detection::remove_subscops): Same. > (scop_detection::remove_intersecting_scops): Same. > (stmt_has_side_effects): Same. > (scop_detection::graphite_can_represent_stmt): Same. > (scop_detection::stmt_simple_for_scop_p): Same. > (scop_detection::loop_body_is_valid_scop): Same. > (build_scops): Same. > (debug_printer): Move the declaration to sese.h > * graphite.c (graphite_initialize): Initialize the dump_file for > debug_printer. > (graphite_finalize): Use debug_printer. > (graphite_transform_loops): Use debug_printer. > * sese.h: Move the declaration of debug_printer here. > --- > gcc/graphite-isl-ast-to-gimple.c | 28 +--- > gcc/graphite-optimize-isl.c | 13 +++--- > gcc/graphite-scop-detection.c| 94 > +--- > gcc/graphite.c | 49 + > gcc/sese.h | 34 +++ > 5 files changed, 99 insertions(+), 119 deletions(-) > > diff --git a/gcc/graphite-isl-ast-to-gimple.c > b/gcc/graphite-isl-ast-to-gimple.c > index 12e6c61..f986866 100644 > --- a/gcc/graphite-isl-ast-to-gimple.c > +++ b/gcc/graphite-isl-ast-to-gimple.c > @@ -1153,12 +1153,9 @@ graphite_regenerate_ast_isl (scop_p scop) >graphite_regenerate_error = false; >root_node = scop_to_isl_ast (scop, ip); > > - if (dump_file && (dump_flags & TDF_DETAILS)) > -{ > - fprintf (dump_file, "\nISL AST generated by ISL: \n"); > - print_isl_ast_node (dump_file, root_node, scop->isl_context); > - fprintf (dump_file, "\n"); > -} > + DEBUG_PRINT (dbgs << "\nISL AST generated by ISL: \n"; > +print_isl_ast_node (dump_file, root_node, scop->isl_context); > +dbgs << "\n"); > >recompute_all_dominators (); >graphite_verify (); > @@ -1200,18 +1197,13 @@ graphite_regenerate_ast_isl (scop_p scop) >isl_ast_node_free (root_node); >timevar_pop (TV_GRAPHITE_CODE_GEN); > > - if (dump_file && (dump_flags & TDF_DETAILS)) > -{ > - loop_p loop; > - int num_no_dependency = 0; > - > - FOR_EACH_LOOP (loop, 0) > - if (loop->can_be_parallel) > - num_no_dependency++; > - > - fprintf (dump_file, "\n%d loops carried no dependency.\n", > - num_no_dependency); > -} > + DEBUG_PRINT ( > +loop_p loop; > +int num_no_dependency = 0; > +FOR_EACH_LOOP (loop, 0) > + if (loop->can_be_parallel) > + num_no_dependency++; > +dbgs << num_no_dependency << " loops carried no dependency.\n"); > >return !graphite_regenerate_error; > } > diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c > index 2bae417..03ab5e9 100644 > --- a/gcc/graphite-optimize-isl.c > +++ b/gcc/graphite-optimize-isl.c > @@ -177,14 +177,12 @@ get_schedule_for_band (isl_band *band, int *dimensions) >/* It does not make any sense to tile a band with just one dimension. */ >if (*dimensions
[PATCH] Refactoring sese.h and graphite-poly.h
Rename scop->region to scop->scop_info Removed conversion constructors for sese_l and dr_info. Removed macros. No functional changed intended. Passes regtest and bootstrap. gcc/ChangeLog: 2015-19-10 Aditya Kumar * graphite-poly.h (struct dr_info): Removed conversion constructor. (struct scop): Renamed scop::region to scop::scop_info (scop_set_region): Same. (SCOP_REGION): Removed (SCOP_CONTEXT): Removed. (POLY_SCOP_P): Removed. * graphite-isl-ast-to-gimple.c (translate_isl_ast_node_user): Rename scop->region to scop->scop_info. (add_parameters_to_ivs_params): Same. (graphite_regenerate_ast_isl): Same. * graphite-poly.c (new_scop): Same. (free_scop): Same. (print_scop_params): Same. * graphite-scop-detection.c (scop_detection::remove_subscops): Same. (scop_detection::remove_intersecting_scops): Use pointer to sese_l. (dot_all_scops_1): Rename scop->region to scop->scop_info. (scop_detection::nb_pbbs_in_loops): Same. (find_scop_parameters): Same. (try_generate_gimple_bb): Same. (gather_bbs::before_dom_children): Same. (gather_bbs::after_dom_children): Same. (build_scops): Same. * graphite-sese-to-poly.c (build_scop_scattering): Same. (extract_affine_chrec): Same. (extract_affine): Same. (set_scop_parameter_dim): Same. (build_loop_iteration_domains): Same. (create_pw_aff_from_tree): Same. (add_param_constraints): Same. (build_scop_iteration_domain): Same. (build_scop_drs): Same. (analyze_drs_in_stmts): Same. (insert_out_of_ssa_copy_on_edge): Same. (rewrite_close_phi_out_of_ssa):Same. (rewrite_reductions_out_of_ssa):Same. (handle_scalar_deps_crossing_scop_limits):Same. (rewrite_cross_bb_scalar_deps):Same. (rewrite_cross_bb_scalar_deps_out_of_ssa):Same. (build_poly_scop):Same. (build_alias_set): Use pointer to dr_info. * graphite.c (print_graphite_scop_statistics): (graphite_transform_loops): * sese.h (struct sese_l): Remove conversion constructor. --- gcc/graphite-isl-ast-to-gimple.c | 8 gcc/graphite-poly.c | 8 gcc/graphite-poly.h | 14 ++ gcc/graphite-scop-detection.c| 34 gcc/graphite-sese-to-poly.c | 42 gcc/graphite.c | 10 +- gcc/sese.h | 3 --- 7 files changed, 53 insertions(+), 66 deletions(-) diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c index 2f2e2ba..7f99bce 100644 --- a/gcc/graphite-isl-ast-to-gimple.c +++ b/gcc/graphite-isl-ast-to-gimple.c @@ -786,10 +786,10 @@ translate_isl_ast_node_user (__isl_keep isl_ast_node *node, iv_map.create (nb_loops); iv_map.safe_grow_cleared (nb_loops); - build_iv_mapping (iv_map, gbb, user_expr, ip, pbb->scop->region->region); + build_iv_mapping (iv_map, gbb, user_expr, ip, pbb->scop->scop_info->region); isl_ast_expr_free (user_expr); next_e = copy_bb_and_scalar_dependences (GBB_BB (gbb), - pbb->scop->region, next_e, + pbb->scop->scop_info, next_e, iv_map, &graphite_regenerate_error); iv_map.release (); @@ -909,7 +909,7 @@ print_isl_ast_node (FILE *file, __isl_keep isl_ast_node *node, static void add_parameters_to_ivs_params (scop_p scop, ivs_params &ip) { - sese_info_p region = scop->region; + sese_info_p region = scop->scop_info; unsigned nb_parameters = isl_set_dim (scop->param_context, isl_dim_param); gcc_assert (nb_parameters == SESE_PARAMS (region).length ()); unsigned i; @@ -1144,7 +1144,7 @@ bool graphite_regenerate_ast_isl (scop_p scop) { loop_p context_loop; - sese_info_p region = scop->region; + sese_info_p region = scop->scop_info; ifsese if_region = NULL; isl_ast_node *root_node; ivs_params ip; diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c index 0d1dc63..eb76f05 100644 --- a/gcc/graphite-poly.c +++ b/gcc/graphite-poly.c @@ -306,7 +306,7 @@ new_scop (edge entry, edge exit) scop->may_waw_no_source = NULL; scop_set_region (scop, region); scop->pbbs.create (3); - POLY_SCOP_P (scop) = false; + scop->poly_scop_p = false; scop->drs.create (3); return scop; @@ -321,7 +321,7 @@ free_scop (scop_p scop) poly_bb_p pbb; remove_gbbs_in_scop (scop); - free_sese_info (SCOP_REGION (scop)); + free_sese_info (scop->scop_info); FOR_EACH_VEC_ELT (scop->pbbs, i, pbb) free_poly_bb (pbb); @@ -475,13 +475,13 @@ print_pbb (FILE *file, poly_bb_p pbb) v
[PATCH] Refactor graphite-sese-to-poly, sese.h, graphite-poly.h
Now that scop contains a list of all the basic blocks inside, it makes sense to iterate over only those basic blocks in graphite-sese-to-poly.c:rewrite_reductions_out_of_ssa,rewrite_cross_bb_scalar_deps_out_of_ssa Passes regtest and bootstrap. gcc/ChangeLog: 2015-10-20 Aditya Kumar * graphite-poly.h (struct dr_info): Added invalid_alias_set number. (operator=): Removed. (dr_info): Make alias_set number the last argument with default value of invalid_alias_set. * graphite-sese-to-poly.c (build_scop_drs): Update constructor of dr_info. (rewrite_reductions_out_of_ssa): Iterate only through the basic blocks which are inside region. (rewrite_cross_bb_scalar_deps_out_of_ssa): Same. * sese.h (struct sese_l): Removed assignment operator. (split_region_for_bb): Removed dead code. --- gcc/graphite-poly.h | 26 +-- gcc/graphite-sese-to-poly.c | 50 ++--- gcc/sese.h | 37 - 3 files changed, 34 insertions(+), 79 deletions(-) diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h index 721e914..5298f85 100644 --- a/gcc/graphite-poly.h +++ b/gcc/graphite-poly.h @@ -373,29 +373,23 @@ pbb_set_black_box (poly_bb_p pbb, gimple_poly_bb_p black_box) struct dr_info { + enum { +invalid_alias_set = -1 + }; /* The data reference. */ data_reference_p dr; - /* ALIAS_SET is the SCC number assigned by a graph_dfs of the alias graph. -1 - is an invalid alias set. */ - int alias_set; - /* The polyhedral BB containing this DR. */ poly_bb_p pbb; + /* ALIAS_SET is the SCC number assigned by a graph_dfs of the alias graph. + -1 is an invalid alias set. */ + int alias_set; + /* Construct a DR_INFO from a data reference DR, an ALIAS_SET, and a PBB. */ - dr_info (data_reference_p dr, int alias_set, poly_bb_p pbb) -: dr (dr), alias_set (alias_set), pbb (pbb) {} - - /* Assignment operator, to be able to iterate over a vec of these objects. */ - const dr_info & - operator= (const dr_info &p) - { -dr = p.dr; -alias_set = p.alias_set; -pbb = p.pbb; -return *this; - } + dr_info (data_reference_p dr, poly_bb_p pbb, + int alias_set = invalid_alias_set) +: dr (dr), pbb (pbb), alias_set (alias_set) {} }; /* A SCOP is a Static Control Part of the program, simple enough to be diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c index d75e6a2..d1eae90 100644 --- a/gcc/graphite-sese-to-poly.c +++ b/gcc/graphite-sese-to-poly.c @@ -1151,7 +1151,7 @@ build_scop_drs (scop_p scop) FOR_EACH_VEC_ELT (scop->pbbs, i, pbb) if (pbb) FOR_EACH_VEC_ELT (GBB_DATA_REFS (PBB_BLACK_BOX (pbb)), j, dr) - scop->drs.safe_push (dr_info (dr, -1, pbb)); + scop->drs.safe_push (dr_info (dr, pbb)); build_alias_set (scop); @@ -1497,31 +1497,29 @@ rewrite_degenerate_phi (gphi_iterator *psi) static void rewrite_reductions_out_of_ssa (scop_p scop) { + int i; basic_block bb; - sese_l region = scop->scop_info->region; - - FOR_EACH_BB_FN (bb, cfun) -if (bb_in_sese_p (bb, region)) - for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi);) - { - gphi *phi = psi.phi (); + FOR_EACH_VEC_ELT (scop->scop_info->bbs, i, bb) +for (gphi_iterator psi = gsi_start_phis (bb); !gsi_end_p (psi);) + { + gphi *phi = psi.phi (); - if (virtual_operand_p (gimple_phi_result (phi))) - { - gsi_next (&psi); - continue; - } + if (virtual_operand_p (gimple_phi_result (phi))) + { + gsi_next (&psi); + continue; + } - if (gimple_phi_num_args (phi) > 1 - && degenerate_phi_result (phi)) - rewrite_degenerate_phi (&psi); + if (gimple_phi_num_args (phi) > 1 + && degenerate_phi_result (phi)) + rewrite_degenerate_phi (&psi); - else if (scalar_close_phi_node_p (phi)) - rewrite_close_phi_out_of_ssa (scop, &psi); + else if (scalar_close_phi_node_p (phi)) + rewrite_close_phi_out_of_ssa (scop, &psi); - else if (reduction_phi_p (region, &psi)) - rewrite_phi_out_of_ssa (scop, &psi); - } + else if (reduction_phi_p (scop->scop_info->region, &psi)) + rewrite_phi_out_of_ssa (scop, &psi); + } update_ssa (TODO_update_ssa); #ifdef ENABLE_CHECKING @@ -1684,7 +1682,6 @@ rewrite_cross_bb_scalar_deps (scop_p scop, gimple_stmt_iterator *gsi) static void rewrite_cross_bb_scalar_deps_out_of_ssa (scop_p scop) { - basic_block bb; gimple_stmt_iterator psi; sese_l region = scop->scop_info->region; bool changed = false; @@ -1692,10 +1689,11 @@ rewrite_cross_bb_scalar_deps_out_of_ssa (scop_p scop) /* Create an
[PATCH] isl schedule tree
From: Abderrazek Zaafrani Use isl_schedule_node instead of isl_band_list for isl-0.15. Passes regtest and bootstrap for isl-0.15. gcc/ChangeLog: 2015-10-22 Abderrazek Zaafrani * graphite-optimize-isl.c (get_schedule_for_node_st): New callback function to schedule based on isl_schedule_node. (get_schedule_map_st): New schedule optimizer based on isl_schedule_node. (scop_get_domains): New. Return the isl_union_set containing the domains of all the pbbs. (optimize_isl): Call the new function get_schedule_map_st for isl-0.15 gcc/testsuite/ChangeLog: 2015-10-22 Abderrazek Zaafrani * gcc.dg/graphite/block-0.c: Changed to match pattern. * gcc.dg/graphite/interchange-1.c: Same. * gcc.dg/graphite/interchange-10.c: Same. * gcc.dg/graphite/interchange-11.c: Same. * gcc.dg/graphite/interchange-13.c: Same. * gcc.dg/graphite/interchange-3.c: Same. * gcc.dg/graphite/interchange-4.c: Same. * gcc.dg/graphite/interchange-7.c: Same. * gcc.dg/graphite/interchange-9.c: Same. * gcc.dg/graphite/uns-interchange-9.c: Same. * gfortran.dg/graphite/interchange-3.f90: Same. --- gcc/graphite-optimize-isl.c| 98 -- gcc/testsuite/gcc.dg/graphite/block-0.c| 2 +- gcc/testsuite/gcc.dg/graphite/interchange-1.c | 7 +- gcc/testsuite/gcc.dg/graphite/interchange-10.c | 2 +- gcc/testsuite/gcc.dg/graphite/interchange-11.c | 2 +- gcc/testsuite/gcc.dg/graphite/interchange-13.c | 3 +- gcc/testsuite/gcc.dg/graphite/interchange-3.c | 2 +- gcc/testsuite/gcc.dg/graphite/interchange-4.c | 2 +- gcc/testsuite/gcc.dg/graphite/interchange-7.c | 2 +- gcc/testsuite/gcc.dg/graphite/interchange-9.c | 2 +- gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c | 2 +- .../gfortran.dg/graphite/interchange-3.f90 | 2 +- 12 files changed, 105 insertions(+), 21 deletions(-) diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c index 090bc01..53355bb 100644 --- a/gcc/graphite-optimize-isl.c +++ b/gcc/graphite-optimize-isl.c @@ -34,6 +34,9 @@ along with GCC; see the file COPYING3. If not see #include #include #include +#ifdef HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS +#include +#endif #include "system.h" #include "coretypes.h" @@ -50,20 +53,78 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "dumpfile.h" -static isl_union_set * -scop_get_domains (scop_p scop ATTRIBUTE_UNUSED) +#ifdef HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS + +/* get_schedule_for_node_st - Improve schedule for the schedule node. + Only Simple loop tiling is considered. */ + +static __isl_give isl_schedule_node * +get_schedule_for_node_st (__isl_take isl_schedule_node *node, void *user) { - int i; - poly_bb_p pbb; - isl_space *space = isl_set_get_space (scop->param_context); - isl_union_set *res = isl_union_set_empty (space); + if (user) +return node; - FOR_EACH_VEC_ELT (scop->pbbs, i, pbb) -res = isl_union_set_add_set (res, isl_set_copy (pbb->domain)); + if (isl_schedule_node_get_type (node) != isl_schedule_node_band + || isl_schedule_node_n_children (node) != 1) +return node; + + isl_space *space = isl_schedule_node_band_get_space (node); + unsigned dims = isl_space_dim (space, isl_dim_set); + isl_schedule_node *child = isl_schedule_node_get_child (node, 0); + isl_schedule_node_type type = isl_schedule_node_get_type (child); + isl_space_free (space); + isl_schedule_node_free (child); + + if (type != isl_schedule_node_leaf) +return node; + + if (dims <= 1 || !isl_schedule_node_band_get_permutable (node)) +{ + if (dump_file && dump_flags) + fprintf (dump_file, "not tiled\n"); + return node; +} + + /* Tile loops. */ + space = isl_schedule_node_band_get_space (node); + isl_multi_val *sizes = isl_multi_val_zero (space); + long tile_size = PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE); + isl_ctx *ctx = isl_schedule_node_get_ctx (node); + + for (unsigned i = 0; i < dims; i++) +{ + sizes = isl_multi_val_set_val (sizes, i, +isl_val_int_from_si (ctx, tile_size)); + if (dump_file && dump_flags) + fprintf (dump_file, "tiled by %ld\n", tile_size); +} + + node = isl_schedule_node_band_tile (node, sizes); + node = isl_schedule_node_child (node, 0); - return res; + return node; } +/* get_schedule_map_st - Improve the schedule by performing other loop + optimizations. _st ending is for schedule tree version of this + function (see get_schedule_map below for the band forest version). + + Do a depth-first post-order traversal of the nodes in a schedule + tree and apply get_schedule_for_node_st on them to improve the schedule. + */ + +static __isl_give isl_union_map * +get_schedule_map_st (__isl_keep isl_schedule *schedule) +{ + + schedule = isl
[PATCH] Do not allow irreducible loops/regions in a scop
Irreducible regions are not going to be optimized by ISL so discard them early. Passes bootstrap and regtest. gcc/ChangeLog: 2015-11-06 Aditya Kumar * graphite-scop-detection.c (scop_detection::merge_sese): Entry and exit edges should not be a part of irreducible loop. (scop_detection::can_represent_loop_1): Loops should not be irreducible. (scop_detection::harmful_stmt_in_region): All the basic block should belong to reducible loops. --- gcc/graphite-scop-detection.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index ae8497d..b1f2ebc 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -794,7 +794,8 @@ scop_detection::merge_sese (sese_l first, sese_l second) const get_entry_bb (second)); edge entry = get_nearest_dom_with_single_entry (dom); - if (!entry) + + if (!entry || (entry->flags & EDGE_IRREDUCIBLE_LOOP)) return invalid_sese; basic_block pdom = nearest_common_dominator (CDI_POST_DOMINATORS, @@ -803,7 +804,8 @@ scop_detection::merge_sese (sese_l first, sese_l second) const pdom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, pdom); edge exit = get_nearest_pdom_with_single_exit (pdom); - if (!exit) + + if (!exit || (exit->flags & EDGE_IRREDUCIBLE_LOOP)) return invalid_sese; sese_l combined (entry, exit); @@ -923,6 +925,7 @@ scop_detection::can_represent_loop_1 (loop_p loop, sese_l scop) struct tree_niter_desc niter_desc; return single_exit (loop) +&& !(loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP) && number_of_iterations_exit (loop, single_exit (loop), &niter_desc, false) && niter_desc.control.no_overflow && (niter = number_of_latch_executions (loop)) @@ -1053,6 +1056,10 @@ scop_detection::harmful_stmt_in_region (sese_l scop) const if (!dominated_by_p (CDI_POST_DOMINATORS, bb, exit_bb)) continue; + /* The basic block should not be part of an irreducible loop. */ + if (bb->flags & BB_IRREDUCIBLE_LOOP) +return true; + if (harmful_stmt_in_bb (scop, bb)) return true; } -- 2.1.0.243.g30d45f7
[PATCH] Preserve the original program while using graphite.
Earlier, graphite used to translate portions of the original program after scop-detection in order to represent the SCoP into polyhedral model. This was required because each basic block was represented as independent basic block in the polyhedral model. So all the cross-basic-block dependencies were translated out-of-ssa. With this patch those dependencies are also exposed to the ISL, so there is no need to modify the original structure of the program. There is still a little bit of work required but we are posting it before the stage 1 closes. The complete patch would be ready in a few days. After this patch we should be able to enable graphite at some default optimization level. The patch is attached: Aditya Kumar Compiler Engineer 0001-Preserve-the-original-program-while-running-graphite.patch Description: Binary data
[PATCH] Preserve the original program while using graphite
Earlier, graphite used to translate portions of the original program after scop-detection in order to represent the SCoP into polyhedral model. This was required because each basic block was represented as independent basic block in the polyhedral model. So all the cross-basic-block dependencies were translated out-of-ssa. With this patch those dependencies are also exposed to the ISL, so there is no need to modify the original structure of the program. After this patch we should be able to enable graphite at some default optimization level. Highlights: Remove cross bb scalar to array translation For reductions, add support for more than just INT_CST Early bailout on codegen. Verify loop-closed ssa structure during copy of renames The uses of exprs should come from bb which dominates the bb Collect the init value of close phi in loop-guard Do not follow vuses for close-phi, postpone loop-close phi until the corresponding loop-phi is processed Bail out if no bb found to place cond/loop -phis Move insertion of liveouts at the end of codegen Insert loop-phis in the loop-header. This patch passes regtest and bootstrap with BOOT_CFLAGS='-O2 -fgraphite-identity -floop-nest-optimize' >From 706df301cdc8d2523408c663f62383308bc8a642 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Date: Sat, 7 Nov 2015 13:39:23 -0600 Subject: [PATCH] Preserve the original program while using graphite. Earlier, graphite used to translate portions of the original program after scop-detection in order to represent the SCoP into polyhedral model. This was required because each basic block was represented as independent basic block in the polyhedral model. So all the cross-basic-block dependencies were translated out-of-ssa. With this patch those dependencies are also exposed to the ISL, so there is no need to modify the original structure of the program. After this patch we should be able to enable graphite at some default optimization level. Highlights: Remove cross bb scalar to array translation For reductions, add support for more than just INT_CST Early bailout on codegen. Verify loop-closed ssa structure during copy of renames The uses of exprs should come from bb which dominates the bb Collect the init value of close phi in loop-guard Do not follow vuses for close-phi, postpone loop-close phi until the corresponding loop-phi is processed Bail out if no bb found to place cond/loop -phis Move insertion of liveouts at the end of codegen Insert loop-phis in the loop-header. This patch passes regtest and bootstrap with BOOT_CFLAGS='-O2 -fgraphite-identity -floop-nest-optimize' 2015-11-08 Aditya Kumar Sebastian Pop * gcc.dg/graphite/isl-ast-gen-user-1.c: Remove calls to std. library. gcc/ChangeLog: 2015-11-08 Aditya Kumar Sebastian Pop * graphite-isl-ast-to-gimple.c (class translate_isl_ast_to_gimple): New member codegen_error (translate_isl_ast_for_loop): Remove call to single_succ_edge and early return. (translate_isl_ast_node_user): Early return in case of error. (translate_isl_ast_to_gimple::translate_isl_ast): Same. (translate_isl_ast_to_gimple::translate_pending_phi_nodes): New. (add_parameters_to_ivs_params): Remove macro. (graphite_regenerate_ast_isl): Add if_region pointer to region. * graphite-poly.c (new_poly_dr): Remove macro. (print_pdr): Same. (new_gimple_poly_bb): Same. (free_gimple_poly_bb): Same. (print_scop_params): Same. * graphite-poly.h (struct poly_dr): Same. (struct poly_bb): Add new_bb. (gbb_from_bb): Remove dead code. (pbb_from_bb): Same. * graphite-scop-detection.c (parameter_index_in_region_1): Same. (parameter_index_in_region): Same. (find_scop_parameters): Same. (build_cross_bb_scalars_def): New. (build_cross_bb_scalars_use): New. (graphite_find_cross_bb_scalar_vars): New (try_generate_gimple_bb): Reads and Writes. (build_alias_set): Move. (gather_bbs::before_dom_children): Gather bbs visited. (build_scops): call build_alias_set. * graphite-sese-to-poly.c (phi_arg_in_outermost_loop): Delete. (remove_simple_copy_phi): Delete. (remove_invariant_phi): Delete. (simple_copy_phi_p): Delete. (reduction_phi_p): Delete. (isl_id_for_dr): Remove unused param. (parameter_index_in_region_1): Remove macro usage. (set_scop_parameter_dim): Same. (add_param_constraints): Same. (add_conditions_to_constraints): Same (build_scop_iteration_domain): Same. (pdr_add_alias_set): Comment. (add_scalar_version_numbers): New. (build_poly_dr): ISL id. (build_scop_drs): Move. (build_poly_sr_1): Same. (insert_stmts): Remove. (build_poly_sr): New. (new_pbb_from_pbb): Delete. (insert_out_of_ssa_copy_on_edge): Delete. (create_zero_dim_array): Delete. (scalar_close_phi_node_p): Delete. (propagate_expr_outside_region): Delete. (rewrite_close_phi_out_of_ssa): Delete. (rewrite_phi_out_of_ssa): Delete. (rewrite_degenerate_phi): Delete. (rewrite_reductions_out_of_ssa): Delete. (rewrite_cross_bb_sca
Re: [PATCH] RFC: Enable graphite at -O3 -fprofile_use
Thanks all for supporting the idea of enabling graphite. We would collect compile-time, performance, code-size data and let you know. We are very positive that the compile time would have improved on an average, because of new scop detection algorithm. With the new set of patches graphite does not modify the code any more (if no optimizations have been done). This was one of the issues discussed at the Cauldron'15. Currently we have a couple more patches to put in before tomorrow, so collecting the data might take another few days, if that is okay. If we find serious regressions in any benchmark, we will address them as well. Aditya Kumar Compiler Engineer On Fri, Nov 13, 2015 at 5:32 AM, Richard Biener wrote: > On Fri, 13 Nov 2015, VandeVondele Joost wrote: > >> I'm all in favour of requiring isl and enabling graphite by default, but >> would suggest to enable it with -Ofast instead. >> >> One reason is that certainly extracting testcases from a PGO build is >> more difficult, and initially there will certainly be miscompiles with >> graphite (CP2K is right now). >> >> Furthermore, unless graphite is particularly effective with PGO (does it >> use average loop trip counts already?), I don't see a particular >> connection. > > The reason to choose FDO was so GRAPHITE can concentrate its computing > budget on the hot parts of a program (which profile estimation isn't > good enough identifying), reducing its compile-time cost. > > -Ofast isn't supposed to enable passes over -O3 so you're suggesting > to enable it with -O3 which I think is a bit premature. But we can > try doing that and revert at the end of stage3 if problems are just > too big. > > Richard.
[PATCH] [graphite] fix PR68693: Check for loop structure when extending the SCoP
The check for dominance while extending the scop assumed that multiple successors meant a loop which is not true in case of conditionals around the loop. Improved pretty printers for better debugging. gcc/ChangeLog: 2015-12-02 Aditya Kumar Sebastian Pop * graphite-scop-detection.c (dot_all_sese): New (dot_all_scops_1): Renamed to dot_all_sese. (dot_all_scops): Removed. (dot_sese): New. (dot_cfg): New. (scop_detection::get_nearest_dom_with_single_entry): Check that preds are from different loop levels. (scop_detection::get_nearest_pdom_with_single_exit): Check that succs are from different loop levels. (scop_detection::print_sese): Inlined. (scop_detection::print_edge): New. (scop_detection::merge_sese): Added dumps. * graphite.h: Add declarations. gcc/testsuite/ChangeLog: 2015-12-02 Aditya Kumar Sebastian Pop * gfortran.dg/graphite/pr68693.f90: New test. --- gcc/graphite-scop-detection.c | 153 - gcc/graphite.h | 6 +- gcc/testsuite/gfortran.dg/graphite/pr68693.f90 | 35 ++ 3 files changed, 110 insertions(+), 84 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/graphite/pr68693.f90 diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index 2f4231a..729a5fd 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -98,22 +98,16 @@ public: - "()" around the node number denotes the entry or the exit nodes of the SCOP. These are not part of SCoP. */ -static void -dot_all_scops_1 (FILE *file, vec scops) +DEBUG_FUNCTION void +dot_all_sese (FILE *file, vec& scops) { - basic_block bb; - edge e; - edge_iterator ei; - scop_p scop; - const char *color; - int i; - /* Disable debugging while printing graph. */ int tmp_dump_flags = dump_flags; dump_flags = 0; fprintf (file, "digraph all {\n"); + basic_block bb; FOR_ALL_BB_FN (bb, cfun) { int part_of_scop = false; @@ -126,12 +120,15 @@ dot_all_scops_1 (FILE *file, vec scops) fprintf (file, "CELLSPACING=\"0\">\n"); /* Select color for SCoP. */ - FOR_EACH_VEC_ELT (scops, i, scop) + sese_l *region; + int i; + FOR_EACH_VEC_ELT (scops, i, region) { - sese_l region = scop->scop_info->region; - if (bb_in_sese_p (bb, region) || (region.exit->dest == bb) - || (region.entry->dest == bb)) + bool sese_in_region = bb_in_sese_p (bb, *region); + if (sese_in_region || (region->exit->dest == bb) + || (region->entry->dest == bb)) { + const char *color; switch (i % 17) { case 0: /* red */ @@ -192,21 +189,21 @@ dot_all_scops_1 (FILE *file, vec scops) fprintf (file, "", color); - if (!bb_in_sese_p (bb, region)) + if (!sese_in_region) fprintf (file, " ("); - if (bb == region.entry->dest && bb == region.exit->dest) + if (bb == region->entry->dest && bb == region->exit->dest) fprintf (file, " %d*# ", bb->index); - else if (bb == region.entry->dest) + else if (bb == region->entry->dest) fprintf (file, " %d* ", bb->index); - else if (bb == region.exit->dest) + else if (bb == region->exit->dest) fprintf (file, " %d# ", bb->index); else fprintf (file, " %d ", bb->index); fprintf (file, "{lp_%d}", bb->loop_father->num); - if (!bb_in_sese_p (bb, region)) + if (!sese_in_region) fprintf (file, ")"); fprintf (file, "\n"); @@ -225,6 +222,8 @@ dot_all_scops_1 (FILE *file, vec scops) FOR_ALL_BB_FN (bb, cfun) { + edge e; + edge_iterator ei; FOR_EACH_EDGE (e, ei, bb->succs) fprintf (file, "%d -> %d;\n", bb->index, e->dest->index); } @@ -235,52 +234,29 @@ dot_all_scops_1 (FILE *file, vec scops) dump_flags = tmp_dump_flags; } -/* Display all SCoPs using dotty. */ - -DEBUG_FUNCTION void -dot_all_scops (vec scops) -{ - /* When debugging, enable the following code. This cannot be used - in production compilers because it calls "system". */ -#if 0 - int x; - FILE *stream = fopen ("/tmp/allscops.dot", "w"); - gcc_assert (stream); - - dot_all_scops_1 (stream, scops); - fclose (stream); - - x = system ("dotty /tmp/allscops.dot &"); -#else - dot_all_scop
[PATCH 1/2] [graphite] check that all the scev applied ops have are dominated by their defs
2015-12-02 Aditya Kumar Sebastian Pop * gcc.dg/graphite/id-29.c: New test. gcc/ChangeLog: 2015-12-02 Aditya Kumar Sebastian Pop * graphite-isl-ast-to-gimple.c (translate_isl_ast_node_user): Improve debug. (get_rename_from_scev): Check that all the ops in an expression have their uses dominated by corresponding defs. --- gcc/graphite-isl-ast-to-gimple.c | 35 ++- gcc/testsuite/gcc.dg/graphite/id-29.c | 17 + 2 files changed, 39 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/graphite/id-29.c diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c index 20eb80f..ed2a896 100644 --- a/gcc/graphite-isl-ast-to-gimple.c +++ b/gcc/graphite-isl-ast-to-gimple.c @@ -1116,16 +1116,17 @@ translate_isl_ast_node_user (__isl_keep isl_ast_node *node, build_iv_mapping (iv_map, gbb, user_expr, ip, pbb->scop->scop_info->region); isl_ast_expr_free (user_expr); + basic_block old_bb = GBB_BB (gbb); if (dump_file) { - fprintf (dump_file, "[codegen] copying from basic block\n"); + fprintf (dump_file, + "[codegen] copying from bb_%d on edge (bb_%d, bb_%d)\n", + old_bb->index, next_e->src->index, next_e->dest->index); print_loops_bb (dump_file, GBB_BB (gbb), 0, 3); - fprintf (dump_file, "[codegen] to new basic block\n"); - print_loops_bb (dump_file, next_e->src, 0, 3); + } - next_e = copy_bb_and_scalar_dependences (GBB_BB (gbb), next_e, - iv_map); + next_e = copy_bb_and_scalar_dependences (old_bb, next_e, iv_map); iv_map.release (); @@ -1598,8 +1599,8 @@ translate_isl_ast_to_gimple::collect_all_ssa_names (tree new_expr, } } -/* This is abridged version of the function: - tree.c:substitute_in_expr (tree exp, tree f, tree r). */ +/* This is abridged version of the function copied from: + tree.c:substitute_in_expr (tree exp, tree f, tree r). */ static tree substitute_ssa_name (tree exp, tree f, tree r) @@ -1804,15 +1805,23 @@ get_rename_from_scev (tree old_name, gimple_seq *stmts, loop_p loop, } new_expr = rename_all_uses (new_expr, new_bb, old_bb); - /* We should check all the operands and all of them should dominate the use at + + /* We check all the operands and all of them should dominate the use at new_expr. */ - if (TREE_CODE (new_expr) == SSA_NAME) + auto_vec new_ssa_names; + collect_all_ssa_names (new_expr, &new_ssa_names); + int i; + tree new_ssa_name; + FOR_EACH_VEC_ELT (new_ssa_names, i, new_ssa_name) { - basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (new_expr)); - if (bb && !dominated_by_p (CDI_DOMINATORS, new_bb, bb)) + if (TREE_CODE (new_ssa_name) == SSA_NAME) { - codegen_error = true; - return build_zero_cst (TREE_TYPE (old_name)); + basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (new_ssa_name)); + if (bb && !dominated_by_p (CDI_DOMINATORS, new_bb, bb)) + { + codegen_error = true; + return build_zero_cst (TREE_TYPE (old_name)); + } } } diff --git a/gcc/testsuite/gcc.dg/graphite/id-29.c b/gcc/testsuite/gcc.dg/graphite/id-29.c new file mode 100644 index 000..9554c0b --- /dev/null +++ b/gcc/testsuite/gcc.dg/graphite/id-29.c @@ -0,0 +1,17 @@ +/* { dg-options "-floop-nest-optimize -O2" } */ + +typedef struct { + unsigned lp, lc; + short *l; + short p[1 << 4]; +} foo; + +void LzmaEnc_Init(foo *p) { + unsigned i; + unsigned num = 0x300 << (p->lp + p->lc); + for (i = 0; i < num; i++) +p->l[i] = ((1 << 11) >> 1); + + for (i = 0; i < (1 << 4); i++) +p->p[i] = ((1 << 11) >> 1); +} -- 2.6.3
[PATCH] Break when has_sample is true
gcc/ChangeLog: 2015-05-26 Aditya Kumar * auto-profile.c (afdo_calculate_branch_prob): Break once has_sample is true. --- gcc/auto-profile.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c index 55dd8d1..1936487 100644 --- a/gcc/auto-profile.c +++ b/gcc/auto-profile.c @@ -1365,8 +1365,13 @@ afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge) bool has_sample = false; FOR_EACH_BB_FN (bb, cfun) - if (bb->count > 0) -has_sample = true; + { +if (bb->count > 0) + { + has_sample = true; + break; + } + } if (!has_sample) return; -- 2.1.0.243.g30d45f7
[PATCH] Vectorize loops with parameterized loop bounds
w.r.t. the PR48052, here is the patch which finds out if scev would wrap or not. The patch symbolically evaluates if valid_niter >= loop->nb_iterations is true. In that case the scev would not wrap. Currently, we only look for two special 'patterns', which are sufficient to analyze the test cases. valid_niter = ~s (= UNIT_MAX - s) We have to prove that valid_niter >= loop->nb_iterations Pattern1 loop->nb_iterations: s>= e ? s - e : 0 In this case we prove that valid_niter >= loop->nb_iterations in both the cases i.e., when s >= e and when not. Pattern2 loop->nb_iterations: (e - s) -1 In this case we prove valid_niter >= loop->nb_iterations, by simple analysis that UINT_MAXi >= e is true in all cases. We symbolically evaluate conditionals, and subtraction when additional constraints are provided. Adding this evaluation mechanism helps vectorize some loops on 64bit machines because on 64bit, a typecast appears which causes scev to bail out. This patch passes bootstrap and no additional failures on regression test. gcc/ChangeLog: 2015-05-21 Aditya Kumar 2015-05-21 Sebastian Pop 2015-05-21 Abderrazek Zaafrani * gcc.dg/vect/pr48052.c: New test. * tree-ssa-loop-niter.c (fold_binary_cond_p): Fold a conditional operation when additional constraints are available. (fold_binary_minus_p): Fold a subtraction operations of the form (A - B -1) when additional constraints are available. (scev_probably_wraps_p): Use the above two functions to find whether valid_niter>= loop->nb_iterations. --- gcc/testsuite/gcc.dg/vect/pr48052.c | 26 +++ gcc/tree-ssa-loop-niter.c | 138 2 files changed, 164 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/pr48052.c diff --git a/gcc/testsuite/gcc.dg/vect/pr48052.c b/gcc/testsuite/gcc.dg/vect/pr48052.c new file mode 100644 index 000..534fb54 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr48052.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3" } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + +int foo(int* A, int* B, unsigned start, unsigned BS) +{ + int s; + for (unsigned k = start; k < start + BS; k++) +{ + s += A[k] * B[k]; +} + + return s; +} + +int bar(int* A, int* B, unsigned BS) +{ + int s; + for (unsigned k = 0; k < BS; k++) +{ + s += A[k] * B[k]; +} + + return s; +} diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index 042f8df..a14f7e5 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -3773,6 +3773,133 @@ nowrap_type_p (tree type) return false; } +/* Return true when op0>= op1. + For example: + Where, op0 = ~start_3(D); + op1 = start_3(D) <= stop_6(D) ? stop_6(D) - start_3(D) : 0; + In this case op0 = UINT_MAX - start_3(D); + So, op0>= op1 in all cases because UINT_MAX>= stop_6(D), + when TREE_TYPE(stop_6(D)) == unsigned int; */ +bool +fold_binary_cond_p (enum tree_code code, tree type, tree op0, tree op1) +{ + gcc_assert (type == boolean_type_node); + + if (TREE_TYPE (op0) != TREE_TYPE (op1)) +return false; + + /* TODO: Handle other operations. */ + if (code != GE_EXPR) +return false; + + /* The type of op0 and op1 should be unsigned. */ + if (!TYPE_UNSIGNED (TREE_TYPE (op0))) +return false; + + if ((TREE_CODE (op0) != BIT_NOT_EXPR) || (TREE_CODE (op1) != COND_EXPR)) +return false; + + /* We have to show that in both the cases, + (when cond is true and when cond is false) op (op0, op1) is true. */ + tree neg_op0 = TREE_OPERAND (op0, 0); + tree cond_op1 = TREE_OPERAND (op1, 0); + tree true_op1 = TREE_OPERAND (op1, 1); + tree false_op1 = TREE_OPERAND (op1, 2); + + gcc_assert (neg_op0 && cond_op1 && true_op1 && false_op1); + + /* When cond is false. Evaluate op (op0, false_op1). */ + tree running_exp = fold_binary (code, boolean_type_node, op0, false_op1); + + if (running_exp == NULL || integer_zerop (running_exp)) +/* TODO: Handle more cases here. */ +return false; + + /* When cond is true. Evaluate op (op0, true_op1). */ + running_exp = fold_binary (code, boolean_type_node, op0, true_op1); + + if (running_exp != NULL && integer_nonzerop (running_exp)) +return true; + + tree smaller, bigger; + if (TREE_CODE (cond_op1) == LE_EXPR) +{ + smaller = TREE_OPERAND (cond_op1, 0); + bigger = TREE_OPERAND (cond_op1, 1); +} + else +return false; + + if (TREE_CODE (true_op1) == MINUS_EXPR) +{ + tree minuend = TREE_OPERAND (true_op1, 0); + tree subtrahend = TREE_OPERAND (true_op1, 1); + + if (subtrahend == neg_op0 && subtrahend == smaller && minuend == bigger) + { + tree
[PATCH] Do not constrain on REAL_TYPE
From: Aditya Kumar gcc/ChangeLog: 2015-06-24 Aditya Kumar Sebastian Pop * graphite-sese-to-poly.c (parameter_index_in_region): Discard REAL_TYPE parameters. (scan_tree_for_params): Handle REAL_CST in scan_tree_for_params. (add_conditions_to_domain): Do not constrain on REAL_TYPE. --- gcc/graphite-sese-to-poly.c | 8 1 file changed, 8 insertions(+) diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c index 271c499..5b37796 100644 --- a/gcc/graphite-sese-to-poly.c +++ b/gcc/graphite-sese-to-poly.c @@ -796,6 +796,9 @@ parameter_index_in_region (tree name, sese region) gcc_assert (SESE_ADD_PARAMS (region)); + /* Cannot constrain on REAL_TYPE parameters. */ + if (TREE_CODE (TREE_TYPE (name)) == REAL_TYPE) +return -1; i = SESE_PARAMS (region).length (); SESE_PARAMS (region).safe_push (name); return i; @@ -915,6 +918,7 @@ scan_tree_for_params (sese s, tree e) case INTEGER_CST: case ADDR_EXPR: +case REAL_CST: break; default: @@ -1194,6 +1198,10 @@ add_conditions_to_domain (poly_bb_p pbb) { case GIMPLE_COND: { +/* Don't constrain on REAL_TYPE. */ + if (TREE_CODE (TREE_TYPE (gimple_cond_lhs (stmt))) == REAL_TYPE) + break; + gcond *cond_stmt = as_a (stmt); enum tree_code code = gimple_cond_code (cond_stmt); -- 2.1.0.243.g30d45f7
[PATCH] Discard Scops for which entry==exit
In this patch we discard the scops where entry and exit are the same BB. This is an effort to remove graphite-scop-detection.c:limit_scops. Removing the limit_scops function introduces correctness regressions. We are making relevant changes in incremental steps to fix those bugs, and finally we intend to remove limit_scops. 2015-06-29 Aditya Kumar Sebastian Pop * graphite-scop-detection.c (build_scops_1): Discard scops for which entry==exit --- gcc/graphite-scop-detection.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index e8ddecd..f57cc4a 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -810,7 +810,14 @@ build_scops_1 (basic_block current, loop_p outermost_loop, { open_scop.exit = sinfo.exit; gcc_assert (open_scop.exit); - scops->safe_push (open_scop); + if (open_scop.entry != open_scop.exit) + scops->safe_push (open_scop); + else + { + sinfo.difficult = true; + sinfo.exits = false; + sinfo.exit = NULL; + } } result.exit = sinfo.exit; -- 2.1.0.243.g30d45f7
[PATCH] Graphite cannot handle return stmt
No regressions. 2015-06-29 Aditya Kumar Sebastian Pop * graphite-scop-detection.c (stmt_simple_for_scop_p): Bail out in case of a return statement. --- gcc/graphite-scop-detection.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index e8ddecd..a10702e 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -365,6 +365,8 @@ stmt_simple_for_scop_p (basic_block scop_entry, loop_p outermost_loop, switch (gimple_code (stmt)) { case GIMPLE_RETURN: + return false; + case GIMPLE_LABEL: return true; -- 2.1.0.243.g30d45f7
[PATCH] Graphite cannot handle return stmt
No regressions. 2015-06-29 Aditya Kumar Sebastian Pop * graphite-scop-detection.c (stmt_simple_for_scop_p): Bail out in case of a return statement. --- gcc/graphite-scop-detection.c | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index e8ddecd..a14142f 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -364,7 +364,6 @@ stmt_simple_for_scop_p (basic_block scop_entry, loop_p outermost_loop, switch (gimple_code (stmt)) { -case GIMPLE_RETURN: case GIMPLE_LABEL: return true; -- 2.1.0.243.g30d45f7
[PATCH] Restore previous change for gimple_phi_iterator
gcc/ChangeLog: 2015-07-02 Aditya Kumar Sebastian Pop * graphite-sese-to-poly.c (rewrite_cross_bb_scalar_deps): Point iterator to use_stmt. Bug introduced by patch: git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@217787 --- gcc/graphite-sese-to-poly.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c index 271c499..78f10e4 100644 --- a/gcc/graphite-sese-to-poly.c +++ b/gcc/graphite-sese-to-poly.c @@ -2458,11 +2458,10 @@ rewrite_cross_bb_scalar_deps (scop_p scop, gimple_stmt_iterator *gsi) handle_scalar_deps_crossing_scop_limits (scop, def, stmt); FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def) -if (gimple_code (use_stmt) == GIMPLE_PHI - && (res = true)) +if (gphi *phi = dyn_cast (use_stmt)) { - gphi_iterator psi = gsi_start_phis (gimple_bb (use_stmt)); - + res = true; + gphi_iterator psi = gsi_for_phi (phi); if (scalar_close_phi_node_p (gsi_stmt (psi))) rewrite_close_phi_out_of_ssa (scop, &psi); else -- 2.1.0.243.g30d45f7
[PATCH] remove whitespace
--- libstdc++-v3/include/bits/shared_ptr_base.h | 26 +- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h b/libstdc++-v3/include/bits/shared_ptr_base.h index 787dc9b..60b825c 100644 --- a/libstdc++-v3/include/bits/shared_ptr_base.h +++ b/libstdc++-v3/include/bits/shared_ptr_base.h @@ -70,7 +70,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION public: virtual char const* what() const noexcept; -virtual ~bad_weak_ptr() noexcept; +virtual ~bad_weak_ptr() noexcept; }; // Substitute for bad_weak_ptr object in the case of -fno-exceptions. @@ -108,31 +108,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION class _Sp_counted_base : public _Mutex_base<_Lp> { -public: +public: _Sp_counted_base() noexcept : _M_use_count(1), _M_weak_count(1) { } - + virtual ~_Sp_counted_base() noexcept { } - + // Called when _M_use_count drops to zero, to release the resources // managed by *this. virtual void _M_dispose() noexcept = 0; - + // Called when _M_weak_count drops to zero. virtual void _M_destroy() noexcept { delete this; } - + virtual void* _M_get_deleter(const std::type_info&) noexcept = 0; void _M_add_ref_copy() { __gnu_cxx::__atomic_add_dispatch(&_M_use_count, 1); } - + void _M_add_ref_lock(); @@ -167,7 +167,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } } } - + void _M_weak_add_ref() noexcept { __gnu_cxx::__atomic_add_dispatch(&_M_weak_count, 1); } @@ -189,7 +189,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_destroy(); } } - + long _M_get_use_count() const noexcept { @@ -198,7 +198,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __atomic_load_n(&_M_use_count, __ATOMIC_RELAXED); } -private: +private: _Sp_counted_base(_Sp_counted_base const&) = delete; _Sp_counted_base& operator=(_Sp_counted_base const&) = delete; @@ -229,7 +229,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } } - template<> + template<> inline void _Sp_counted_base<_S_atomic>:: _M_add_ref_lock() @@ -241,10 +241,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__count == 0) __throw_bad_weak_ptr(); // Replace the current counter value with the old value + 1, as - // long as it's not changed meanwhile. + // long as it's not changed meanwhile. } while (!__atomic_compare_exchange_n(&_M_use_count, &__count, __count + 1, - true, __ATOMIC_ACQ_REL, + true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)); } -- 2.6.3
[PATCH] Remove whitespace
--- libstdc++-v3/ChangeLog | 5 + libstdc++-v3/include/bits/algorithmfwd.h| 206 ++-- libstdc++-v3/include/bits/shared_ptr_base.h | 26 ++-- 3 files changed, 121 insertions(+), 116 deletions(-) diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 299bce6..88d908d 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,8 @@ +2016-08-22 Aditya Kumar + * libstdc++-v3/include/bits/algorithmfwd.h: Remove WS + * libstdc++-v3/include/bits/shared_ptr_base.h: Remove WS + + 2016-08-22 Gleb Natapov PR libstdc++/68297 diff --git a/libstdc++-v3/include/bits/algorithmfwd.h b/libstdc++-v3/include/bits/algorithmfwd.h index c648169..a61fea7 100644 --- a/libstdc++-v3/include/bits/algorithmfwd.h +++ b/libstdc++-v3/include/bits/algorithmfwd.h @@ -168,12 +168,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * * These algorithms are variations of a classic binary search, and * all assume that the sequence being searched is already sorted. - * + * * The number of comparisons will be logarithmic (and as few as * possible). The number of steps through the sequence will be * logarithmic for random-access iterators (e.g., pointers), and * linear otherwise. - * + * * The LWG has passed Defect Report 270, which notes: The * proposed resolution reinterprets binary search. Instead of * thinking about searching for a value in a sorted range, we view @@ -202,11 +202,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif template -bool +bool binary_search(_FIter, _FIter, const _Tp&); template -bool +bool binary_search(_FIter, _FIter, const _Tp&, _Compare); #if __cplusplus > 201402L @@ -222,7 +222,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif template -_OIter +_OIter copy(_IIter, _IIter, _OIter); template @@ -251,7 +251,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION equal_range(_FIter, _FIter, const _Tp&, _Compare); template -void +void fill(_FIter, _FIter, const _Tp&); template @@ -282,36 +282,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // generate_n template -bool +bool includes(_IIter1, _IIter1, _IIter2, _IIter2); template -bool +bool includes(_IIter1, _IIter1, _IIter2, _IIter2, _Compare); template -void +void inplace_merge(_BIter, _BIter, _BIter); template -void +void inplace_merge(_BIter, _BIter, _BIter, _Compare); #if __cplusplus >= 201103L template -bool +bool is_heap(_RAIter, _RAIter); template -bool +bool is_heap(_RAIter, _RAIter, _Compare); template -_RAIter +_RAIter is_heap_until(_RAIter, _RAIter); template -_RAIter +_RAIter is_heap_until(_RAIter, _RAIter, _Compare); template @@ -328,63 +328,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION is_permutation(_FIter1, _FIter1, _FIter2, _BinaryPredicate); template -bool +bool is_sorted(_FIter, _FIter); template -bool +bool is_sorted(_FIter, _FIter, _Compare); template -_FIter +_FIter is_sorted_until(_FIter, _FIter); template -_FIter +_FIter is_sorted_until(_FIter, _FIter, _Compare); #endif template -void +void iter_swap(_FIter1, _FIter2); template -_FIter +_FIter lower_bound(_FIter, _FIter, const _Tp&); template -_FIter +_FIter lower_bound(_FIter, _FIter, const _Tp&, _Compare); template -void +void make_heap(_RAIter, _RAIter); template -void +void make_heap(_RAIter, _RAIter, _Compare); - template + template _GLIBCXX14_CONSTEXPR -const _Tp& +const _Tp& max(const _Tp&, const _Tp&); template _GLIBCXX14_CONSTEXPR -const _Tp& +const _Tp& max(const _Tp&, const _Tp&, _Compare); // max_element // merge - template + template _GLIBCXX14_CONSTEXPR -const _Tp& +const _Tp& min(const _Tp&, const _Tp&); template _GLIBCXX14_CONSTEXPR -const _Tp& +const _Tp& min(const _Tp&, const _Tp&, _Compare); // min_element @@ -392,7 +392,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #if __cplusplus >= 201103L template _GLIBCXX14_CONSTEXPR -pair +pair minmax(const _Tp&, const _Tp&); template @@ -444,11 +444,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // mismatch template -bool +bool next_permutation(_BIter, _BIter); template -bool +bool next_permutation(_BIter, _BIter, _Compare); #if __cplusplus >= 201103L @@ -482,65 +482,65 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif template -void +void pop_heap(_RAIter, _RAIter); template -void +void pop_heap(_RAIter,
RE: [PATCH] improve string find algorithm
Yes, we do. Sorry for the mistake, it happened because I first wrote this for libcxx (https://reviews.llvm.org/D27068) and while porting that line got missed. Thanks, -Aditya diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc index df1e8dd..7942ee6 100644 --- a/libstdc++-v3/include/bits/basic_string.tcc +++ b/libstdc++-v3/include/bits/basic_string.tcc @@ -1194,14 +1194,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__n == 0) return __pos <= __size ? __pos : npos; - if (__n <= __size) - { - for (; __pos <= __size - __n; ++__pos) - if (traits_type::eq(__data[__pos], __s[0]) - && traits_type::compare(__data + __pos + 1, - __s + 1, __n - 1) == 0) - return __pos; - } + if (__n > __size) + return npos; + + const _CharT __elem0 = __s[0]; + const _CharT* __first1 = __data; + const _CharT* __first2 = __s; + const _CharT* __last1 = __data + __size; + ptrdiff_t __len2 = __n - __pos; + + while (true) { +ptrdiff_t __len1 = __last1 - __first1; + if (__len1 < __len2) + return npos; + + // Find the first match. + __first1 = traits_type::find(__first1, __len1 - __len2 + 1, __elem0); + if (__first1 == 0) + return npos; + // Compare the full string when first match is found. +if (traits_type::compare(__first1, __first2, __len2) == 0) + return __first1 - __data; + +++__first1; + } + return npos; } -- 2.6.3 -Original Message- From: Jonathan Wakely [mailto:jwak...@redhat.com] Sent: Friday, January 06, 2017 7:35 AM To: Aditya Kumar Cc: libstd...@gcc.gnu.org; gcc-patches@gcc.gnu.org; hiradi...@msn.com Subject: Re: [PATCH] improve string find algorithm On 07/12/16 11:46 -0600, Aditya Kumar wrote: >Here is an improved version of basic_string::find. The idea is to >split the string find in two parts: >1. search for the first match by using traits_type::find (this gets converted to memchr for x86) >2. see if there is a match (this gets converted to memcmp for x86) > >Passes bootstrap on x86-64. > >The patch results in good improvements on a synthetic test case I wrote using the google-benchmark. >following are the results. > > >Branch: master without patch >$ ./bin/string.libcxx.out >Run on (24 X 1899.12 MHz CPU s) >2016-12-06 16:41:55 >***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. >Benchmark Time CPU Iterations >- >BM_StringFindNoMatch/10 8 ns 8 ns 81880927 >BM_StringFindNoMatch/6452 ns 52 ns 13235018 >BM_StringFindNoMatch/512 355 ns355 ns1962488 >BM_StringFindNoMatch/4k 2769 ns 2772 ns 249090 >BM_StringFindNoMatch/32k22598 ns 22619 ns 30984 >BM_StringFindNoMatch/128k 89745 ns 89830 ns 7996 >BM_StringFindAllMatch/1 7 ns 7 ns 102893835 >BM_StringFindAllMatch/8 9 ns 9 ns 75403364 >BM_StringFindAllMatch/64 12 ns 12 ns 60766893 >BM_StringFindAllMatch/512 31 ns 31 ns 23163999 >BM_StringFindAllMatch/4k 141 ns141 ns4980386 >BM_StringFindAllMatch/32k1402 ns 1403 ns 483581 >BM_StringFindAllMatch/128k 5604 ns 5609 ns 126123 >BM_StringFindMatch1/1 44430 ns 44473 ns 15804 >BM_StringFindMatch1/8 44315 ns 44357 ns 15741 >BM_StringFindMatch1/64 44689 ns 44731 ns 15712 >BM_StringFindMatch1/512 44247 ns 44290 ns 15724 >BM_StringFindMatch1/4k 45010 ns 45053 ns 15678 >BM_StringFindMatch1/32k 45717 ns 45761 ns 15278 >BM_StringFindMatch2/1 44307 ns 44349 ns 15730 >BM_StringFindMatch2/8 44631 ns 44674 ns 15721 >BM_StringFindMatch2/64 44300 ns 44342 ns 15750 >BM_StringFindMatch2/512 44239 ns 44281 ns 15713 >BM_StringFindMatch2/4k 44886 ns 44928 ns 15787 > >Branch: master with patch >$ ./bin/string.libcxx.out >Run on (24 X 2892.28 MHz CPU s) >2016-12-06 18:51:38 >***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. >Benchmark Time CPU Iterations >- >BM_StringFindNoMatch/1011 ns 11 ns 63049677 >BM_StringFindNoMatch/6412 ns
RE: [PATCH] improve string find algorithm
Thanks for the correction and updating the comments. -Aditya -Original Message- From: Jonathan Wakely [mailto:jwak...@redhat.com] Sent: Friday, January 06, 2017 2:21 PM To: Aditya Kumar Cc: libstd...@gcc.gnu.org; gcc-patches@gcc.gnu.org; hiradi...@msn.com Subject: Re: [PATCH] improve string find algorithm On 06/01/17 08:42 -0600, Aditya Kumar wrote: >Yes, we do. >Sorry for the mistake, it happened because I first wrote this for >libcxx (https://reviews.llvm.org/D27068) and while porting that line >got missed. > >Thanks, >-Aditya > > >diff --git a/libstdc++-v3/include/bits/basic_string.tcc >b/libstdc++-v3/include/bits/basic_string.tcc >index df1e8dd..7942ee6 100644 >--- a/libstdc++-v3/include/bits/basic_string.tcc >+++ b/libstdc++-v3/include/bits/basic_string.tcc >@@ -1194,14 +1194,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION > if (__n == 0) > return __pos <= __size ? __pos : npos; > >- if (__n <= __size) >- { >- for (; __pos <= __size - __n; ++__pos) >- if (traits_type::eq(__data[__pos], __s[0]) >- && traits_type::compare(__data + __pos + 1, >- __s + 1, __n - 1) == 0) >- return __pos; >- } >+ if (__n > __size) >+ return npos; >+ >+ const _CharT __elem0 = __s[0]; >+ const _CharT* __first1 = __data; >+ const _CharT* __first2 = __s; >+ const _CharT* __last1 = __data + __size; >+ ptrdiff_t __len2 = __n - __pos; What's this variable for? >+ while (true) { >+ptrdiff_t __len1 = __last1 - __first1; >+ if (__len1 < __len2) >+ return npos; >+ >+ // Find the first match. >+ __first1 = traits_type::find(__first1, __len1 - __len2 + 1, __elem0); >+ if (__first1 == 0) >+ return npos; >+ // Compare the full string when first match is found. >+if (traits_type::compare(__first1, __first2, __len2) == 0) >+ return __first1 - __data; >+ >+++__first1; >+ } >+ > return npos; > } This is still wrong, consider std::string("abcd").find("ab", 2) This should return npos, but you return 0. The postcondition for the function is that the return value is not less than the pos argument. The attached patch should be a correct version of the improved algorithm.
[PATCH] Added noexcept on constructors
Thanks for the feedback. Updated patch is below. The noexcept on definition and the declaration of constructors _Sp_locker do not match. ChangeLog 2016-12-05 Aditya Kumar * src/c++11/shared_ptr.cc (_Sp_locker::_Sp_locker(const void* p)): Added noexcept on constructors. _Sp_locker::_Sp_locker(const void* p1, const void* p2): Same --- libstdc++-v3/src/c++11/shared_ptr.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/src/c++11/shared_ptr.cc b/libstdc++-v3/src/c++11/shared_ptr.cc index 9028040..b4addd0 100644 --- a/libstdc++-v3/src/c++11/shared_ptr.cc +++ b/libstdc++-v3/src/c++11/shared_ptr.cc @@ -56,7 +56,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { return _Hash_impl::hash(addr) & __gnu_internal::mask; } } - _Sp_locker::_Sp_locker(const void* p) + _Sp_locker::_Sp_locker(const void* p) noexcept { if (__gthread_active_p()) { @@ -67,7 +67,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_key1 = _M_key2 = __gnu_internal::invalid; } - _Sp_locker::_Sp_locker(const void* p1, const void* p2) + _Sp_locker::_Sp_locker(const void* p1, const void* p2) noexcept { if (__gthread_active_p()) { -- 2.6.3
RE: [PATCH] Added noexcept on constructors
Since g++ correctly finds this error, I'm wondering how libstdc++-v3/src/c++11/shared_ptr.cc was compiling correctly before. With a small test case: $ cat tes.cpp class A { public: A() noexcept; }; A::A() { } $ ../../build/gcc/xg++ -B ../../build/gcc -c test.cpp -std=gnu++14 test.cpp:6:1: error: declaration of 'A::A()' has a different exception specifier A::A() { ^ test.cpp:3:3: note: from previous declaration 'A::A() noexcept' A() noexcept; Thanks, -Aditya -Original Message- From: Jonathan Wakely [mailto:jwak...@redhat.com] Sent: Tuesday, December 06, 2016 4:26 AM To: Aditya Kumar Cc: gcc-patches@gcc.gnu.org; libstd...@gcc.gnu.org; ville.voutilai...@gmail.com Subject: Re: [PATCH] Added noexcept on constructors On 05/12/16 15:34 -0600, Aditya Kumar wrote: >Thanks for the feedback. Updated patch is below. > > >The noexcept on definition and the declaration of constructors >_Sp_locker do not match. Thanks for the patch (and thanks, Ville, for reviewing it). I'll commit this today.
[PATCH] improve string find algorithm
Here is an improved version of basic_string::find. The idea is to split the string find in two parts: 1. search for the first match by using traits_type::find (this gets converted to memchr for x86) 2. see if there is a match (this gets converted to memcmp for x86) Passes bootstrap on x86-64. The patch results in good improvements on a synthetic test case I wrote using the google-benchmark. following are the results. Branch: master without patch $ ./bin/string.libcxx.out Run on (24 X 1899.12 MHz CPU s) 2016-12-06 16:41:55 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. Benchmark Time CPU Iterations - BM_StringFindNoMatch/10 8 ns 8 ns 81880927 BM_StringFindNoMatch/6452 ns 52 ns 13235018 BM_StringFindNoMatch/512 355 ns355 ns1962488 BM_StringFindNoMatch/4k 2769 ns 2772 ns 249090 BM_StringFindNoMatch/32k22598 ns 22619 ns 30984 BM_StringFindNoMatch/128k 89745 ns 89830 ns 7996 BM_StringFindAllMatch/1 7 ns 7 ns 102893835 BM_StringFindAllMatch/8 9 ns 9 ns 75403364 BM_StringFindAllMatch/64 12 ns 12 ns 60766893 BM_StringFindAllMatch/512 31 ns 31 ns 23163999 BM_StringFindAllMatch/4k 141 ns141 ns4980386 BM_StringFindAllMatch/32k1402 ns 1403 ns 483581 BM_StringFindAllMatch/128k 5604 ns 5609 ns 126123 BM_StringFindMatch1/1 44430 ns 44473 ns 15804 BM_StringFindMatch1/8 44315 ns 44357 ns 15741 BM_StringFindMatch1/64 44689 ns 44731 ns 15712 BM_StringFindMatch1/512 44247 ns 44290 ns 15724 BM_StringFindMatch1/4k 45010 ns 45053 ns 15678 BM_StringFindMatch1/32k 45717 ns 45761 ns 15278 BM_StringFindMatch2/1 44307 ns 44349 ns 15730 BM_StringFindMatch2/8 44631 ns 44674 ns 15721 BM_StringFindMatch2/64 44300 ns 44342 ns 15750 BM_StringFindMatch2/512 44239 ns 44281 ns 15713 BM_StringFindMatch2/4k 44886 ns 44928 ns 15787 Branch: master with patch $ ./bin/string.libcxx.out Run on (24 X 2892.28 MHz CPU s) 2016-12-06 18:51:38 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. Benchmark Time CPU Iterations - BM_StringFindNoMatch/1011 ns 11 ns 63049677 BM_StringFindNoMatch/6412 ns 12 ns 57259381 BM_StringFindNoMatch/512 27 ns 27 ns 25495432 BM_StringFindNoMatch/4k 130 ns130 ns5301301 BM_StringFindNoMatch/32k 858 ns859 ns 824048 BM_StringFindNoMatch/128k4091 ns 4095 ns 171493 BM_StringFindAllMatch/114 ns 14 ns 53023977 BM_StringFindAllMatch/814 ns 14 ns 51516536 BM_StringFindAllMatch/64 17 ns 17 ns 40992668 BM_StringFindAllMatch/512 37 ns 37 ns 18503267 BM_StringFindAllMatch/4k 153 ns153 ns4494458 BM_StringFindAllMatch/32k1460 ns 1461 ns 483380 BM_StringFindAllMatch/128k 5801 ns 5806 ns 120680 BM_StringFindMatch1/12062 ns 2064 ns 333144 BM_StringFindMatch1/82057 ns 2059 ns 335496 BM_StringFindMatch1/64 2083 ns 2085 ns 341469 BM_StringFindMatch1/512 2134 ns 2136 ns 336880 BM_StringFindMatch1/4k 2309 ns 2312 ns 308745 BM_StringFindMatch1/32k 3413 ns 3417 ns 208206 BM_StringFindMatch2/12053 ns 2055 ns 341523 BM_StringFindMatch2/82061 ns 2063 ns 343999 BM_StringFindMatch2/64 2075 ns 2077 ns 338479 BM_StringFindMatch2/512 2102 ns 2104 ns 332276 BM_StringFindMatch2/4k 2286 ns 2288 ns 300416 BM_StringFindMatch2/32k 3385 ns 3388 ns 204158 ChangeLog: 2016-12-07 Aditya Kumar * include/bits/basic_string.tcc(find(const _CharT* __s, size_type __pos, size_type __n) const)): Improve the algorithm --- libstdc++-v3/include/bits/basic_string.tcc | 31 ++ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc index df1e8dd..7942ee6 100644 --- a/libstdc++-v3/include/bits/basic_string.tcc +++ b/libstdc++-v3/include/bits/basic_string.tcc @@ -1194,14 +1194,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__n == 0
[PATCH] [graphite] Constrain only on INTEGER_TYPE
Passes bootstrap, no regressions. With this patch gcc bootstraps with graphite. make BOOT_CFLAGS="-g -O2 -fgraphite-identity -floop-interchange -floop-block" gcc/ChangeLog: 2015-08-12 Aditya Kumar * graphite-scop-detection.c (stmt_simple_for_scop_p): Constrain only on INTEGER_TYPE --- gcc/graphite-scop-detection.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index fb7247e..05cb5b9 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -410,7 +410,7 @@ stmt_simple_for_scop_p (basic_block scop_entry, loop_p outermost_loop, tree op = gimple_op (stmt, i); if (!graphite_can_represent_expr (scop_entry, loop, op) /* We can not handle REAL_TYPE. Failed for pr39260. */ - || TREE_CODE (TREE_TYPE (op)) == REAL_TYPE) + || (TREE_CODE (TREE_TYPE (op)) != INTEGER_TYPE)) { if (dump_file && (dump_flags & TDF_DETAILS)) { -- 2.1.0.243.g30d45f7
[PATCH] Specify the type of scop->region
From: Aditya Kumar Changing the type of scop::region from void* to sese, as this is the only type assigned to scop::region for now. No functional changes intended. Passes regtest and bootstrap. gcc/ChangeLog: 2015-08-17 Aditya Kumar * graphite-poly.c: Change type of region from void* to sese. * graphite-poly.h (struct scop): Changing the type of scop::region from void* to sese. Change accessor macro accordingly. * graphite-sese-to-poly.c (extract_affine_chrec): Use accessor macro. --- gcc/graphite-poly.c | 2 +- gcc/graphite-poly.h | 8 gcc/graphite-sese-to-poly.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/graphite-poly.c b/gcc/graphite-poly.c index dd4fcee..78b5d12 100644 --- a/gcc/graphite-poly.c +++ b/gcc/graphite-poly.c @@ -422,7 +422,7 @@ debug_pdr (poly_dr_p pdr, int verbosity) /* Creates a new SCOP containing REGION. */ scop_p -new_scop (void *region) +new_scop (sese region) { scop_p scop = XNEW (struct scop); diff --git a/gcc/graphite-poly.h b/gcc/graphite-poly.h index 062d927..4ca5d1f 100644 --- a/gcc/graphite-poly.h +++ b/gcc/graphite-poly.h @@ -1345,7 +1345,7 @@ lst_remove_all_before_excluding_pbb (lst_p loop, poly_bb_p pbb, bool before) struct scop { /* A SCOP is defined as a SESE region. */ - void *region; + sese region; /* Number of parameters in SCoP. */ graphite_dim_t nb_params; @@ -1390,14 +1390,14 @@ struct scop }; #define SCOP_BBS(S) (S->bbs) -#define SCOP_REGION(S) ((sese) S->region) +#define SCOP_REGION(S) (S->region) #define SCOP_CONTEXT(S) (NULL) #define SCOP_ORIGINAL_SCHEDULE(S) (S->original_schedule) #define SCOP_TRANSFORMED_SCHEDULE(S) (S->transformed_schedule) #define SCOP_SAVED_SCHEDULE(S) (S->saved_schedule) #define POLY_SCOP_P(S) (S->poly_scop_p) -extern scop_p new_scop (void *); +extern scop_p new_scop (sese); extern void free_scop (scop_p); extern void free_scops (vec ); extern void print_generated_program (FILE *, scop_p); @@ -1414,7 +1414,7 @@ extern bool graphite_legal_transform (scop_p); /* Set the region of SCOP to REGION. */ static inline void -scop_set_region (scop_p scop, void *region) +scop_set_region (scop_p scop, sese region) { scop->region = region; } diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c index fdcc790..0c97eba 100644 --- a/gcc/graphite-sese-to-poly.c +++ b/gcc/graphite-sese-to-poly.c @@ -604,7 +604,7 @@ extract_affine_chrec (scop_p s, tree e, __isl_take isl_space *space) isl_pw_aff *lhs = extract_affine (s, CHREC_LEFT (e), isl_space_copy (space)); isl_pw_aff *rhs = extract_affine (s, CHREC_RIGHT (e), isl_space_copy (space)); isl_local_space *ls = isl_local_space_from_space (space); - unsigned pos = sese_loop_depth ((sese) s->region, get_chrec_loop (e)) - 1; + unsigned pos = sese_loop_depth (SCOP_REGION (s), get_chrec_loop (e)) - 1; isl_aff *loop = isl_aff_set_coefficient_si (isl_aff_zero_on_domain (ls), isl_dim_in, pos, 1); isl_pw_aff *l = isl_pw_aff_from_aff (loop); -- 2.1.0.243.g30d45f7
[PATCH] [graphite] Remove limit_scops
This patch removes graphite-scop-detection.c:limit_scops function and fix related issues arising because of that. The functionality limit_scop was added as an intermediate step to discard the loops which graphite could not handle. Removing limit_scop required handling of different cases of loops and surrounding code. The scop is now larger so most test cases required 'number of scops detected' to be fixed. By increasing the size of scop we can now optimize loops which are 'siblings' of each other. This could enable loop fusion on a number of loops. Since in the graphite framework we mostly want to opimize loop-nests/adjacent-loops, we now discard scops with less than 2 loops. We also discard scops without any data references. Essentially: - Remove limite_scops. - Only select scops when there are at least two loops (loop nest or, side by side). - Discard loops without data-refs. - Fix test cases. Passes bootstrap and reg-test. gcc/ChangeLog: 2015-09-02 Aditya Kumar Sebastian Pop * graphite-isl-ast-to-gimple.c (gcc_expression_from_isl_ast_expr_id): Return the parameter if it was saved in corresponding parameter_rename_map of the region. (copy_def): Copy def from sese region to the newly created region. (copy_internal_parameters): Copy all the internal parameters defined within a region to the newly created region. (graphite_regenerate_ast_isl): Copy parameters to the new region before translating isl to gimple. * graphite-scop-detection.c (graphite_can_represent_loop): Bail out if the loop-nest does not have any data-references. (build_graphite_scops): Create a scop only when there is at least one loop inside it. (contains_only_close_phi_nodes): Deleted. (print_graphite_scop_statistics): Remove printing of scop-statistics before limit_scops. (limit_scops): Deleted. (build_scops): Removed call to limit_scops. * sese.c (new_sese): Construct. (free_sese): Destruct. (sese_add_exit_phis_edge): update_stmt after exit phi edge has been added. (set_rename): Pass sese region so that parameters inside the region can be added to its parameter_rename_map. (rename_uses): Pass sese region. (graphite_copy_stmts_from_block): Do not copy parameters that have been generated in the header of the scop. For each SSA_NAME in the parameter_rename_map rename its usage. (invariant_in_sese_p_rec): Return false if tree t is defined outside sese region. (scalar_evolution_in_region): If the tree t is invariant just return t. * sese.h: Added a parameter renamne map (parameter_rename_map_t) to struct sese to keep track of all the parameters which need renaming. * tree-data-ref.c (loop_nest_has_data_refs): Check if a loop nest has any data-refs. * tree-data-ref.h: Declaration of loop_nest_has_data_refs. gcc/testsuite/ChangeLog: 2015-09-02 Aditya Kumar Sebastian Pop * gcc.dg/graphite/block-0.c: Modifed test case to match current output. * gcc.dg/graphite/block-1.c: Same. * gcc.dg/graphite/block-5.c: Same. * gcc.dg/graphite/block-6.c: Same. * gcc.dg/graphite/interchange-1.c: Same. * gcc.dg/graphite/interchange-10.c: Same. * gcc.dg/graphite/interchange-11.c: Same. * gcc.dg/graphite/interchange-13.c: Same. * gcc.dg/graphite/interchange-14.c: Same. * gcc.dg/graphite/interchange-3.c: Same. * gcc.dg/graphite/interchange-4.c: Same. * gcc.dg/graphite/interchange-7.c: Same. * gcc.dg/graphite/interchange-8.c: Same. * gcc.dg/graphite/interchange-9.c: Same. * gcc.dg/graphite/isl-codegen-loop-dumping.c: Same. * gcc.dg/graphite/pr35356-1.c (foo): Same. * gcc.dg/graphite/pr37485.c: Same. * gcc.dg/graphite/scop-0.c (int toto): Same. * gcc.dg/graphite/scop-1.c: Same. * gcc.dg/graphite/scop-10.c: Same. * gcc.dg/graphite/scop-11.c: Same. * gcc.dg/graphite/scop-12.c: Same. * gcc.dg/graphite/scop-13.c: Same. * gcc.dg/graphite/scop-16.c: Same. * gcc.dg/graphite/scop-17.c: Same. * gcc.dg/graphite/scop-18.c: Same. * gcc.dg/graphite/scop-2.c: Same. * gcc.dg/graphite/scop-21.c (int test): Same. * gcc.dg/graphite/scop-22.c (void foo): Same. * gcc.dg/graphite/scop-4.c: Same. * gcc.dg/graphite/scop-5.c: Same. * gcc.dg/graphite/scop-6.c: Same. * gcc.dg/graphite/scop-7.c: Same. * gcc.dg/graphite/scop-8.c: Same. * gcc.dg/graphite/scop-9.c: Same. * gcc.dg/graphite/scop-mvt.c (void mvt): Introduced dependency so that data-refs remain inside the inner loop. * gcc.dg/graphite/uns-block-1.c: Modifed te
[PATCH] [graphite] Remove limit_scops
This patch removes graphite-scop-detection.c:limit_scops function and fix related issues arising because of that. The functionality limit_scop was added as an intermediate step to discard the loops which graphite could not handle. Removing limit_scop required handling of different cases of loops and surrounding code. The scop is now larger so most test cases required 'number of scops detected' to be fixed. By increasing the size of scop we can now optimize loops which are 'siblings' of each other. This could enable loop fusion on a number of loops. Since in the graphite framework we mostly want to opimize loop-nests/adjacent-loops, we now discard scops with less than 2 loops. We also discard scops without any data references. Essentially: - Remove limite_scops. - Only select scops when there are at least two loops (loop nest or, side by side). - Discard loops without data-refs. - Fix test cases. Passes bootstrap and reg-test. gcc/ChangeLog: 2015-09-02 Aditya Kumar Sebastian Pop * graphite-isl-ast-to-gimple.c (gcc_expression_from_isl_ast_expr_id): Return the parameter if it was saved in corresponding parameter_rename_map of the region. (copy_def): Copy def from sese region to the newly created region. (copy_internal_parameters): Copy all the internal parameters defined within a region to the newly created region. (graphite_regenerate_ast_isl): Copy parameters to the new region before translating isl to gimple. * graphite-scop-detection.c (graphite_can_represent_loop): Bail out if the loop-nest does not have any data-references. (build_graphite_scops): Create a scop only when there is at least one loop inside it. (contains_only_close_phi_nodes): Deleted. (print_graphite_scop_statistics): Deleted (print_graphite_statistics): Deleted (limit_scops): Deleted. (build_scops): Removed call to limit_scops. * sese.c (new_sese): Construct. (free_sese): Destruct. (sese_add_exit_phis_edge): update_stmt after exit phi edge has been added. (set_rename): Pass sese region so that parameters inside the region can be added to its parameter_rename_map. (rename_uses): Pass sese region. (graphite_copy_stmts_from_block): Do not copy parameters that have been generated in the header of the scop. For each SSA_NAME in the parameter_rename_map rename its usage. (invariant_in_sese_p_rec): Return false if tree t is defined outside sese region. (scalar_evolution_in_region): If the tree t is invariant just return t. * sese.h: Added a parameter renamne map (parameter_rename_map_t) to struct sese to keep track of all the parameters which need renaming. * tree-data-ref.c (loop_nest_has_data_refs): Check if a loop nest has any data-refs. * tree-data-ref.h: Declaration of loop_nest_has_data_refs. gcc/testsuite/ChangeLog: 2015-09-02 Aditya Kumar Sebastian Pop * gcc.dg/graphite/block-0.c: Modifed test case to match current output. * gcc.dg/graphite/block-1.c: Same. * gcc.dg/graphite/block-5.c: Same. * gcc.dg/graphite/block-6.c: Same. * gcc.dg/graphite/interchange-1.c: Same. * gcc.dg/graphite/interchange-10.c: Same. * gcc.dg/graphite/interchange-11.c: Same. * gcc.dg/graphite/interchange-13.c: Same. * gcc.dg/graphite/interchange-14.c: Same. * gcc.dg/graphite/interchange-3.c: Same. * gcc.dg/graphite/interchange-4.c: Same. * gcc.dg/graphite/interchange-7.c: Same. * gcc.dg/graphite/interchange-8.c: Same. * gcc.dg/graphite/interchange-9.c: Same. * gcc.dg/graphite/isl-codegen-loop-dumping.c: Same. * gcc.dg/graphite/pr35356-1.c (foo): Same. * gcc.dg/graphite/pr37485.c: Same. * gcc.dg/graphite/scop-0.c (int toto): Same. * gcc.dg/graphite/scop-1.c: Same. * gcc.dg/graphite/scop-10.c: Same. * gcc.dg/graphite/scop-11.c: Same. * gcc.dg/graphite/scop-12.c: Same. * gcc.dg/graphite/scop-13.c: Same. * gcc.dg/graphite/scop-16.c: Same. * gcc.dg/graphite/scop-17.c: Same. * gcc.dg/graphite/scop-18.c: Same. * gcc.dg/graphite/scop-2.c: Same. * gcc.dg/graphite/scop-21.c (int test): Same. * gcc.dg/graphite/scop-22.c (void foo): Same. * gcc.dg/graphite/scop-4.c: Same. * gcc.dg/graphite/scop-5.c: Same. * gcc.dg/graphite/scop-6.c: Same. * gcc.dg/graphite/scop-7.c: Same. * gcc.dg/graphite/scop-8.c: Same. * gcc.dg/graphite/scop-9.c: Same. * gcc.dg/graphite/scop-mvt.c (void mvt): Introduced dependency so that data-refs remain inside the inner loop. * gcc.dg/graphite/uns-block-1.c: Modifed test case to match o
[graphite] Refactor graphite-optimize-isl.c
Refactor graphite-optimize-isl.c. Renamed function name, variable names etc., and indented the source according to gcc style guidelines. Modified comments accordingly. No functional change intended. Passes regtest and bootstap on x86_64. gcc/ChangeLog: 2015-09-10 Aditya Kumar * graphite-optimize-isl.c (get_tile_map): Refactor. (get_schedule_for_band): Same. (getScheduleForBand): Same. (get_prevector_map): Same. (get_schedule_for_band_list): Same. (get_schedule_map): Same. (get_single_map): Same. (apply_schedule_map_to_scop): Same. (optimize_isl): Same. --- gcc/graphite-optimize-isl.c | 414 ++-- 1 file changed, 205 insertions(+), 209 deletions(-) diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c index 811a510..e988ee5 100644 --- a/gcc/graphite-optimize-isl.c +++ b/gcc/graphite-optimize-isl.c @@ -50,6 +50,9 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "dumpfile.h" +/* Set this to true to disable tiling of nested loops. */ +static bool disable_tiling = false; + static isl_union_set * scop_get_domains (scop_p scop ATTRIBUTE_UNUSED) { @@ -64,152 +67,152 @@ scop_get_domains (scop_p scop ATTRIBUTE_UNUSED) return res; } -/* getTileMap - Create a map that describes a n-dimensonal tiling. - - getTileMap creates a map from a n-dimensional scattering space into an +/* get_tile_map - Create a map that describes a n-dimensonal tiling. + + get_tile_map creates a map from a n-dimensional scattering space into an 2*n-dimensional scattering space. The map describes a rectangular tiling. - + Example: - scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32 - -tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]: - t0 % 32 = 0 and t0 <= s0 < t0 + 32 and - t1 % 32 = 0 and t1 <= s1 < t1 + 32} - + SCHEDULE_DIMENSIONS = 2, PARAMETER_DIMENSIONS = 1, TILE_SIZE = 32 + +tile_map := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]: +t0 % 32 = 0 and t0 <= s0 < t0 + 32 and +t1 % 32 = 0 and t1 <= s1 < t1 + 32} + Before tiling: - + for (i = 0; i < N; i++) for (j = 0; j < M; j++) - S(i,j) - + S(i,j) + After tiling: - + for (t_i = 0; t_i < N; i+=32) for (t_j = 0; t_j < M; j+=32) - for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0 - for (j = t_j; j < t_j + 32; j++)| Known that M % 32 = 0 - S(i,j) - */ - + for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0 + for (j = t_j; j < t_j + 32; j++)| Known that M % 32 = 0 + S(i,j) + */ + static isl_basic_map * -getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize) +get_tile_map (isl_ctx *ctx, int schedule_dimensions, int tile_size) { - int x; /* We construct - tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]: - s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and - s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32} + tile_map := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]: + s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and + s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32} and project out the auxilary dimensions a0 and a1. */ - isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions, - scheduleDimensions * 3); - isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space)); + isl_space *space + = isl_space_alloc (ctx, 0, schedule_dimensions, schedule_dimensions * 3); + isl_basic_map *tile_map = isl_basic_map_universe (isl_space_copy (space)); - isl_local_space *LocalSpace = isl_local_space_from_space (Space); + isl_local_space *local_space = isl_local_space_from_space (space); - for (x = 0; x < scheduleDimensions; x++) + for (int x = 0; x < schedule_dimensions; x++) { int sX = x; int tX = x; - int pX = scheduleDimensions + x; - int aX = 2 * scheduleDimensions + x; + int pX = schedule_dimensions + x; + int aX = 2 * schedule_dimensions + x; isl_constraint *c; - /* sX = aX * tileSize; */ - c = isl_equality_alloc (isl_local_space_copy (LocalSpace)); + /* sX = aX * tile_size; */ + c = isl_equality_alloc (isl_local_space_copy (local_space)); isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1); - isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize); - tileMap = isl_basic_map_add_constraint (tileMap, c); + isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tile_size); + tile_map = isl_basic_map_add_con
[PATCH] Refactor optimize isl
Updated patch with corrections: Refactor graphite-optimize-isl.c. Renamed function name, variable names etc., and indented the source according to gcc style guidelines. Modified comments accordingly. No functional change intended. Passes regtest and bootstap on x86_64. gcc/ChangeLog: 2015-09-10 Aditya Kumar * graphite-optimize-isl.c (get_tile_map): Refactor. (get_schedule_for_band): Same. (getScheduleForBand): Same. (get_prevector_map): Same. (get_schedule_for_band_list): Same. (get_schedule_map): Same. (get_single_map): Same. (apply_schedule_map_to_scop): Same. (optimize_isl): Same. --- gcc/graphite-optimize-isl.c | 416 ++-- 1 file changed, 210 insertions(+), 206 deletions(-) diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c index 811a510..e891e91 100644 --- a/gcc/graphite-optimize-isl.c +++ b/gcc/graphite-optimize-isl.c @@ -50,6 +50,9 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "dumpfile.h" +/* Set this to true to disable tiling of nested loops. */ +static bool disable_tiling = false; + static isl_union_set * scop_get_domains (scop_p scop ATTRIBUTE_UNUSED) { @@ -64,152 +67,153 @@ scop_get_domains (scop_p scop ATTRIBUTE_UNUSED) return res; } -/* getTileMap - Create a map that describes a n-dimensonal tiling. - - getTileMap creates a map from a n-dimensional scattering space into an +/* get_tile_map - Create a map that describes a n-dimensonal tiling. + + get_tile_map creates a map from a n-dimensional scattering space into an 2*n-dimensional scattering space. The map describes a rectangular tiling. - + Example: - scheduleDimensions = 2, parameterDimensions = 1, tileSize = 32 - -tileMap := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]: - t0 % 32 = 0 and t0 <= s0 < t0 + 32 and - t1 % 32 = 0 and t1 <= s1 < t1 + 32} - + SCHEDULE_DIMENSIONS = 2, PARAMETER_DIMENSIONS = 1, TILE_SIZE = 32 + +tile_map := [p0] -> {[s0, s1] -> [t0, t1, s0, s1]: +t0 % 32 = 0 and t0 <= s0 < t0 + 32 and +t1 % 32 = 0 and t1 <= s1 < t1 + 32} + Before tiling: - + for (i = 0; i < N; i++) for (j = 0; j < M; j++) - S(i,j) - + S(i,j) + After tiling: - + for (t_i = 0; t_i < N; i+=32) for (t_j = 0; t_j < M; j+=32) - for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0 - for (j = t_j; j < t_j + 32; j++)| Known that M % 32 = 0 - S(i,j) - */ - + for (i = t_i; i < min(t_i + 32, N); i++) | Unknown that N % 32 = 0 + for (j = t_j; j < t_j + 32; j++)| Known that M % 32 = 0 + S(i,j) + */ + static isl_basic_map * -getTileMap (isl_ctx *ctx, int scheduleDimensions, int tileSize) +get_tile_map (isl_ctx *ctx, int schedule_dimensions, int tile_size) { - int x; /* We construct - tileMap := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]: - s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and - s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32} + tile_map := [p0] -> {[s0, s1] -> [t0, t1, p0, p1, a0, a1]: + s0 = a0 * 32 and s0 = p0 and t0 <= p0 < t0 + 32 and + s1 = a1 * 32 and s1 = p1 and t1 <= p1 < t1 + 32} and project out the auxilary dimensions a0 and a1. */ - isl_space *Space = isl_space_alloc (ctx, 0, scheduleDimensions, - scheduleDimensions * 3); - isl_basic_map *tileMap = isl_basic_map_universe (isl_space_copy (Space)); + isl_space *space + = isl_space_alloc (ctx, 0, schedule_dimensions, schedule_dimensions * 3); + isl_basic_map *tile_map = isl_basic_map_universe (isl_space_copy (space)); - isl_local_space *LocalSpace = isl_local_space_from_space (Space); + isl_local_space *local_space = isl_local_space_from_space (space); - for (x = 0; x < scheduleDimensions; x++) + for (int x = 0; x < schedule_dimensions; x++) { int sX = x; int tX = x; - int pX = scheduleDimensions + x; - int aX = 2 * scheduleDimensions + x; + int pX = schedule_dimensions + x; + int aX = 2 * schedule_dimensions + x; isl_constraint *c; - /* sX = aX * tileSize; */ - c = isl_equality_alloc (isl_local_space_copy (LocalSpace)); + /* sX = aX * tile_size; */ + c = isl_equality_alloc (isl_local_space_copy (local_space)); isl_constraint_set_coefficient_si (c, isl_dim_out, sX, 1); - isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tileSize); - tileMap = isl_basic_map_add_constraint (tileMap, c); + isl_constraint_set_coefficient_si (c, isl_dim_out, aX, -tile_
[PATCH] Remove dead code from graphite-optimize-isl.c
The variable `static bool enable_polly_vector' is always assigned to false. This results in dead code in optimize-isl.c. Removing the dead code. No functional change intended. Passes bootstrap and regtest. gcc/ChangeLog: 2015-09-11 Aditya Kumar * graphite-optimize-isl.c (get_prevector_map): Delete function. (get_schedule_for_band_list): Remove dead code. --- gcc/graphite-optimize-isl.c | 136 ++-- 1 file changed, 4 insertions(+), 132 deletions(-) diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c index e891e91..b2a9a3f 100644 --- a/gcc/graphite-optimize-isl.c +++ b/gcc/graphite-optimize-isl.c @@ -204,119 +204,13 @@ get_schedule_for_band (isl_band *band, int *dimensions) return isl_union_map_apply_range (partial_schedule, tile_umap); } -/* Create a map that pre-vectorizes one scheduling dimension. - - get_prevector_map creates a map that maps each input dimension to the same - output dimension, except for the dimension DIM_TO_VECTORIZE. - DIM_TO_VECTORIZE is - strip mined by 'VECTOR_WIDTH' and the newly created point loop of - DIM_TO_VECTORIZE is moved to the innermost level. - - Example (DIM_TO_VECTORIZE=0, SCHEDULE_DIMENSIONS=2,VECTOR_WIDTH=4): - - | Before transformation - | - | A[i,j] -> [i,j] - | - | for (i = 0; i < 128; i++) - |for (j = 0; j < 128; j++) - | A(i,j); - - Prevector map: - [i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip - - | After transformation: - | - | A[i,j] -> [it,j,ip] : it % 4 = 0 and it <= ip <= it + 3 and i = ip - | - | for (it = 0; it < 128; it+=4) - |for (j = 0; j < 128; j++) - | for (ip = max(0,it); ip < min(128, it + 3); ip++) - |A(ip,j); - - The goal of this transformation is to create a trivially vectorizable loop. - This means a parallel loop at the innermost level that has a constant number - of iterations corresponding to the target vector width. - - This transformation creates a loop at the innermost level. The loop has a - constant number of iterations, if the number of loop iterations at - DIM_TO_VECTORIZE can be devided by VECTOR_WIDTH. The default VECTOR_WIDTH is - currently constant and not yet target specific. This function does not - reason about parallelism. */ -static isl_map * -get_prevector_map (isl_ctx *ctx, int dim_to_vectorize, int schedule_dimensions, - int vector_width) -{ - isl_space *space; - isl_local_space *local_space, *local_space_range; - isl_set *modulo; - isl_map *tiling_map; - isl_constraint *c; - isl_aff *aff; - int point_dimension; /* ip */ - int tile_dimension; /* it */ - isl_val *vector_widthMP; - int i; - - /* assert (0 <= DimToVectorize && DimToVectorize < ScheduleDimensions);*/ - - space - = isl_space_alloc (ctx, 0, schedule_dimensions, schedule_dimensions + 1); - tiling_map = isl_map_universe (isl_space_copy (space)); - local_space = isl_local_space_from_space (space); - point_dimension = schedule_dimensions; - tile_dimension = dim_to_vectorize; - - /* Create an identity map for everything except DimToVectorize and map - DimToVectorize to the point loop at the innermost dimension. */ - for (i = 0; i < schedule_dimensions; i++) -{ - c = isl_equality_alloc (isl_local_space_copy (local_space)); - isl_constraint_set_coefficient_si (c, isl_dim_in, i, -1); - - if (i == dim_to_vectorize) - isl_constraint_set_coefficient_si (c, isl_dim_out, point_dimension, 1); - else - isl_constraint_set_coefficient_si (c, isl_dim_out, i, 1); - - tiling_map = isl_map_add_constraint (tiling_map, c); -} - - /* it % 'VectorWidth' = 0 */ - local_space_range - = isl_local_space_range (isl_local_space_copy (local_space)); - aff = isl_aff_zero_on_domain (local_space_range); - aff = isl_aff_set_constant_si (aff, vector_width); - aff = isl_aff_set_coefficient_si (aff, isl_dim_in, tile_dimension, 1); - - vector_widthMP = isl_val_int_from_si (ctx, vector_width); - aff = isl_aff_mod_val (aff, vector_widthMP); - modulo = isl_pw_aff_zero_set (isl_pw_aff_from_aff (aff)); - tiling_map = isl_map_intersect_range (tiling_map, modulo); - - /* it <= ip */ - c = isl_inequality_alloc (isl_local_space_copy (local_space)); - isl_constraint_set_coefficient_si (c, isl_dim_out, tile_dimension, -1); - isl_constraint_set_coefficient_si (c, isl_dim_out, point_dimension, 1); - tiling_map = isl_map_add_constraint (tiling_map, c); - - /* ip <= it + ('VectorWidth' - 1) */ - c = isl_inequality_alloc (local_space); - isl_constraint_set_coefficient_si (c, isl_dim_out, tile_dimension, 1); - isl_constraint_set_coefficient_si (c, isl_dim_out, point_dimension, -1); - isl_constraint_set_constant_si (c, vector_width - 1); - tiling_map = isl_map_add_constraint (tiling_map, c); - - return til
RE: [PATCH] Refactor optimize isl
-Original Message- From: Tobias Grosser [mailto:tob...@grosser.es] Sent: Friday, September 11, 2015 1:16 PM To: Aditya Kumar; gcc-patches@gcc.gnu.org Cc: richard.guent...@gmail.com; s@samsung.com; seb...@gmail.com Subject: Re: [PATCH] Refactor optimize isl On 09/11/2015 07:07 PM, Aditya Kumar wrote: > Updated patch with corrections: > > Refactor graphite-optimize-isl.c. Renamed function name, variable > names etc., and indented the source according to gcc style guidelines. > Modified comments accordingly. No functional change intended. > Looks reasonable. > Just for history, this code was copied from Polly this is why the formatting does not match gcc's style. The relevant file in Polly has been evolved since then and might provide you with ideas > on how to improve this file in gcc. Thanks, I would look into Polly. -Aditya > Tobias
[PATCH] [graphite] Fix computation of single entry/exit of a region.
For basic block with two preds, allow (as single entry) only when the other edge is a backedge. Similarly for basic block with two succs, allow (as single exit) only when the other edge is a back edge. 2015-12-21 Aditya Kumar * graphite-scop-detection.c (scop_detection::get_nearest_dom_with_single_entry): Check l == l2. (scop_detection::get_nearest_pdom_with_single_exit): Same. (scop_detection::merge_sese): Whitespace. (scop_detection::add_scop): Comment. (build_scops): Whitespace. --- gcc/graphite-scop-detection.c | 31 +-- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c index dd506b5..ad11227 100644 --- a/gcc/graphite-scop-detection.c +++ b/gcc/graphite-scop-detection.c @@ -693,18 +693,22 @@ scop_detection::get_nearest_dom_with_single_entry (basic_block dom) { if (!dom->preds) return NULL; - /* If e1->src dominates e2->src then e1->src will also dominate dom. */ + + /* If any of the dominators has two predecessors but one of them is a back + edge, then that basic block also qualifies as a dominator with single + entry. */ if (dom->preds->length () == 2) { + /* If e1->src dominates e2->src then e1->src will also dominate dom. */ edge e1 = (*dom->preds)[0]; edge e2 = (*dom->preds)[1]; loop_p l = dom->loop_father; loop_p l1 = e1->src->loop_father; loop_p l2 = e2->src->loop_father; - if (l != l1 + if (l != l1 && l == l2 && dominated_by_p (CDI_DOMINATORS, e2->src, e1->src)) return e1; - if (l != l2 + if (l != l2 && l == l1 && dominated_by_p (CDI_DOMINATORS, e1->src, e2->src)) return e2; } @@ -728,17 +732,23 @@ scop_detection::get_nearest_pdom_with_single_exit (basic_block pdom) { if (!pdom->succs) return NULL; + + /* If any of the post-dominators has two successors but one of them is a back + edge, then that basic block also qualifies as a post-dominator with single + exit. */ if (pdom->succs->length () == 2) { + /* If e1->dest post-dominates e2->dest then e1->dest will also +post-dominate pdom. */ edge e1 = (*pdom->succs)[0]; edge e2 = (*pdom->succs)[1]; loop_p l = pdom->loop_father; loop_p l1 = e1->dest->loop_father; loop_p l2 = e2->dest->loop_father; - if (l != l1 + if (l != l1 && l == l2 && dominated_by_p (CDI_POST_DOMINATORS, e2->dest, e1->dest)) return e1; - if (l != l2 + if (l != l2 && l == l1 && dominated_by_p (CDI_POST_DOMINATORS, e1->dest, e2->dest)) return e2; } @@ -805,7 +815,7 @@ scop_detection::merge_sese (sese_l first, sese_l second) const EXIT->DEST should be in the same loop nest. */ if (!dominated_by_p (CDI_DOMINATORS, pdom, dom) || loop_depth (entry->src->loop_father) - != loop_depth (exit->dest->loop_father)) +!= loop_depth (exit->dest->loop_father)) return invalid_sese; /* For now we just want to bail out when exit does not post-dominate entry. @@ -1014,7 +1024,8 @@ scop_detection::add_scop (sese_l s) /* Remove all the scops which are subsumed by s. */ remove_subscops (s); - /* Replace this with split-intersecting scops. */ + /* Remove intersecting scops. FIXME: It will be a good idea to keep + the non-intersecting part of the scop already in the list. */ remove_intersecting_scops (s); scops.safe_push (s); @@ -1960,9 +1971,9 @@ build_scops (vec *scops) if (scop_nb_params (scop) > max_dim) { DEBUG_PRINT (dp << "[scop-detection-fail] too many parameters: " - << scop_nb_params (scop) - << " larger than --param graphite-max-nb-scop-params=" - << max_dim << ".\n"); + << scop_nb_params (scop) + << " larger than --param graphite-max-nb-scop-params=" + << max_dim << ".\n"); free_scop (scop); continue; } -- 2.6.3