On 18-03-15 18:25, Jakub Jelinek wrote:
On Wed, Mar 18, 2015 at 06:21:51PM +0100, Tom de Vries wrote:
this patch fixes PR65458.
The patch marks omp thread functions as parallelized, which means the
parloops pass no longer attempts to modify that function.
Bootstrapped and reg-tested on x86_64.
OK for stage4 trunk?
This will certainly not work with LTO.
IMHO you instead want some cgraph_node flag, and make sure you stream it
out and in for LTO.
Updated patch adds a parallelized_function flag to cgraph_node, and uses it
instead of the bitmap parallelized_functions in tree-parloops.c.
Bootstrapped and reg-tested on x86_64.
OK for stage4 trunk?
Thanks,
- Tom
Mark omp thread functions as parallelized
2015-03-19 Tom de Vries <t...@codesourcery.com>
PR tree-optimization/65458
* cgraph.c (cgraph_node::dump): Handle parallelized_function field.
* cgraph.h (cgraph_node): Add parallelized_function field.
* lto-cgraph.c (lto_output_node): Write parallelized_function field.
(input_overwrite_node): Read parallelized_function field.
* omp-low.c: Add include of tree-parloops.h.
(expand_omp_taskreg): Call mark_parallelized_function for child_fn.
* tree-parloops.c: Add include of plugin-api.h, ipa-ref.h and cgraph.h.
Remove include of gt-tree-parloops.h.
(parallelized_functions): Remove static variable.
(parallelized_function_p): Rewrite using parallelized_function field of
cgraph_node.
(mark_parallelized_function): New function.
(create_loop_fn): Remove adding to parallelized_functions.
* tree-parloops.h (mark_parallelized_function): Declare.
---
gcc/cgraph.c | 2 ++
gcc/cgraph.h | 2 ++
gcc/lto-cgraph.c | 2 ++
gcc/omp-low.c | 2 ++
gcc/tree-parloops.c | 35 +++++++++++++++++------------------
gcc/tree-parloops.h | 1 +
6 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index ede58bf..4573057 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -2009,6 +2009,8 @@ cgraph_node::dump (FILE *f)
fprintf (f, " only_called_at_exit");
if (opt_for_fn (decl, optimize_size))
fprintf (f, " optimize_size");
+ if (parallelized_function)
+ fprintf (f, " parallelized_function");
fprintf (f, "\n");
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 52b15c5..650e689 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1317,6 +1317,8 @@ public:
unsigned nonfreeing_fn : 1;
/* True if there was multiple COMDAT bodies merged by lto-symtab. */
unsigned merged : 1;
+ /* True if function was created to be executed in parallel. */
+ unsigned parallelized_function : 1;
private:
/* Worker for call_for_symbol_and_aliases. */
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index c875fed..088de86 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -574,6 +574,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
bp_pack_value (&bp, node->icf_merged, 1);
bp_pack_value (&bp, node->nonfreeing_fn, 1);
bp_pack_value (&bp, node->thunk.thunk_p, 1);
+ bp_pack_value (&bp, node->parallelized_function, 1);
bp_pack_enum (&bp, ld_plugin_symbol_resolution,
LDPR_NUM_KNOWN, node->resolution);
bp_pack_value (&bp, node->instrumentation_clone, 1);
@@ -1209,6 +1210,7 @@ input_overwrite_node (struct lto_file_decl_data *file_data,
node->icf_merged = bp_unpack_value (bp, 1);
node->nonfreeing_fn = bp_unpack_value (bp, 1);
node->thunk.thunk_p = bp_unpack_value (bp, 1);
+ node->parallelized_function = bp_unpack_value (bp, 1);
node->resolution = bp_unpack_enum (bp, ld_plugin_symbol_resolution,
LDPR_NUM_KNOWN);
node->instrumentation_clone = bp_unpack_value (bp, 1);
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 48d73cb..a49a6eb 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "lto-section-names.h"
#include "gomp-constants.h"
+#include "tree-parloops.h"
/* Lowering of OMP parallel and workshare constructs proceeds in two
@@ -5569,6 +5570,7 @@ expand_omp_taskreg (struct omp_region *region)
/* Inform the callgraph about the new function. */
DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
cgraph_node::add_new_function (child_fn, true);
+ mark_parallelized_function (child_fn);
/* Fix the callgraph edges for child_cfun. Those for cfun will be
fixed in a following pass. */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index a584460..e952f2b 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -75,6 +75,9 @@ along with GCC; see the file COPYING3. If not see
#include "tree-parloops.h"
#include "omp-low.h"
#include "tree-nested.h"
+#include "plugin-api.h"
+#include "ipa-ref.h"
+#include "cgraph.h"
/* This pass tries to distribute iterations of loops into several threads.
The implementation is straightforward -- for each loop we test whether its
@@ -1422,21 +1425,24 @@ separate_decls_in_region (edge entry, edge exit,
}
}
-/* Bitmap containing uids of functions created by parallelization. We cannot
- allocate it from the default obstack, as it must live across compilation
- of several functions; we make it gc allocated instead. */
-
-static GTY(()) bitmap parallelized_functions;
-
-/* Returns true if FN was created by create_loop_fn. */
+/* Returns true if FN was created to run in parallel. */
bool
-parallelized_function_p (tree fn)
+parallelized_function_p (tree fndecl)
{
- if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
- return false;
+ cgraph_node *node = cgraph_node::get (fndecl);
+ gcc_assert (node != NULL);
+ return node->parallelized_function;
+}
+
+/* Mark that FN was created to run in parallel. */
- return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
+void
+mark_parallelized_function (tree fndecl)
+{
+ cgraph_node *node = cgraph_node::get (fndecl);
+ gcc_assert (node != NULL);
+ node->parallelized_function = 1;
}
/* Creates and returns an empty function that will receive the body of
@@ -1459,10 +1465,6 @@ create_loop_fn (location_t loc)
type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
decl = build_decl (loc, FUNCTION_DECL, name, type);
- if (!parallelized_functions)
- parallelized_functions = BITMAP_GGC_ALLOC ();
- bitmap_set_bit (parallelized_functions, DECL_UID (decl));
-
TREE_STATIC (decl) = 1;
TREE_USED (decl) = 1;
DECL_ARTIFICIAL (decl) = 1;
@@ -2314,6 +2316,3 @@ make_pass_parallelize_loops (gcc::context *ctxt)
{
return new pass_parallelize_loops (ctxt);
}
-
-
-#include "gt-tree-parloops.h"
diff --git a/gcc/tree-parloops.h b/gcc/tree-parloops.h
index d71f0a4..a742755 100644
--- a/gcc/tree-parloops.h
+++ b/gcc/tree-parloops.h
@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3. If not see
#define GCC_TREE_PARLOOPS_H
extern bool parallelized_function_p (tree);
+extern void mark_parallelized_function (tree);
#endif /* GCC_TREE_PARLOOPS_H */
--
1.9.1