On 17 June 2016 at 19:52, Prathamesh Kulkarni
<[email protected]> wrote:
> On 14 June 2016 at 18:31, Prathamesh Kulkarni
> <[email protected]> wrote:
>> On 13 June 2016 at 16:13, Jan Hubicka <[email protected]> wrote:
>>>> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
>>>> index ecafe63..41ac408 100644
>>>> --- a/gcc/cgraph.h
>>>> +++ b/gcc/cgraph.h
>>>> @@ -1874,6 +1874,9 @@ public:
>>>> if we did not do any inter-procedural code movement. */
>>>> unsigned used_by_single_function : 1;
>>>>
>>>> + /* Set if -fsection-anchors is set. */
>>>> + unsigned section_anchor : 1;
>>>> +
>>>> private:
>>>> /* Assemble thunks and aliases associated to varpool node. */
>>>> void assemble_aliases (void);
>>>> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
>>>> index 4bfcad7..e75d5c0 100644
>>>> --- a/gcc/cgraphunit.c
>>>> +++ b/gcc/cgraphunit.c
>>>> @@ -800,6 +800,9 @@ varpool_node::finalize_decl (tree decl)
>>>> it is available to notice_global_symbol. */
>>>> node->definition = true;
>>>> notice_global_symbol (decl);
>>>> +
>>>> + node->section_anchor = flag_section_anchors;
>>>> +
>>>> if (TREE_THIS_VOLATILE (decl) || DECL_PRESERVE_P (decl)
>>>> /* Traditionally we do not eliminate static variables when not
>>>> optimizing and when not doing toplevel reoder. */
>>>> diff --git a/gcc/common.opt b/gcc/common.opt
>>>> index f0d7196..e497795 100644
>>>> --- a/gcc/common.opt
>>>> +++ b/gcc/common.opt
>>>> @@ -1590,6 +1590,10 @@ fira-algorithm=
>>>> Common Joined RejectNegative Enum(ira_algorithm) Var(flag_ira_algorithm)
>>>> Init(IRA_ALGORITHM_CB) Optimization
>>>> -fira-algorithm=[CB|priority] Set the used IRA algorithm.
>>>>
>>>> +fipa-increase_alignment
>>>> +Common Report Var(flag_ipa_increase_alignment) Init(0) Optimization
>>>> +Option to gate increase_alignment ipa pass.
>>>> +
>>>> Enum
>>>> Name(ira_algorithm) Type(enum ira_algorithm) UnknownError(unknown IRA
>>>> algorithm %qs)
>>>>
>>>> @@ -2133,7 +2137,7 @@ Common Report Var(flag_sched_dep_count_heuristic)
>>>> Init(1) Optimization
>>>> Enable the dependent count heuristic in the scheduler.
>>>>
>>>> fsection-anchors
>>>> -Common Report Var(flag_section_anchors) Optimization
>>>> +Common Report Var(flag_section_anchors)
>>>> Access data in the same section from shared anchor points.
>>>>
>>>> fsee
>>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>>>> index a0db3a4..1482566 100644
>>>> --- a/gcc/config/aarch64/aarch64.c
>>>> +++ b/gcc/config/aarch64/aarch64.c
>>>> @@ -8252,6 +8252,8 @@ aarch64_override_options (void)
>>>>
>>>> aarch64_register_fma_steering ();
>>>>
>>>> + /* Enable increase_alignment pass. */
>>>> + flag_ipa_increase_alignment = 1;
>>>
>>> I would rather enable it always on targets that do support anchors.
>> AFAIK aarch64 supports section anchors.
>>>> diff --git a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c
>>>> index ce9e146..7f09f3a 100644
>>>> --- a/gcc/lto/lto-symtab.c
>>>> +++ b/gcc/lto/lto-symtab.c
>>>> @@ -342,6 +342,13 @@ lto_symtab_merge (symtab_node *prevailing,
>>>> symtab_node *entry)
>>>> The type compatibility checks or the completing of types has properly
>>>> dealt with most issues. */
>>>>
>>>> + /* ??? is this assert necessary ? */
>>>> + varpool_node *v_prevailing = dyn_cast<varpool_node *> (prevailing);
>>>> + varpool_node *v_entry = dyn_cast<varpool_node *> (entry);
>>>> + gcc_assert (v_prevailing && v_entry);
>>>> + /* section_anchor of prevailing_decl wins. */
>>>> + v_entry->section_anchor = v_prevailing->section_anchor;
>>>> +
>>> Other flags are merged in lto_varpool_replace_node so please move this
>>> there.
>> Ah indeed, thanks for the pointers.
>> I wonder though if we need to set
>> prevailing_node->section_anchor = vnode->section_anchor ?
>> IIUC, the function merges flags from vnode into prevailing_node
>> and removes vnode. However we want prevailing_node->section_anchor
>> to always take precedence.
>>>> +/* Return true if alignment should be increased for this vnode.
>>>> + This is done if every function that references/referring to vnode
>>>> + has flag_tree_loop_vectorize set. */
>>>> +
>>>> +static bool
>>>> +increase_alignment_p (varpool_node *vnode)
>>>> +{
>>>> + ipa_ref *ref;
>>>> +
>>>> + for (int i = 0; vnode->iterate_reference (i, ref); i++)
>>>> + if (cgraph_node *cnode = dyn_cast<cgraph_node *> (ref->referred))
>>>> + {
>>>> + struct cl_optimization *opts = opts_for_fn (cnode->decl);
>>>> + if (!opts->x_flag_tree_loop_vectorize)
>>>> + return false;
>>>> + }
>>>
>>> If you take address of function that has vectorizer enabled probably doesn't
>>> imply need to increase alignment of that var. So please drop the loop.
>>>
>>> You only want function that read/writes or takes address of the symbol. But
>>> onthe other hand, you need to walk all aliases of the symbol by
>>> call_for_symbol_and_aliases
>>>> +
>>>> + for (int i = 0; vnode->iterate_referring (i, ref); i++)
>>>> + if (cgraph_node *cnode = dyn_cast<cgraph_node *> (ref->referring))
>>>> + {
>>>> + struct cl_optimization *opts = opts_for_fn (cnode->decl);
>>>> + if (!opts->x_flag_tree_loop_vectorize)
>>>> + return false;
>>>> + }
>>>> +
>>>> + return true;
>>>> +}
>>>> +
>>>> /* Entry point to increase_alignment pass. */
>>>> static unsigned int
>>>> increase_alignment (void)
>>>> @@ -914,9 +942,12 @@ increase_alignment (void)
>>>> tree decl = vnode->decl;
>>>> unsigned int alignment;
>>>>
>>>> - if ((decl_in_symtab_p (decl)
>>>> - && !symtab_node::get (decl)->can_increase_alignment_p ())
>>>> - || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
>>>> + if (!vnode->section_anchor
>>>> + || (decl_in_symtab_p (decl)
>>>> + && !symtab_node::get (decl)->can_increase_alignment_p ())
>>>> + || DECL_USER_ALIGN (decl)
>>>> + || DECL_ARTIFICIAL (decl)
>>>> + || !increase_alignment_p (vnode))
>>>
>>> Incrementally we probably should do more testing whether the variable looks
>>> like
>>> someting that can be vectorized, i.e. it contains array, has address taken
>>> or the
>>> accesses are array accesses within loop.
>>> This can be done by the analysis phase of the IPA pass inspecting the
>>> function
>>> bodies.
>> Thanks, I will try to check for array accesses are within a loop in
>> followup patch.
>> I was wondering if we could we treat a homogeneous global struct
>> (having members of one type),
>> as a global array of that type and increase it's alignment if required ?
>>>
>>> I think it is important waste to bump up everything including error
>>> messages etc.
>>> At least on i386 the effect on firefox datasegment of various alignment
>>> setting is
>>> very visible.
>> Um for a start, would it be OK to check if all functions referencing variable
>> have attribute noreturn, and in that case we skip increasing the alignment ?
>> I suppose that error functions would be having attribute noreturn set ?
>>>
>>> Looks OK to me otherwise. please send updated patch.
>> I have done the changes in the attached patch (stage-1 built).
>> I am not sure what to return from the callback function and
>> arbitrarily chose to return true.
> Hi,
> In this version (stage-1 built), I added read/write summaries which
> stream those variables
> which we want to increase alignment for.
>
> I have a few questions:
>
> a) To check if global var is used inside a loop, I obtained
> corresponding ipa_ref
> and checked loop_containing_stmt (ref->stmt), however it returned non-NULL
> even when ref->stmt was not inside a loop.
> for instance:
> int a[32];
> int f() { int x = a[0]; return x; }
> Um how to check if stmt is used inside a loop ?
>
> b) Is it necessary during WPA to check if function has
> flag_tree_vectorize_set since
> during analysis phase in increase_alignment_generate_summary() I check
> if cnode has flag_tree_loop_vectorize_set ?
>
> c) In LTO_increase_alignment_section, the following is streamed:
> i) a "count" of type uwhi, to represent number of variables
> ii) decls corresponding to the variables
> The variables are then obtained during read_summay using
> symtab_node::get_create().
> I suppose since decls for varpool_nodes would already be streamed in
> LTO_section_decls, I was wondering if I
> could somehow refer to the decls in that section to avoid duplicate
> streaming of decls ?
Hi,
In this version, the variable is streamed if it occurs within a loop
or it's address is taken.
To check if stmt is inside a loop I am using:
struct loop *l = loop_containing_stmt (ref->stmt);
if (l != DECL_STRUCT_FUNCTION (cnode->decl)->x_current_loops->tree_root)
vars->add (vnode);
Is this correct ?
I came across an unexpected issue in my previous patch with -ffat-lto-objects.
I am allocating vars = new hash_set<varpool_node *> () in
generate_summary() and freeing it in write_summary().
However with -ffat-lto-objects, it appears execute() gets called after
write_summary()
during LGEN and we hit segfault in execute() at:
for (hash_set<varpool_node *>::iterator it = vars.begin (); it !=
vars.end (); it++)
{ ... }
because write_summary() has freed vars.
To workaround the issue, I gated call to free vars in write_summary on
flag_fat_lto_objects,
I am not sure if that's a good solution.
Cross tested on arm*-*-*, aarch64*-*-*.
Thanks,
Prathamesh
>
> Thanks,
> Prathamesh
>>
>> Thanks,
>> Prathamesh
>>>
>>> Honza
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index ecafe63..41ac408 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1874,6 +1874,9 @@ public:
if we did not do any inter-procedural code movement. */
unsigned used_by_single_function : 1;
+ /* Set if -fsection-anchors is set. */
+ unsigned section_anchor : 1;
+
private:
/* Assemble thunks and aliases associated to varpool node. */
void assemble_aliases (void);
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 4bfcad7..e75d5c0 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -800,6 +800,9 @@ varpool_node::finalize_decl (tree decl)
it is available to notice_global_symbol. */
node->definition = true;
notice_global_symbol (decl);
+
+ node->section_anchor = flag_section_anchors;
+
if (TREE_THIS_VOLATILE (decl) || DECL_PRESERVE_P (decl)
/* Traditionally we do not eliminate static variables when not
optimizing and when not doing toplevel reoder. */
diff --git a/gcc/common.opt b/gcc/common.opt
index f0d7196..e497795 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1590,6 +1590,10 @@ fira-algorithm=
Common Joined RejectNegative Enum(ira_algorithm) Var(flag_ira_algorithm)
Init(IRA_ALGORITHM_CB) Optimization
-fira-algorithm=[CB|priority] Set the used IRA algorithm.
+fipa-increase_alignment
+Common Report Var(flag_ipa_increase_alignment) Init(0) Optimization
+Option to gate increase_alignment ipa pass.
+
Enum
Name(ira_algorithm) Type(enum ira_algorithm) UnknownError(unknown IRA
algorithm %qs)
@@ -2133,7 +2137,7 @@ Common Report Var(flag_sched_dep_count_heuristic) Init(1)
Optimization
Enable the dependent count heuristic in the scheduler.
fsection-anchors
-Common Report Var(flag_section_anchors) Optimization
+Common Report Var(flag_section_anchors)
Access data in the same section from shared anchor points.
fsee
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a0db3a4..1482566 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -8252,6 +8252,8 @@ aarch64_override_options (void)
aarch64_register_fma_steering ();
+ /* Enable increase_alignment pass. */
+ flag_ipa_increase_alignment = 1;
}
/* Implement targetm.override_options_after_change. */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 3503c15..b7f448e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3458,6 +3458,9 @@ arm_option_override (void)
/* Init initial mode for testing. */
thumb_flipper = TARGET_THUMB;
+
+ /* Enable increase_alignment pass. */
+ flag_ipa_increase_alignment = 1;
}
static void
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2d7df6b..ed59068 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5011,6 +5011,9 @@ rs6000_option_override (void)
= { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
register_pass (&analyze_swaps_info);
+
+ /* Enable increase_alignment pass. */
+ flag_ipa_increase_alignment = 1;
}
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index 5cef2ba..289d9c3 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -627,6 +627,7 @@ lto_output_varpool_node (struct lto_simple_output_block
*ob, varpool_node *node,
bp_pack_value (&bp, node->tls_model, 3);
bp_pack_value (&bp, node->used_by_single_function, 1);
bp_pack_value (&bp, node->need_bounds_init, 1);
+ bp_pack_value (&bp, node->section_anchor, 1);
streamer_write_bitpack (&bp);
group = node->get_comdat_group ();
@@ -1401,6 +1402,7 @@ input_varpool_node (struct lto_file_decl_data *file_data,
node->tls_model = (enum tls_model)bp_unpack_value (&bp, 3);
node->used_by_single_function = (enum tls_model)bp_unpack_value (&bp, 1);
node->need_bounds_init = bp_unpack_value (&bp, 1);
+ node->section_anchor = bp_unpack_value (&bp, 1);
group = read_identifier (ib);
if (group)
{
diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c
index d8e74d7..d56f9d4 100644
--- a/gcc/lto-section-in.c
+++ b/gcc/lto-section-in.c
@@ -52,7 +52,8 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] =
"icf",
"offload_table",
"mode_table",
- "hsa"
+ "hsa",
+ "increase_alignment"
};
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index 92efdb8..725d0dc 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -245,6 +245,7 @@ enum lto_section_type
LTO_section_offload_table,
LTO_section_mode_table,
LTO_section_ipa_hsa,
+ LTO_section_increase_alignment,
LTO_N_SECTION_TYPES /* Must be last. */
};
diff --git a/gcc/passes.def b/gcc/passes.def
index 3647e90..3a8063c 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -138,12 +138,12 @@ along with GCC; see the file COPYING3. If not see
PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile)
NEXT_PASS (pass_feedback_split_functions);
POP_INSERT_PASSES ()
- NEXT_PASS (pass_ipa_increase_alignment);
NEXT_PASS (pass_ipa_tm);
NEXT_PASS (pass_ipa_lower_emutls);
TERMINATE_PASS_LIST (all_small_ipa_passes)
INSERT_PASSES_AFTER (all_regular_ipa_passes)
+ NEXT_PASS (pass_ipa_increase_alignment);
NEXT_PASS (pass_ipa_whole_program_visibility);
NEXT_PASS (pass_ipa_profile);
NEXT_PASS (pass_ipa_icf);
diff --git a/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-73.c
b/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-73.c
new file mode 100644
index 0000000..74eaed8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-73.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target section_anchors } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 32
+
+/* Clone of section-anchors-vect-70.c with foo() having
-fno-tree-loop-vectorize. */
+
+static struct A {
+ int p1, p2;
+ int e[N];
+} a, b, c;
+
+__attribute__((optimize("-fno-tree-loop-vectorize")))
+int foo(void)
+{
+ for (int i = 0; i < N; i++)
+ a.e[i] = b.e[i] + c.e[i];
+
+ return a.e[0];
+}
+
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target aarch64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target powerpc64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target arm*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-74.c
b/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-74.c
new file mode 100644
index 0000000..574d917
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-74.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target section_anchors } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 32
+
+/* Do not increase alignment of a since it does not have it's
+ address taken and doesn't occur inside a loop. */
+
+static int a[N];
+
+int foo(void)
+{
+ return a[0];
+}
+
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target aarch64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target powerpc64*-*-* } } } */
+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0
"increase_alignment" { target arm*-*-* } } } */
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 36299a6..d36aa1d 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -483,7 +483,7 @@ extern simple_ipa_opt_pass
*make_pass_local_optimization_passes (gcc::context *c
extern ipa_opt_pass_d *make_pass_ipa_whole_program_visibility (gcc::context
*ctxt);
-extern simple_ipa_opt_pass *make_pass_ipa_increase_alignment (gcc::context
+extern ipa_opt_pass_d *make_pass_ipa_increase_alignment (gcc::context
*ctxt);
extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt);
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 2669813..f240747 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -75,7 +75,10 @@ along with GCC; see the file COPYING3. If not see
#include "tree-ssa-propagate.h"
#include "dbgcnt.h"
#include "tree-scalar-evolution.h"
-
+#include "tree-ssa-loop.h"
+#include "tree-streamer.h"
+#include "stringpool.h"
+#include "gimple-pretty-print.h"
/* Loop or bb location. */
source_location vect_location;
@@ -795,7 +798,8 @@ make_pass_slp_vectorize (gcc::context *ctxt)
array padding. */
static unsigned get_vec_alignment_for_type (tree);
-static hash_map<tree, unsigned> *type_align_map;
+static hash_map<tree, unsigned> *type_align_map = NULL;
+static hash_set<varpool_node *> *vars = NULL;
/* Return alignment of array's vector type corresponding to scalar type.
0 if no vector type exists. */
@@ -899,46 +903,199 @@ get_vec_alignment_for_type (tree type)
return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
}
+/* Return true if alignment should be increased for this vnode.
+ This is done if every function that referring to vnode
+ has flag_tree_loop_vectorize set. */
+
+static bool
+increase_alignment_p (varpool_node *vnode)
+{
+ ipa_ref *ref;
+ cgraph_node *cnode;
+
+ /* FIXME: Is this really necessary, since we check if cnode has
+ flag_tree_loop_vectorize_set during analysis phase in
+ increase_alignment_analyze_function ? */
+
+ for (int i = 0; vnode->iterate_referring (i, ref); i++)
+ /* Walk those functions that read/write/take address of vnode. */
+ if ((cnode = dyn_cast<cgraph_node *> (ref->referring)))
+ if (!opt_for_fn (cnode->decl, flag_tree_loop_vectorize))
+ return false;
+
+ return true;
+}
+
+static bool
+increase_alignment_callback (varpool_node *vnode, void *data ATTRIBUTE_UNUSED)
+{
+ tree decl = vnode->decl;
+
+ if (vnode->section_anchor
+ && decl_in_symtab_p (decl)
+ && symtab_node::get (decl)->can_increase_alignment_p ()
+ && !DECL_USER_ALIGN (decl)
+ && increase_alignment_p (vnode))
+ {
+ unsigned alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
+ if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
+ {
+ vnode->increase_alignment (alignment);
+ dump_printf (MSG_NOTE, "Increasing alignment of decl: ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, decl);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ }
+
+ return true;
+}
+
/* Entry point to increase_alignment pass. */
static unsigned int
increase_alignment (void)
{
- varpool_node *vnode;
-
vect_location = UNKNOWN_LOCATION;
type_align_map = new hash_map<tree, unsigned>;
/* Increase the alignment of all global arrays for vectorization. */
- FOR_EACH_DEFINED_VARIABLE (vnode)
+ for (hash_set<varpool_node *>::iterator it = vars->begin (); it != vars->end
(); ++it)
{
- tree decl = vnode->decl;
- unsigned int alignment;
-
- if ((decl_in_symtab_p (decl)
- && !symtab_node::get (decl)->can_increase_alignment_p ())
- || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
- continue;
-
- alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
- if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
- {
- vnode->increase_alignment (alignment);
- dump_printf (MSG_NOTE, "Increasing alignment of decl: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, decl);
- dump_printf (MSG_NOTE, "\n");
- }
+ varpool_node *vnode = *it;
+ vnode->call_for_symbol_and_aliases (increase_alignment_callback, NULL,
true);
}
delete type_align_map;
+ delete vars;
return 0;
}
+static void
+increase_alignment_analyze_function (cgraph_node *cnode)
+{
+ ipa_ref *ref;
+
+ for (int i = 0; cnode->iterate_reference (i, ref); i++)
+ if (varpool_node *vnode = dyn_cast<varpool_node *> (ref->referred))
+ {
+ if (vars->contains (vnode))
+ return;
+
+ tree decl = vnode->decl;
+ if (! (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE
+ || TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE)
+ && !DECL_ARTIFICIAL (decl))
+ continue;
+
+ if (ref->use == IPA_REF_ADDR)
+ vars->add (vnode);
+ else
+ {
+ struct loop *l = loop_containing_stmt (ref->stmt);
+ if (l != DECL_STRUCT_FUNCTION
(cnode->decl)->x_current_loops->tree_root)
+ vars->add (vnode);
+ }
+ }
+}
+
+static void
+increase_alignment_generate_summary (void)
+{
+ vars = new hash_set<varpool_node *> ();
+ cgraph_node *cnode;
+
+ FOR_EACH_DEFINED_FUNCTION (cnode)
+ if (opt_for_fn (cnode->decl, flag_tree_loop_optimize))
+ increase_alignment_analyze_function (cnode);
+}
+
+static void
+increase_alignment_write_summary (void)
+{
+ /* Following is streamed to the section:
+ a) count of type uhwi, representing number of decls.
+ b) the decls. */
+
+ struct output_block *ob = create_output_block
(LTO_section_increase_alignment);
+ streamer_write_uhwi (ob, vars->elements ());
+
+ for (hash_set<varpool_node *>::iterator it = vars->begin (); it != vars->end
(); ++it)
+ {
+ varpool_node *vnode = *it;
+ stream_write_tree (ob, vnode->decl, true);
+ }
+
+ produce_asm (ob, NULL);
+ destroy_output_block (ob);
+
+ /* FIXME: This is a workaround for segfault when flag_fat_lto_objects gets
+ enabled either directly or by -fno-use-linker-plugin.
+ With flag_fat_lto_objects, execute() is called also during analysis
+ stage, after write_summary(). So if we delete vars here, we segfault
+ during execute() since it requires vars. I am not sure what's a good
+ solution to workaround the issue. */
+
+ if (!global_options.x_flag_fat_lto_objects)
+ delete vars;
+}
+
+static void
+increase_alignment_read_section (struct lto_file_decl_data *file_data,
+ const char *data, size_t len)
+{
+ const struct lto_function_header *header =
+ (const struct lto_function_header *) data;
+
+ const int cfg_offset = sizeof (struct lto_function_header);
+ const int main_offset = cfg_offset + header->cfg_size;
+ const int string_offset = main_offset + header->main_size;
+
+ lto_input_block ib (data + main_offset, header->main_size,
+ file_data->mode_table);
+
+ struct data_in *data_in =
+ lto_data_in_create (file_data, data + string_offset,
+ header->string_size, vNULL);
+
+ unsigned HOST_WIDE_INT count = streamer_read_uhwi (&ib);
+
+ for (unsigned HOST_WIDE_INT i = 0; i < count; i++)
+ {
+ tree decl = stream_read_tree (&ib, data_in);
+ varpool_node *vnode = varpool_node::get_create (decl);
+ if (vnode)
+ vars->add (vnode);
+ }
+
+ lto_free_section_data (file_data, LTO_section_increase_alignment, NULL,
+ data, len);
+
+ lto_data_in_delete (data_in);
+}
+
+static void
+increase_alignment_read_summary (void)
+{
+ struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
+ struct lto_file_decl_data *file_data;
+ unsigned i = 0;
+
+ vars = new hash_set<varpool_node *> ();
+
+ while ((file_data = file_data_vec[i++]))
+ {
+ size_t len;
+ const char *data = lto_get_section_data (file_data,
LTO_section_increase_alignment, NULL, &len);
+
+ if (data)
+ increase_alignment_read_section (file_data, data, len);
+ }
+}
namespace {
const pass_data pass_data_ipa_increase_alignment =
{
- SIMPLE_IPA_PASS, /* type */
+ IPA_PASS, /* type */
"increase_alignment", /* name */
OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
TV_IPA_OPT, /* tv_id */
@@ -949,17 +1106,26 @@ const pass_data pass_data_ipa_increase_alignment =
0, /* todo_flags_finish */
};
-class pass_ipa_increase_alignment : public simple_ipa_opt_pass
+class pass_ipa_increase_alignment : public ipa_opt_pass_d
{
public:
pass_ipa_increase_alignment (gcc::context *ctxt)
- : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
+ : ipa_opt_pass_d (pass_data_ipa_increase_alignment, ctxt,
+ increase_alignment_generate_summary,
+ increase_alignment_write_summary,
+ increase_alignment_read_summary,
+ NULL, /* write optimization summary */
+ NULL, /* read optimization summary */
+ NULL, /* stmt fixup */
+ 0, /* function_transform_todo_flags_start */
+ NULL, /* transform function */
+ NULL )/* variable transform */
{}
/* opt_pass methods: */
virtual bool gate (function *)
{
- return flag_section_anchors && flag_tree_loop_vectorize;
+ return flag_ipa_increase_alignment != 0;
}
virtual unsigned int execute (function *) { return increase_alignment (); }
@@ -968,7 +1133,7 @@ public:
} // anon namespace
-simple_ipa_opt_pass *
+ipa_opt_pass_d *
make_pass_ipa_increase_alignment (gcc::context *ctxt)
{
return new pass_ipa_increase_alignment (ctxt);