Hello,
I've been solving undefined symbols related to:
http://gcc.gnu.org/PR57703. In chromium there's a following inline asm:
asm(".type Syscall, @function\n" ...);
intptr_t SandboxSyscall(...)
{
asm volatile("call SyscallAsm");
}
Where call of SandboxSyscall is inlined in couple of functions. Our
problem is that top-level asm is streamed to the first ltrans and so
that undefined symbol is reached:
/tmp/cc9oZmpM.ltrans26.ltrans.o:cc9oZmpM.ltrans26.o:function
sandbox::Die::ExitGroup(): error: undefined reference to 'SyscallAsm'
There's couple of fixes which can be done:
1) add to build system -fno-lto option for the TU; not easy due to
chrome's usage of ninja/gyp build system
2) if a compiler sees top-level asm, -flto can be disabled. I was given
a segfault after I set lto flag to false. Probably there's something
that relies on the flag
3) put all functions with inline asm to the same partition as top-level
asm. There are many functions in chrome having inline asm (proto bufs),
so that the first partition is very big
4) I tried to promote a new flag (top_level_asm) that is used to
decorate SandboxSyscall. Method works, but I'm not sure if pleasant for
chromium developers? Patch for this solution is attached.
Thank you for any ideas,
Martin
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index abd96fb..e836a9a 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -319,6 +319,8 @@ static tree handle_noclone_attribute (tree *, tree, tree, int, bool *);
static tree handle_leaf_attribute (tree *, tree, tree, int, bool *);
static tree handle_always_inline_attribute (tree *, tree, tree, int,
bool *);
+static tree handle_top_level_asm_attribute (tree *, tree, tree, int,
+ bool *);
static tree handle_gnu_inline_attribute (tree *, tree, tree, int, bool *);
static tree handle_artificial_attribute (tree *, tree, tree, int, bool *);
static tree handle_flatten_attribute (tree *, tree, tree, int, bool *);
@@ -637,6 +639,8 @@ const struct attribute_spec c_common_attribute_table[] =
handle_leaf_attribute, false },
{ "always_inline", 0, 0, true, false, false,
handle_always_inline_attribute, false },
+ { "top_level_asm", 0, 0, true, false, false,
+ handle_top_level_asm_attribute, false },
{ "gnu_inline", 0, 0, true, false, false,
handle_gnu_inline_attribute, false },
{ "artificial", 0, 0, true, false, false,
@@ -6717,6 +6721,25 @@ handle_always_inline_attribute (tree *node, tree name,
return NULL_TREE;
}
+/* Handle a "top_level_asm" attribute; arguments as in
+ struct attribute_spec.handler. */
+
+static tree
+handle_top_level_asm_attribute (tree *node, tree name,
+ tree ARG_UNUSED (args),
+ int ARG_UNUSED (flags),
+ bool *no_add_attrs)
+{
+ if (TREE_CODE (*node) != FUNCTION_DECL)
+ {
+ warning (OPT_Wattributes, "%qE attribute ignored", name);
+ *no_add_attrs = true;
+ }
+
+ return NULL_TREE;
+}
+
+
/* Handle a "gnu_inline" attribute; arguments as in
struct attribute_spec.handler. */
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index a15b6bc..7f007b9 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1963,6 +1964,8 @@ dump_cgraph_node (FILE *f, struct cgraph_node *node)
fprintf (f, " only_called_at_exit");
if (node->tm_clone)
fprintf (f, " tm_clone");
+ if (node->contains_asm)
+ fprintf (f, " contains_asm");
fprintf (f, "\n");
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 32b1ee1..46972e5 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -431,6 +431,7 @@ public:
/* True if this decl calls a COMDAT-local function. This is set up in
compute_inline_parameters and inline_call. */
unsigned calls_comdat_local : 1;
+ unsigned contains_asm: 1;
};
diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c
index ca69033..71bebc2 100644
--- a/gcc/cgraphclones.c
+++ b/gcc/cgraphclones.c
@@ -219,6 +219,7 @@ cgraph_clone_node (struct cgraph_node *n, tree decl, gcov_type count, int freq,
new_node->clone = n->clone;
new_node->clone.tree_map = NULL;
new_node->tp_first_run = n->tp_first_run;
+ new_node->contains_asm = n->contains_asm;
if (n->count)
{
if (new_node->count > n->count)
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 06283fc..2f97c3a 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -772,7 +772,11 @@ process_function_and_variable_attributes (struct cgraph_node *first,
&& !DECL_UNINLINABLE (decl))
warning_at (DECL_SOURCE_LOCATION (decl), OPT_Wattributes,
"always_inline function might not be inlinable");
-
+
+
+ if (lookup_attribute ("top_level_asm", DECL_ATTRIBUTES (decl)))
+ node->contains_asm = 1;
+
process_common_attributes (decl);
}
for (vnode = varpool_first_variable (); vnode != first_var;
diff --git a/gcc/ipa-inline-transform.c b/gcc/ipa-inline-transform.c
index b2e0285..db9347a 100644
--- a/gcc/ipa-inline-transform.c
+++ b/gcc/ipa-inline-transform.c
@@ -240,6 +240,9 @@ inline_call (struct cgraph_edge *e, bool update_original,
/* Don't even think of inlining inline clone. */
gcc_assert (!callee->global.inlined_to);
+ if (e->callee->contains_asm)
+ e->caller->contains_asm = 1;
+
e->inline_failed = CIF_OK;
DECL_POSSIBLY_INLINED (callee->decl) = true;
diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
index ef3890d..7ceeecd 100644
--- a/gcc/lto-cgraph.c
+++ b/gcc/lto-cgraph.c
@@ -522,6 +522,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
bp_pack_value (&bp, node->only_called_at_exit, 1);
bp_pack_value (&bp, node->tm_clone, 1);
bp_pack_value (&bp, node->calls_comdat_local, 1);
+ bp_pack_value (&bp, node->contains_asm, 1);
bp_pack_value (&bp, node->thunk.thunk_p && !boundary_p, 1);
bp_pack_enum (&bp, ld_plugin_symbol_resolution,
LDPR_NUM_KNOWN, node->resolution);
@@ -995,6 +996,7 @@ input_overwrite_node (struct lto_file_decl_data *file_data,
node->only_called_at_exit = bp_unpack_value (bp, 1);
node->tm_clone = bp_unpack_value (bp, 1);
node->calls_comdat_local = bp_unpack_value (bp, 1);
+ node->contains_asm = bp_unpack_value (bp, 1);
node->thunk.thunk_p = bp_unpack_value (bp, 1);
node->resolution = bp_unpack_enum (bp, ld_plugin_symbol_resolution,
LDPR_NUM_KNOWN);
diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 1ee5fbb..224ae4a 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -346,6 +346,11 @@ node_cmp (const void *pa, const void *pb)
const struct cgraph_node *a = *(const struct cgraph_node * const *) pa;
const struct cgraph_node *b = *(const struct cgraph_node * const *) pb;
+ if (a->contains_asm && !b->contains_asm)
+ return -1;
+ else if (!a->contains_asm && b->contains_asm)
+ return 1;
+
/* Profile reorder flag enables function reordering based on first execution
of a function. All functions with profile are placed in ascending
order at the beginning. */
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 91b43d9..c142183 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -2495,7 +2495,7 @@ wait_for_child ()
static void
stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last)
{
static int nruns;
if (lto_parallelism <= 1)
@@ -2590,10 +2590,12 @@ lto_wpa_write_files (void)
FIXME: Even when not reordering we may want to output one list for parallel make
and other for final link command. */
+ /*
if (!flag_profile_reorder_functions || !flag_profile_use)
ltrans_partitions.qsort (flag_toplevel_reorder
? cmp_partitions_size
: cmp_partitions_order);
+ */
for (i = 0; i < n_sets; i++)
{