On Mon, Nov 24, 2014 at 5:44 PM, Ilya Verbin <iver...@gmail.com> wrote: > On 17 Nov 10:57, Richard Biener wrote: >> On Fri, Nov 14, 2014 at 6:08 PM, Ilya Verbin <iver...@gmail.com> wrote: >> > On 14 Nov 09:01, H.J. Lu wrote: >> >> On Fri, Nov 14, 2014 at 8:51 AM, Ilya Verbin <iver...@gmail.com> wrote: >> >> > On 14 Nov 08:46, H.J. Lu wrote: >> >> >> What happens when -flto is used on command line? Will we >> >> >> generate both LTO IR and offload IR? >> >> > >> >> > Right. >> >> > >> >> > I'm not sure whether we should make slim objects in case of LTO + >> >> > offload IR... >> >> > >> >> >> >> Isn't __gnu_lto_slim only applied to regular LTO IR? Should offload IR be >> >> handled separately from regular LTO IR? It is odd to use >> >> flag_fat_lto_objects >> >> to control offload IR. >> > >> > It is handled separately, but it uses a common infrastructure with regular >> > LTO >> > for streaming, therefore compile_file automatically emits __gnu_lto_slim >> > when >> > there is at least one section with IR (flag_generate_lto is set). You >> > propose >> > to introduce a second flag like flag_fat_lto_objects to disable >> > __gnu_lto_slim? >> >> Err... why is offloading not guarded with a new symbol like >> __gnu_lto_offload? > > Well, it's possible to guard offload IR with a new symbol, using a patch like > this (it is not fully regtested). But I don't like it... Maybe we could just > change the meaning of "__gnu_lto_v1" from "object contains LTO IR" to "object > contains any IR"? In collect2 both LTO and offload cases are handled > identically. Is there other place where the symbol is used?
I don't think so (and even collect2.c should be changed to use simple-object to identify LTO objects rather than ar...). But I think libtool uses it as well. In the patch adding flag_generate_offload sounds like a good solution, I didn't like emitting fat LTO objects unconditionally just because we offload. Richard. > -- Ilya > > > diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c > index c133a22..f09d79d 100644 > --- a/gcc/ada/gcc-interface/decl.c > +++ b/gcc/ada/gcc-interface/decl.c > @@ -1490,7 +1490,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree > gnu_expr, int definition) > && definition > && debug_info_p > && !optimize > - && !flag_generate_lto) > + && !flag_generate_lto > + && !flag_generate_offload) > { > tree param = create_param_decl (gnu_entity_name, gnu_type, false); > gnat_pushdecl (param, gnat_entity); > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c > index 2fd99a7..fed1a3e 100644 > --- a/gcc/cgraphunit.c > +++ b/gcc/cgraphunit.c > @@ -2075,7 +2075,7 @@ ipa_passes (void) > } > > /* Some targets need to handle LTO assembler output specially. */ > - if (flag_generate_lto) > + if (flag_generate_lto || flag_generate_offload) > targetm.asm_out.lto_start (); > > if (!in_lto_p) > @@ -2092,7 +2092,7 @@ ipa_passes (void) > } > } > > - if (flag_generate_lto) > + if (flag_generate_lto || flag_generate_offload) > targetm.asm_out.lto_end (); > > if (!flag_ltrans && (in_lto_p || !flag_lto || flag_fat_lto_objects)) > @@ -2176,10 +2176,10 @@ symbol_table::compile (void) > > /* Offloading requires LTO infrastructure. */ > if (!in_lto_p && g->have_offload) > - flag_generate_lto = 1; > + flag_generate_offload = 1; > > /* If LTO is enabled, initialize the streamer hooks needed by GIMPLE. */ > - if (flag_generate_lto) > + if (flag_generate_lto || flag_generate_offload) > lto_streamer_hooks_init (); > > /* Don't run the IPA passes if there was any error or sorry messages. */ > diff --git a/gcc/collect2.c b/gcc/collect2.c > index 9c3a1c5..2dcebcd 100644 > --- a/gcc/collect2.c > +++ b/gcc/collect2.c > @@ -2392,12 +2392,16 @@ scan_prog_file (const char *prog_name, scanpass > which_pass, > if (found_lto) > continue; > > - /* Look for the LTO info marker symbol, and add filename to > + /* Look for the LTO or offload info marker symbol, and add > filename to > the LTO objects list if found. */ > for (p = buf; (ch = *p) != '\0' && ch != '\n'; p++) > if (ch == ' ' && p[1] == '_' && p[2] == '_' > - && (strncmp (p + (p[3] == '_' ? 2 : 1), "__gnu_lto_v1", 12) > == 0) > - && ISSPACE (p[p[3] == '_' ? 14 : 13])) > + && (((strncmp (p + (p[3] == '_' ? 2 : 1), > + "__gnu_lto_v1", 12) == 0) > + && ISSPACE (p[p[3] == '_' ? 14 : 13])) > + || ((strncmp (p + (p[3] == '_' ? 2 : 1), > + "__gnu_offload_v1", 16) == 0) > + && ISSPACE (p[p[3] == '_' ? 18 : 17])))) > { > add_lto_object (<o_objects, prog_name); > > diff --git a/gcc/common.opt b/gcc/common.opt > index 41c8d4e..11a5500 100644 > --- a/gcc/common.opt > +++ b/gcc/common.opt > @@ -67,6 +67,10 @@ int *param_values > Variable > int flag_generate_lto > > +; Nonzero if we should write GIMPLE bytecode for offload compilation. > +Variable > +int flag_generate_offload > + > ; True to warn about any objects definitions whose size is larger > ; than N bytes. Also want about function definitions whose returned > ; values are larger than N bytes, where N is 'larger_than_size'. > diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c > index 3d50ac9..3e83213 100644 > --- a/gcc/dwarf2out.c > +++ b/gcc/dwarf2out.c > @@ -24390,7 +24390,8 @@ dwarf2out_finish (const char *filename) > /* When generating LTO bytecode we can not generate new assembler > names at this point and all important decls got theirs via > free-lang-data. */ > - if ((!flag_generate_lto || DECL_ASSEMBLER_NAME_SET_P (decl)) > + if (((!flag_generate_lto && !flag_generate_offload) > + || DECL_ASSEMBLER_NAME_SET_P (decl)) > && DECL_ASSEMBLER_NAME (decl) != DECL_NAME (decl)) > { > add_linkage_attr (node->die, decl); > diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c > index 2f2993c..9d62722 100644 > --- a/gcc/ipa-inline-analysis.c > +++ b/gcc/ipa-inline-analysis.c > @@ -4031,7 +4031,7 @@ inline_generate_summary (void) > > /* When not optimizing, do not bother to analyze. Inlining is still done > because edge redirection needs to happen there. */ > - if (!optimize && !flag_generate_lto && !flag_wpa) > + if (!optimize && !flag_generate_lto && !flag_generate_offload && !flag_wpa) > return; > > function_insertion_hook_holder = > diff --git a/gcc/lto-streamer.c b/gcc/lto-streamer.c > index e8347dc..af20330 100644 > --- a/gcc/lto-streamer.c > +++ b/gcc/lto-streamer.c > @@ -328,7 +328,7 @@ lto_streamer_init (void) > bool > gate_lto_out (void) > { > - return ((flag_generate_lto || in_lto_p) > + return ((flag_generate_lto || flag_generate_offload || in_lto_p) > /* Don't bother doing anything if the program has errors. */ > && !seen_error ()); > } > diff --git a/gcc/passes.c b/gcc/passes.c > index a3be0bb..74b40e5 100644 > --- a/gcc/passes.c > +++ b/gcc/passes.c > @@ -2466,7 +2466,7 @@ ipa_write_summaries (bool offload_lto_mode) > struct cgraph_node *node; > struct cgraph_node **order; > > - if (!flag_generate_lto || seen_error ()) > + if ((!flag_generate_lto && !flag_generate_offload) || seen_error ()) > return; > > select_what_to_stream (offload_lto_mode); > diff --git a/gcc/toplev.c b/gcc/toplev.c > index 6e6adfa..2d84b0a 100644 > --- a/gcc/toplev.c > +++ b/gcc/toplev.c > @@ -672,6 +672,27 @@ compile_file (void) > } > } > > + /* Emit offload marker if offload info has been previously emitted. > + This is used by collect2 to determine whether an object file contains > IL. > + We used to emit an undefined reference here, but this produces > + link errors if an object file with IL is stored into a shared > + library without invoking lto1. */ > + if (flag_generate_offload) > + { > +#if defined ASM_OUTPUT_ALIGNED_DECL_COMMON > + ASM_OUTPUT_ALIGNED_DECL_COMMON (asm_out_file, NULL_TREE, > + "__gnu_offload_v1", > + (unsigned HOST_WIDE_INT) 1, 8); > +#elif defined ASM_OUTPUT_ALIGNED_COMMON > + ASM_OUTPUT_ALIGNED_COMMON (asm_out_file, "__gnu_offload_v1", > + (unsigned HOST_WIDE_INT) 1, 8); > +#else > + ASM_OUTPUT_COMMON (asm_out_file, "__gnu_offload_v1", > + (unsigned HOST_WIDE_INT) 1, > + (unsigned HOST_WIDE_INT) 1); > +#endif > + } > + > /* Attach a special .ident directive to the end of the file to identify > the version of GCC which compiled this code. The format of the .ident > string is patterned after the ones produced by native SVR4 compilers. > */ > diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c > index d4864ae..d4ebf52 100644 > --- a/gcc/tree-inline.c > +++ b/gcc/tree-inline.c > @@ -4383,7 +4383,8 @@ expand_call_inline (basic_block bb, gimple stmt, > copy_body_data *id) > || !optimize > || cgraph_inline_failed_type (reason) == CIF_FINAL_ERROR) > /* PR 20090218-1_0.c. Body can be provided by another module. */ > - && (reason != CIF_BODY_NOT_AVAILABLE || !flag_generate_lto)) > + && (reason != CIF_BODY_NOT_AVAILABLE > + || (!flag_generate_lto && !flag_generate_offload))) > { > error ("inlining failed in call to always_inline %q+F: %s", fn, > cgraph_inline_failed_string (reason)); > diff --git a/gcc/tree.c b/gcc/tree.c > index 1d5e4f6..f6a6d04 100644 > --- a/gcc/tree.c > +++ b/gcc/tree.c > @@ -5730,7 +5730,7 @@ free_lang_data (void) > > /* If we are the LTO frontend we have freed lang-specific data already. */ > if (in_lto_p > - || !flag_generate_lto) > + || (!flag_generate_lto && !flag_generate_offload)) > return 0; > > /* Allocate and assign alias sets to the standard integer types