On Thu, 21 Nov 2013, Jan Hubicka wrote:
> >
> > Why do you need an additional -fparallelism? Wouldn't
> > -fwpa=... be a better match, matching -flto=...? As we already
> > pass down a -fwpa option to WPA this would make things easier, no?
>
> My plan was to possibly use same option later for parallelizing more parts of
> compiler, not only WPA streaming. Streaming in may have some chance if we get
> into thread safety of GGC or move sufficient amount of stuff out of GGC. Also
> we can parallelize inliner heuristic or IPA-PTA if it will ever work. So it
> would make sense with -flto-partition=none and perhaps with local
> optimization,
> too.
I'd like to drop -flto-partition=none eventually. It's just one more
path through the compiler to support ...
> But I can definitely update the patch to use -fwpa=N and we can deal with this
> once this becomes real. (i.e. I have no clue how to parallelize inliner
> without
> making its decisions dependent on the parallelizm and declining with
> parallelizm
> increased nor I have real plans for stream in procedure)
Please.
Richard.
> Honza
> >
> > Thanks,
> > Richard.
> >
> > > Honza
> > >
> > > * lto-cgraph.c (asm_nodes_output): Make global.
> > > * lto-streamer.h (asm_nodes_output): Declare.
> > > * lto-wrapper.c (parallel, jobserver): Make global.
> > > (run_gcc): Pass down -fparallelism
> > >
> > > * lto.c (lto_parallelism): New variable.
> > > (do_stream_out): New function.
> > > (stream_out): New function.
> > > (lto_wpa_write_files): Use it.
> > > * lang.opt (fparallelism): New.
> > > * lto.h (lto_parallelism): Declare.
> > > * lto-lang.c (lto_handle_option): Add fparalelism.
> > >
> > > Index: lto-cgraph.c
> > > ===================================================================
> > > --- lto-cgraph.c (revision 201891)
> > > +++ lto-cgraph.c (working copy)
> > > @@ -50,6 +50,9 @@ along with GCC; see the file COPYING3.
> > > #include "context.h"
> > > #include "pass_manager.h"
> > >
> > > +/* True when asm nodes has been output. */
> > > +bool asm_nodes_output = false;
> > > +
> > > static void output_cgraph_opt_summary (void);
> > > static void input_cgraph_opt_summary (vec<symtab_node> nodes);
> > >
> > > @@ -852,7 +855,6 @@ output_symtab (void)
> > > lto_symtab_encoder_iterator lsei;
> > > int i, n_nodes;
> > > lto_symtab_encoder_t encoder;
> > > - static bool asm_nodes_output = false;
> > >
> > > if (flag_wpa)
> > > output_cgraph_opt_summary ();
> > > Index: lto-streamer.h
> > > ===================================================================
> > > --- lto-streamer.h (revision 201891)
> > > +++ lto-streamer.h (working copy)
> > > @@ -870,6 +870,7 @@ void lto_output_location (struct output_
> > >
> > >
> > > /* In lto-cgraph.c */
> > > +extern bool asm_nodes_output;
> > > lto_symtab_encoder_t lto_symtab_encoder_new (bool);
> > > int lto_symtab_encoder_encode (lto_symtab_encoder_t, symtab_node);
> > > void lto_symtab_encoder_delete (lto_symtab_encoder_t);
> > > Index: lto-wrapper.c
> > > ===================================================================
> > > --- lto-wrapper.c (revision 201891)
> > > +++ lto-wrapper.c (working copy)
> > > @@ -56,6 +56,9 @@ along with GCC; see the file COPYING3.
> > >
> > > int debug; /* true if -save-temps. */
> > > int verbose; /* true if -v. */
> > > +int parallel = 0; /* number of parallel builds
> > > specified
> > > + by -flto=N */
> > > +int jobserver = 0; /* true if -flto=jobserver was
> > > used. */
> > >
> > > enum lto_mode_d {
> > > LTO_MODE_NONE, /* Not doing LTO. */
> > > @@ -445,8 +448,6 @@ run_gcc (unsigned argc, char *argv[])
> > > char *list_option_full = NULL;
> > > const char *linker_output = NULL;
> > > const char *collect_gcc, *collect_gcc_options;
> > > - int parallel = 0;
> > > - int jobserver = 0;
> > > bool no_partition = false;
> > > struct cl_decoded_option *fdecoded_options = NULL;
> > > unsigned int fdecoded_options_count = 0;
> > > @@ -630,6 +631,16 @@ run_gcc (unsigned argc, char *argv[])
> > > if (parallel <= 1)
> > > parallel = 0;
> > > }
> > > + if (jobserver)
> > > + {
> > > + obstack_ptr_grow (&argv_obstack, xstrdup
> > > ("-fparallelism=jobserver"));
> > > + }
> > > + else if (parallel > 1)
> > > + {
> > > + char buf[256];
> > > + sprintf (buf, "-fparallelism=%i", parallel);
> > > + obstack_ptr_grow (&argv_obstack, xstrdup (buf));
> > > + }
> > > /* Fallthru. */
> > >
> > > case OPT_flto:
> > > Index: lto/lto.c
> > > ===================================================================
> > > --- lto/lto.c (revision 201891)
> > > +++ lto/lto.c (working copy)
> > > @@ -49,6 +49,9 @@ along with GCC; see the file COPYING3.
> > > #include "context.h"
> > > #include "pass_manager.h"
> > >
> > > +/* Number of parallel tasks to run, -1 if we want to use GNU Make
> > > jobserver. */
> > > +int lto_parallelism;
> > > +
> > > static GTY(()) tree first_personality_decl;
> > >
> > > /* Returns a hash code for P. */
> > > @@ -3002,6 +3005,98 @@ cmp_partitions_order (const void *a, con
> > > return orderb - ordera;
> > > }
> > >
> > > +/* Actually stream out ENCODER into TEMP_FILENAME. */
> > > +
> > > +void
> > > +do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder)
> > > +{
> > > + lto_file *file = lto_obj_file_open (temp_filename, true);
> > > + if (!file)
> > > + fatal_error ("lto_obj_file_open() failed");
> > > + lto_set_current_out_file (file);
> > > +
> > > + ipa_write_optimization_summaries (encoder);
> > > +
> > > + lto_set_current_out_file (NULL);
> > > + lto_obj_file_close (file);
> > > + free (file);
> > > +}
> > > +
> > > +/* Wait for forked process and signal errors. */
> > > +#ifdef HAVE_WORKING_FORK
> > > +void
> > > +wait_for_child ()
> > > +{
> > > + int status;
> > > + do
> > > + {
> > > + int w = waitpid(0, &status, WUNTRACED | WCONTINUED);
> > > + if (w == -1)
> > > + fatal_error ("waitpid failed");
> > > +
> > > + if (WIFEXITED (status) && WEXITSTATUS (status))
> > > + fatal_error ("streaming subprocess failed");
> > > + else if (WIFSIGNALED (status))
> > > + fatal_error ("streaming subprocess was killed by signal");
> > > + }
> > > + while (!WIFEXITED(status) && !WIFSIGNALED(status));
> > > +}
> > > +#endif
> > > +
> > > +/* Stream out ENCODER into TEMP_FILENAME
> > > + Fork if that seems to help. */
> > > +
> > > +void
> > > +stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last)
> > > +{
> > > +#ifdef HAVE_WORKING_FORK
> > > + static int nruns;
> > > +
> > > + if (!lto_parallelism || lto_parallelism == 1)
> > > + {
> > > + do_stream_out (temp_filename, encoder);
> > > + return;
> > > + }
> > > +
> > > + /* Do not run more than LTO_PARALLELISM streamings
> > > + FIXME: we ignore limits on jobserver. */
> > > + if (lto_parallelism > 0 && nruns >= lto_parallelism)
> > > + {
> > > + wait_for_child ();
> > > + nruns --;
> > > + }
> > > + /* If this is not the last parallel partition, execute new
> > > + streaming process. */
> > > + if (!last)
> > > + {
> > > + pid_t cpid = fork ();
> > > +
> > > + if (!cpid)
> > > + {
> > > + setproctitle ("lto1-wpa-streaming");
> > > + do_stream_out (temp_filename, encoder);
> > > + exit (0);
> > > + }
> > > + /* Fork failed; lets do the job ourseleves. */
> > > + else if (cpid == -1)
> > > + do_stream_out (temp_filename, encoder);
> > > + else
> > > + nruns++;
> > > + }
> > > + /* Last partition; stream it and wait for all children to die. */
> > > + else
> > > + {
> > > + int i;
> > > + do_stream_out (temp_filename, encoder);
> > > + for (i = 0; i < nruns; i++)
> > > + wait_for_child ();
> > > + }
> > > + asm_nodes_output = true;
> > > +#else
> > > + do_stream_out (temp_filename, encoder);
> > > +#endif
> > > +}
> > > +
> > > /* Write all output files in WPA mode and the file with the list of
> > > LTRANS units. */
> > >
> > > @@ -3009,18 +3104,15 @@ static void
> > > lto_wpa_write_files (void)
> > > {
> > > unsigned i, n_sets;
> > > - lto_file *file;
> > > ltrans_partition part;
> > > FILE *ltrans_output_list_stream;
> > > char *temp_filename;
> > > + vec <char *>temp_filenames = vNULL;
> > > size_t blen;
> > >
> > > /* Open the LTRANS output list. */
> > > if (!ltrans_output_list)
> > > fatal_error ("no LTRANS output list filename provided");
> > > - ltrans_output_list_stream = fopen (ltrans_output_list, "w");
> > > - if (ltrans_output_list_stream == NULL)
> > > - fatal_error ("opening LTRANS output list %s: %m",
> > > ltrans_output_list);
> > >
> > > timevar_push (TV_WHOPR_WPA);
> > >
> > > @@ -3056,14 +3148,10 @@ lto_wpa_write_files (void)
> > > : cmp_partitions_order);
> > > for (i = 0; i < n_sets; i++)
> > > {
> > > - size_t len;
> > > ltrans_partition part = ltrans_partitions[i];
> > >
> > > /* Write all the nodes in SET. */
> > > sprintf (temp_filename + blen, "%u.o", i);
> > > - file = lto_obj_file_open (temp_filename, true);
> > > - if (!file)
> > > - fatal_error ("lto_obj_file_open() failed");
> > >
> > > if (!quiet_flag)
> > > fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name,
> > > part->insns);
> > > @@ -3105,21 +3193,25 @@ lto_wpa_write_files (void)
> > > }
> > > gcc_checking_assert (lto_symtab_encoder_size (part->encoder) ||
> > > !i);
> > >
> > > - lto_set_current_out_file (file);
> > > -
> > > - ipa_write_optimization_summaries (part->encoder);
> > > + stream_out (temp_filename, part->encoder, i == n_sets - 1);
> > >
> > > - lto_set_current_out_file (NULL);
> > > - lto_obj_file_close (file);
> > > - free (file);
> > > part->encoder = NULL;
> > >
> > > - len = strlen (temp_filename);
> > > - if (fwrite (temp_filename, 1, len, ltrans_output_list_stream) < len
> > > + temp_filenames.safe_push (xstrdup (temp_filename));
> > > + }
> > > + ltrans_output_list_stream = fopen (ltrans_output_list, "w");
> > > + if (ltrans_output_list_stream == NULL)
> > > + fatal_error ("opening LTRANS output list %s: %m",
> > > ltrans_output_list);
> > > + for (i = 0; i < n_sets; i++)
> > > + {
> > > + unsigned int len = strlen (temp_filenames[i]);
> > > + if (fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream)
> > > < len
> > > || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
> > > fatal_error ("writing to LTRANS output list %s: %m",
> > > ltrans_output_list);
> > > + free (temp_filenames[i]);
> > > }
> > > + temp_filenames.release();
> > >
> > > lto_stats.num_output_files += n_sets;
> > >
> > > Index: lto/lang.opt
> > > ===================================================================
> > > --- lto/lang.opt (revision 201891)
> > > +++ lto/lang.opt (working copy)
> > > @@ -32,6 +32,10 @@ fltrans-output-list=
> > > LTO Joined Var(ltrans_output_list)
> > > Specify a file to which a list of files output by LTRANS is written.
> > >
> > > +fparallelism=
> > > +LTO Joined
> > > +Run the link-time optimizer in whole program analysis (WPA) mode.
> > > +
> > > fwpa
> > > LTO Driver Report Var(flag_wpa)
> > > Run the link-time optimizer in whole program analysis (WPA) mode.
> > > Index: lto/lto.h
> > > ===================================================================
> > > --- lto/lto.h (revision 201891)
> > > +++ lto/lto.h (working copy)
> > > @@ -39,6 +39,7 @@ extern const char *resolution_file_name;
> > > extern tree lto_eh_personality (void);
> > > extern void lto_main (void);
> > > extern void lto_read_all_file_options (void);
> > > +extern int lto_parallelism;
> > >
> > > /* In lto-elf.c or lto-coff.c */
> > > extern lto_file *lto_obj_file_open (const char *filename, bool writable);
> > > Index: lto/lto-lang.c
> > > ===================================================================
> > > --- lto/lto-lang.c (revision 201891)
> > > +++ lto/lto-lang.c (working copy)
> > > @@ -735,6 +735,19 @@ lto_handle_option (size_t scode, const c
> > > warn_psabi = value;
> > > break;
> > >
> > > + case OPT_fparallelism_:
> > > + if (!arg)
> > > + lto_parallelism = 1;
> > > + else if (!strcmp (arg, "jobserver"))
> > > + lto_parallelism = -1;
> > > + else
> > > + {
> > > + lto_parallelism = atoi (arg);
> > > + if (lto_parallelism <= 0)
> > > + lto_parallelism = 0;
> > > + }
> > > + break;
> > > +
> > > default:
> > > break;
> > > }
> > >
> > >
> >
> > --
> > Richard Biener <[email protected]>
> > SUSE / SUSE Labs
> > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
> > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer
>
>
--
Richard Biener <[email protected]>
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer