> On Thu, 21 Nov 2013, Jan Hubicka wrote:
>
> > >
> > > Why do you need an additional -fparallelism? Wouldn't
> > > -fwpa=... be a better match, matching -flto=...? As we already
> > > pass down a -fwpa option to WPA this would make things easier, no?
> >
> > My plan was to possibly use same option later for parallelizing more parts
> > of
> > compiler, not only WPA streaming. Streaming in may have some chance if we
> > get
> > into thread safety of GGC or move sufficient amount of stuff out of GGC.
> > Also
> > we can parallelize inliner heuristic or IPA-PTA if it will ever work. So it
> > would make sense with -flto-partition=none and perhaps with local
> > optimization,
> > too.
>
> I'd like to drop -flto-partition=none eventually. It's just one more
> path through the compiler to support ...
>
> > But I can definitely update the patch to use -fwpa=N and we can deal with
> > this
> > once this becomes real. (i.e. I have no clue how to parallelize inliner
> > without
> > making its decisions dependent on the parallelizm and declining with
> > parallelizm
> > increased nor I have real plans for stream in procedure)
>
> Please.
>
Hi,
here is updated patch. Sorry for taking time, I should have more time for
hacking again
now...
Honza
* lto-cgraph.c (asm_nodes_output): Make global.
* lto-wrapper.c (run_gcc): Pass down paralelizm to WPA.
* lto.c (lto_parallelism): New static var.
(do_stream_out, wait_for_child, stream_out): New static functions.
(lto_wpa_write_files): Add support for parallel streaming.
(do_whole_program_analysis): Set parallelism.
* lang.opt (fwpa): Add parameter.
* lto-lang.c (lto_handle_option): Handle flag_wpa.
(lto_init): Update use of flag_wpa.
* lto-streamer.h (asm_nodes_output): Declare.
Index: lto-cgraph.c
===================================================================
*** lto-cgraph.c (revision 205646)
--- lto-cgraph.c (working copy)
*************** along with GCC; see the file COPYING3.
*** 53,58 ****
--- 53,61 ----
#include "pass_manager.h"
#include "ipa-utils.h"
+ /* True when asm nodes has been output. */
+ bool asm_nodes_output = false;
+
static void output_cgraph_opt_summary (void);
static void input_cgraph_opt_summary (vec<symtab_node *> nodes);
*************** output_symtab (void)
*** 889,895 ****
lto_symtab_encoder_iterator lsei;
int i, n_nodes;
lto_symtab_encoder_t encoder;
- static bool asm_nodes_output = false;
if (flag_wpa)
output_cgraph_opt_summary ();
--- 892,897 ----
Index: lto-wrapper.c
===================================================================
*** lto-wrapper.c (revision 205646)
--- lto-wrapper.c (working copy)
*************** run_gcc (unsigned argc, char *argv[])
*** 745,751 ****
tmp += list_option_len;
strcpy (tmp, ltrans_output_file);
! obstack_ptr_grow (&argv_obstack, "-fwpa");
}
/* Append the input objects and possible preceding arguments. */
--- 746,761 ----
tmp += list_option_len;
strcpy (tmp, ltrans_output_file);
! if (jobserver)
! obstack_ptr_grow (&argv_obstack, xstrdup ("-fwpa=jobserver"));
! else if (parallel > 1)
! {
! char buf[256];
! sprintf (buf, "-fwpa=%i", parallel);
! obstack_ptr_grow (&argv_obstack, xstrdup (buf));
! }
! else
! obstack_ptr_grow (&argv_obstack, "-fwpa");
}
/* Append the input objects and possible preceding arguments. */
Index: lto/lto.c
===================================================================
*** lto/lto.c (revision 205646)
--- lto/lto.c (working copy)
*************** along with GCC; see the file COPYING3.
*** 53,58 ****
--- 53,61 ----
/* Vector to keep track of external variables we've seen so far. */
vec<tree, va_gc> *lto_global_var_decls;
+ /* Number of parallel tasks to run, -1 if we want to use GNU Make jobserver.
*/
+ static int lto_parallelism;
+
static GTY(()) tree first_personality_decl;
/* Returns a hash code for P. */
*************** cmp_partitions_order (const void *a, con
*** 2454,2459 ****
--- 2457,2554 ----
return orderb - ordera;
}
+ /* Actually stream out ENCODER into TEMP_FILENAME. */
+
+ static void
+ do_stream_out (char *temp_filename, lto_symtab_encoder_t encoder)
+ {
+ lto_file *file = lto_obj_file_open (temp_filename, true);
+ if (!file)
+ fatal_error ("lto_obj_file_open() failed");
+ lto_set_current_out_file (file);
+
+ ipa_write_optimization_summaries (encoder);
+
+ lto_set_current_out_file (NULL);
+ lto_obj_file_close (file);
+ free (file);
+ }
+
+ /* Wait for forked process and signal errors. */
+ #ifdef HAVE_WORKING_FORK
+ static void
+ wait_for_child ()
+ {
+ int status;
+ do
+ {
+ int w = waitpid(0, &status, WUNTRACED | WCONTINUED);
+ if (w == -1)
+ fatal_error ("waitpid failed");
+
+ if (WIFEXITED (status) && WEXITSTATUS (status))
+ fatal_error ("streaming subprocess failed");
+ else if (WIFSIGNALED (status))
+ fatal_error ("streaming subprocess was killed by signal");
+ }
+ while (!WIFEXITED(status) && !WIFSIGNALED(status));
+ }
+ #endif
+
+ /* Stream out ENCODER into TEMP_FILENAME
+ Fork if that seems to help. */
+
+ static void
+ stream_out (char *temp_filename, lto_symtab_encoder_t encoder, bool last)
+ {
+ #ifdef HAVE_WORKING_FORK
+ static int nruns;
+
+ if (!lto_parallelism || lto_parallelism == 1)
+ {
+ do_stream_out (temp_filename, encoder);
+ return;
+ }
+
+ /* Do not run more than LTO_PARALLELISM streamings
+ FIXME: we ignore limits on jobserver. */
+ if (lto_parallelism > 0 && nruns >= lto_parallelism)
+ {
+ wait_for_child ();
+ nruns --;
+ }
+ /* If this is not the last parallel partition, execute new
+ streaming process. */
+ if (!last)
+ {
+ pid_t cpid = fork ();
+
+ if (!cpid)
+ {
+ setproctitle ("lto1-wpa-streaming");
+ do_stream_out (temp_filename, encoder);
+ exit (0);
+ }
+ /* Fork failed; lets do the job ourseleves. */
+ else if (cpid == -1)
+ do_stream_out (temp_filename, encoder);
+ else
+ nruns++;
+ }
+ /* Last partition; stream it and wait for all children to die. */
+ else
+ {
+ int i;
+ do_stream_out (temp_filename, encoder);
+ for (i = 0; i < nruns; i++)
+ wait_for_child ();
+ }
+ asm_nodes_output = true;
+ #else
+ do_stream_out (temp_filename, encoder);
+ #endif
+ }
+
/* Write all output files in WPA mode and the file with the list of
LTRANS units. */
*************** static void
*** 2461,2478 ****
lto_wpa_write_files (void)
{
unsigned i, n_sets;
- lto_file *file;
ltrans_partition part;
FILE *ltrans_output_list_stream;
char *temp_filename;
size_t blen;
/* Open the LTRANS output list. */
if (!ltrans_output_list)
fatal_error ("no LTRANS output list filename provided");
- ltrans_output_list_stream = fopen (ltrans_output_list, "w");
- if (ltrans_output_list_stream == NULL)
- fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list);
timevar_push (TV_WHOPR_WPA);
--- 2556,2570 ----
lto_wpa_write_files (void)
{
unsigned i, n_sets;
ltrans_partition part;
FILE *ltrans_output_list_stream;
char *temp_filename;
+ vec <char *>temp_filenames = vNULL;
size_t blen;
/* Open the LTRANS output list. */
if (!ltrans_output_list)
fatal_error ("no LTRANS output list filename provided");
timevar_push (TV_WHOPR_WPA);
*************** lto_wpa_write_files (void)
*** 2508,2521 ****
: cmp_partitions_order);
for (i = 0; i < n_sets; i++)
{
- size_t len;
ltrans_partition part = ltrans_partitions[i];
/* Write all the nodes in SET. */
sprintf (temp_filename + blen, "%u.o", i);
- file = lto_obj_file_open (temp_filename, true);
- if (!file)
- fatal_error ("lto_obj_file_open() failed");
if (!quiet_flag)
fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name,
part->insns);
--- 2600,2609 ----
*************** lto_wpa_write_files (void)
*** 2557,2577 ****
}
gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
! lto_set_current_out_file (file);
!
! ipa_write_optimization_summaries (part->encoder);
- lto_set_current_out_file (NULL);
- lto_obj_file_close (file);
- free (file);
part->encoder = NULL;
! len = strlen (temp_filename);
! if (fwrite (temp_filename, 1, len, ltrans_output_list_stream) < len
|| fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
fatal_error ("writing to LTRANS output list %s: %m",
ltrans_output_list);
}
lto_stats.num_output_files += n_sets;
--- 2645,2669 ----
}
gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
! stream_out (temp_filename, part->encoder, i == n_sets - 1);
part->encoder = NULL;
! temp_filenames.safe_push (xstrdup (temp_filename));
! }
! ltrans_output_list_stream = fopen (ltrans_output_list, "w");
! if (ltrans_output_list_stream == NULL)
! fatal_error ("opening LTRANS output list %s: %m", ltrans_output_list);
! for (i = 0; i < n_sets; i++)
! {
! unsigned int len = strlen (temp_filenames[i]);
! if (fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) < len
|| fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
fatal_error ("writing to LTRANS output list %s: %m",
ltrans_output_list);
+ free (temp_filenames[i]);
}
+ temp_filenames.release();
lto_stats.num_output_files += n_sets;
*************** do_whole_program_analysis (void)
*** 3126,3131 ****
--- 3218,3235 ----
{
symtab_node *node;
+ lto_parallelism = 1;
+
+ /* TODO: jobserver communicatoin is not supported, yet. */
+ if (!strcmp (flag_wpa, "jobserver"))
+ lto_parallelism = -1;
+ else
+ {
+ lto_parallelism = atoi (flag_wpa);
+ if (lto_parallelism <= 0)
+ lto_parallelism = 0;
+ }
+
timevar_start (TV_PHASE_OPT_GEN);
/* Note that since we are in WPA mode, materialize_cgraph will not
Index: lto/lang.opt
===================================================================
*** lto/lang.opt (revision 205646)
--- lto/lang.opt (working copy)
*************** LTO Joined Var(ltrans_output_list)
*** 33,41 ****
Specify a file to which a list of files output by LTRANS is written.
fwpa
! LTO Driver Report Var(flag_wpa)
Run the link-time optimizer in whole program analysis (WPA) mode.
fresolution=
LTO Joined
The resolution file
--- 33,45 ----
Specify a file to which a list of files output by LTRANS is written.
fwpa
! LTO Driver Report
Run the link-time optimizer in whole program analysis (WPA) mode.
+ fwpa=
+ LTO Driver RejectNegative Joined Var(flag_wpa)
+ Whole program analysis (WPA) mode with number of parallel jobs specified.
+
fresolution=
LTO Joined
The resolution file
Index: lto/lto-lang.c
===================================================================
*** lto/lto-lang.c (revision 205646)
--- lto/lto-lang.c (working copy)
*************** lto_handle_option (size_t scode, const c
*** 749,754 ****
--- 749,758 ----
warn_psabi = value;
break;
+ case OPT_fwpa:
+ flag_wpa = value ? "" : NULL;
+ break;
+
default:
break;
}
*************** static bool
*** 1148,1154 ****
lto_init (void)
{
/* We need to generate LTO if running in WPA mode. */
! flag_generate_lto = flag_wpa;
/* Create the basic integer types. */
build_common_tree_nodes (flag_signed_char, /*short_double=*/false);
--- 1152,1158 ----
lto_init (void)
{
/* We need to generate LTO if running in WPA mode. */
! flag_generate_lto = (flag_wpa != NULL);
/* Create the basic integer types. */
build_common_tree_nodes (flag_signed_char, /*short_double=*/false);
Index: lto-streamer.h
===================================================================
*** lto-streamer.h (revision 205646)
--- lto-streamer.h (working copy)
*************** void lto_output_location (struct output_
*** 873,878 ****
--- 873,879 ----
/* In lto-cgraph.c */
+ extern bool asm_nodes_output;
lto_symtab_encoder_t lto_symtab_encoder_new (bool);
int lto_symtab_encoder_encode (lto_symtab_encoder_t, symtab_node *);
void lto_symtab_encoder_delete (lto_symtab_encoder_t);