This patch adds a new dump flag that dumps PMU profile information using
the -pmu dump option.

This patch should be applied to google/main.

Tested with crosstools.

2012-09-06  Chris Manghane  <cm...@google.com>

        * gcc/doc/invoke.texi: Modified pmu-profile-use option.
        * gcc/tree-dump.c: Added new dump flag.
        * gcc/tree-pretty-print.c
        (dump_load_latency_details): New function.
        (dump_pmu): New function.
        (dump_generic_node): Added support for new dump flag.
        * gcc/tree-pretty-print.h: Added new function to global header.
        * gcc/tree-pass.h (enum tree_dump_index): Added new dump flag.
        * gcc/gcov.c:
        (process_pmu_profile): Fixed assertion conditions.
        * gcc/gcov-io.h (struct gcov_pmu_summary): Added new struct.
        * gcc/opts.c (common_handle_option): Added support for modified option.
        * gcc/gimple-pretty-print.c
        (dump_gimple_phi): Added support for new dump flag.
        (dump_gimple_stmt): Ditto.
        * gcc/coverage.c
        (htab_counts_entry_hash): Added new hash table for PMU info.
        (htab_pmu_entry_hash): Ditto.
        (htab_counts_entry_eq): Ditto.
        (htab_pmu_entry_eq): Ditto.
        (htab_counts_entry_del): Ditto.
        (htab_pmu_entry_del): Ditto.
        (read_counts_file): Ditto.
        (static void read_pmu_file): Ditto.
        (get_coverage_pmu_latency): Ditto.
        (get_coverage_pmu_branch_mispredict): Ditto.
        (pmu_data_present): Added new function.
        (coverage_init): Added pmu file reading support.
        * gcc/coverage.h: Added pmu functions to global header.
        * gcc/common.opt: Modified pmu-profile-use option.

Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi (revision 190817)
+++ gcc/doc/invoke.texi (working copy)
@@ -399,7 +399,7 @@ Objective-C and Objective-C++ Dialects}.
 -fprofile-generate=@var{path} -fprofile-generate-sampling @gol
 -fprofile-use -fprofile-use=@var{path} -fprofile-values @gol
 -fpmu-profile-generate=@var{pmuoption} @gol
--fpmu-profile-use=@var{pmuoption} @gol
+-fpmu-profile-use=@var{pmudata} @gol
 -freciprocal-math -free -fregmove -frename-registers -freorder-blocks @gol
 -frecord-gcc-switches-in-elf@gol
 -freorder-blocks-and-partition -freorder-functions @gol
@@ -8381,12 +8381,11 @@ displayed using coverage tool gcov. The params var
 "pmu_profile_n_addresses" can be used to restrict PMU data collection
 to only this many addresses.
 
-@item -fpmu-profile-use=@var{pmuoption}
+@item -fpmu-profile-use=@var{pmudata}
 @opindex fpmu-profile-use
 
-Enable performance monitoring unit (PMU) profiling based
-optimizations.  Currently only @var{load-latency} and
-@var{branch-mispredict} are supported.
+If @var{pmudata} is specified, GCC will read PMU data from @var{pmudata}. If
+unspecified, PMU data will be read from 'pmuprofile.gcda'.
 
 @item -fprofile-strip=@var{base_suffix}
 @opindex fprofile-strip
Index: gcc/tree-dump.c
===================================================================
--- gcc/tree-dump.c     (revision 190817)
+++ gcc/tree-dump.c     (working copy)
@@ -824,9 +824,11 @@ static const struct dump_option_value_info dump_op
   {"nouid", TDF_NOUID},
   {"enumerate_locals", TDF_ENUMERATE_LOCALS},
   {"scev", TDF_SCEV},
+  {"pmu", TDF_PMU},
   {"all", ~(TDF_RAW | TDF_SLIM | TDF_LINENO | TDF_TREE | TDF_RTL | TDF_IPA
            | TDF_STMTADDR | TDF_GRAPH | TDF_DIAGNOSTIC | TDF_VERBOSE
-           | TDF_RHS_ONLY | TDF_NOUID | TDF_ENUMERATE_LOCALS | TDF_SCEV)},
+           | TDF_RHS_ONLY | TDF_NOUID | TDF_ENUMERATE_LOCALS | TDF_SCEV
+            | TDF_PMU)},
   {NULL, 0}
 };
 
Index: gcc/tree-pretty-print.c
===================================================================
--- gcc/tree-pretty-print.c     (revision 190817)
+++ gcc/tree-pretty-print.c     (working copy)
@@ -25,6 +25,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "tree.h"
 #include "output.h"
+#include "basic-block.h"
+#include "gcov-io.h"
+#include "coverage.h"
 #include "tree-pretty-print.h"
 #include "hashtab.h"
 #include "tree-flow.h"
@@ -51,6 +54,7 @@ static void do_niy (pretty_printer *, const_tree);
 
 static pretty_printer buffer;
 static int initialized = 0;
+static char *file_prefix = NULL;
 
 /* Try to print something for an unknown tree code.  */
 
@@ -461,7 +465,32 @@ dump_omp_clauses (pretty_printer *buffer, tree cla
     }
 }
 
+/* Dump detailed information about pmu load latency events */
 
+void
+dump_load_latency_details (pretty_printer *buffer, gcov_pmu_ll_info_t *ll_info)
+{
+  if (ll_info == NULL)
+    return;
+
+  pp_string (buffer, "\n[load latency contribution: ");
+  pp_scalar (buffer, "%.2f%%\n", ll_info->self / 100.f);
+  pp_string (buffer, "average cycle distribution:\n");
+  pp_scalar (buffer, "%.2f%% <= 10 cycles\n",
+             ll_info->lt_10 / 100.f);
+  pp_scalar (buffer, "%.2f%% <= 32 cycles\n",
+             ll_info->lt_32 / 100.f);
+  pp_scalar (buffer, "%.2f%% <= 64 cycles\n",
+             ll_info->lt_64 / 100.f);
+  pp_scalar (buffer, "%.2f%% <= 256 cycles\n",
+             ll_info->lt_256 / 100.f);
+  pp_scalar (buffer, "%.2f%% <= 1024 cycles\n",
+             ll_info->lt_1024 / 100.f);
+  pp_scalar (buffer, "%.2f%% > 1024 cycles\n",
+             ll_info->gt_1024 / 100.f);
+  pp_string (buffer, "] ");
+}
+
 /* Dump location LOC to BUFFER.  */
 
 static void
@@ -485,7 +514,51 @@ dump_location (pretty_printer *buffer, location_t
   pp_string (buffer, "] ");
 }
 
+/* Dump PMU info about LOC to BUFFER.  */
 
+static void
+dump_pmu (pretty_printer *buffer, location_t loc)
+{
+  expanded_location xloc = expand_location (loc);
+  gcov_pmu_ll_info_t *ll_info;
+  gcov_pmu_brm_info_t *brm_info;
+  char *src;
+  uint64_t src_size;
+
+  if (!xloc.file)
+    return;
+
+  if (!file_prefix)
+    file_prefix = getpwd();
+
+  if (!IS_ABSOLUTE_PATH (xloc.file))
+    {
+      src_size = strlen (xloc.file) + strlen (file_prefix) + 1;
+      src = XCNEWVEC (char, src_size + 1);
+      strcpy (src, file_prefix);
+      strcat (src, "/");
+      strcat (src, xloc.file);
+    }
+  else
+    src = xstrdup (xloc.file);
+
+  ll_info = get_coverage_pmu_latency (src, xloc.line);
+  brm_info =
+      get_coverage_pmu_branch_mispredict (src, xloc.line);
+
+  if (ll_info)
+    dump_load_latency_details (buffer, ll_info);
+
+  if (brm_info)
+    {
+      pp_string (buffer, "[branch misprediction contribution: ");
+      pp_scalar (buffer, "%.2f%%", brm_info->self / 100.f);
+      pp_string (buffer, "] ");
+    }
+
+  XDELETE (src);
+}
+
 /* Dump lexical block BLOCK.  BUFFER, SPC and FLAGS are as in
    dump_generic_node.  */
 
@@ -622,6 +695,9 @@ dump_generic_node (pretty_printer *buffer, tree no
   if ((flags & TDF_LINENO) && EXPR_HAS_LOCATION (node))
     dump_location (buffer, EXPR_LOCATION (node));
 
+  if ((flags & TDF_PMU) && pmu_data_present () && EXPR_HAS_LOCATION (node))
+    dump_pmu (buffer, EXPR_LOCATION (node));
+
   switch (TREE_CODE (node))
     {
     case ERROR_MARK:
Index: gcc/tree-pretty-print.h
===================================================================
--- gcc/tree-pretty-print.h     (revision 190817)
+++ gcc/tree-pretty-print.h     (working copy)
@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3.  If not see
 #define GCC_TREE_PRETTY_PRINT_H
 
 #include "pretty-print.h"
+#include "basic-block.h"
+#include "gcov-io.h"
 
 #define pp_tree_identifier(PP, T)                      \
   pp_base_tree_identifier (pp_base (PP), T)
@@ -45,6 +47,7 @@ extern void print_generic_expr (FILE *, tree, int)
 extern void print_generic_decl (FILE *, tree, int);
 extern void debug_c_tree (tree);
 extern void dump_omp_clauses (pretty_printer *, tree, int, int);
+extern void dump_load_latency_details (pretty_printer *, gcov_pmu_ll_info_t *);
 extern void print_call_name (pretty_printer *, tree, int);
 extern void debug_generic_expr (tree);
 extern void debug_generic_stmt (tree);
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h     (revision 190817)
+++ gcc/tree-pass.h     (working copy)
@@ -84,8 +84,8 @@ enum tree_dump_index
 #define TDF_ENUMERATE_LOCALS (1 << 22) /* Enumerate locals by uid.  */
 #define TDF_CSELIB     (1 << 23)       /* Dump cselib details.  */
 #define TDF_SCEV       (1 << 24)       /* Dump SCEV details.  */
+#define TDF_PMU         (1 << 25)       /* Dump PMU Profiling details  */
 
-
 /* In tree-dump.c */
 
 extern char *get_dump_file_name (int);
Index: gcc/gcov.c
===================================================================
--- gcc/gcov.c  (revision 190817)
+++ gcc/gcov.c  (working copy)
@@ -2350,6 +2350,7 @@ filter_pmu_data_lines (source_t *src)
         }
     }
 
+
   /* Sort the load latency data according to the line numbers because
      we later iterate over sources in line number order. Normally we
      expect the PMU tool to provide sorted data, but a few entries can
@@ -3022,9 +3023,9 @@ static void process_pmu_profile (void)
         {
           gcov_pmu_st_entry_t *st_entry = XCNEW (gcov_pmu_st_entry_t);
           gcov_read_pmu_string_table_entry (st_entry, length);
+          string_table->st_count++;
           /* Verify that we read string table entries in the right order */
           gcc_assert (st_entry->index == string_table->st_count);
-          string_table->st_count++;
           if (string_table->st_count >= string_table->alloc_st_count)
             {
               string_table->alloc_st_count *= 2;
Index: gcc/gcov-io.h
===================================================================
--- gcc/gcov-io.h       (revision 190817)
+++ gcc/gcov-io.h       (working copy)
@@ -702,6 +702,14 @@ typedef struct string_table
   gcov_pmu_tool_header_t *pmu_tool_header;
 } string_table_t;
 
+/* Cumulative pmu data */
+struct gcov_pmu_summary
+{
+  ll_infos_t ll_infos;         /* load latency infos. */
+  brm_infos_t brm_infos;       /* branch misprediction infos */
+  string_table_t string_table; /* string table entries */
+};
+
 /* Structures embedded in coveraged program.  The structures generated
    by write_profile must match these.  */
 
Index: gcc/opts.c
===================================================================
--- gcc/opts.c  (revision 190817)
+++ gcc/opts.c  (working copy)
@@ -1645,6 +1645,11 @@ common_handle_option (struct gcc_options *opts,
        opts->x_flag_gcse_after_reload = value;
       break;
 
+    case OPT_fpmu_profile_use_:
+      opts->x_pmu_profile_data = xstrdup (arg);
+      value = true;
+      break;
+
     case OPT_fprofile_generate_:
       opts->x_profile_data_prefix = xstrdup (arg);
       value = true;
Index: gcc/gimple-pretty-print.c
===================================================================
--- gcc/gimple-pretty-print.c   (revision 190817)
+++ gcc/gimple-pretty-print.c   (working copy)
@@ -26,8 +26,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "tm.h"
 #include "tree.h"
 #include "diagnostic.h"
+#include "basic-block.h"
 #include "tree-pretty-print.h"
 #include "gimple-pretty-print.h"
+#include "gcov-io.h"
+#include "coverage.h"
 #include "hashtab.h"
 #include "tree-flow.h"
 #include "tree-pass.h"
@@ -40,6 +43,7 @@ along with GCC; see the file COPYING3.  If not see
 
 static pretty_printer buffer;
 static bool initialized = false;
+static char *file_prefix = NULL;
 
 #define GIMPLE_NIY do_niy (buffer,gs)
 
@@ -1629,6 +1633,51 @@ dump_gimple_phi (pretty_printer *buffer, gimple ph
          pp_decimal_int (buffer, xloc.column);
          pp_string (buffer, "] ");
        }
+      if ((flags & TDF_PMU) && pmu_data_present ()
+          && (gimple_phi_arg_location (phi, i)))
+        {
+          expanded_location xloc;
+          gcov_pmu_ll_info_t *ll_info;
+          gcov_pmu_brm_info_t *brm_info;
+          char *src;
+          uint64_t src_size;
+
+          xloc = expand_location (gimple_phi_arg_location (phi, i));
+          if (xloc.file)
+            {
+              if (!file_prefix)
+                file_prefix = getpwd();
+
+              if (!IS_ABSOLUTE_PATH (xloc.file))
+                {
+                  src_size = strlen (xloc.file) + strlen (file_prefix) + 1;
+                  src = XCNEWVEC (char, src_size + 1);
+                  strcpy (src, file_prefix);
+                  strcat (src, "/");
+                  strcat (src, xloc.file);
+                }
+              else
+                src = xstrdup (xloc.file);
+
+              ll_info = get_coverage_pmu_latency (src, xloc.line);
+              brm_info =
+                  get_coverage_pmu_branch_mispredict (src, xloc.line);
+
+              if (ll_info)
+                dump_load_latency_details (buffer, ll_info);
+
+              if (brm_info)
+                {
+                  pp_string (buffer, "\n[branch misprediction contribution: ");
+                  pp_scalar (buffer, "%.2f%%", brm_info->self / 100.f);
+                  pp_string (buffer, "] ");
+                }
+
+              XDELETE (src);
+            }
+
+        }
+
       dump_generic_node (buffer, gimple_phi_arg_def (phi, i), spc, flags,
                         false);
       pp_character (buffer, '(');
@@ -1875,6 +1924,50 @@ dump_gimple_stmt (pretty_printer *buffer, gimple g
       pp_string (buffer, "] ");
     }
 
+  if ((flags & TDF_PMU) && pmu_data_present () && gimple_has_location (gs))
+    {
+      expanded_location xloc;
+      gcov_pmu_ll_info_t *ll_info;
+      gcov_pmu_brm_info_t *brm_info;
+      char *src;
+      uint64_t src_size;
+
+      xloc = expand_location (gimple_location (gs));
+      if (xloc.file)
+        {
+          if (!file_prefix)
+            file_prefix = getpwd();
+
+          if (!IS_ABSOLUTE_PATH (xloc.file))
+            {
+              src_size = strlen (xloc.file) + strlen (file_prefix) + 1;
+              src = XCNEWVEC (char, src_size + 1);
+              strcpy (src, file_prefix);
+              strcat (src, "/");
+              strcat (src, xloc.file);
+            }
+          else
+            src = xstrdup (xloc.file);
+
+          ll_info = get_coverage_pmu_latency (src, xloc.line);
+          brm_info =
+              get_coverage_pmu_branch_mispredict (src, xloc.line);
+
+          if (ll_info)
+            dump_load_latency_details (buffer, ll_info);
+
+          if (brm_info)
+            {
+              pp_string (buffer, "\n[branch misprediction contribution: ");
+              pp_scalar (buffer, "%.2f%%", brm_info->self / 100.f);
+              pp_string (buffer, "] ");
+            }
+
+          XDELETE (src);
+        }
+    }
+
+
   if (flags & TDF_EH)
     {
       int lp_nr = lookup_stmt_eh_lp (gs);
Index: gcc/coverage.c
===================================================================
--- gcc/coverage.c      (revision 190817)
+++ gcc/coverage.c      (working copy)
@@ -96,6 +96,17 @@ typedef struct counts_entry
   struct gcov_ctr_summary summary;
 } counts_entry_t;
 
+typedef struct pmu_entry
+{
+  /* We hash by  */
+  gcov_unsigned_t lineno;
+  char *filename;
+
+  /* Store  */
+  gcov_pmu_ll_info_t *ll_info;
+  gcov_pmu_brm_info_t *brm_info;
+} pmu_entry_t;
+
 static GTY(()) struct coverage_data *functions_head = 0;
 static struct coverage_data **functions_tail = &functions_head;
 static unsigned no_coverage = 0;
@@ -129,6 +140,9 @@ static char pmu_profile_filename[] = "pmuprofile";
 /* Hash table of count data.  */
 static htab_t counts_hash = NULL;
 
+/* Hash table of pmu data, */
+static htab_t pmu_hash = NULL;
+
 /* The names of merge functions for counters.  */
 static const char *const ctr_merge_functions[GCOV_COUNTERS] = 
GCOV_MERGE_FUNCTIONS;
 static const char *const ctr_names[GCOV_COUNTERS] = GCOV_COUNTER_NAMES;
@@ -159,11 +173,17 @@ static tree gcov_pmu_top_n_address_decl = NULL_TRE
 /* To ensure that the above variables are initialized only once.  */
 static int pmu_profiling_initialized = 0;
 
+struct gcov_pmu_summary pmu_global_summary;
+
 /* Forward declarations.  */
 static hashval_t htab_counts_entry_hash (const void *);
+static hashval_t htab_pmu_entry_hash (const void *);
 static int htab_counts_entry_eq (const void *, const void *);
+static int htab_pmu_entry_eq (const void *, const void *);
 static void htab_counts_entry_del (void *);
+static void htab_pmu_entry_del (void *);
 static void read_counts_file (const char *, unsigned);
+static void read_pmu_file (const char*);
 static tree build_var (tree, tree, int);
 static void build_fn_info_type (tree, unsigned, tree);
 static void build_info_type (tree, tree);
@@ -211,6 +231,14 @@ htab_counts_entry_hash (const void *of)
   return entry->ident * GCOV_COUNTERS + entry->ctr;
 }
 
+static hashval_t
+htab_pmu_entry_hash (const void *of)
+{
+  const pmu_entry_t *const entry = (const pmu_entry_t *) of;
+
+  return htab_hash_string (entry->filename) + entry->lineno;
+}
+
 static int
 htab_counts_entry_eq (const void *of1, const void *of2)
 {
@@ -220,6 +248,16 @@ htab_counts_entry_eq (const void *of1, const void
   return entry1->ident == entry2->ident && entry1->ctr == entry2->ctr;
 }
 
+static int
+htab_pmu_entry_eq (const void *of1, const void *of2)
+{
+  const pmu_entry_t *const entry1 = (const pmu_entry_t *) of1;
+  const pmu_entry_t *const entry2 = (const pmu_entry_t *) of2;
+
+  return strcmp (entry1->filename, entry2->filename) == 0 &&
+      entry1->lineno == entry2->lineno;
+}
+
 static void
 htab_counts_entry_del (void *of)
 {
@@ -233,6 +271,17 @@ htab_counts_entry_del (void *of)
     }
 }
 
+static void
+htab_pmu_entry_del (void *of)
+{
+  pmu_entry_t *const entry = (pmu_entry_t *) of;
+
+  free (entry->filename);
+  free (entry->ll_info);
+  free (entry->brm_info);
+  free (entry);
+}
+
 /* Returns true if MOD_ID is the id of the last source module.  */
 
 int
@@ -722,6 +771,247 @@ read_counts_file (const char *da_file_name, unsign
   gcov_close ();
 }
 
+/* Read in the pmu profiling file, if available. DA_FILE_NAME is the
+   name of the gcda file. */
+
+static void read_pmu_file (const char* da_file_name)
+{
+  gcov_unsigned_t tag;
+  ll_infos_t* ll_infos = &pmu_global_summary.ll_infos;
+  brm_infos_t* brm_infos = &pmu_global_summary.brm_infos;
+  string_table_t* string_table = &pmu_global_summary.string_table;
+  int is_error = 0;
+  unsigned i;
+  pmu_entry_t **slot, *entry, elt;
+  gcov_pmu_ll_info_t *ll_info;
+  gcov_pmu_brm_info_t *brm_info;
+  gcov_pmu_st_entry_t *st_entry;
+
+
+  if (!gcov_open (da_file_name, 1))
+    {
+      if (PARAM_VALUE (PARAM_GCOV_DEBUG))
+        {
+          /* Try to find .gcda file in the current working dir.  */
+          da_file_name = lbasename (da_file_name);
+          if (!gcov_open (da_file_name, 1))
+            return;
+        }
+      else
+        return;
+    }
+
+  if (!gcov_magic (gcov_read_unsigned (), GCOV_DATA_MAGIC))
+    {
+      warning (0, "%qs is not a gcov data file", da_file_name);
+      gcov_close ();
+      return;
+    }
+  else if ((tag = gcov_read_unsigned ()) != GCOV_VERSION)
+    {
+      char v[4], e[4];
+
+      GCOV_UNSIGNED2STRING (v, tag);
+      GCOV_UNSIGNED2STRING (e, GCOV_VERSION);
+
+      warning (0, "%qs is version %q.*s, expected version %q.*s",
+               da_file_name, 4, v, 4, e);
+      gcov_close ();
+      return;
+    }
+
+  /* Read and discard the version. */
+  tag = gcov_read_unsigned ();
+
+  /* Read and discard the stamp.  */
+  tag = gcov_read_unsigned ();
+
+  /* Initialize PMU data fields. */
+  ll_infos->ll_count = 0;
+  ll_infos->alloc_ll_count = 64;
+  ll_infos->ll_array = XCNEWVEC (gcov_pmu_ll_info_t *, 
ll_infos->alloc_ll_count);
+
+  brm_infos->brm_count = 0;
+  brm_infos->alloc_brm_count = 64;
+  brm_infos->brm_array = XCNEWVEC (gcov_pmu_brm_info_t *,
+                                   brm_infos->alloc_brm_count);
+
+  string_table->st_count = 0;
+  string_table->alloc_st_count = 64;
+  string_table->st_array = XCNEWVEC (gcov_pmu_st_entry_t *,
+                                     string_table->alloc_st_count);
+
+  while ((tag = gcov_read_unsigned ()))
+    {
+      unsigned length = gcov_read_unsigned ();
+      unsigned long base = gcov_position ();
+
+      if (tag == GCOV_TAG_PMU_LOAD_LATENCY_INFO)
+        {
+          gcov_pmu_ll_info_t *ll_info = XCNEW (gcov_pmu_ll_info_t);
+          gcov_read_pmu_load_latency_info (ll_info, length);
+          ll_infos->ll_count++;
+          if (ll_infos->ll_count >= ll_infos->alloc_ll_count)
+            {
+              /* need to realloc */
+              ll_infos->ll_array = (gcov_pmu_ll_info_t **)
+                xrealloc (ll_infos->ll_array, 2 * ll_infos->alloc_ll_count);
+            }
+          ll_infos->ll_array[ll_infos->ll_count - 1] = ll_info;
+        }
+      else if (tag == GCOV_TAG_PMU_BRANCH_MISPREDICT_INFO)
+        {
+          gcov_pmu_brm_info_t *brm_info = XCNEW (gcov_pmu_brm_info_t);
+          gcov_read_pmu_branch_mispredict_info (brm_info, length);
+          brm_infos->brm_count++;
+          if (brm_infos->brm_count >= brm_infos->alloc_brm_count)
+            {
+              /* need to realloc */
+              brm_infos->brm_array = (gcov_pmu_brm_info_t **)
+                xrealloc (brm_infos->brm_array, 2 * 
brm_infos->alloc_brm_count);
+            }
+          brm_infos->brm_array[brm_infos->brm_count - 1] = brm_info;
+        }
+      else if (tag == GCOV_TAG_PMU_TOOL_HEADER)
+        {
+          gcov_pmu_tool_header_t *tool_header = XCNEW (gcov_pmu_tool_header_t);
+          gcov_read_pmu_tool_header (tool_header, length);
+          ll_infos->pmu_tool_header = tool_header;
+          brm_infos->pmu_tool_header = tool_header;
+        }
+      else if (tag == GCOV_TAG_PMU_STRING_TABLE_ENTRY)
+       {
+         gcov_pmu_st_entry_t *st_entry = XCNEW (gcov_pmu_st_entry_t);
+         gcov_read_pmu_string_table_entry(st_entry, length);
+         string_table->st_count++;
+         if (string_table->st_count >= string_table->alloc_st_count)
+           {
+             string_table->alloc_st_count *= 2;
+             string_table->st_array = (gcov_pmu_st_entry_t **)
+                 xrealloc (string_table->st_array,
+                           string_table->alloc_st_count);
+           }
+
+         string_table->st_array[string_table->st_count - 1] = st_entry;
+       }
+
+      gcov_sync (base, length);
+      if ((is_error = gcov_is_error ()))
+       {
+         error (is_error < 0 ? "%qs has overflowed" : "%qs is corrupted",
+                da_file_name);
+          gcov_close();
+         break;
+       }
+    }
+
+  gcov_close();
+
+  /* Construct hash table with information from gcda file. Entry keys are a
+     unique combination of the filename and the line number for easy access */
+  if (!pmu_hash)
+    pmu_hash = htab_create (10,
+                            htab_pmu_entry_hash, htab_pmu_entry_eq,
+                            htab_pmu_entry_del);
+
+  gcc_assert (pmu_hash != NULL);
+  gcc_assert (ll_infos->ll_count > 0);
+  gcc_assert (brm_infos->brm_count > 0);
+
+  for (i = 0; i < ll_infos->ll_count; ++i)
+    {
+      ll_info = ll_infos->ll_array[i];
+      st_entry = string_table->st_array[ll_info->filetag - 1];
+      elt.lineno = ll_info->line;
+      elt.filename = xstrdup (st_entry->str);
+
+      slot = (pmu_entry_t **) htab_find_slot
+          (pmu_hash, &elt, INSERT);
+      entry = *slot;
+      XDELETE (elt.filename);
+      if (!entry)
+        {
+          *slot = entry = XCNEW (pmu_entry_t);
+          entry->lineno = elt.lineno;
+          entry->filename = xstrdup (st_entry->str);
+          entry->ll_info = ll_info;
+        }
+      /* No need to check for existing entries because
+         there should only be one entry per filename and line number */
+    }
+
+  for (i = 0; i < brm_infos->brm_count; ++i)
+    {
+      brm_info = brm_infos->brm_array[i];
+      st_entry = string_table->st_array[brm_info->filetag - 1];
+      elt.lineno = brm_info->line;
+      elt.filename = xstrdup (st_entry->str);
+      slot = (pmu_entry_t **) htab_find_slot
+          (pmu_hash, &elt, INSERT);
+      entry = *slot;
+      XDELETE (elt.filename);
+      if (!entry)
+        {
+          *slot = entry = XCNEW (pmu_entry_t);
+          entry->lineno = elt.lineno;
+          entry->filename = xstrdup(st_entry->str);
+          entry->brm_info = brm_info;
+        }
+      else
+        {
+          /* There already exists a pmu_entry_t that is partially filled
+             with load latency info */
+          entry->brm_info = brm_info;
+        }
+    }
+}
+
+/* Returns the load latency info for line number LINENO of source file
+   FILENAME. */
+
+gcov_pmu_ll_info_t *
+get_coverage_pmu_latency (const char* filename, gcov_unsigned_t lineno)
+{
+  pmu_entry_t *entry, elt;
+
+  /* No hash table, no pmu data */
+  if (pmu_hash == NULL)
+    return NULL;
+
+  elt.filename = xstrdup (filename);
+  elt.lineno = lineno;
+
+  entry = (pmu_entry_t *) htab_find(pmu_hash, &elt);
+  XDELETE (elt.filename);
+  if (entry)
+    return entry->ll_info;
+
+  return NULL;
+}
+
+/* Returns the branch misprediction info for line number LINENO of source file
+   FILENAME. */
+
+gcov_pmu_brm_info_t *
+get_coverage_pmu_branch_mispredict (const char* filename, gcov_unsigned_t 
lineno)
+{
+  pmu_entry_t *entry, elt;
+
+  /* No hash table, no pmu data */
+  if (pmu_hash == NULL)
+    return NULL;
+
+  elt.filename = xstrdup(filename);
+  elt.lineno = lineno;
+
+  entry = (pmu_entry_t *) htab_find(pmu_hash, &elt);
+  XDELETE (elt.filename);
+  if (entry)
+    return entry->brm_info;
+
+  return NULL;
+}
+
 /* Returns the coverage data entry for counter type COUNTER of function
    FUNC. EXPECTED is the number of expected counter entries.  */
 
@@ -1125,6 +1415,14 @@ coverage_function_present (unsigned fn_ident)
   return item != NULL;
 }
 
+/* True if there is PMU data present in this compilation */
+
+bool
+pmu_data_present (void)
+{
+  return (pmu_hash != NULL);
+}
+
 /* Update function and program direct-call coverage counts.  */
 
 void
@@ -2271,6 +2569,10 @@ coverage_init (const char *filename, const char* s
   if (flag_branch_probabilities)
     read_counts_file (da_file_name, 0);
 
+  /* Reads at most one auxiliary GCDA file since we don't support merging */
+  if (pmu_profile_data != 0 && TDF_PMU)
+    read_pmu_file (pmu_profile_data);
+
   /* Rebuild counts_hash and read the auxiliary GCDA files.  */
   if (flag_profile_use && L_IPO_COMP_MODE)
     {
Index: gcc/coverage.h
===================================================================
--- gcc/coverage.h      (revision 190817)
+++ gcc/coverage.h      (working copy)
@@ -45,7 +45,12 @@ extern int coverage_counter_alloc (unsigned /*coun
 extern tree tree_coverage_counter_ref (unsigned /*counter*/, unsigned/*num*/);
 /* Use a counter address from the most recent allocation.  */
 extern tree tree_coverage_counter_addr (unsigned /*counter*/, unsigned/*num*/);
-
+/* Get the load latency info for the current file and line */
+extern gcov_pmu_ll_info_t *get_coverage_pmu_latency (const char*,
+                                                     gcov_unsigned_t);
+/* Get the load latency info for the current file and line */
+extern gcov_pmu_brm_info_t *
+get_coverage_pmu_branch_mispredict (const char*, gcov_unsigned_t);
 /* Get all the counters for the current function.  */
 extern gcov_type *get_coverage_counts (unsigned /*counter*/,
                                       unsigned /*expected*/,
@@ -70,6 +75,9 @@ extern void coverage_dc_end_function (void);
    is present in the coverage internal data structures.  */
 extern bool coverage_function_present (unsigned fn_ident);
 
+/* True if there is PMU data present in this compilation. */
+extern bool pmu_data_present (void);
+
 extern tree get_gcov_type (void);
 extern tree get_gcov_unsigned_t (void);
 
Index: gcc/common.opt
===================================================================
--- gcc/common.opt      (revision 190817)
+++ gcc/common.opt      (working copy)
@@ -1684,8 +1684,8 @@ Common Joined RejectNegative Var(flag_pmu_profile_
 -fpmu-profile-generate=[load-latency]  Generate pmu profile for cache misses. 
Currently only pfmon based load latency profiling is supported on Intel/PEBS 
and AMD/IBS platforms.
 
 fpmu-profile-use=
-Common Joined RejectNegative Var(flag_pmu_profile_use)
--fpmu-profile-use=[load-latency]  Use pmu profile data while optimizing.  
Currently only perfmon based load latency profiling is supported on Intel/PEBS 
and AMD/IBS platforms.
+Common Joined RejectNegative Var(pmu_profile_data)
+-fpmu-profile-use=[pmuprofile.gcda]  The pmu profile data file to use for pmu 
feedback.
 
 fpredictive-commoning
 Common Report Var(flag_predictive_commoning) Optimization

--
This patch is available for review at http://codereview.appspot.com/6489092

Reply via email to