On Fri, 2019-11-15 at 20:23 -0500, David Malcolm wrote: > This patch adds template classes for directed graphs, their nodes > and edges, and for finding the shortest path through such a graph. > > gcc/ChangeLog: > * analyzer/digraph.cc: New file. > * analyzer/digraph.h: New file. > * analyzer/shortest-paths.h: New file. Nothing too worrisome here. I'm kindof surprised if haven't needed some of these capabilities before now. Thoughts on moving them outside of analyzer into a more generic location?
jeff > --- > gcc/analyzer/digraph.cc | 189 ++++++++++++++++++++++++++++++++ > gcc/analyzer/digraph.h | 248 > ++++++++++++++++++++++++++++++++++++++++++ > gcc/analyzer/shortest-paths.h | 147 +++++++++++++++++++++++++ > 3 files changed, 584 insertions(+) > create mode 100644 gcc/analyzer/digraph.cc > create mode 100644 gcc/analyzer/digraph.h > create mode 100644 gcc/analyzer/shortest-paths.h > > diff --git a/gcc/analyzer/digraph.cc b/gcc/analyzer/digraph.cc > new file mode 100644 > index 0000000..c1fa46e > --- /dev/null > +++ b/gcc/analyzer/digraph.cc > @@ -0,0 +1,189 @@ > +/* Template classes for directed graphs. > + Copyright (C) 2019 Free Software Foundation, Inc. > + Contributed by David Malcolm <dmalc...@redhat.com>. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it > +under the terms of the GNU General Public License as published by > +the Free Software Foundation; either version 3, or (at your option) > +any later version. > + > +GCC is distributed in the hope that it will be useful, but > +WITHOUT ANY WARRANTY; without even the implied warranty of > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +General Public License for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>;. */ > + > +#include "config.h" > +#include "gcc-plugin.h" > +#include "system.h" > +#include "coretypes.h" > +#include "diagnostic.h" > +#include "analyzer/graphviz.h" > +#include "analyzer/digraph.h" > +#include "analyzer/shortest-paths.h" > +#include "selftest.h" > + > +#if CHECKING_P > + > +namespace selftest { > + > +/* A family of digraph classes for writing selftests. */ > + > +struct test_node; > +struct test_edge; > +struct test_graph; > +struct test_dump_args_t {}; > +struct test_cluster; > + > +struct test_graph_traits > +{ > + typedef test_node node_t; > + typedef test_edge edge_t; > + typedef test_graph graph_t; > + typedef test_dump_args_t dump_args_t; > + typedef test_cluster cluster_t; > +}; > + > +struct test_node : public dnode<test_graph_traits> > +{ > + test_node (const char *name, int index) : m_name (name), m_index > (index) {} > + void dump_dot (graphviz_out *, const dump_args_t &) const OVERRIDE > + { > + } > + > + const char *m_name; > + int m_index; > +}; > + > +struct test_edge : public dedge<test_graph_traits> > +{ > + test_edge (node_t *src, node_t *dest) > + : dedge (src, dest) > + {} > + > + void dump_dot (graphviz_out *gv, const dump_args_t &) const > OVERRIDE > + { > + gv->println ("%s -> %s;", m_src->m_name, m_dest->m_name); > + } > +}; > + > +struct test_graph : public digraph<test_graph_traits> > +{ > + test_node *add_test_node (const char *name) > + { > + test_node *result = new test_node (name, m_nodes.length ()); > + add_node (result); > + return result; > + } > + > + test_edge *add_test_edge (test_node *src, test_node *dst) > + { > + test_edge *result = new test_edge (src, dst); > + add_edge (result); > + return result; > + } > +}; > + > +struct test_cluster : public cluster<test_graph_traits> > +{ > +}; > + > +struct test_path > +{ > + auto_vec<const test_edge *> m_edges; > +}; > + > +/* Smoketest of digraph dumping. */ > + > +static void > +test_dump_to_dot () > +{ > + test_graph g; > + test_node *a = g.add_test_node ("a"); > + test_node *b = g.add_test_node ("b"); > + g.add_test_edge (a, b); > + > + pretty_printer pp; > + pp.buffer->stream = NULL; > + test_dump_args_t dump_args; > + g.dump_dot_to_pp (&pp, NULL, dump_args); > + > + ASSERT_STR_CONTAINS (pp_formatted_text (&pp), > + "a -> b;\n"); > +} > + > +/* Test shortest paths from A in this digraph, > + where edges run top-to-bottom if not otherwise labeled: > + > + A > + / \ > + B C-->D > + | | > + E | > + \ / > + F. */ > + > +static void > +test_shortest_paths () > +{ > + test_graph g; > + test_node *a = g.add_test_node ("a"); > + test_node *b = g.add_test_node ("b"); > + test_node *c = g.add_test_node ("d"); > + test_node *d = g.add_test_node ("d"); > + test_node *e = g.add_test_node ("e"); > + test_node *f = g.add_test_node ("f"); > + > + test_edge *ab = g.add_test_edge (a, b); > + test_edge *ac = g.add_test_edge (a, c); > + test_edge *cd = g.add_test_edge (c, d); > + test_edge *be = g.add_test_edge (b, e); > + g.add_test_edge (e, f); > + test_edge *cf = g.add_test_edge (c, f); > + > + shortest_paths<test_graph_traits, test_path> sp (g, a); > + > + test_path path_to_a = sp.get_shortest_path (a); > + ASSERT_EQ (path_to_a.m_edges.length (), 0); > + > + test_path path_to_b = sp.get_shortest_path (b); > + ASSERT_EQ (path_to_b.m_edges.length (), 1); > + ASSERT_EQ (path_to_b.m_edges[0], ab); > + > + test_path path_to_c = sp.get_shortest_path (c); > + ASSERT_EQ (path_to_c.m_edges.length (), 1); > + ASSERT_EQ (path_to_c.m_edges[0], ac); > + > + test_path path_to_d = sp.get_shortest_path (d); > + ASSERT_EQ (path_to_d.m_edges.length (), 2); > + ASSERT_EQ (path_to_d.m_edges[0], ac); > + ASSERT_EQ (path_to_d.m_edges[1], cd); > + > + test_path path_to_e = sp.get_shortest_path (e); > + ASSERT_EQ (path_to_e.m_edges.length (), 2); > + ASSERT_EQ (path_to_e.m_edges[0], ab); > + ASSERT_EQ (path_to_e.m_edges[1], be); > + > + test_path path_to_f = sp.get_shortest_path (f); > + ASSERT_EQ (path_to_f.m_edges.length (), 2); > + ASSERT_EQ (path_to_f.m_edges[0], ac); > + ASSERT_EQ (path_to_f.m_edges[1], cf); > +} > + > +/* Run all of the selftests within this file. */ > + > +void > +analyzer_digraph_cc_tests () > +{ > + test_dump_to_dot (); > + test_shortest_paths (); > +} > + > +} // namespace selftest > + > +#endif /* #if CHECKING_P */ > diff --git a/gcc/analyzer/digraph.h b/gcc/analyzer/digraph.h > new file mode 100644 > index 0000000..5c6a496 > --- /dev/null > +++ b/gcc/analyzer/digraph.h > @@ -0,0 +1,248 @@ > +/* Template classes for directed graphs. > + Copyright (C) 2019 Free Software Foundation, Inc. > + Contributed by David Malcolm <dmalc...@redhat.com>. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it > +under the terms of the GNU General Public License as published by > +the Free Software Foundation; either version 3, or (at your option) > +any later version. > + > +GCC is distributed in the hope that it will be useful, but > +WITHOUT ANY WARRANTY; without even the implied warranty of > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +General Public License for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>;. */ > + > +#ifndef GCC_ANALYZER_DIGRAPH_H > +#define GCC_ANALYZER_DIGRAPH_H > + > +#include "diagnostic.h" > +#include "tree-diagnostic.h" /* for default_tree_printer. */ > +#include "analyzer/graphviz.h" > + > +/* Templates for a family of classes: digraph, node, edge, and > cluster. > + This assumes a traits type with the following typedefs: > + node_t: the node class > + edge_t: the edge class > + dump_args_t: additional args for dot-dumps > + cluster_t: the cluster class (for use when generating .dot > files). > + > + Using a template allows for typesafe nodes and edges: a node's > + predecessor and successor edges can be of a node-specific edge > + subclass, without needing casting. */ > + > +/* Abstract base class for a node in a directed graph. */ > + > +template <typename GraphTraits> > +class dnode > +{ > + public: > + typedef typename GraphTraits::edge_t edge_t; > + typedef typename GraphTraits::dump_args_t dump_args_t; > + > + virtual ~dnode () {} > + virtual void dump_dot (graphviz_out *gv, const dump_args_t &args) > const = 0; > + > + auto_vec<edge_t *> m_preds; > + auto_vec<edge_t *> m_succs; > +}; > + > +/* Abstract base class for an edge in a directed graph. */ > + > +template <typename GraphTraits> > +class dedge > +{ > + public: > + typedef typename GraphTraits::node_t node_t; > + typedef typename GraphTraits::dump_args_t dump_args_t; > + > + dedge (node_t *src, node_t *dest) > + : m_src (src), m_dest (dest) {} > + > + virtual ~dedge () {} > + > + virtual void dump_dot (graphviz_out *gv, const dump_args_t &args) > const = 0; > + > + node_t *const m_src; > + node_t *const m_dest; > +}; > + > +/* Abstract base class for a directed graph. > + This class maintains the vectors of nodes and edges, > + and owns the nodes and edges. */ > + > +template <typename GraphTraits> > +class digraph > +{ > + public: > + typedef typename GraphTraits::node_t node_t; > + typedef typename GraphTraits::edge_t edge_t; > + typedef typename GraphTraits::dump_args_t dump_args_t; > + typedef typename GraphTraits::cluster_t cluster_t; > + > + digraph () {} > + virtual ~digraph () {} > + > + void dump_dot_to_pp (pretty_printer *pp, > + cluster_t *root_cluster, > + const dump_args_t &args) const; > + void dump_dot_to_file (FILE *fp, > + cluster_t *root_cluster, > + const dump_args_t &args) const; > + void dump_dot (const char *path, > + cluster_t *root_cluster, > + const dump_args_t &args) const; > + > + void add_node (node_t *node); > + void add_edge (edge_t *edge); > + > + auto_delete_vec<node_t> m_nodes; > + auto_delete_vec<edge_t> m_edges; > +}; > + > +/* Abstract base class for splitting dnodes into hierarchical > clusters > + in the generated .dot file. > + > + See "Subgraphs and Clusters" within > + https://www.graphviz.org/doc/info/lang.html > + and e.g. > + https://graphviz.gitlab.io/_pages/Gallery/directed/cluster.html > + > + If a root_cluster is passed to dump_dot*, then all nodes will be > + added to it at the start of dumping, via calls to add_node. > + > + The root cluster can organize the nodes into a hierarchy of > + child clusters. > + > + After all nodes are added to the root cluster, dump_dot will then > + be called on it (and not on the nodes themselves). */ > + > +template <typename GraphTraits> > +class cluster > +{ > + public: > + typedef typename GraphTraits::node_t node_t; > + typedef typename GraphTraits::dump_args_t dump_args_t; > + > + virtual ~cluster () {} > + > + virtual void add_node (node_t *node) = 0; > + > + /* Recursively dump the cluster, all nodes, and child > clusters. */ > + virtual void dump_dot (graphviz_out *gv, const dump_args_t &) > const = 0; > +}; > + > +//////////////////////////////////////////////////////////////////// > //////// > + > +/* Write .dot information for this graph to PP, passing ARGS to the > nodes > + and edges. > + If ROOT_CLUSTER is non-NULL, use it to organize the nodes into > clusters. */ > + > +template <typename GraphTraits> > +inline void > +digraph<GraphTraits>::dump_dot_to_pp (pretty_printer *pp, > + cluster_t *root_cluster, > + const dump_args_t &args) const > +{ > + graphviz_out gv (pp); > + > + pp_string (pp, "digraph \""); > + pp_string (pp, "base"); > + pp_string (pp, "\" {\n"); > + > + gv.indent (); > + > + pp_string (pp, "overlap=false;\n"); > + pp_string (pp, "compound=true;\n"); > + > + /* If using clustering, emit all nodes via clusters. */ > + if (root_cluster) > + { > + int i; > + node_t *n; > + FOR_EACH_VEC_ELT (m_nodes, i, n) > + root_cluster->add_node (n); > + root_cluster->dump_dot (&gv, args); > + } > + else > + { > + /* Otherwise, display all nodes at top level. */ > + int i; > + node_t *n; > + FOR_EACH_VEC_ELT (m_nodes, i, n) > + n->dump_dot (&gv, args); > + } > + > + /* Edges. */ > + int i; > + edge_t *e; > + FOR_EACH_VEC_ELT (m_edges, i, e) > + e->dump_dot (&gv, args); > + > + /* Terminate "digraph" */ > + gv.outdent (); > + pp_string (pp, "}"); > + pp_newline (pp); > +} > + > +/* Write .dot information for this graph to FP, passing ARGS to the > nodes > + and edges. > + If ROOT_CLUSTER is non-NULL, use it to organize the nodes into > clusters. */ > + > +template <typename GraphTraits> > +inline void > +digraph<GraphTraits>::dump_dot_to_file (FILE *fp, > + cluster_t *root_cluster, > + const dump_args_t &args) const > +{ > + pretty_printer pp; > + // TODO: > + pp_format_decoder (&pp) = default_tree_printer; > + pp.buffer->stream = fp; > + dump_dot_to_pp (&pp, root_cluster, args); > + pp_flush (&pp); > +} > + > +/* Write .dot information for this graph to a file at PATH, passing > ARGS > + to the nodes and edges. > + If ROOT_CLUSTER is non-NULL, use it to organize the nodes into > clusters. */ > + > +template <typename GraphTraits> > +inline void > +digraph<GraphTraits>::dump_dot (const char *path, > + cluster_t *root_cluster, > + const dump_args_t &args) const > +{ > + FILE *fp = fopen (path, "w"); > + dump_dot_to_file (fp, root_cluster, args); > + fclose (fp); > +} > + > +/* Add NODE to this DIGRAPH, taking ownership. */ > + > +template <typename GraphTraits> > +inline void > +digraph<GraphTraits>::add_node (node_t *node) > +{ > + m_nodes.safe_push (node); > +} > + > +/* Add EDGE to this digraph, and to the preds/succs of its > endpoints. > + Take ownership of EDGE. */ > + > +template <typename GraphTraits> > +inline void > +digraph<GraphTraits>::add_edge (edge_t *edge) > +{ > + m_edges.safe_push (edge); > + edge->m_dest->m_preds.safe_push (edge); > + edge->m_src->m_succs.safe_push (edge); > + > +} > + > +#endif /* GCC_ANALYZER_DIGRAPH_H */ > diff --git a/gcc/analyzer/shortest-paths.h b/gcc/analyzer/shortest- > paths.h > new file mode 100644 > index 0000000..4738f18 > --- /dev/null > +++ b/gcc/analyzer/shortest-paths.h > @@ -0,0 +1,147 @@ > +/* Template class for Dijkstra's algorithm on directed graphs. > + Copyright (C) 2019 Free Software Foundation, Inc. > + Contributed by David Malcolm <dmalc...@redhat.com>. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it > +under the terms of the GNU General Public License as published by > +the Free Software Foundation; either version 3, or (at your option) > +any later version. > + > +GCC is distributed in the hope that it will be useful, but > +WITHOUT ANY WARRANTY; without even the implied warranty of > +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +General Public License for more details. > + > +You should have received a copy of the GNU General Public License > +along with GCC; see the file COPYING3. If not see > +<http://www.gnu.org/licenses/>;. */ > + > +#ifndef GCC_ANALYZER_SHORTEST_PATHS_H > +#define GCC_ANALYZER_SHORTEST_PATHS_H > + > +#include "timevar.h" > + > +/* A record of the shortest path to each node in an graph > + from the origin node. > + The constructor runs Dijkstra's algorithm, and the results are > + stored in this class. */ > + > +template <typename GraphTraits, typename Path_t> > +class shortest_paths > +{ > +public: > + typedef typename GraphTraits::graph_t graph_t; > + typedef typename GraphTraits::node_t node_t; > + typedef typename GraphTraits::edge_t edge_t; > + typedef Path_t path_t; > + > + shortest_paths (const graph_t &graph, const node_t *origin); > + > + path_t get_shortest_path (const node_t *to) const; > + > +private: > + const graph_t &m_graph; > + > + /* For each node (by index), the minimal distance to that node > from the > + origin. */ > + auto_vec<int> m_dist; > + > + /* For each exploded_node (by index), the previous edge in the > shortest > + path from the origin. */ > + auto_vec<const edge_t *> m_prev; > +}; > + > +//////////////////////////////////////////////////////////////////// > //////// > + > +/* shortest_paths's constructor. > + > + Use Dijkstra's algorithm relative to ORIGIN to populate m_dist > and > + m_prev with enough information to be able to generate Path_t > instances > + to give the shortest path to any node in GRAPH from ORIGIN. */ > + > +template <typename GraphTraits, typename Path_t> > +inline > +shortest_paths<GraphTraits, Path_t>::shortest_paths (const graph_t > &graph, > + const node_t > *origin) > +: m_graph (graph), > + m_dist (graph.m_nodes.length ()), > + m_prev (graph.m_nodes.length ()) > +{ > + auto_client_timevar tv ("shortest_paths"); > + > + auto_vec<int> queue (graph.m_nodes.length ()); > + > + for (unsigned i = 0; i < graph.m_nodes.length (); i++) > + { > + m_dist.quick_push (INT_MAX); > + m_prev.quick_push (NULL); > + queue.quick_push (i); > + } > + m_dist[origin->m_index] = 0; > + > + while (queue.length () > 0) > + { > + /* Get minimal distance in queue. > + FIXME: this is O(N^2); replace with a priority queue. */ > + int idx_with_min_dist = -1; > + int idx_in_queue_with_min_dist = -1; > + int min_dist = INT_MAX; > + for (unsigned i = 0; i < queue.length (); i++) > + { > + int idx = queue[i]; > + if (m_dist[queue[i]] < min_dist) > + { > + min_dist = m_dist[idx]; > + idx_with_min_dist = idx; > + idx_in_queue_with_min_dist = i; > + } > + } > + gcc_assert (idx_with_min_dist != -1); > + gcc_assert (idx_in_queue_with_min_dist != -1); > + > + // FIXME: this is confusing: there are two indices here > + > + queue.unordered_remove (idx_in_queue_with_min_dist); > + > + node_t *n > + = static_cast <node_t *> (m_graph.m_nodes[idx_with_min_dist]); > + > + int i; > + edge_t *succ; > + FOR_EACH_VEC_ELT (n->m_succs, i, succ) > + { > + // TODO: only for dest still in queue > + node_t *dest = succ->m_dest; > + int alt = m_dist[n->m_index] + 1; > + if (alt < m_dist[dest->m_index]) > + { > + m_dist[dest->m_index] = alt; > + m_prev[dest->m_index] = succ; > + } > + } > + } > +} > + > +/* Generate an Path_t instance giving the shortest path to the node > + TO from the origin node. */ > + > +template <typename GraphTraits, typename Path_t> > +inline Path_t > +shortest_paths<GraphTraits, Path_t>::get_shortest_path (const node_t > *to) const > +{ > + Path_t result; > + > + while (m_prev[to->m_index]) > + { > + result.m_edges.safe_push (m_prev[to->m_index]); > + to = m_prev[to->m_index]->m_src; > + } > + > + result.m_edges.reverse (); > + > + return result; > +} > + > +#endif /* GCC_ANALYZER_SHORTEST_PATHS_H */