johannes updated this revision to Diff 103147.
johannes added a comment.
- style fixes
- do not compare nodes from system headers
https://reviews.llvm.org/D34329
Files:
include/clang/Tooling/ASTDiff/ASTDiff.h
lib/Tooling/ASTDiff/ASTDiff.cpp
lib/Tooling/ASTDiff/CMakeLists.txt
lib/Tooling/CMakeLists.txt
tools/CMakeLists.txt
tools/clang-diff/CMakeLists.txt
tools/clang-diff/ClangDiff.cpp
Index: tools/clang-diff/ClangDiff.cpp
===================================================================
--- /dev/null
+++ tools/clang-diff/ClangDiff.cpp
@@ -0,0 +1,94 @@
+//===- ClangDiff.cpp - compare source files by AST nodes ------*- C++ -*- -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/ASTDiff/ASTDiff.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace tooling;
+
+static cl::OptionCategory ClangDiffCategory("clang-diff options");
+
+static cl::opt<bool>
+ DumpAST("ast-dump",
+ cl::desc("Print the internal representation of the AST as JSON."),
+ cl::init(false), cl::cat(ClangDiffCategory));
+
+static cl::opt<std::string> SourcePath(cl::Positional, cl::desc("<source>"),
+ cl::Required,
+ cl::cat(ClangDiffCategory));
+
+static cl::opt<std::string> DestinationPath(cl::Positional,
+ cl::desc("<destination>"),
+ cl::Optional,
+ cl::cat(ClangDiffCategory));
+
+static std::unique_ptr<ASTUnit> getAST(const StringRef Filename) {
+ std::string ErrorMessage;
+ std::unique_ptr<CompilationDatabase> Compilations =
+ CompilationDatabase::autoDetectFromSource(Filename, ErrorMessage);
+ if (!Compilations) {
+ errs() << "Error while trying to load a compilation database, running "
+ "without flags.\n"
+ << ErrorMessage;
+ Compilations.reset(
+ new FixedCompilationDatabase(".", std::vector<std::string>()));
+ }
+ std::array<std::string, 1> Files = {{Filename}};
+ ClangTool Tool(*Compilations, Files);
+ std::vector<std::unique_ptr<ASTUnit>> ASTs;
+ Tool.buildASTs(ASTs);
+ if (ASTs.size() != Files.size()) {
+ return nullptr;
+ }
+ return std::move(ASTs[0]);
+}
+
+int main(int argc, const char **argv) {
+ cl::HideUnrelatedOptions(ClangDiffCategory);
+ if (!cl::ParseCommandLineOptions(argc, argv)) {
+ cl::PrintOptionValues();
+ return 1;
+ }
+
+ if (DumpAST) {
+ if (!DestinationPath.empty()) {
+ errs() << "Error: Please specify exactly one filename.\n";
+ return 1;
+ }
+ std::unique_ptr<ASTUnit> AST = getAST(SourcePath);
+ if (!AST) {
+ return 1;
+ }
+ clang::diff::TreeRoot Tree(*AST);
+ Tree.dumpAsJson();
+ return 0;
+ }
+
+ if (DestinationPath.empty()) {
+ errs() << "Error: Exactly two paths are required.\n";
+ return 1;
+ }
+
+ std::unique_ptr<ASTUnit> Src = getAST(SourcePath);
+ std::unique_ptr<ASTUnit> Dst = getAST(DestinationPath);
+ if (!Src || !Dst) {
+ return 1;
+ }
+
+ clang::diff::runDiff(*Src, *Dst);
+
+ return 0;
+}
Index: tools/clang-diff/CMakeLists.txt
===================================================================
--- /dev/null
+++ tools/clang-diff/CMakeLists.txt
@@ -0,0 +1,16 @@
+set(LLVM_LINK_COMPONENTS
+ Support
+ )
+
+add_clang_executable(clang-diff
+ ClangDiff.cpp
+ )
+
+target_link_libraries(clang-diff
+ clangAST
+ clangBasic
+ clangFrontend
+ clangLex
+ clangTooling
+ clangToolingASTDiff
+ )
Index: tools/CMakeLists.txt
===================================================================
--- tools/CMakeLists.txt
+++ tools/CMakeLists.txt
@@ -2,6 +2,7 @@
add_clang_subdirectory(diagtool)
add_clang_subdirectory(driver)
+add_clang_subdirectory(clang-diff)
add_clang_subdirectory(clang-format)
add_clang_subdirectory(clang-format-vs)
add_clang_subdirectory(clang-fuzzer)
Index: lib/Tooling/CMakeLists.txt
===================================================================
--- lib/Tooling/CMakeLists.txt
+++ lib/Tooling/CMakeLists.txt
@@ -5,6 +5,7 @@
add_subdirectory(Core)
add_subdirectory(Refactoring)
+add_subdirectory(ASTDiff)
add_clang_library(clangTooling
ArgumentsAdjusters.cpp
Index: lib/Tooling/ASTDiff/CMakeLists.txt
===================================================================
--- /dev/null
+++ lib/Tooling/ASTDiff/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(LLVM_LINK_COMPONENTS
+ # Option
+ Support
+ )
+
+add_clang_library(clangToolingASTDiff
+ ASTDiff.cpp
+ LINK_LIBS
+ clangBasic
+ clangAST
+ # clangToolingCore
+ )
Index: lib/Tooling/ASTDiff/ASTDiff.cpp
===================================================================
--- /dev/null
+++ lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -0,0 +1,836 @@
+//===- ASTDiff.cpp - AST differencing implementation-----------*- C++ -*- -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/ASTDiff/ASTDiff.h"
+
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#include <limits>
+#include <memory>
+#include <unordered_set>
+
+using namespace llvm;
+using namespace clang;
+
+namespace clang {
+namespace diff {
+
+static bool isRelevantNode(Decl *D) { return D != nullptr; }
+static bool isRelevantNode(Stmt *S) { return S != nullptr; }
+
+namespace {
+// Count the number of non-null nodes
+struct NodeCountVisitor : public RecursiveASTVisitor<NodeCountVisitor> {
+ int Count = 0;
+ bool TraverseDecl(Decl *D) {
+ if (isRelevantNode(D)) {
+ ++Count;
+ RecursiveASTVisitor<NodeCountVisitor>::TraverseDecl(D);
+ }
+ return true;
+ }
+ bool TraverseStmt(Stmt *S) {
+ if (isRelevantNode(S)) {
+ ++Count;
+ RecursiveASTVisitor<NodeCountVisitor>::TraverseStmt(S);
+ }
+ return true;
+ }
+ bool TraverseType(QualType T) { return true; }
+};
+} // namespace
+
+namespace {
+// Sets Height, Parent and Children for each node.
+struct PreorderVisitor : public RecursiveASTVisitor<PreorderVisitor> {
+ int Id = 0, Depth = 0;
+ NodeId Parent = NoNodeId;
+ TreeRoot &Root;
+
+ PreorderVisitor(TreeRoot &Root) : Root(Root) {}
+
+ template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) {
+ assert(ASTNode != nullptr);
+ NodeId MyId = Id;
+ Node &N = Root.getMutableNode(MyId);
+ N.Parent = Parent;
+ N.Depth = Depth;
+ N.ASTNode = ast_type_traits::DynTypedNode::create(*ASTNode);
+ assert(!N.ASTNode.getNodeKind().isNone());
+ if (Parent != NoNodeId) {
+ Node &P = Root.getMutableNode(Parent);
+ P.Children.push_back(MyId);
+ }
+ Parent = MyId;
+ ++Id;
+ ++Depth;
+ return {MyId, Root.getNode(MyId).Parent};
+ }
+ void PostTraverse(std::tuple<NodeId, NodeId> State) {
+ NodeId MyId, PreviousParent;
+ std::tie(MyId, PreviousParent) = State;
+ Parent = PreviousParent;
+ --Depth;
+ if (MyId != NoNodeId) {
+ Node &N = Root.getMutableNode(MyId);
+ if (N.isLeaf()) {
+ Root.Leaves.push_back(MyId);
+ }
+ N.Height = 1;
+ for (NodeId Child : N.Children) {
+ N.Height = std::max(N.Height, 1 + Root.getNode(Child).Height);
+ }
+ }
+ }
+ bool TraverseDecl(Decl *D) {
+ if (isRelevantNode(D)) {
+ auto SavedState = PreTraverse(D);
+ RecursiveASTVisitor<PreorderVisitor>::TraverseDecl(D);
+ PostTraverse(SavedState);
+ }
+ return true;
+ }
+ bool TraverseStmt(Stmt *S) {
+ if (isRelevantNode(S)) {
+ auto SavedState = PreTraverse(S);
+ RecursiveASTVisitor<PreorderVisitor>::TraverseStmt(S);
+ PostTraverse(SavedState);
+ }
+ return true;
+ }
+ bool TraverseType(QualType T) { return true; }
+};
+} // namespace
+
+TreeRoot::TreeRoot(ASTUnit &AST) : AST(AST) {
+ auto *TUD = AST.getASTContext().getTranslationUnitDecl();
+ // Run the above visitors to initialize the tree.
+ NodeCountVisitor NodeCounter;
+ NodeCounter.TraverseDecl(TUD);
+ setSize(NodeCounter.Count);
+ PreorderVisitor PreorderWalker(*this);
+ PreorderWalker.TraverseDecl(TUD);
+ setLeftMostDescendant();
+ int PostorderId = 0;
+ PostorderIds.resize(Nodes.size());
+ std::function<void(NodeId)> PostorderTraverse = [&](NodeId Id) {
+ for (NodeId Child : getNode(Id).Children) {
+ PostorderTraverse(Child);
+ }
+ PostorderIds[Id] = PostorderId;
+ ++PostorderId;
+ };
+ PostorderTraverse(root());
+}
+
+int TreeRoot::getSubtreePostorder(std::vector<NodeId> &Ids, NodeId Root) const {
+ int Leaves = 0;
+ std::function<void(NodeId)> Traverse = [&](NodeId Id) {
+ const Node &N = getNode(Id);
+ for (NodeId Child : N.Children) {
+ Traverse(Child);
+ }
+ if (N.isLeaf()) {
+ ++Leaves;
+ }
+ Ids.push_back(Id);
+ };
+ Traverse(Root);
+ return Leaves;
+}
+std::vector<NodeId> TreeRoot::getSubtreeBfs(NodeId Id) const {
+ std::vector<NodeId> Ids;
+ size_t Expanded = 0;
+ Ids.push_back(Id);
+ while (Expanded < Ids.size()) {
+ for (NodeId Child : getNode(Ids[Expanded++]).Children) {
+ Ids.push_back(Child);
+ }
+ }
+ return Ids;
+}
+
+int TreeRoot::getNumberOfDescendants(NodeId Id) const {
+ return PostorderIds[Id] - getNode(Id).LeftMostDescendant;
+}
+
+std::string TreeRoot::label(NodeId Id) const {
+ assert(isValidNodeId(Id));
+ const Node &N = getNode(Id);
+ const ast_type_traits::DynTypedNode &DTN = N.ASTNode;
+ if (auto *X = DTN.get<FieldDecl>()) {
+ return X->getName();
+ }
+ if (auto *X = DTN.get<BinaryOperator>()) {
+ return X->getOpcodeStr();
+ }
+ if (auto *X = DTN.get<IntegerLiteral>()) {
+ auto Tmp = X->getValue();
+ SmallString<256> Str;
+ Tmp.toString(Str, /*Radix=*/10, /*Signed=*/false);
+ return Str.str();
+ }
+ if (auto *X = DTN.get<StringLiteral>()) {
+ return X->getString();
+ }
+ if (auto *X = DTN.get<CompoundStmt>()) {
+ return "";
+ }
+ if (auto *X = DTN.get<DeclStmt>()) {
+ return "";
+ }
+ if (auto *X = DTN.get<DeclRefExpr>()) {
+ return "";
+ }
+ if (auto *X = DTN.get<Decl>()) {
+ return "";
+ }
+ if (auto *X = DTN.get<Stmt>()) {
+ return "";
+ }
+ if (auto *X = DTN.get<QualType>()) {
+ return "";
+ }
+ llvm_unreachable("Fatal: unhandled AST node\n");
+}
+void TreeRoot::dumpTree(NodeId Id) const {
+ if (Id == NoNodeId) {
+ Id = root();
+ }
+ const Node &N = getNode(Id);
+ for (int I = 0; I < N.Depth; ++I) {
+ outs() << " ";
+ }
+ outs() << showNode(Id) << "\n";
+ for (NodeId Child : N.Children) {
+ dumpTree(Child);
+ }
+}
+std::string TreeRoot::showNode(NodeId Id) const {
+ if (Id == NoNodeId) {
+ return "None";
+ }
+ std::string Label;
+ if (label(Id) != "") {
+ Label = formatv(": {0}", label(Id));
+ }
+ return formatv("{0}{1}({2})", getNode(Id).getTypeLabel(), Label,
+ PostorderIds[Id]);
+}
+void TreeRoot::dumpNodeAsJson(NodeId Id) const {
+ auto N = getNode(Id);
+ std::string Label;
+ if (label(Id) != "") {
+ Label = formatv(R"(,"label":"{0}")", label(Id));
+ }
+ outs() << formatv(R"({"typeLabel":"{0}"{1},"children":[)", N.getTypeLabel(),
+ Label);
+ if (N.Children.size() > 0) {
+ dumpNodeAsJson(N.Children[0]);
+ for (size_t I = 1; I < N.Children.size(); ++I) {
+ outs() << ",";
+ dumpNodeAsJson(N.Children[I]);
+ }
+ }
+ outs() << "]}";
+}
+void TreeRoot::dumpAsJson() const {
+ outs() << R"({"root":)";
+ dumpNodeAsJson(root());
+ outs() << "}\n";
+}
+
+void TreeRoot::setLeftMostDescendant() {
+ for (NodeId Id : Leaves) {
+ Node &N = getMutableNode(Id);
+ N.LeftMostDescendant = Id;
+ NodeId Cur = Id;
+ while (N.Parent != NoNodeId && getNode(N.Parent).Children.front() == Cur) {
+ Cur = N.Parent;
+ getMutableNode(Cur).LeftMostDescendant = Id;
+ }
+ }
+}
+
+Mappings::Mappings(const TreeRoot &T1, const TreeRoot &T2) : T1(T1), T2(T2) {
+ int Size = T1.getSize() + T2.getSize();
+ SrcToDst = new NodeId[Size];
+ DstToSrc = new NodeId[Size];
+ // set everything to NoNodeId == -1
+ memset(SrcToDst, NoNodeId, Size * sizeof(*SrcToDst));
+ memset(DstToSrc, NoNodeId, Size * sizeof(*DstToSrc));
+}
+Mappings::~Mappings() {
+ delete[] SrcToDst;
+ delete[] DstToSrc;
+}
+void Mappings::link(NodeId Src, NodeId Dst) {
+ SrcToDst[Src] = Dst;
+ DstToSrc[Dst] = Src;
+}
+
+void Mappings::dumpMapping() const {
+ for (NodeId Id1 = 0, Id2; Id1 < T1.getSize(); ++Id1) {
+ if ((Id2 = getDst(Id1)) != NoNodeId) {
+ outs() << formatv("Match {0} to {1}\n", T1.showNode(Id1),
+ T2.showNode(Id2));
+ }
+ }
+}
+
+class Subtree {
+private:
+ /// Identifies a node in this subtree by its postorder offset.
+ using SNodeId = int;
+ /// The parent tree.
+ const TreeRoot &Tree;
+ /// Maps SNodeIds to original ids.
+ std::vector<NodeId> RootIds;
+ std::vector<NodeId> LeftMostDescendants;
+
+public:
+ std::vector<NodeId> KeyRoots;
+
+ Subtree(const TreeRoot &Tree, NodeId SubtreeRoot) : Tree(Tree) {
+ int Leaves = Tree.getSubtreePostorder(RootIds, SubtreeRoot);
+ setLeftMostDescendants();
+ computeKeyRoots(Leaves);
+ }
+
+ int getSizeS() const { return RootIds.size(); }
+ NodeId getIdInRoot(SNodeId Id) const {
+ assert(Id > 0 && Id <= getSizeS());
+ return RootIds[Id - 1];
+ }
+ const Node &getNodeS(SNodeId Id) const {
+ return Tree.getNode(getIdInRoot(Id));
+ }
+ const std::string labelS(SNodeId Id) const {
+ return Tree.label(getIdInRoot(Id));
+ }
+ SNodeId getLeftMostDescendant(SNodeId Id) const {
+ assert(Id > 0 && Id <= getSizeS());
+ return LeftMostDescendants[Id - 1];
+ }
+
+private:
+ void setLeftMostDescendants() {
+ LeftMostDescendants.resize(getSizeS());
+ for (SNodeId I = 0; I < getSizeS(); ++I) {
+ NodeId Cur = getIdInRoot(I + 1);
+ while (!Tree.getNode(Cur).isLeaf()) {
+ Cur = Tree.getNode(Cur).Children[0];
+ }
+ // TODO why does this work
+ LeftMostDescendants[I] = 1 + (Tree.PostorderIds[Cur] - getIdInRoot(1));
+ }
+ assert(LeftMostDescendants[0] == 0);
+ }
+ void computeKeyRoots(int Leaves) {
+ KeyRoots.resize(Leaves);
+ std::unordered_set<SNodeId> Visited;
+ int K = Leaves - 1;
+ for (int I = getSizeS(); I > 0; --I) {
+ SNodeId LeftDesc = getLeftMostDescendant(I);
+ if (Visited.count(LeftDesc) != 0u) {
+ continue;
+ }
+ assert(K >= 0);
+ KeyRoots[K] = I;
+ Visited.insert(LeftDesc);
+ --K;
+ }
+ }
+};
+
+// Computes an optimal mapping between two trees
+class ZsMatcher {
+ using Pairs = std::vector<std::pair<NodeId, NodeId>>;
+
+private:
+ Subtree S1;
+ Subtree S2;
+ double **TreeDist;
+ double **ForestDist;
+
+ // TODO Use string editing distance if applicable.
+ double getUpdateCost(NodeId Id1, NodeId Id2) {
+ if (!S1.getNodeS(Id1).hasSameType(S2.getNodeS(Id2))) {
+ return std::numeric_limits<double>::max();
+ }
+ if (S1.labelS(Id1) == S2.labelS(Id2)) {
+ return 0;
+ }
+ return 1;
+ }
+ void computeTreeDist() {
+ for (NodeId Id1 : S1.KeyRoots) {
+ for (NodeId Id2 : S2.KeyRoots) {
+ computeForestDist(Id1, Id2);
+ }
+ }
+ }
+ void dumpForestDist() const {
+ for (int I = 0; I < S1.getSizeS() + 1; ++I) {
+ outs() << "[";
+ for (int J = 0; J < S2.getSizeS() + 1; ++J) {
+ outs() << formatv("{0}{1: 2}", (J == 0 ? "" : " "), ForestDist[I][J]);
+ }
+ outs() << "]\n";
+ }
+ }
+ void computeForestDist(NodeId Id1, NodeId Id2) {
+ assert(Id1 > 0 && Id2 > 0);
+ NodeId LMD1 = S1.getLeftMostDescendant(Id1);
+ NodeId LMD2 = S2.getLeftMostDescendant(Id2);
+
+ ForestDist[LMD1][LMD2] = 0;
+ for (NodeId D1 = LMD1 + 1; D1 <= Id1; ++D1) {
+ double DeletionCost = 1.0;
+ ForestDist[D1][LMD2] = ForestDist[D1 - 1][LMD2] + DeletionCost;
+ for (NodeId D2 = LMD2 + 1; D2 <= Id2; ++D2) {
+ double InsertionCost = 1;
+ ForestDist[LMD1][D2] = ForestDist[LMD1][D2 - 1] + InsertionCost;
+ NodeId DLMD1 = S1.getLeftMostDescendant(D1);
+ NodeId DLMD2 = S2.getLeftMostDescendant(D2);
+ if (DLMD1 == LMD1 && DLMD2 == LMD2) {
+ double UpdateCost = getUpdateCost(D1, D2);
+ ForestDist[D1][D2] =
+ std::min(std::min(ForestDist[D1 - 1][D2] + DeletionCost,
+ ForestDist[D1][D2 - 1] + InsertionCost),
+ ForestDist[D1 - 1][D2 - 1] + UpdateCost);
+ TreeDist[D1][D2] = ForestDist[D1][D2];
+ } else {
+ ForestDist[D1][D2] =
+ std::min(std::min(ForestDist[D1 - 1][D2] + DeletionCost,
+ ForestDist[D1][D2 - 1] + InsertionCost),
+ ForestDist[DLMD1][DLMD2] + TreeDist[D1][D2]);
+ }
+ }
+ }
+ }
+
+public:
+ ZsMatcher(const TreeRoot &T1, const TreeRoot &T2, NodeId Id1, NodeId Id2)
+ : S1(T1, Id1), S2(T2, Id2) {
+ TreeDist = new double *[S1.getSizeS() + 1];
+ ForestDist = new double *[S1.getSizeS() + 1];
+ for (int I = 0; I < S1.getSizeS() + 1; ++I) {
+ TreeDist[I] = new double[S2.getSizeS() + 1]();
+ ForestDist[I] = new double[S2.getSizeS() + 1]();
+ }
+ }
+
+ ~ZsMatcher() {
+ for (int I = 0; I < S1.getSizeS() + 1; ++I) {
+ delete[] TreeDist[I];
+ delete[] ForestDist[I];
+ }
+ delete[] TreeDist;
+ delete[] ForestDist;
+ }
+
+ Pairs matchZs() {
+ Pairs Matches;
+
+ computeTreeDist();
+
+ bool RootNodePair = true;
+ Pairs TreePairs{{S1.getSizeS(), S2.getSizeS()}};
+
+ NodeId LastRow, LastCol, FirstRow, FirstCol, Row, Col;
+ while (!TreePairs.empty()) {
+ std::tie(LastRow, LastCol) = TreePairs.back();
+ TreePairs.pop_back();
+
+ if (!RootNodePair) {
+ computeForestDist(LastRow, LastCol);
+ }
+
+ RootNodePair = false;
+
+ FirstRow = S1.getLeftMostDescendant(LastRow);
+ FirstCol = S2.getLeftMostDescendant(LastCol);
+
+ Row = LastRow;
+ Col = LastCol;
+
+ while (Row > FirstRow || Col > FirstCol) {
+ if (Row > FirstRow &&
+ ForestDist[Row - 1][Col] + 1 == ForestDist[Row][Col]) {
+ --Row;
+ } else if (Col > FirstCol &&
+ ForestDist[Row][Col - 1] + 1 == ForestDist[Row][Col]) {
+ --Col;
+ } else {
+ NodeId LMD1 = S1.getLeftMostDescendant(Row);
+ NodeId LMD2 = S2.getLeftMostDescendant(Col);
+ if (LMD1 == S1.getLeftMostDescendant(LastRow) &&
+ LMD2 == S2.getLeftMostDescendant(LastCol)) {
+ assert(S1.getNodeS(Row).hasSameType(S2.getNodeS(Col)));
+ Matches.emplace_back(S1.getIdInRoot(Row), S2.getIdInRoot(Col));
+ --Row;
+ --Col;
+ } else {
+ TreePairs.emplace_back(Row, Col);
+ Row = LMD1;
+ Col = LMD2;
+ }
+ }
+ }
+ }
+ return Matches;
+ }
+};
+
+namespace {
+// Compares nodes by their depth.
+struct HeightLess {
+ const TreeRoot &Tree;
+ HeightLess(const TreeRoot &Tree) : Tree(Tree) {}
+ bool operator()(NodeId Id1, NodeId Id2) const {
+ return Tree.getNode(Id1).Height < Tree.getNode(Id2).Height;
+ }
+};
+} // namespace
+
+// Priority queue for nodes, sorted descendingly by their height.
+class PriorityList {
+ const TreeRoot &Tree;
+ HeightLess Comparator;
+ std::vector<NodeId> Container;
+ PriorityQueue<NodeId, std::vector<NodeId>, HeightLess> List;
+
+public:
+ PriorityList(const TreeRoot &Tree)
+ : Tree(Tree), Comparator(Tree), List(Comparator, Container) {}
+
+ void push(NodeId id) { List.push(id); }
+
+ std::vector<NodeId> pop() {
+ int Max = peekMax();
+ std::vector<NodeId> Result;
+ if (Max != 0) {
+ while (peekMax() == Max) {
+ Result.push_back(List.top());
+ List.pop();
+ }
+ }
+ // TODO this is here to get a stable output, not a good heuristic
+ std::sort(Result.begin(), Result.end());
+ return Result;
+ }
+
+ int peekMax() const {
+ if (List.empty()) {
+ return 0;
+ }
+ return Tree.getNode(List.top()).Height;
+ }
+
+ void open(NodeId Id) {
+ for (NodeId Child : Tree.getNode(Id).Children) {
+ push(Child);
+ }
+ }
+};
+
+/// Computes the differences between two ASTs
+class ClangDiff {
+private:
+ TreeRoot &T1;
+ TreeRoot &T2;
+
+ // Returns true if the two subtrees have the same structure
+ // and equal node kinds (does not compare labels).
+ bool isomorphic(NodeId Id1, NodeId Id2) const {
+ const Node &N1 = T1.getNode(Id1);
+ const Node &N2 = T2.getNode(Id2);
+ if (!N1.hasSameType(N2) || N1.Children.size() != N2.Children.size() ||
+ T1.label(Id1) != T2.label(Id2)) {
+ return false;
+ }
+ for (size_t Id = 0; Id < N1.Children.size(); ++Id) {
+ if (!isomorphic(N1.Children[Id], N2.Children[Id])) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // TODO This is too restrictive, we want to allow multiple mapping candidates
+ // for nodes and resolve the ambiguity later.
+ bool isMappingAllowed(const Mappings &M, NodeId Id1, NodeId Id2) const {
+ const Node &N1 = T1.getNode(Id1);
+ const Node &N2 = T2.getNode(Id2);
+ bool AnyMapped = M.hasSrc(Id1) || M.hasDst(Id2);
+ bool SameType = N1.hasSameType(N2);
+ NodeId P1 = N1.Parent;
+ NodeId P2 = N2.Parent;
+ bool ParentsSameType = (P1 == NoNodeId && P2 == NoNodeId) ||
+ (P1 != NoNodeId && P2 != NoNodeId &&
+ T1.getNode(P1).hasSameType(T2.getNode(P2)));
+ return !AnyMapped && SameType && ParentsSameType;
+ }
+
+ // Adds all corresponding subtrees of the two nodes to the mappings.
+ // The two nodes must be isomorphic.
+ void addIsomorphicSubTrees(Mappings &M, NodeId Id1, NodeId Id2) const {
+ M.link(Id1, Id2);
+ const Node &N1 = T1.getNode(Id1);
+ const Node &N2 = T2.getNode(Id2);
+ assert(isomorphic(Id1, Id2));
+ assert(N1.Children.size() == N2.Children.size());
+ for (size_t Id = 0; Id < N1.Children.size(); ++Id) {
+ addIsomorphicSubTrees(M, N1.Children[Id], N2.Children[Id]);
+ }
+ }
+
+ // Uses an optimal albeit slow algorithm to compute mappings for two
+ // subtrees, but only if both have fewer nodes than MaxSize.
+ void addOptimalMappings(Mappings &M, NodeId Id1, NodeId Id2) const {
+ if (std::max(T1.getNumberOfDescendants(Id1),
+ T2.getNumberOfDescendants(Id2)) < MaxSize) {
+ ZsMatcher Matcher(T1, T2, Id1, Id2);
+ std::vector<std::pair<NodeId, NodeId>> R = Matcher.matchZs();
+ for (const auto Tuple : R) {
+ NodeId Src = Tuple.first;
+ NodeId Dst = Tuple.second;
+ if (isMappingAllowed(M, Src, Dst)) {
+ M.link(Src, Dst);
+ }
+ }
+ }
+ }
+
+ // Computes the ratio of common descendants between the two nodes.
+ // Descendants are only considered to be equal when they are mapped in M.
+ double dice(const Mappings &M, NodeId Id1, NodeId Id2) const {
+ if (Id1 == NoNodeId || Id2 == NoNodeId) {
+ return 0.0;
+ }
+ int CommonDescendants = 0;
+ const Node &N1 = T1.getNode(Id1);
+ for (NodeId Id = N1.LeftMostDescendant; Id < Id1; ++Id) {
+ CommonDescendants += int(M.hasSrc(Id));
+ }
+ return 2.0 * CommonDescendants /
+ (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2));
+ }
+
+ // Returns the node that has the highest degree of similarity.
+ NodeId findCandidate(const Mappings &M, NodeId Id1) const {
+ NodeId Candidate = NoNodeId;
+ double BestDiceValue = 0.0;
+ const Node &N1 = T1.getNode(Id1);
+ for (NodeId Id2 = 0; Id2 < T2.getSize(); ++Id2) {
+ const Node &N2 = T2.getNode(Id2);
+ if (!N1.hasSameType(N2)) {
+ continue;
+ }
+ if (M.hasDst(Id2)) {
+ continue;
+ }
+ double DiceValue = dice(M, Id1, Id2);
+ if (DiceValue > BestDiceValue) {
+ BestDiceValue = DiceValue;
+ Candidate = Id2;
+ }
+ }
+ return Candidate;
+ }
+
+ // Tries to match any yet unmapped nodes, in a bottom-up fashion.
+ void matchBottomUp(Mappings &M) const {
+ for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) {
+ if (Id1 == T1.root()) {
+ M.link(T1.root(), T2.root());
+ addOptimalMappings(M, T1.root(), T2.root());
+ break;
+ }
+ assert(Id1 != NoNodeId);
+ const Node &N1 = T1.getNode(Id1);
+ bool Matched = M.hasSrc(Id1);
+ bool MatchedChildren =
+ std::any_of(N1.Children.begin(), N1.Children.end(),
+ [&](NodeId Child) { return M.hasSrc(Child); });
+ if (Matched || !MatchedChildren) {
+ continue;
+ }
+ NodeId Id2 = findCandidate(M, Id1);
+ if (Id2 == NoNodeId || dice(M, Id1, Id2) < MinDice ||
+ !isMappingAllowed(M, Id1, Id2)) {
+ continue;
+ }
+ M.link(Id1, Id2);
+ addOptimalMappings(M, Id1, Id2);
+ }
+ }
+
+ // Returns a set of mappings of isomorphic subtrees.
+ Mappings matchTopDown() const {
+ PriorityList L1(T1);
+ PriorityList L2(T2);
+
+ Mappings M(T1, T2);
+
+ L1.push(T1.root());
+ L2.push(T2.root());
+
+ int Max1, Max2;
+ while (std::min(Max1 = L1.peekMax(), Max2 = L2.peekMax()) > MinHeight) {
+ if (Max1 > Max2) {
+ for (NodeId Id : L1.pop()) {
+ L1.open(Id);
+ }
+ continue;
+ }
+ if (Max2 > Max1) {
+ for (NodeId Id : L2.pop()) {
+ L2.open(Id);
+ }
+ continue;
+ }
+ std::vector<NodeId> H1, H2;
+ H1 = L1.pop();
+ H2 = L2.pop();
+ for (NodeId Id1 : H1) {
+ for (NodeId Id2 : H2) {
+ if (isomorphic(Id1, Id2) && isMappingAllowed(M, Id1, Id2)) {
+ addIsomorphicSubTrees(M, Id1, Id2);
+ }
+ }
+ }
+ for (NodeId Id1 : H1) {
+ if (!M.hasSrc(Id1)) {
+ L1.open(Id1);
+ }
+ }
+ for (NodeId Id2 : H2) {
+ if (!M.hasDst(Id2)) {
+ L2.open(Id2);
+ }
+ }
+ }
+ return M;
+ }
+
+ // Finds an edit script that converts T1 to T2.
+ std::vector<Change> computeChanges(Mappings &M) {
+ std::vector<Change> Changes;
+ for (NodeId Id2 : T2.getSubtreeBfs(T2.root())) {
+ const Node &N2 = T2.getNode(Id2);
+ NodeId Id1 = M.getSrc(Id2);
+ if (Id1 != NoNodeId) {
+ assert(T1.getNode(Id1).hasSameType(N2));
+ if (T1.label(Id1) != T2.label(Id2)) {
+ Changes.emplace_back(Update, Id1, Id2, /*UNUSED Position=*/0);
+ }
+ continue;
+ }
+ NodeId P2 = N2.Parent;
+ NodeId P1 = M.getSrc(P2);
+ assert(P1 != NoNodeId);
+ Node &Parent1 = T1.getMutableNode(P1);
+ const Node &Parent2 = T2.getNode(P2);
+ auto &Siblings1 = Parent1.Children;
+ const auto &Siblings2 = Parent2.Children;
+ size_t Position;
+ for (Position = 0; Position < Siblings2.size(); ++Position) {
+ if (Siblings2[Position] == Id2 || Position >= Siblings1.size()) {
+ break;
+ }
+ }
+ Changes.emplace_back(Insert, Id2, P2, Position);
+ Node PatchNode;
+ PatchNode.Parent = P1;
+ PatchNode.LeftMostDescendant = N2.LeftMostDescendant;
+ PatchNode.Depth = N2.Depth;
+ PatchNode.ASTNode = N2.ASTNode;
+ // TODO update Depth if needed
+ NodeId PatchNodeId = T1.getSize();
+ assert(Position <= Siblings1.size());
+ // TODO maybe choose a different data structure for Children.
+ Siblings1.insert(Siblings1.begin() + Position, PatchNodeId);
+ T1.addNode(PatchNode);
+ M.link(PatchNodeId, Id2);
+ }
+ for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) {
+ NodeId Id2 = M.getDst(Id1);
+ if (Id2 == NoNodeId) {
+ Changes.emplace_back(Delete, Id1, Id2, /*UNUSED Position=*/0);
+ }
+ }
+ return Changes;
+ }
+
+ // Prints an edit action.
+ void dumpChange(const Change &Chg) const {
+ ChangeKind Kind;
+ NodeId Id1, Id2;
+ size_t Position;
+ std::tie(Kind, Id1, Id2, Position) = Chg;
+ std::string S;
+ switch (Kind) {
+ case Delete:
+ S = formatv("Delete {0}", T1.showNode(Id1));
+ break;
+ case Update:
+ S = formatv("Update {0} to {1}", T1.showNode(Id1), T2.label(Id2));
+ break;
+ case Insert:
+ S = formatv("Insert {0} into {1} at {2}", T2.showNode(Id1),
+ T2.showNode(Id2), Position);
+ break;
+ case Move:
+ llvm_unreachable("TODO");
+ break;
+ };
+ outs() << S << "\n";
+ }
+
+ Mappings match() const {
+ Mappings M = matchTopDown();
+ matchBottomUp(M);
+ return M;
+ }
+
+public:
+ int MinHeight = 2;
+ double MinDice = 0.2;
+ int MaxSize = 100;
+
+ ClangDiff(TreeRoot &T1, TreeRoot &T2) : T1(T1), T2(T2) {}
+
+ void printDiff() {
+ Mappings M = match();
+ M.dumpMapping();
+ auto Changes = computeChanges(M);
+ for (const auto &C : Changes) {
+ dumpChange(C);
+ }
+ }
+};
+
+void runDiff(ASTUnit &AST1, ASTUnit &AST2) {
+ TreeRoot T1(AST1);
+ TreeRoot T2(AST2);
+ ClangDiff DiffTool(T1, T2);
+ DiffTool.printDiff();
+}
+
+} // namespace diff
+} // namespace clang
Index: include/clang/Tooling/ASTDiff/ASTDiff.h
===================================================================
--- /dev/null
+++ include/clang/Tooling/ASTDiff/ASTDiff.h
@@ -0,0 +1,121 @@
+//===- ASTDiff.h - AST differencing API -----------------------*- C++ -*- -===//
+//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_ASTDIFF_ASTDIFF_H
+#define LLVM_CLANG_TOOLING_ASTDIFF_ASTDIFF_H
+
+#include "clang/AST/ASTTypeTraits.h"
+#include "clang/Frontend/ASTUnit.h"
+
+using namespace llvm;
+using namespace clang;
+
+namespace clang {
+namespace diff {
+
+/// Within a tree, this identifies a node by its preorder offset.
+using NodeId = int;
+
+/// Sentinel value for invalid nodes.
+const NodeId NoNodeId = -1;
+
+/// Represents a Clang AST node, alongside some additional information.
+struct Node {
+ NodeId Parent, LeftMostDescendant;
+ int Depth, Height;
+ ast_type_traits::DynTypedNode ASTNode;
+ // Maybe there is a better way to store children than this.
+ SmallVector<NodeId, 4> Children;
+
+ ast_type_traits::ASTNodeKind getType() const { return ASTNode.getNodeKind(); }
+ bool hasSameType(const Node &Other) const {
+ return getType().isSame(Other.getType()) ||
+ (getType().isNone() && Other.getType().isNone());
+ }
+ const StringRef getTypeLabel() const { return getType().asStringRef(); }
+ bool isLeaf() const { return Children.empty(); }
+};
+
+/// Represents the AST of a TranslationUnit.
+class TreeRoot {
+private:
+ /// Nodes in preorder.
+ std::vector<Node> Nodes;
+ void setRightMostDescendant();
+ void setLeftMostDescendant();
+ void setSize(size_t Size) { Nodes.resize(Size); }
+
+public:
+ ASTUnit &AST;
+ std::vector<NodeId> Leaves;
+ std::vector<int> PostorderIds;
+
+ TreeRoot(ASTUnit &AST);
+
+ int getSize() const { return Nodes.size(); }
+ NodeId root() const { return 0; }
+
+ const Node &getNode(NodeId Id) const { return Nodes[Id]; }
+ Node &getMutableNode(NodeId Id) { return Nodes[Id]; }
+ bool isValidNodeId(NodeId Id) const { return Id >= 0 && Id < getSize(); }
+ void addNode(Node &N) { Nodes.push_back(N); }
+
+ int getSubtreePostorder(std::vector<NodeId> &Ids, NodeId Root) const;
+ std::vector<NodeId> getSubtreeBfs(NodeId Id) const;
+ int getNumberOfDescendants(NodeId Id) const;
+
+ /// Returns the value of the node.
+ std::string label(NodeId Id) const;
+ void dumpTree(NodeId Id = NoNodeId) const;
+ /// Return the node as <kind>[: <label>](<postorder-id)
+ std::string showNode(NodeId Id) const;
+ void dumpNodeAsJson(NodeId Id) const;
+ void dumpAsJson() const;
+};
+
+/// Maps nodes of the left tree to ones on the right, and vice versa.
+// Supports fast insertion and lookup.
+// TODO allow multiple mappings
+class Mappings {
+ NodeId *SrcToDst, *DstToSrc;
+ const TreeRoot &T1, &T2;
+
+public:
+ Mappings(const TreeRoot &T1, const TreeRoot &T2);
+ ~Mappings();
+
+ void link(NodeId Src, NodeId Dst);
+
+ NodeId getDst(NodeId Src) const { return SrcToDst[Src]; }
+ NodeId getSrc(NodeId Dst) const { return DstToSrc[Dst]; }
+ bool hasSrc(NodeId Src) const { return SrcToDst[Src] != NoNodeId; }
+ bool hasDst(NodeId Dst) const { return DstToSrc[Dst] != NoNodeId; }
+
+ void dumpMapping() const;
+};
+
+/// (Update, src, dst, _ ): update the value of node src to match dst.
+/// (Insert, src, dst, pos): insert src as child of dst at offset pos.
+/// (Delete, src, _, _ ): delete node src.
+/// (Move, src, dst, pos): move src to a child of dst at offset pos.
+enum ChangeKind { Delete, Update, Insert, Move };
+using Change = std::tuple<ChangeKind, NodeId, NodeId, size_t>;
+
+void runDiff(ASTUnit &AST1, ASTUnit &AST2);
+
+} // namespace diff
+} // namespace clang
+
+#endif
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits