johannes updated this revision to Diff 103151.
https://reviews.llvm.org/D34329
Files:
include/clang/Tooling/ASTDiff/ASTDiff.h
lib/Tooling/ASTDiff/ASTDiff.cpp
lib/Tooling/ASTDiff/CMakeLists.txt
lib/Tooling/CMakeLists.txt
tools/CMakeLists.txt
tools/clang-diff/CMakeLists.txt
tools/clang-diff/ClangDiff.cpp
Index: tools/clang-diff/ClangDiff.cpp
===================================================================
--- /dev/null
+++ tools/clang-diff/ClangDiff.cpp
@@ -0,0 +1,92 @@
+//===- ClangDiff.cpp - compare source files by AST nodes ------*- C++ -*- -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a tool for syntax tree based comparison using
+// Tooling/ASTDiff.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/ASTDiff/ASTDiff.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace tooling;
+
+static cl::OptionCategory ClangDiffCategory("clang-diff options");
+
+static cl::opt<bool>
+ DumpAST("ast-dump",
+ cl::desc("Print the internal representation of the AST as JSON."),
+ cl::init(false), cl::cat(ClangDiffCategory));
+
+static cl::opt<std::string> SourcePath(cl::Positional, cl::desc("<source>"),
+ cl::Required,
+ cl::cat(ClangDiffCategory));
+
+static cl::opt<std::string> DestinationPath(cl::Positional,
+ cl::desc("<destination>"),
+ cl::Optional,
+ cl::cat(ClangDiffCategory));
+
+static std::unique_ptr<ASTUnit> getAST(const StringRef Filename) {
+ std::string ErrorMessage;
+ std::unique_ptr<CompilationDatabase> Compilations =
+ CompilationDatabase::autoDetectFromSource(Filename, ErrorMessage);
+ if (!Compilations) {
+ llvm::errs()
+ << "Error while trying to load a compilation database, running "
+ "without flags.\n"
+ << ErrorMessage;
+ Compilations.reset(
+ new FixedCompilationDatabase(".", std::vector<std::string>()));
+ }
+ std::array<std::string, 1> Files = {{Filename}};
+ ClangTool Tool(*Compilations, Files);
+ std::vector<std::unique_ptr<ASTUnit>> ASTs;
+ Tool.buildASTs(ASTs);
+ if (ASTs.size() != Files.size())
+ return nullptr;
+ return std::move(ASTs[0]);
+}
+
+int main(int argc, const char **argv) {
+ cl::HideUnrelatedOptions(ClangDiffCategory);
+ if (!cl::ParseCommandLineOptions(argc, argv)) {
+ cl::PrintOptionValues();
+ return 1;
+ }
+
+ if (DumpAST) {
+ if (!DestinationPath.empty()) {
+ llvm::errs() << "Error: Please specify exactly one filename.\n";
+ return 1;
+ }
+ std::unique_ptr<ASTUnit> AST = getAST(SourcePath);
+ if (!AST)
+ return 1;
+ clang::diff::TreeRoot Tree(AST->getASTContext());
+ Tree.printAsJson();
+ return 0;
+ }
+
+ if (DestinationPath.empty()) {
+ llvm::errs() << "Error: Exactly two paths are required.\n";
+ return 1;
+ }
+
+ std::unique_ptr<ASTUnit> Src = getAST(SourcePath);
+ std::unique_ptr<ASTUnit> Dst = getAST(DestinationPath);
+ if (!Src || !Dst)
+ return 1;
+
+ clang::diff::runDiff(Src->getASTContext(), Dst->getASTContext());
+
+ return 0;
+}
Index: tools/clang-diff/CMakeLists.txt
===================================================================
--- /dev/null
+++ tools/clang-diff/CMakeLists.txt
@@ -0,0 +1,16 @@
+set(LLVM_LINK_COMPONENTS
+ Support
+ )
+
+add_clang_executable(clang-diff
+ ClangDiff.cpp
+ )
+
+target_link_libraries(clang-diff
+ clangAST
+ clangBasic
+ clangFrontend
+ clangLex
+ clangTooling
+ clangToolingASTDiff
+ )
Index: tools/CMakeLists.txt
===================================================================
--- tools/CMakeLists.txt
+++ tools/CMakeLists.txt
@@ -2,6 +2,7 @@
add_clang_subdirectory(diagtool)
add_clang_subdirectory(driver)
+add_clang_subdirectory(clang-diff)
add_clang_subdirectory(clang-format)
add_clang_subdirectory(clang-format-vs)
add_clang_subdirectory(clang-fuzzer)
Index: lib/Tooling/CMakeLists.txt
===================================================================
--- lib/Tooling/CMakeLists.txt
+++ lib/Tooling/CMakeLists.txt
@@ -5,6 +5,7 @@
add_subdirectory(Core)
add_subdirectory(Refactoring)
+add_subdirectory(ASTDiff)
add_clang_library(clangTooling
ArgumentsAdjusters.cpp
Index: lib/Tooling/ASTDiff/CMakeLists.txt
===================================================================
--- /dev/null
+++ lib/Tooling/ASTDiff/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(LLVM_LINK_COMPONENTS
+ # Option
+ Support
+ )
+
+add_clang_library(clangToolingASTDiff
+ ASTDiff.cpp
+ LINK_LIBS
+ clangBasic
+ clangAST
+ # clangToolingCore
+ )
Index: lib/Tooling/ASTDiff/ASTDiff.cpp
===================================================================
--- /dev/null
+++ lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -0,0 +1,814 @@
+//===- ASTDiff.cpp - AST differencing implementation-----------*- C++ -*- -===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitons for the AST differencing interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/ASTDiff/ASTDiff.h"
+
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#include <limits>
+#include <memory>
+#include <unordered_set>
+
+using namespace llvm;
+using namespace clang;
+
+namespace clang {
+namespace diff {
+
+namespace {
+// Count the number of non-null nodes
+struct NodeCountVisitor : public RecursiveASTVisitor<NodeCountVisitor> {
+ int Count = 0;
+ const TreeRoot &Root;
+ NodeCountVisitor(const TreeRoot &Root) : Root(Root) {}
+ bool TraverseDecl(Decl *D) {
+ if (Root.discardNode(D))
+ return true;
+ ++Count;
+ RecursiveASTVisitor<NodeCountVisitor>::TraverseDecl(D);
+ return true;
+ }
+ bool TraverseStmt(Stmt *S) {
+ if (Root.discardNode(S))
+ return true;
+ ++Count;
+ RecursiveASTVisitor<NodeCountVisitor>::TraverseStmt(S);
+ return true;
+ }
+ bool TraverseType(QualType T) { return true; }
+};
+} // end anonymous namespace
+
+namespace {
+// Sets Height, Parent and Children for each node.
+struct PreorderVisitor : public RecursiveASTVisitor<PreorderVisitor> {
+ int Id = 0, Depth = 0;
+ NodeId Parent = NoNodeId;
+ TreeRoot &Root;
+
+ PreorderVisitor(TreeRoot &Root) : Root(Root) {}
+
+ template <class T> std::tuple<NodeId, NodeId> PreTraverse(T *ASTNode) {
+ NodeId MyId = Id;
+ Node &N = Root.getMutableNode(MyId);
+ N.Parent = Parent;
+ N.Depth = Depth;
+ N.ASTNode = ast_type_traits::DynTypedNode::create(*ASTNode);
+ assert(!N.ASTNode.getNodeKind().isNone() &&
+ "Expected nodes to have a valid kind.");
+ if (Parent != NoNodeId) {
+ Node &P = Root.getMutableNode(Parent);
+ P.Children.push_back(MyId);
+ }
+ Parent = MyId;
+ ++Id;
+ ++Depth;
+ return {MyId, Root.getNode(MyId).Parent};
+ }
+ void PostTraverse(std::tuple<NodeId, NodeId> State) {
+ NodeId MyId, PreviousParent;
+ std::tie(MyId, PreviousParent) = State;
+ Parent = PreviousParent;
+ --Depth;
+ if (MyId == NoNodeId)
+ return;
+ Node &N = Root.getMutableNode(MyId);
+ if (N.isLeaf())
+ Root.Leaves.push_back(MyId);
+ N.Height = 1;
+ for (NodeId Child : N.Children)
+ N.Height = std::max(N.Height, 1 + Root.getNode(Child).Height);
+ }
+ bool TraverseDecl(Decl *D) {
+ if (Root.discardNode(D))
+ return true;
+ auto SavedState = PreTraverse(D);
+ RecursiveASTVisitor<PreorderVisitor>::TraverseDecl(D);
+ PostTraverse(SavedState);
+ return true;
+ }
+ bool TraverseStmt(Stmt *S) {
+ if (Root.discardNode(S))
+ return true;
+ auto SavedState = PreTraverse(S);
+ RecursiveASTVisitor<PreorderVisitor>::TraverseStmt(S);
+ PostTraverse(SavedState);
+ return true;
+ }
+ bool TraverseType(QualType T) { return true; }
+};
+} // end anonymous namespace
+
+TreeRoot::TreeRoot(ASTContext &AST) : AST(AST) {
+ auto *TUD = AST.getTranslationUnitDecl();
+ // Run the above visitors to initialize the tree.
+ NodeCountVisitor NodeCounter(*this);
+ NodeCounter.TraverseDecl(TUD);
+ setSize(NodeCounter.Count);
+ PreorderVisitor PreorderWalker(*this);
+ PreorderWalker.TraverseDecl(TUD);
+ setLeftMostDescendant();
+ int PostorderId = 0;
+ PostorderIds.resize(Nodes.size());
+ std::function<void(NodeId)> PostorderTraverse = [&](NodeId Id) {
+ for (NodeId Child : getNode(Id).Children)
+ PostorderTraverse(Child);
+ PostorderIds[Id] = PostorderId;
+ ++PostorderId;
+ };
+ PostorderTraverse(root());
+}
+
+template <class T> bool TreeRoot::discardNode(T *N) const {
+ if (!N)
+ return true;
+ SourceLocation SLoc = N->getLocStart();
+ const SourceManager &SrcMgr = AST.getSourceManager();
+ return SLoc.isValid() && SrcMgr.isInSystemHeader(SLoc);
+}
+
+int TreeRoot::getSubtreePostorder(std::vector<NodeId> &Ids, NodeId Root) const {
+ int Leaves = 0;
+ std::function<void(NodeId)> Traverse = [&](NodeId Id) {
+ const Node &N = getNode(Id);
+ for (NodeId Child : N.Children)
+ Traverse(Child);
+ if (N.isLeaf())
+ ++Leaves;
+ Ids.push_back(Id);
+ };
+ Traverse(Root);
+ return Leaves;
+}
+
+std::vector<NodeId> TreeRoot::getSubtreeBfs(NodeId Id) const {
+ std::vector<NodeId> Ids;
+ size_t Expanded = 0;
+ Ids.push_back(Id);
+ while (Expanded < Ids.size())
+ for (NodeId Child : getNode(Ids[Expanded++]).Children)
+ Ids.push_back(Child);
+ return Ids;
+}
+
+int TreeRoot::getNumberOfDescendants(NodeId Id) const {
+ return PostorderIds[Id] - getNode(Id).LeftMostDescendant;
+}
+
+std::string TreeRoot::getSourceString(SourceLocation SourceBegin,
+ SourceLocation SourceEnd) const {
+ const SourceManager &SrcMgr = AST.getSourceManager();
+ const char *Begin = SrcMgr.getCharacterData(SourceBegin);
+ const char *End = SrcMgr.getCharacterData(SourceEnd);
+ return {Begin, static_cast<size_t>(End - Begin)};
+}
+
+std::string TreeRoot::label(NodeId Id) const {
+ const Node &N = getNode(Id);
+ const ast_type_traits::DynTypedNode &DTN = N.ASTNode;
+ if (auto *X = DTN.get<BinaryOperator>())
+ return X->getOpcodeStr();
+ if (auto *X = DTN.get<AccessSpecDecl>())
+ return getSourceString(X->getAccessSpecifierLoc(), X->getColonLoc());
+ if (auto *X = DTN.get<IntegerLiteral>()) {
+ SmallString<256> Str;
+ X->getValue().toString(Str, /*Radix=*/10, /*Signed=*/false);
+ return Str.str();
+ }
+ if (auto *X = DTN.get<StringLiteral>())
+ return X->getString();
+ if (auto *X = DTN.get<ValueDecl>())
+ return X->getNameAsString() + "(" + X->getType().getAsString() + ")";
+ if (auto *X = DTN.get<DeclStmt>())
+ return "";
+ if (auto *X = DTN.get<TranslationUnitDecl>())
+ return "";
+ std::string Label;
+ if (auto *X = DTN.get<DeclRefExpr>()) {
+ if (X->hasQualifier() && X->getQualifier()->getAsIdentifier())
+ Label += std::string(X->getQualifier()->getAsIdentifier()->getName());
+ Label += X->getDecl()->getNameAsString();
+ return Label;
+ }
+ if (auto *X = DTN.get<NamedDecl>())
+ Label += X->getNameAsString() + ";";
+ if (auto *X = DTN.get<TypedefNameDecl>())
+ return Label + X->getUnderlyingType().getAsString() + ";";
+ if (auto *X = DTN.get<NamespaceDecl>())
+ return Label;
+ if (auto *X = DTN.get<TypeDecl>())
+ if (X->getTypeForDecl())
+ Label +=
+ X->getTypeForDecl()->getCanonicalTypeInternal().getAsString() + ";";
+ if (auto *X = DTN.get<Decl>())
+ return Label;
+ if (auto *X = DTN.get<Stmt>())
+ return Label;
+ if (auto *X = DTN.get<QualType>())
+ llvm_unreachable("Types are not included.\n");
+ llvm_unreachable("Fatal: unhandled AST node.\n");
+}
+
+void TreeRoot::printTree(raw_ostream &OS, NodeId Id) const {
+ if (Id == NoNodeId)
+ Id = root();
+ const Node &N = getNode(Id);
+ for (int I = 0; I < N.Depth; ++I)
+ OS << " ";
+ OS << showNode(Id) << "\n";
+ for (NodeId Child : N.Children)
+ printTree(OS, Child);
+}
+
+std::string TreeRoot::showNode(NodeId Id) const {
+ if (Id == NoNodeId)
+ return "None";
+ std::string Label;
+ if (label(Id) != "")
+ Label = formatv(": {0}", label(Id));
+ return formatv("{0}{1}({2})", getNode(Id).getTypeLabel(), Label,
+ PostorderIds[Id]);
+}
+
+void TreeRoot::printNodeAsJson(raw_ostream &OS, NodeId Id) const {
+ auto N = getNode(Id);
+ std::string Label;
+ if (label(Id) != "")
+ Label = formatv(R"(,"label":"{0}")", label(Id));
+ OS << formatv(R"({"typeLabel":"{0}"{1},"children":[)", N.getTypeLabel(),
+ Label);
+ if (N.Children.size() > 0) {
+ printNodeAsJson(OS, N.Children[0]);
+ for (size_t I = 1; I < N.Children.size(); ++I) {
+ OS << ",";
+ printNodeAsJson(OS, N.Children[I]);
+ }
+ }
+ OS << "]}";
+}
+
+void TreeRoot::printAsJson(raw_ostream &OS) const {
+ OS << R"({"root":)";
+ printNodeAsJson(OS, root());
+ OS << "}\n";
+}
+
+void TreeRoot::printAsJson() const { printAsJson(llvm::outs()); }
+
+void TreeRoot::setLeftMostDescendant() {
+ for (NodeId Id : Leaves) {
+ Node &N = getMutableNode(Id);
+ N.LeftMostDescendant = Id;
+ NodeId Cur = Id;
+ while (N.Parent != NoNodeId && getNode(N.Parent).Children.front() == Cur) {
+ Cur = N.Parent;
+ getMutableNode(Cur).LeftMostDescendant = Id;
+ }
+ }
+}
+
+Mappings::Mappings(const TreeRoot &T1, const TreeRoot &T2) : T1(T1), T2(T2) {
+ // Maximum possible size after patching one tree.
+ int Size = T1.getSize() + T2.getSize();
+ SrcToDst = llvm::make_unique<NodeId[]>(Size);
+ DstToSrc = llvm::make_unique<NodeId[]>(Size);
+ // set everything to NoNodeId == -1
+ memset(SrcToDst.get(), NoNodeId, Size * sizeof(NodeId));
+ memset(DstToSrc.get(), NoNodeId, Size * sizeof(NodeId));
+}
+
+void Mappings::link(NodeId Src, NodeId Dst) {
+ SrcToDst[Src] = Dst;
+ DstToSrc[Dst] = Src;
+}
+
+void Mappings::printMapping(raw_ostream &OS) const {
+ for (NodeId Id1 = 0, Id2; Id1 < T1.getSize(); ++Id1)
+ if ((Id2 = getDst(Id1)) != NoNodeId)
+ OS << formatv("Match {0} to {1}\n", T1.showNode(Id1), T2.showNode(Id2));
+}
+
+void Mappings::printMapping() const { printMapping(llvm::outs()); }
+
+/// Identifies a node in this subtree by its postorder offset.
+using SNodeId = int;
+
+class Subtree {
+private:
+ /// The parent tree.
+ const TreeRoot &Tree;
+ /// Maps SNodeIds to original ids.
+ std::vector<NodeId> RootIds;
+ std::vector<NodeId> LeftMostDescendants;
+
+public:
+ std::vector<NodeId> KeyRoots;
+
+ Subtree(const TreeRoot &Tree, NodeId SubtreeRoot) : Tree(Tree) {
+ int Leaves = Tree.getSubtreePostorder(RootIds, SubtreeRoot);
+ setLeftMostDescendants();
+ computeKeyRoots(Leaves);
+ }
+
+ int getSizeS() const { return RootIds.size(); }
+ NodeId getIdInRoot(SNodeId Id) const {
+ assert(Id > 0 && Id <= getSizeS() && "Invalid subtree node index.");
+ return RootIds[Id - 1];
+ }
+ const Node &getNodeS(SNodeId Id) const {
+ return Tree.getNode(getIdInRoot(Id));
+ }
+ const std::string labelS(SNodeId Id) const {
+ return Tree.label(getIdInRoot(Id));
+ }
+ SNodeId getLeftMostDescendant(SNodeId Id) const {
+ assert(Id > 0 && Id <= getSizeS() && "Invalid subtree node index.");
+ return LeftMostDescendants[Id - 1];
+ }
+
+private:
+ void setLeftMostDescendants() {
+ LeftMostDescendants.resize(getSizeS());
+ for (SNodeId I = 0; I < getSizeS(); ++I) {
+ NodeId Cur = getIdInRoot(I + 1);
+ while (!Tree.getNode(Cur).isLeaf())
+ Cur = Tree.getNode(Cur).Children[0];
+ // TODO why does this work
+ LeftMostDescendants[I] = 1 + (Tree.PostorderIds[Cur] - getIdInRoot(1));
+ }
+ assert(LeftMostDescendants[0] == 0 &&
+ "The leftmost node should have subtree index 0.");
+ }
+ void computeKeyRoots(int Leaves) {
+ KeyRoots.resize(Leaves);
+ std::unordered_set<SNodeId> Visited;
+ int K = Leaves - 1;
+ for (int I = getSizeS(); I > 0; --I) {
+ SNodeId LeftDesc = getLeftMostDescendant(I);
+ if (Visited.count(LeftDesc))
+ continue;
+ assert(K >= 0 && "K should be non-negative");
+ KeyRoots[K] = I;
+ Visited.insert(LeftDesc);
+ --K;
+ }
+ }
+};
+
+// Computes an optimal mapping between two trees
+class ZsMatcher {
+ using Pairs = std::vector<std::pair<NodeId, NodeId>>;
+
+ Subtree S1;
+ Subtree S2;
+ std::unique_ptr<std::unique_ptr<double[]>[]> TreeDist, ForestDist;
+
+public:
+ ZsMatcher(const TreeRoot &T1, const TreeRoot &T2, NodeId Id1, NodeId Id2)
+ : S1(T1, Id1), S2(T2, Id2) {
+ TreeDist =
+ llvm::make_unique<std::unique_ptr<double[]>[]>(S1.getSizeS() + 1);
+ ForestDist =
+ llvm::make_unique<std::unique_ptr<double[]>[]>(S1.getSizeS() + 1);
+ for (int I = 0; I < S1.getSizeS() + 1; ++I) {
+ TreeDist[I] = llvm::make_unique<double[]>(S2.getSizeS() + 1);
+ ForestDist[I] = llvm::make_unique<double[]>(S2.getSizeS() + 1);
+ }
+ }
+
+ Pairs matchZs() {
+ Pairs Matches;
+
+ computeTreeDist();
+
+ bool RootNodePair = true;
+ Pairs TreePairs{{S1.getSizeS(), S2.getSizeS()}};
+
+ NodeId LastRow, LastCol, FirstRow, FirstCol, Row, Col;
+ while (!TreePairs.empty()) {
+ std::tie(LastRow, LastCol) = TreePairs.back();
+ TreePairs.pop_back();
+
+ if (!RootNodePair) {
+ computeForestDist(LastRow, LastCol);
+ }
+
+ RootNodePair = false;
+
+ FirstRow = S1.getLeftMostDescendant(LastRow);
+ FirstCol = S2.getLeftMostDescendant(LastCol);
+
+ Row = LastRow;
+ Col = LastCol;
+
+ while (Row > FirstRow || Col > FirstCol) {
+ if (Row > FirstRow &&
+ ForestDist[Row - 1][Col] + 1 == ForestDist[Row][Col]) {
+ --Row;
+ } else if (Col > FirstCol &&
+ ForestDist[Row][Col - 1] + 1 == ForestDist[Row][Col]) {
+ --Col;
+ } else {
+ NodeId LMD1 = S1.getLeftMostDescendant(Row);
+ NodeId LMD2 = S2.getLeftMostDescendant(Col);
+ if (LMD1 == S1.getLeftMostDescendant(LastRow) &&
+ LMD2 == S2.getLeftMostDescendant(LastCol)) {
+ assert(S1.getNodeS(Row).hasSameType(S2.getNodeS(Col)) &&
+ "Must not match nodes of different kind.");
+ Matches.emplace_back(S1.getIdInRoot(Row), S2.getIdInRoot(Col));
+ --Row;
+ --Col;
+ } else {
+ TreePairs.emplace_back(Row, Col);
+ Row = LMD1;
+ Col = LMD2;
+ }
+ }
+ }
+ }
+ return Matches;
+ }
+
+private:
+ // TODO Use string editing distance if applicable.
+ double getUpdateCost(NodeId Id1, NodeId Id2) {
+ if (!S1.getNodeS(Id1).hasSameType(S2.getNodeS(Id2)))
+ return std::numeric_limits<double>::max();
+ if (S1.labelS(Id1) == S2.labelS(Id2))
+ return 0;
+ return 1;
+ }
+ void computeTreeDist() {
+ for (NodeId Id1 : S1.KeyRoots)
+ for (NodeId Id2 : S2.KeyRoots)
+ computeForestDist(Id1, Id2);
+ }
+ void dumpForestDist() const {
+ for (int I = 0; I < S1.getSizeS() + 1; ++I) {
+ llvm::errs() << "[";
+ for (int J = 0; J < S2.getSizeS() + 1; ++J)
+ llvm::errs() << formatv("{0}{1: 2}", (J == 0 ? "" : " "),
+ ForestDist[I][J]);
+ llvm::errs() << "]\n";
+ }
+ }
+ void computeForestDist(SNodeId Id1, SNodeId Id2) {
+ assert(Id1 > 0 && Id2 > 0 && "Expecting offsets greater than 0.");
+ SNodeId LMD1 = S1.getLeftMostDescendant(Id1);
+ SNodeId LMD2 = S2.getLeftMostDescendant(Id2);
+
+ ForestDist[LMD1][LMD2] = 0;
+ for (SNodeId D1 = LMD1 + 1; D1 <= Id1; ++D1) {
+ double DeletionCost = 1.0;
+ ForestDist[D1][LMD2] = ForestDist[D1 - 1][LMD2] + DeletionCost;
+ for (SNodeId D2 = LMD2 + 1; D2 <= Id2; ++D2) {
+ double InsertionCost = 1;
+ ForestDist[LMD1][D2] = ForestDist[LMD1][D2 - 1] + InsertionCost;
+ SNodeId DLMD1 = S1.getLeftMostDescendant(D1);
+ SNodeId DLMD2 = S2.getLeftMostDescendant(D2);
+ if (DLMD1 == LMD1 && DLMD2 == LMD2) {
+ double UpdateCost = getUpdateCost(D1, D2);
+ ForestDist[D1][D2] =
+ std::min(std::min(ForestDist[D1 - 1][D2] + DeletionCost,
+ ForestDist[D1][D2 - 1] + InsertionCost),
+ ForestDist[D1 - 1][D2 - 1] + UpdateCost);
+ TreeDist[D1][D2] = ForestDist[D1][D2];
+ } else {
+ ForestDist[D1][D2] =
+ std::min(std::min(ForestDist[D1 - 1][D2] + DeletionCost,
+ ForestDist[D1][D2 - 1] + InsertionCost),
+ ForestDist[DLMD1][DLMD2] + TreeDist[D1][D2]);
+ }
+ }
+ }
+ }
+};
+
+namespace {
+// Compares nodes by their depth.
+struct HeightLess {
+ const TreeRoot &Tree;
+ HeightLess(const TreeRoot &Tree) : Tree(Tree) {}
+ bool operator()(NodeId Id1, NodeId Id2) const {
+ return Tree.getNode(Id1).Height < Tree.getNode(Id2).Height;
+ }
+};
+} // end anonymous namespace
+
+// Priority queue for nodes, sorted descendingly by their height.
+class PriorityList {
+ const TreeRoot &Tree;
+ HeightLess Comparator;
+ std::vector<NodeId> Container;
+ PriorityQueue<NodeId, std::vector<NodeId>, HeightLess> List;
+
+public:
+ PriorityList(const TreeRoot &Tree)
+ : Tree(Tree), Comparator(Tree), List(Comparator, Container) {}
+
+ void push(NodeId id) { List.push(id); }
+
+ std::vector<NodeId> pop() {
+ int Max = peekMax();
+ std::vector<NodeId> Result;
+ if (Max == 0)
+ return Result;
+ while (peekMax() == Max) {
+ Result.push_back(List.top());
+ List.pop();
+ }
+ // TODO this is here to get a stable output, not a good heuristic
+ std::sort(Result.begin(), Result.end());
+ return Result;
+ }
+ int peekMax() const {
+ if (List.empty())
+ return 0;
+ return Tree.getNode(List.top()).Height;
+ }
+ void open(NodeId Id) {
+ for (NodeId Child : Tree.getNode(Id).Children)
+ push(Child);
+ }
+};
+
+/// Computes the differences between two ASTs
+class ClangDiff {
+private:
+ TreeRoot &T1;
+ TreeRoot &T2;
+
+ // Returns true if the two subtrees have the same structure
+ // and equal node kinds (does not compare labels).
+ bool isomorphic(NodeId Id1, NodeId Id2) const {
+ const Node &N1 = T1.getNode(Id1);
+ const Node &N2 = T2.getNode(Id2);
+ if (!N1.hasSameType(N2) || N1.Children.size() != N2.Children.size() ||
+ T1.label(Id1) != T2.label(Id2))
+ return false;
+ for (size_t Id = 0; Id < N1.Children.size(); ++Id)
+ if (!isomorphic(N1.Children[Id], N2.Children[Id]))
+ return false;
+ return true;
+ }
+
+ // TODO This is too restrictive, we want to allow multiple mapping
+ // candidates
+ // for nodes and resolve the ambiguity later.
+ bool isMappingAllowed(const Mappings &M, NodeId Id1, NodeId Id2) const {
+ const Node &N1 = T1.getNode(Id1);
+ const Node &N2 = T2.getNode(Id2);
+ bool AnyMapped = M.hasSrc(Id1) || M.hasDst(Id2);
+ bool SameType = N1.hasSameType(N2);
+ NodeId P1 = N1.Parent;
+ NodeId P2 = N2.Parent;
+ bool ParentsSameType = (P1 == NoNodeId && P2 == NoNodeId) ||
+ (P1 != NoNodeId && P2 != NoNodeId &&
+ T1.getNode(P1).hasSameType(T2.getNode(P2)));
+ return !AnyMapped && SameType && ParentsSameType;
+ }
+
+ // Adds all corresponding subtrees of the two nodes to the mappings.
+ // The two nodes must be isomorphic.
+ void addIsomorphicSubTrees(Mappings &M, NodeId Id1, NodeId Id2) const {
+ assert(isomorphic(Id1, Id2) &&
+ "Can only be called on isomorphic subtrees.");
+ M.link(Id1, Id2);
+ const Node &N1 = T1.getNode(Id1);
+ const Node &N2 = T2.getNode(Id2);
+ for (size_t Id = 0; Id < N1.Children.size(); ++Id)
+ addIsomorphicSubTrees(M, N1.Children[Id], N2.Children[Id]);
+ }
+
+ // Uses an optimal albeit slow algorithm to compute mappings for two
+ // subtrees, but only if both have fewer nodes than MaxSize.
+ void addOptimalMappings(Mappings &M, NodeId Id1, NodeId Id2) const {
+ if (std::max(T1.getNumberOfDescendants(Id1),
+ T2.getNumberOfDescendants(Id2)) >= MaxSize)
+ return;
+ ZsMatcher Matcher(T1, T2, Id1, Id2);
+ std::vector<std::pair<NodeId, NodeId>> R = Matcher.matchZs();
+ for (const auto Tuple : R) {
+ NodeId Src = Tuple.first;
+ NodeId Dst = Tuple.second;
+ if (isMappingAllowed(M, Src, Dst))
+ M.link(Src, Dst);
+ }
+ }
+
+ // Computes the ratio of common descendants between the two nodes.
+ // Descendants are only considered to be equal when they are mapped in M.
+ double dice(const Mappings &M, NodeId Id1, NodeId Id2) const {
+ if (Id1 == NoNodeId || Id2 == NoNodeId)
+ return 0.0;
+ int CommonDescendants = 0;
+ const Node &N1 = T1.getNode(Id1);
+ for (NodeId Id = N1.LeftMostDescendant; Id < Id1; ++Id)
+ CommonDescendants += int(M.hasSrc(Id));
+ return 2.0 * CommonDescendants /
+ (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2));
+ }
+
+ // Returns the node that has the highest degree of similarity.
+ NodeId findCandidate(const Mappings &M, NodeId Id1) const {
+ NodeId Candidate = NoNodeId;
+ double BestDiceValue = 0.0;
+ const Node &N1 = T1.getNode(Id1);
+ for (NodeId Id2 = 0; Id2 < T2.getSize(); ++Id2) {
+ const Node &N2 = T2.getNode(Id2);
+ if (!N1.hasSameType(N2))
+ continue;
+ if (M.hasDst(Id2))
+ continue;
+ double DiceValue = dice(M, Id1, Id2);
+ if (DiceValue > BestDiceValue) {
+ BestDiceValue = DiceValue;
+ Candidate = Id2;
+ }
+ }
+ return Candidate;
+ }
+
+ // Tries to match any yet unmapped nodes, in a bottom-up fashion.
+ void matchBottomUp(Mappings &M) const {
+ for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) {
+ if (Id1 == T1.root()) {
+ M.link(T1.root(), T2.root());
+ addOptimalMappings(M, T1.root(), T2.root());
+ break;
+ }
+ const Node &N1 = T1.getNode(Id1);
+ bool Matched = M.hasSrc(Id1);
+ bool MatchedChildren =
+ std::any_of(N1.Children.begin(), N1.Children.end(),
+ [&](NodeId Child) { return M.hasSrc(Child); });
+ if (Matched || !MatchedChildren)
+ continue;
+ NodeId Id2 = findCandidate(M, Id1);
+ if (Id2 == NoNodeId || dice(M, Id1, Id2) < MinDice ||
+ !isMappingAllowed(M, Id1, Id2))
+ continue;
+ M.link(Id1, Id2);
+ addOptimalMappings(M, Id1, Id2);
+ }
+ }
+
+ // Returns a set of mappings of isomorphic subtrees.
+ Mappings matchTopDown() const {
+ PriorityList L1(T1);
+ PriorityList L2(T2);
+
+ Mappings M(T1, T2);
+
+ L1.push(T1.root());
+ L2.push(T2.root());
+
+ int Max1, Max2;
+ while (std::min(Max1 = L1.peekMax(), Max2 = L2.peekMax()) > MinHeight) {
+ if (Max1 > Max2) {
+ for (NodeId Id : L1.pop())
+ L1.open(Id);
+ continue;
+ }
+ if (Max2 > Max1) {
+ for (NodeId Id : L2.pop())
+ L2.open(Id);
+ continue;
+ }
+ std::vector<NodeId> H1, H2;
+ H1 = L1.pop();
+ H2 = L2.pop();
+ for (NodeId Id1 : H1)
+ for (NodeId Id2 : H2)
+ if (isomorphic(Id1, Id2) && isMappingAllowed(M, Id1, Id2))
+ addIsomorphicSubTrees(M, Id1, Id2);
+ for (NodeId Id1 : H1)
+ if (!M.hasSrc(Id1))
+ L1.open(Id1);
+ for (NodeId Id2 : H2)
+ if (!M.hasDst(Id2))
+ L2.open(Id2);
+ }
+ return M;
+ }
+
+ // Finds an edit script that converts T1 to T2.
+ std::vector<Change> computeChanges(Mappings &M) {
+ std::vector<Change> Changes;
+ for (NodeId Id2 : T2.getSubtreeBfs(T2.root())) {
+ const Node &N2 = T2.getNode(Id2);
+ NodeId Id1 = M.getSrc(Id2);
+ if (Id1 != NoNodeId) {
+ assert(T1.getNode(Id1).hasSameType(N2) &&
+ "Matched nodes with different kinds.");
+ if (T1.label(Id1) != T2.label(Id2)) {
+ Changes.emplace_back(Update, Id1, Id2, /*UNUSED Position=*/0);
+ }
+ continue;
+ }
+ NodeId P2 = N2.Parent;
+ NodeId P1 = M.getSrc(P2);
+ assert(P1 != NoNodeId);
+ Node &Parent1 = T1.getMutableNode(P1);
+ const Node &Parent2 = T2.getNode(P2);
+ auto &Siblings1 = Parent1.Children;
+ const auto &Siblings2 = Parent2.Children;
+ size_t Position;
+ for (Position = 0; Position < Siblings2.size(); ++Position)
+ if (Siblings2[Position] == Id2 || Position >= Siblings1.size())
+ break;
+ Changes.emplace_back(Insert, Id2, P2, Position);
+ Node PatchNode;
+ PatchNode.Parent = P1;
+ PatchNode.LeftMostDescendant = N2.LeftMostDescendant;
+ PatchNode.Depth = N2.Depth;
+ PatchNode.ASTNode = N2.ASTNode;
+ // TODO update Depth if needed
+ NodeId PatchNodeId = T1.getSize();
+ // TODO maybe choose a different data structure for Children.
+ Siblings1.insert(Siblings1.begin() + Position, PatchNodeId);
+ T1.addNode(PatchNode);
+ M.link(PatchNodeId, Id2);
+ }
+ for (NodeId Id1 = 0; Id1 < T1.getSize(); ++Id1) {
+ NodeId Id2 = M.getDst(Id1);
+ if (Id2 == NoNodeId)
+ Changes.emplace_back(Delete, Id1, Id2, /*UNUSED Position=*/0);
+ }
+ return Changes;
+ }
+
+ // Prints an edit action.
+ void printChange(const Change &Chg) const { printChange(llvm::outs(), Chg); }
+ void printChange(raw_ostream &OS, const Change &Chg) const {
+ ChangeKind Kind;
+ NodeId Id1, Id2;
+ size_t Position;
+ std::tie(Kind, Id1, Id2, Position) = Chg;
+ std::string S;
+ switch (Kind) {
+ case Delete:
+ S = formatv("Delete {0}", T1.showNode(Id1));
+ break;
+ case Update:
+ S = formatv("Update {0} to {1}", T1.showNode(Id1), T2.label(Id2));
+ break;
+ case Insert:
+ S = formatv("Insert {0} into {1} at {2}", T2.showNode(Id1),
+ T2.showNode(Id2), Position);
+ break;
+ case Move:
+ llvm_unreachable("TODO");
+ break;
+ };
+ OS << S << "\n";
+ }
+
+ Mappings match() const {
+ Mappings M = matchTopDown();
+ matchBottomUp(M);
+ return M;
+ }
+
+public:
+ // During top-down matching, only consider nodes of at least this height.
+ int MinHeight = 2;
+ // During bottom-up matching, match only nodes with at least this value as
+ // the ratio of their common descendants.
+ double MinDice = 0.2;
+ // Whenever two subtrees are matched in the bottom-up phase, the optimal
+ // mapping is computed, unless the size of either subtrees exceeds this.
+ int MaxSize = 100;
+
+ ClangDiff(TreeRoot &T1, TreeRoot &T2) : T1(T1), T2(T2) {}
+
+ void printDiff() {
+ Mappings M = match();
+ M.printMapping();
+ auto Changes = computeChanges(M);
+ for (const auto &C : Changes)
+ printChange(C);
+ }
+};
+
+void runDiff(ASTContext &AST1, ASTContext &AST2) {
+ TreeRoot T1(AST1);
+ TreeRoot T2(AST2);
+ ClangDiff DiffTool(T1, T2);
+ DiffTool.printDiff();
+}
+
+} // end namespace diff
+} // end namespace clang
Index: include/clang/Tooling/ASTDiff/ASTDiff.h
===================================================================
--- /dev/null
+++ include/clang/Tooling/ASTDiff/ASTDiff.h
@@ -0,0 +1,128 @@
+//===- ASTDiff.h - AST differencing API -----------------------*- C++ -*- -===//
+//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file specifies an interface that can be used to compare C++ syntax
+// trees.
+//
+// We use the gumtree algorithm which combines a heuristic top-down search that
+// is able to match large subtrees that are equivalent, with an optimal
+// algorithm to match small subtrees.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_ASTDIFF_ASTDIFF_H
+#define LLVM_CLANG_TOOLING_ASTDIFF_ASTDIFF_H
+
+#include "clang/AST/ASTTypeTraits.h"
+
+using namespace llvm;
+using namespace clang;
+
+namespace clang {
+namespace diff {
+
+/// Within a tree, this identifies a node by its preorder offset.
+using NodeId = int;
+
+/// Sentinel value for invalid nodes.
+const NodeId NoNodeId = -1;
+
+/// Represents a Clang AST node, alongside some additional information.
+struct Node {
+ NodeId Parent, LeftMostDescendant;
+ int Depth, Height;
+ ast_type_traits::DynTypedNode ASTNode;
+ // Maybe there is a better way to store children than this.
+ SmallVector<NodeId, 4> Children;
+
+ ast_type_traits::ASTNodeKind getType() const { return ASTNode.getNodeKind(); }
+ bool hasSameType(const Node &Other) const {
+ return getType().isSame(Other.getType()) ||
+ (getType().isNone() && Other.getType().isNone());
+ }
+ const StringRef getTypeLabel() const { return getType().asStringRef(); }
+ bool isLeaf() const { return Children.empty(); }
+};
+
+/// Represents the AST of a TranslationUnit.
+class TreeRoot {
+private:
+ /// Nodes in preorder.
+ std::vector<Node> Nodes;
+ void setRightMostDescendant();
+ void setLeftMostDescendant();
+ void setSize(size_t Size) { Nodes.resize(Size); }
+ std::string getSourceString(SourceLocation SourceBegin,
+ SourceLocation SourceEnd) const;
+
+public:
+ ASTContext &AST;
+ std::vector<NodeId> Leaves;
+ std::vector<int> PostorderIds;
+
+ TreeRoot(ASTContext &AST);
+
+ int getSize() const { return Nodes.size(); }
+ NodeId root() const { return 0; }
+
+ const Node &getNode(NodeId Id) const { return Nodes[Id]; }
+ Node &getMutableNode(NodeId Id) { return Nodes[Id]; }
+ bool isValidNodeId(NodeId Id) const { return Id >= 0 && Id < getSize(); }
+ void addNode(Node &N) { Nodes.push_back(N); }
+ template <class T> bool discardNode(T *N) const;
+
+ int getSubtreePostorder(std::vector<NodeId> &Ids, NodeId Root) const;
+ std::vector<NodeId> getSubtreeBfs(NodeId Id) const;
+ int getNumberOfDescendants(NodeId Id) const;
+
+ /// Returns the value of the node.
+ std::string label(NodeId Id) const;
+ void printTree(raw_ostream &OS, NodeId Id = NoNodeId) const;
+ /// Return the node as "<kind>[: <label>](<postorder-id)"
+ std::string showNode(NodeId Id) const;
+ void printNodeAsJson(raw_ostream &OS, NodeId Id) const;
+ void printAsJson() const;
+ void printAsJson(raw_ostream &OS) const;
+};
+
+/// Maps nodes of the left tree to ones on the right, and vice versa.
+// Supports fast insertion and lookup.
+// TODO allow multiple mappings
+class Mappings {
+ std::unique_ptr<NodeId[]> SrcToDst, DstToSrc;
+ const TreeRoot &T1, &T2;
+
+public:
+ Mappings(const TreeRoot &T1, const TreeRoot &T2);
+
+ void link(NodeId Src, NodeId Dst);
+
+ NodeId getDst(NodeId Src) const { return SrcToDst[Src]; }
+ NodeId getSrc(NodeId Dst) const { return DstToSrc[Dst]; }
+ bool hasSrc(NodeId Src) const { return SrcToDst[Src] != NoNodeId; }
+ bool hasDst(NodeId Dst) const { return DstToSrc[Dst] != NoNodeId; }
+
+ void printMapping() const;
+ void printMapping(raw_ostream &OS) const;
+};
+
+/// (Update, src, dst, _ ): update the value of node src to match dst.
+/// (Insert, src, dst, pos): insert src as child of dst at offset pos.
+/// (Delete, src, _, _ ): delete node src.
+/// (Move, src, dst, pos): move src to a child of dst at offset pos.
+enum ChangeKind { Delete, Update, Insert, Move };
+using Change = std::tuple<ChangeKind, NodeId, NodeId, size_t>;
+
+void runDiff(ASTContext &AST1, ASTContext &AST2);
+
+} // end namespace diff
+} // end namespace clang
+
+#endif
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits