stettberger updated this revision to Diff 125309.
stettberger marked 9 inline comments as done.
stettberger added a comment.

@Eugene.Zelenko Thank you for pointing me out on these issues. I ran clang-tidy 
and clang-format on CHashVisitor.h

@rsmith You're right, there is already more than one implemenation of 
{partial,unstable} AST hashes within LLVM, as we already discussed on cfe-dev 
in August[1]. Therefore, I rewrote our original CHash implementation to extend 
the already existing StmtDataCollectors approach. However, you are right, 
efforts should be coordinated to get a AST hashing implementation that can be 
adapted to various use-case scenarios.

[1] http://lists.llvm.org/pipermail/cfe-dev/2017-August/054911.html


Repository:
  rC Clang

https://reviews.llvm.org/D40731

Files:
  include/clang/AST/AttrDataCollectors.td
  include/clang/AST/CHashVisitor.h
  include/clang/AST/CMakeLists.txt
  include/clang/AST/DeclDataCollectors.td
  include/clang/AST/StmtDataCollectors.td
  include/clang/AST/TypeDataCollectors.td
  unittests/AST/CHashTest.cpp
  unittests/AST/CMakeLists.txt

Index: unittests/AST/CMakeLists.txt
===================================================================
--- unittests/AST/CMakeLists.txt
+++ unittests/AST/CMakeLists.txt
@@ -18,6 +18,7 @@
   PostOrderASTVisitor.cpp
   SourceLocationTest.cpp
   StmtPrinterTest.cpp
+  CHashTest.cpp
   )
 
 target_link_libraries(ASTTests
Index: unittests/AST/CHashTest.cpp
===================================================================
--- /dev/null
+++ unittests/AST/CHashTest.cpp
@@ -0,0 +1,91 @@
+//===- unittests/AST/DataCollectionTest.cpp -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains tests for the DataCollection module.
+//
+// They work by hashing the collected data of two nodes and asserting that the
+// hash values are equal iff the nodes are considered equal.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/CHashVisitor.h"
+#include "clang/Tooling/Tooling.h"
+#include "gtest/gtest.h"
+#include <memory>
+
+using namespace clang;
+using namespace tooling;
+
+
+class CHashConsumer : public ASTConsumer {
+    CompilerInstance &CI;
+    llvm::MD5::MD5Result *ASTHash;
+
+public:
+
+    CHashConsumer(CompilerInstance &CI, llvm::MD5::MD5Result *ASTHash)
+        : CI(CI), ASTHash(ASTHash){}
+
+    virtual void HandleTranslationUnit(clang::ASTContext &Context) override {
+        TranslationUnitDecl *TU = Context.getTranslationUnitDecl();
+
+        // Traversing the translation unit decl via a RecursiveASTVisitor
+        // will visit all nodes in the AST.
+        CHashVisitor<> Visitor(Context);
+        Visitor.TraverseDecl(TU);
+        // Copy Away the resulting hash
+        *ASTHash = *Visitor.getHash(TU);
+
+    }
+
+    ~CHashConsumer() override {}
+};
+
+struct CHashAction : public ASTFrontendAction {
+    llvm::MD5::MD5Result *Hash;
+
+    CHashAction(llvm::MD5::MD5Result *Hash) : Hash(Hash) {}
+
+    std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                   StringRef) override {
+        return std::unique_ptr<ASTConsumer>(new CHashConsumer(CI, Hash));
+    }
+};
+
+static testing::AssertionResult
+isASTHashEqual(StringRef Code1, StringRef Code2) {
+    llvm::MD5::MD5Result Hash1, Hash2;
+    if (!runToolOnCode(new CHashAction(&Hash1), Code1)) {
+        return testing::AssertionFailure()
+            << "Parsing error in (A)\"" << Code1.str() << "\"";
+    }
+    if (!runToolOnCode(new CHashAction(&Hash2), Code2)) {
+        return testing::AssertionFailure()
+            << "Parsing error in (B) \"" << Code2.str() << "\"";
+    }
+    return testing::AssertionResult(Hash1 == Hash2);
+}
+
+TEST(CHashVisitor, TestRecordTypes) {
+    ASSERT_TRUE(isASTHashEqual( // Unused struct
+                     "struct foobar { int a0; char a1; unsigned long a2; };",
+                     "struct foobar { int a0; char a1;};"
+                     ));
+
+}
+
+TEST(CHashVisitor, TestSourceStructure) {
+    ASSERT_FALSE(isASTHashEqual(
+                     "void foo() { int c; if (0) { c = 1; } }",
+                     "void foo() { int c; if (0) { } c = 1; }"));
+
+    ASSERT_FALSE(isASTHashEqual(
+                     "void f1() {} void f2() {       }",
+                     "void f1() {} void f2() { f1(); }"));
+}
Index: include/clang/AST/TypeDataCollectors.td
===================================================================
--- include/clang/AST/TypeDataCollectors.td
+++ include/clang/AST/TypeDataCollectors.td
@@ -0,0 +1,67 @@
+//--- Types ---------------------------------------------------------------//
+
+class Type {
+  code Code = [{
+     addData(S->getTypeClass());
+  }];
+}
+
+class BuiltinType {
+   code Code = [{
+      addData(S->getKind());
+   }];
+}
+
+class ArrayType  {
+   code Code = [{
+      addData(S->getSizeModifier());
+      addData(S->getIndexTypeCVRQualifiers());
+   }];
+}
+
+class ConstantArrayType {
+   code Code = [{
+      addData(S->getSize().getZExtValue());
+   }];
+}
+
+class VectorType {
+   code Code = [{
+      addData(S->getNumElements());
+      addData(S->getVectorKind());
+   }];
+}
+
+class FunctionType {
+   code Code = [{
+      addData(S->getRegParmType());
+      addData(S->getCallConv());
+   }];
+}
+
+class FunctionProtoType {
+   code Code = [{
+      addData(S->getExceptionSpecType());
+      addData(S->isVariadic());
+      addData(S->getRefQualifier());
+      addData(S->hasTrailingReturn());
+   }];
+}
+
+class UnaryTransformType {
+   code Code = [{
+        addData(S->getUTTKind());
+   }];
+}
+
+class AttributedType {
+   code Code = [{
+        addData(S->getAttrKind());
+   }];
+}
+
+class ElaboratedType {
+   code Code = [{
+        addData(S->getKeyword());
+   }];
+}
Index: include/clang/AST/StmtDataCollectors.td
===================================================================
--- include/clang/AST/StmtDataCollectors.td
+++ include/clang/AST/StmtDataCollectors.td
@@ -28,6 +28,7 @@
 class PredefinedExpr {
   code Code = [{
     addData(S->getIdentType());
+    addData(S->getFunctionName()->getString());
   }];
 }
 class TypeTraitExpr {
@@ -38,6 +39,16 @@
   }];
 }
 
+class UnaryExprOrTypeTraitExpr {
+  code Code = [{
+    addData(S->getKind());
+    if (S->isArgumentType()) {
+       addData(S->getArgumentType());
+    }
+  }];
+}
+
+
 //--- Calls --------------------------------------------------------------//
 class CallExpr {
   code Code = [{
@@ -72,6 +83,7 @@
 }
 class MemberExpr {
   code Code = [{
+    // I suspect this should be included: addData(S->isArrow());
     addData(S->getMemberDecl()->getName());
   }];
 }
@@ -124,6 +136,12 @@
   }];
 }
 
+class CastExpr {
+  code Code = [{
+    addData(S->getCastKind());
+  }];
+}
+
 //--- Miscellaneous Exprs ------------------------------------------------//
 class BinaryOperator {
   code Code = [{
@@ -136,6 +154,12 @@
   }];
 }
 
+class VAArgExpr {
+  code Code = [{
+       addData(S->isMicrosoftABI());
+  }];
+}
+
 //--- Control flow -------------------------------------------------------//
 class GotoStmt {
   code Code = [{
@@ -210,6 +234,7 @@
   code Code = [{
     auto numDecls = std::distance(S->decl_begin(), S->decl_end());
     addData(static_cast<unsigned>(numDecls));
+    // FIXME?
     for (const Decl *D : S->decls()) {
       if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
         addData(VD->getType());
@@ -236,7 +261,21 @@
 class AttributedStmt {
   code Code = [{
     for (const Attr *A : S->getAttrs()) {
+       // We duplicate class Attr here to not rely on being integrated
+       // into a RecursiveASTVisitor.
+       std::string AttrString;      
+       llvm::raw_string_ostream OS(AttrString);
+       A->printPretty(OS, Context.getLangOpts());
+       OS.flush();
       addData(std::string(A->getSpelling()));
     }
   }];
 }
+
+class CompoundStmt {
+  code Code = [{
+    addData(S->size());
+  }];
+}
+
+
Index: include/clang/AST/DeclDataCollectors.td
===================================================================
--- include/clang/AST/DeclDataCollectors.td
+++ include/clang/AST/DeclDataCollectors.td
@@ -0,0 +1,105 @@
+//--- Declarations -------------------------------------------------------//
+
+class ValueDecl  {
+  code Code = [{
+      addData(S->getType());
+      addData(S->isWeak());
+  }];
+}
+
+class NamedDecl {
+  code Code = [{
+      addData(S->getName());
+  }];
+}
+
+class TypeDecl {
+  code Code = [{
+      addData(QualType(S->getTypeForDecl(),0));
+  }];
+}
+
+class EnumDecl {
+  code Code = [{
+      addData(S->getNumPositiveBits());
+      addData(S->getNumNegativeBits());
+  }];
+}
+
+class EnumConstantDecl {
+  code Code = [{
+       /* Not every enum has a init expression. Therefore, 
+          we extract the actual enum value from it. */
+       addData(S->getInitVal().getExtValue());
+  }];
+}
+
+class TagDecl {
+  code Code = [{
+     addData(S->getTagKind());
+  }];
+}
+
+
+class TypedefNameDecl {
+  code Code = [{
+     addData(S->getUnderlyingType());
+  }];
+}
+
+class VarDecl {
+  code Code = [{
+      addData(S->getStorageClass());
+      addData(S->getTLSKind());
+      addData(S->isModulePrivate());
+      addData(S->isNRVOVariable());
+  }];
+}
+
+class ParmVarDecl {
+  code Code = [{
+       addData(S->isParameterPack());
+       addData(S->getOriginalType());
+  }];
+}
+
+class ImplicitParamDecl {
+  code Code = [{
+       addData(S->getParameterKind());
+  }];
+}
+
+class FunctionDecl {
+  code Code = [{
+       addData(S->isExternC());
+       addData(S->isGlobal());
+       addData(S->isNoReturn());
+       addData(S->getStorageClass());
+       addData(S->isInlineSpecified());
+       addData(S->isInlined());
+  }];
+}
+
+class LabelDecl {
+  code Code = [{
+       addData(S->isGnuLocal());
+       addData(S->isMSAsmLabel());
+       if (S->isMSAsmLabel()) {
+          addData(S->getMSAsmLabel());
+       }
+  }];
+}
+
+class FieldDecl {
+  code Code = [{
+      addData(S->isBitField());
+  }];
+}
+
+class CapturedDecl {
+  code Code = [{
+      addData(S->isNothrow());
+  }];
+}
+
+
Index: include/clang/AST/CMakeLists.txt
===================================================================
--- include/clang/AST/CMakeLists.txt
+++ include/clang/AST/CMakeLists.txt
@@ -53,3 +53,15 @@
 clang_tablegen(StmtDataCollectors.inc -gen-clang-data-collectors
   SOURCE StmtDataCollectors.td
   TARGET StmtDataCollectors)
+
+clang_tablegen(DeclDataCollectors.inc -gen-clang-data-collectors
+  SOURCE DeclDataCollectors.td
+  TARGET DeclDataCollectors)
+
+clang_tablegen(AttrDataCollectors.inc -gen-clang-data-collectors
+  SOURCE AttrDataCollectors.td
+  TARGET AttrDataCollectors)
+
+clang_tablegen(TypeDataCollectors.inc -gen-clang-data-collectors
+  SOURCE TypeDataCollectors.td
+  TARGET TypeDataCollectors)
Index: include/clang/AST/CHashVisitor.h
===================================================================
--- /dev/null
+++ include/clang/AST/CHashVisitor.h
@@ -0,0 +1,457 @@
+//===--- CHashVisitor.h - Stable AST Hashing -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the APValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_CHASHVISITOR_H
+#define LLVM_CLANG_AST_CHASHVISITOR_H
+
+#include "clang/AST/AST.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/DataCollection.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "llvm/Support/MD5.h"
+#include <map>
+#include <string>
+
+namespace clang {
+
+namespace CHashConstants {
+
+enum {
+  Attr = 0x56b6cba9,
+  InheritableAttr = 0x7c0b04ce,
+  InheritableParamAttr = 0x6a4fdb90,
+
+  NamedDecl = 0,
+  TypeDecl = 0,
+  Decl = 0,
+  TagDecl = 0,
+  VarDecl = 0xb19c2ee2,
+  ImplicitParamDecl = 0xd04f138f,
+  ParmVarDecl = 0x1fe2fcb9,
+  TypedefNameDecl = 0xe8cca403,
+  BlockDecl = 0x761e230f,
+  FunctionDecl = 0x2a34b689,
+  LabelDecl = 0xff6db781,
+  EnumDecl = 0xc564aed1,
+  EnumConstantDecl = 0x11050d85,
+  IndirectFieldDecl = 0x937408ea,
+  ValueDecl = 0xbb06d011,
+  FileScopeAsmDecl = 0x381879fa,
+  CapturedDecl = 0xa3a884ed,
+  FieldDecl = 0xac0c83d4,
+  RecordDecl = 0x27892cea,
+
+  Expr = 0,
+  StmtExpr = 0xf4bb377e,
+  CastExpr = 0x7c505e88,
+  DeclRefExpr = 0xa33a24f3,
+  PredefinedExpr = 0xffb3cc20,
+  InitListExpr = 0xe23aaddd,
+  UnaryExprOrTypeTraitExpr = 0xb4995380,
+  MemberExpr = 0xe682fc67,
+  AddrLabelExpr = 0xe511b92e,
+  CompoundLiteralExpr = 0xc54ffefa,
+  CallExpr = 0x427cc6e8,
+  OffsetOfExpr = 0x48232f36,
+  ParenExpr = 0xf1a9c911,
+  AtomicExpr = 0x7e5497b7,
+  ParenListExpr = 0x64600f,
+  DesignatedInitExpr = 0x8d017154,
+  ArraySubscriptExpr = 0x8c7ab6b2,
+  ImplicitValueInitExpr = 0xfe7647fa,
+  VAArgExpr = 0xdf10fedc,
+  BlockExpr = 0xcc75aacd,
+  ShuffleVectorExpr = 0x2e2321ad,
+  ConvertVectorExpr = 0xfe447195,
+  TypeTraitExpr = 0xe9bda7a,
+  ArrayTypeTraitExpr = 0xd6b02f4,
+  CXXBoolLiteralExpr = 0x45b2a746,
+  CXXDeleteExpr = 0xbcfa92ec,
+  CXXFoldExpr = 0x1cc7935,
+  ObjCPropertyRefExpr = 0x6636c2c,
+  ObjCIndirectCopyRestoreExpr = 0xb53e833,
+  ObjCBridgedCastExpr = 0xcc79223,
+  LambdaExpr = 0xd799f74,
+  GenericSelectionExpr = 0x51b395c,
+  ExpressionTraitExpr = 0x8f308a7,
+  CharacterLiteral = 0x2a1c033f,
+  IntegerLiteral = 0x7b2daa87,
+  FloatingLiteral = 0xceee8473,
+  StringLiteral = 0xe5846c45,
+  ImaginaryLiteral = 0xe340180e,
+
+  UnaryOperator = 0x496a1fb5,
+  BinaryOperator = 0xa6339d46,
+  CompoundAssignmentOperator = 0x9c582bf3,
+  AbstractConditionalOperator = 0x151982b7,
+  BinaryConditionalOperator = 0x40d2aa93,
+
+  Stmt = 0,
+  ForStmt = 0xec4e334f,
+  IfStmt = 0x3de06c3c,
+  NullStmt = 0x777400e0,
+  DoStmt = 0xa80405bd,
+  GotoStmt = 0xec2a6be8,
+  ContinueStmt = 0x2c518360,
+  ReturnStmt = 0x1cf8354e,
+  WhileStmt = 0x6cb85f96,
+  LabelStmt = 0xe3d17613,
+  SwitchStmt = 0x6ef423db,
+  CaseStmt = 0x9640cc21,
+  DefaultStmt = 0x2f6febe9,
+  DeclStmt = 0xbe748556,
+  CompoundStmt = 0x906b6fb4,
+  BreakStmt = 0x530ae0a9,
+  GCCAsmStmt = 0x652782d6,
+  MSAsmStmt = 0xccd123ef,
+  AttributedStmt = 0x8e36d148,
+  CaptureStmt = 0x1cafe3db,
+  IndirectGotoStmt = 0x98888356,
+  AsmStmt = 0xe8cca40,
+  CXXCatchStmt = 0xc853e2ac,
+  ObjCAtCatchStmt = 0xd6ce349,
+  MSDependentExistsStmt = 0xf2097b9,
+
+  Type = 0xf13daabe,
+  PointerType = 0x5b868718,
+  ArrayType = 0xd0b37bef,
+  ConstantArrayType = 0x6439c9ef,
+  VariableArrayType = 0x74887cd4,
+  ComplexType = 0x75d5304a,
+  AtomicType = 0x8a024d89,
+  TypeOfExprType = 0x3417cfda,
+  TypeOfType = 0x98090139,
+  ParenType = 0x7c2df2fc,
+  FunctionType = 0x8647819b,
+  FunctionProtoType = 0x4dd5f204,
+  EnumType = 0x4acd4cde,
+  TagType = 0x94c7a399,
+  AttributedType = 0xddc8426,
+  UnaryTransformType = 0xca8afa5b,
+  DecayedType = 0x707c703e,
+  AdjustedType = 0x9936193,
+  ElaboratedType = 0x96681107,
+  StructureType = 0xa5b0d36d,
+  UnionType = 0x5057c896,
+  VectorType = 0x4ed393c3,
+  BuiltinType = 0xb190dc73,
+  PipeType = 0xe9bb85af,
+  RValueReferenceType = 0xa6c4b308,
+  LValueReferenceType = 0xdb0b2b7d,
+  FunctionNoProtoType = 0x6a185f1b,
+  ExtVectorType = 0x7d816c99,
+  IncompleteArrayType = 0xb07dce69,
+  MemberPointerType = 0x68ce4241,
+  DependentAddressSpaceType = 0x4bfa546,
+  BlockPointerType = 0x6ab898b3,
+  DependentSizedExtVectorType = 0xb5eb2dc1,
+  DependentSizedArrayType = 0xfbbc5e13,
+  DecltypeType = 0xe61ede42,
+  AutoType = 0x15937adc,
+  DependentSizedExtVector = 0x80161e92,
+};
+
+} // namespace CHashConstants
+
+template <typename H = llvm::MD5, typename HR = llvm::MD5::MD5Result>
+class CHashVisitor : public clang::RecursiveASTVisitor<CHashVisitor<H, HR>> {
+
+  using Inherited = clang::RecursiveASTVisitor<CHashVisitor<H, HR>>;
+
+public:
+  using Hash = H;
+  using HashResult = HR;
+
+  /// Configure the RecursiveASTVisitor
+  bool shouldWalkTypesOfTypeLocs() const { return false; }
+
+protected:
+  ASTContext &Context;
+
+  // For the DataCollector, we implement a few addData() functions
+  void addData(uint64_t data) { topHash().update(data); }
+  void addData(const StringRef &str) { topHash().update(str); }
+  // On our way down, we meet a lot of qualified types.
+  void addData(const QualType &T) {
+    // 1. Hash referenced type
+    const Type *const ActualType = T.getTypePtr();
+    assert(ActualType != nullptr);
+
+    // FIXME: Structural hash
+    // 1.1 Was it already hashed?
+    const HashResult *const SavedDigest = getHash(ActualType);
+    if (SavedDigest) {
+      // 1.1.1 Use cached value
+      topHash().update(SavedDigest->Bytes);
+    } else {
+      // 1.1.2 Calculate hash for type
+      const Hash *const CurrentHash = pushHash();
+      Inherited::TraverseType(T); // Uses getTypePtr() internally
+      const HashResult TypeDigest = popHash(CurrentHash);
+      topHash().update(TypeDigest.Bytes);
+
+      // Store hash for underlying type
+      storeHash(ActualType, TypeDigest);
+    }
+
+    // Add the qulaifiers at this specific usage of the type
+    addData(T.getCVRQualifiers());
+  }
+
+public:
+#define DEF_ADD_DATA_STORED(CLASS, CODE)                                       \
+  template <class = void> bool Visit##CLASS(const CLASS *S) {                  \
+    unsigned tag = CHashConstants::CLASS;                                      \
+    if (tag != 0) {                                                            \
+      addData(tag);                                                            \
+    }                                                                          \
+    CODE;                                                                      \
+    return true;                                                               \
+  }
+#define DEF_ADD_DATA(CLASS, CODE) DEF_ADD_DATA_STORED(CLASS, CODE)
+#include "clang/AST/StmtDataCollectors.inc"
+#define DEF_ADD_DATA(CLASS, CODE) DEF_ADD_DATA_STORED(CLASS, CODE)
+#include "clang/AST/AttrDataCollectors.inc"
+#define DEF_ADD_DATA(CLASS, CODE) DEF_ADD_DATA_STORED(CLASS, CODE)
+#include "clang/AST/DeclDataCollectors.inc"
+#define DEF_ADD_DATA(CLASS, CODE) DEF_ADD_DATA_STORED(CLASS, CODE)
+#include "clang/AST/TypeDataCollectors.inc"
+
+  CHashVisitor(ASTContext &Context) : Context(Context) {}
+
+  /* For some special nodes, override the traverse function, since we
+     need both pre- and post order traversal */
+  bool TraverseTranslationUnitDecl(TranslationUnitDecl *TU) {
+    if (!TU)
+      return true;
+    // First, we push a new hash onto the hashing stack. This hash
+    // will capture everythin within the TU*/
+    Hash *CurrentHash = pushHash();
+
+    Inherited::WalkUpFromTranslationUnitDecl(TU);
+
+    // Do recursion on our own, since we want to exclude some children
+    const auto DC = cast<DeclContext>(TU);
+    for (auto *Child : DC->noload_decls()) {
+      if (isa<TypedefDecl>(Child) || isa<RecordDecl>(Child) ||
+          isa<EnumDecl>(Child))
+        continue;
+
+      // Extern variable definitions at the top-level
+      if (const auto VD = dyn_cast<VarDecl>(Child)) {
+        if (VD->hasExternalStorage()) {
+          continue;
+        }
+      }
+
+      if (const auto FD = dyn_cast<FunctionDecl>(Child)) {
+        // We try to avoid hashing of declarations that have no definition
+        if (!FD->isThisDeclarationADefinition()) {
+          bool doHashing = false;
+          // HOWEVER! If this declaration is an alias Declaration, we
+          // hash it no matter what
+          if (FD->hasAttrs()) {
+            for (const Attr *const A : FD->getAttrs()) {
+              if (A->getKind() == attr::Kind::Alias) {
+                doHashing = true;
+                break;
+              }
+            }
+          }
+          if (!doHashing)
+            continue;
+        }
+      }
+
+      TraverseDecl(Child);
+    }
+
+    storeHash(TU, popHash(CurrentHash));
+
+    return true;
+  }
+
+  /* For some special nodes, override the traverse function, since
+     we need both pre- and post order traversal. Storing of type
+     hashes is done in addData() */
+  bool TraverseDecl(Decl *D) {
+    if (!D)
+      return true;
+    /* For some declarations, we store the calculated hash value. */
+    bool CacheHash = false;
+    if (isa<FunctionDecl>(D) && cast<FunctionDecl>(D)->isDefined())
+      CacheHash = true;
+    if (isa<VarDecl>(D) && cast<VarDecl>(D)->hasGlobalStorage())
+      CacheHash = true;
+    if (isa<RecordDecl>(D) && dyn_cast<RecordDecl>(D)->isCompleteDefinition())
+      CacheHash = true;
+
+    if (!CacheHash) {
+      return Inherited::TraverseDecl(D);
+    }
+
+    const HashResult *const SavedDigest = getHash(D);
+    if (SavedDigest) {
+      topHash().update(SavedDigest->Bytes);
+      return true;
+    }
+    Hash *CurrentHash = pushHash();
+    bool Ret = Inherited::TraverseDecl(D);
+    HashResult CurrentHashResult = popHash(CurrentHash);
+    storeHash(D, CurrentHashResult);
+    if (!isa<TranslationUnitDecl>(D)) {
+      topHash().update(CurrentHashResult.Bytes);
+    }
+
+    return Ret;
+  }
+
+  /*****************************************************************
+   * When doing a semantic hash, we have to use cross-tree links to
+   * other parts of the AST, here we establish these links
+   */
+
+#define DEF_TYPE_GOTO_DECL(CLASS, EXPR)                                        \
+  bool Visit##CLASS(CLASS *T) {                                                \
+    Inherited::Visit##CLASS(T);                                                \
+    return TraverseDecl(EXPR);                                                 \
+  }
+
+  DEF_TYPE_GOTO_DECL(TypedefType, T->getDecl());
+  DEF_TYPE_GOTO_DECL(RecordType, T->getDecl());
+  // The EnumType forwards to the declaration. The declaration does
+  // not hand back to the type.
+  DEF_TYPE_GOTO_DECL(EnumType, T->getDecl());
+  bool TraverseEnumDecl(EnumDecl *E) {
+    /* In the original RecursiveASTVisitor
+       > if (D->getTypeForDecl()) {
+       >    TRY_TO(TraverseType(QualType(D->getTypeForDecl(), 0)));
+       > }
+       => NO, NO, NO, to avoid endless recursion
+    */
+    return Inherited::WalkUpFromEnumDecl(E);
+  }
+
+  bool VisitTypeDecl(TypeDecl *D) {
+    addData(CHashConstants::TypeDecl);
+    // If we would hash the resulting type for a typedef, we
+    // would get into an endless recursion.
+    if (!isa<TypedefNameDecl>(D) && !isa<RecordDecl>(D) && !isa<EnumDecl>(D)) {
+      addData(QualType(D->getTypeForDecl(), 0));
+    }
+    return true;
+  }
+
+  bool VisitDeclRefExpr(DeclRefExpr *E) {
+    addData(CHashConstants::DeclRefExpr);
+    ValueDecl *ValDecl = E->getDecl();
+    // Function Declarations are handled in VisitCallExpr
+    if (!ValDecl) {
+      return true;
+    }
+    if (isa<VarDecl>(ValDecl)) {
+      /* We emulate TraverseDecl here for VarDecl, because we
+       * are not allowed to call TraverseDecl here, since the
+       * initial expression of a DeclRefExpr might reference a
+       * sourronding Declaration itself. For example:
+       *
+       * struct foo {int N;}
+       * struct foo a = { sizeof(a) };
+       */
+      VarDecl *VD = static_cast<VarDecl *>(ValDecl);
+      VisitNamedDecl(VD);
+      Inherited::TraverseType(VD->getType());
+      VisitVarDecl(VD);
+    } else if (isa<FunctionDecl>(ValDecl)) {
+      /* Hash Functions without their body */
+      FunctionDecl *FD = static_cast<FunctionDecl *>(ValDecl);
+      Stmt *Body = FD->getBody();
+      FD->setBody(nullptr);
+      TraverseDecl(FD);
+      FD->setBody(Body);
+    } else {
+      TraverseDecl(ValDecl);
+    }
+    return true;
+  }
+
+  bool VisitValueDecl(ValueDecl *D) {
+    addData(CHashConstants::ValueDecl);
+    /* Field Declarations can induce recursions */
+    if (isa<FieldDecl>(D)) {
+      addData(std::string(D->getType().getAsString()));
+    } else {
+      addData(D->getType());
+    }
+    addData(D->isWeak());
+    return true;
+  }
+
+  /*****************************************************************
+   * For performance reasons, we cache some of the hashes for types
+   * and declarations.
+   */
+
+public:
+  // We store hashes for declarations and types in separate maps.
+  std::map<const Type *, HashResult> TypeSilo;
+  std::map<const Decl *, HashResult> DeclSilo;
+
+  void storeHash(const Type *Obj, HashResult Dig) { TypeSilo[Obj] = Dig; }
+
+  void storeHash(const Decl *Obj, HashResult Dig) { DeclSilo[Obj] = Dig; }
+
+  const HashResult *getHash(const Type *Obj) {
+    if (TypeSilo.find(Obj) != TypeSilo.end()) {
+      return &TypeSilo[Obj];
+    }
+    return nullptr;
+  }
+
+  const HashResult *getHash(const Decl *Obj) {
+    if (DeclSilo.find(Obj) != DeclSilo.end()) {
+      return &DeclSilo[Obj];
+    }
+    return nullptr;
+  }
+
+  /*****************************************************************
+   * In order to produce hashes for subtrees on the way, a hash
+   * stack is used. When a new subhash is meant to be calculated, we
+   * push a new stack on the hash. All hashing functions use always
+   * the top of the hashing stack.
+   */
+protected:
+  llvm::SmallVector<Hash, 32> HashStack;
+
+public:
+  Hash *pushHash() {
+    HashStack.push_back(Hash());
+    return &HashStack.back();
+  }
+
+  HashResult popHash(const Hash *ShouldBe = nullptr) {
+    assert(!ShouldBe || ShouldBe == &HashStack.back());
+
+    // Finalize the Hash and return the digest.
+    HashResult CurrentDigest;
+    topHash().final(CurrentDigest);
+    HashStack.pop_back();
+    return CurrentDigest;
+  }
+
+  Hash &topHash() { return HashStack.back(); }
+};
+
+} // namespace clang
+#endif // LLVM_CLANG_AST_CHASHVISITOR_H
Index: include/clang/AST/AttrDataCollectors.td
===================================================================
--- include/clang/AST/AttrDataCollectors.td
+++ include/clang/AST/AttrDataCollectors.td
@@ -0,0 +1,10 @@
+//--- Attributes ---------------------------------------------------------//
+class Attr {
+  code Code = [{
+    std::string AttrString;      
+    llvm::raw_string_ostream OS(AttrString);
+    S->printPretty(OS, Context.getLangOpts());
+    OS.flush();
+    addData(AttrString);
+  }];
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to