teemperor updated this revision to Diff 62888.
teemperor added a comment.
- Using doxygen-style comments for all private members.
http://reviews.llvm.org/D20795
Files:
include/clang/AST/CloneDetection.h
include/clang/StaticAnalyzer/Checkers/Checkers.td
lib/AST/CMakeLists.txt
lib/AST/CloneDetection.cpp
lib/StaticAnalyzer/Checkers/CMakeLists.txt
lib/StaticAnalyzer/Checkers/CloneChecker.cpp
test/Analysis/copypaste/test-min-max.cpp
test/Analysis/copypaste/test-sub-sequences.cpp
Index: test/Analysis/copypaste/test-sub-sequences.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-sub-sequences.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+// We test if sub-sequences can match with normal sequences containing only
+// a single statement.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+ log2(a);
+ log(); // expected-warning{{Detected code clone.}}
+ if (a > b)
+ return a;
+ return b;
+}
+
+int maxClone(int a, int b) {
+ log(); // expected-note{{Related code clone is here.}}
+ if (a > b)
+ return a;
+ return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+ return a + b;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+ log();
+ if (a > b)
+ return a;
+ return b;
+}
+
+int maxClone(int a, int b) { // expected-note{{Related code clone is here.}}
+ log();
+ if (a > b)
+ return a;
+ return b;
+}
+
+// False positives below. These clones should not be reported.
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+ log();
+ if (a < b)
+ return a;
+ return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+ log();
+ if (b > a)
+ return a;
+ return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+ return a + b;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===================================================================
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,67 @@
+//===--- CloneDetection.cpp - Clone detection checkers ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker : public Checker<check::EndOfTranslationUnit> {
+
+public:
+ void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugReporter &BR) const {
+ CloneDetector CloneDetector;
+ CloneDetector.AnalyzeTranslationUnitDecl(
+ TU->getASTContext().getTranslationUnitDecl());
+
+ std::vector<CloneDetector::CloneGroup> CloneGroups =
+ CloneDetector.findClones();
+
+ DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+ unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+ "Detected code clone.");
+
+ unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
+ "Related code clone is here.");
+
+ for (CloneDetector::CloneGroup &Group : CloneGroups) {
+ DiagEngine.Report(Group.front().getStartLoc(), WarnID);
+ for (unsigned J = 1; J < Group.size(); ++J) {
+ DiagEngine.Report(Group[J].getStartLoc(), NoteID);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Register CloneChecker
+//===----------------------------------------------------------------------===//
+
+void ento::registerCloneChecker(CheckerManager &Mgr) {
+ Mgr.registerChecker<CloneChecker>();
+}
Index: lib/StaticAnalyzer/Checkers/CMakeLists.txt
===================================================================
--- lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -22,6 +22,7 @@
CheckerDocumentation.cpp
ChrootChecker.cpp
ClangCheckers.cpp
+ CloneChecker.cpp
DeadStoresChecker.cpp
DebugCheckers.cpp
DereferenceChecker.cpp
Index: lib/AST/CloneDetection.cpp
===================================================================
--- /dev/null
+++ lib/AST/CloneDetection.cpp
@@ -0,0 +1,310 @@
+//===--- CloneDetection.cpp - Analyses the structure of an AST --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file implements classes for searching and anlyzing source code clones.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/CloneDetection.h"
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Stmt.h"
+
+using namespace clang;
+
+StmtSequence::StmtSequence(CompoundStmt *Stmt, ASTContext *Context,
+ unsigned StartIndex, unsigned EndIndex)
+ : S(Stmt), Context(Context), StartIndex(StartIndex), EndIndex(EndIndex) {
+ assert(Stmt && Context && "Both Stmt and Context need to be valid pointers");
+ assert(StartIndex < EndIndex && "Given array should not be empty");
+ assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt");
+}
+
+StmtSequence::StmtSequence(Stmt *Stmt, ASTContext *Context)
+ : S(Stmt), Context(Context), StartIndex(0), EndIndex(0) {
+ // Test against passing StmtSequence(nullptr, validptr) or
+ // StmtSequence(validptr, nullptr) to this constructor.
+ assert((!Stmt && !Context) || (Stmt && Context));
+}
+
+bool StmtSequence::contains(const StmtSequence &Other) const {
+ // If both sequences reside in different translation units, they can
+ // never contain each other.
+ if (Context != Other.Context)
+ return false;
+
+ // Otherwise check if the start and end locations of the current sequence
+ // surround the other sequence.
+ bool StartIsInBounds = Context->getSourceManager().isBeforeInTranslationUnit(
+ getStartLoc(), Other.getStartLoc()) ||
+ getStartLoc() == Other.getStartLoc();
+ bool EndIsInBounds = Context->getSourceManager().isBeforeInTranslationUnit(
+ Other.getEndLoc(), getEndLoc()) ||
+ Other.getEndLoc() == getEndLoc();
+ return StartIsInBounds && EndIsInBounds;
+}
+
+Stmt *const *StmtSequence::begin() const {
+ if (holdsSequence()) {
+ auto CS = static_cast<CompoundStmt *>(S);
+ return CS->body_begin() + StartIndex;
+ }
+ return &S;
+}
+
+Stmt *const *StmtSequence::end() const {
+ if (holdsSequence()) {
+ auto CS = static_cast<CompoundStmt *>(S);
+ return CS->body_begin() + EndIndex;
+ }
+ return &S;
+}
+
+SourceLocation StmtSequence::getStartLoc() const {
+ return front()->getLocStart();
+}
+
+SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); }
+
+namespace {
+/// Utility class for computing a hash value.
+class HashValue {
+ unsigned Value = 0;
+
+public:
+ /// Computes the given data into the hash value.
+ void addData(unsigned Data) {
+ Value *= 53;
+ Value += Data;
+ }
+
+ /// Returns the hash value.
+ unsigned getValue() { return Value; }
+};
+} // end anonymous namespace
+
+namespace {
+/// Traverses the AST and calculates hash values and complexity values
+/// for all statements. The results are stored in a CloneDetector
+/// object.
+///
+/// This calculation happens in linear time and each statement is only
+/// visited a fixed amount of times during this process.
+/// The hash value for a statement is first calculated from the data stored
+/// directly in the statement object.
+/// Afterwards, the hash values of the children are calculated into the
+/// computed hash value.
+class HashVisitor : public RecursiveASTVisitor<HashVisitor> {
+
+ static const unsigned NullChildHashValue = 313;
+
+public:
+ explicit HashVisitor(CloneDetector &CD, ASTContext &Context)
+ : CD(CD), Context(Context) {}
+
+ // We need to traverse postorder over the AST for our algorithm
+ // to work as each parent expects that its children were already hashed.
+ bool shouldTraversePostOrder() const { return true; }
+
+ bool VisitStmt(Stmt *S);
+
+private:
+ void HandleChildHashes(HashValue &Hash, unsigned &Complexity,
+ Stmt::child_iterator Iterator, unsigned StartPos,
+ unsigned Length) {
+ auto StartIterator = Iterator;
+ for (unsigned I = 0; I < StartPos; ++I)
+ ++StartIterator;
+ auto EndIterator = Iterator;
+ for (unsigned I = 0; I < Length; ++I)
+ ++EndIterator;
+ HandleChildHashes(Hash, Complexity, StartIterator, EndIterator);
+ }
+
+ /// Iterates over the specified Stmt array and computes the hash
+ /// values of all statements into the given hash value.
+ /// Also, the complexity of all statements is added to the given
+ /// complexity value.
+ void HandleChildHashes(HashValue &Hash, unsigned &Complexity,
+ Stmt::child_iterator StartIterator,
+ Stmt::child_iterator EndIterator) {
+ // Iterate over each child in the sub-sequence.
+ for (auto I = StartIterator; I != EndIterator; ++I) {
+ Stmt *Child = *I;
+ if (Child == nullptr) {
+ ++Complexity;
+ // We use an placeholder hash value for null children.
+ Hash.addData(NullChildHashValue);
+ } else {
+ StmtSequence ChildSequence(StmtSequence(Child, &Context));
+
+ bool Error;
+ CloneDetector::StmtData Data = CD.findHash(ChildSequence, Error);
+ assert(!Error && "Child statement wasn't hashed before parent.");
+
+ Complexity += Data.Complexity;
+ Hash.addData(Data.Hash);
+ }
+ }
+ }
+
+ /// Saves the current hash value into the persistent storage of this
+ /// CloneDetector.
+ void SaveData(StmtSequence CurrentStmt, HashValue Hash,
+ unsigned Complexity);
+
+ CloneDetector &CD;
+ ASTContext &Context;
+};
+} // end anonymous namespace
+
+bool HashVisitor::VisitStmt(Stmt *S) {
+ StmtSequence CurrentStmt(S, &Context);
+ HashValue Hash;
+ unsigned Complexity = 1;
+
+ // The only relevant data for now is the class of the statement,
+ // so we calculate the hash class into the current hash value.
+ // TODO: Handle statement class specific data.
+ Hash.addData(S->getStmtClass());
+
+ // Incorporate the hash values of all child Stmts into the current
+ // hash value.
+ HandleChildHashes(Hash, Complexity, S->child_begin(), S->child_end());
+
+ SaveData(CurrentStmt, Hash, Complexity);
+ return true;
+}
+
+void HashVisitor::SaveData(StmtSequence CurrentStmt, HashValue Hash,
+ unsigned Complexity) {
+ // Save current data to the CloneDetector.
+ CloneDetector::StmtData Data(Hash.getValue(), Complexity);
+ CD.add(CurrentStmt, Data);
+
+ // If the currently hashed statement is a CompoundStmt, we also
+ // need to calculate the hash values for all possible sub-sequences
+ // in the body of the CompoundStmt.
+ auto CS = dyn_cast<CompoundStmt>(CurrentStmt.front());
+ if (!CS)
+ return;
+
+ // The length of the sub-sequence. We don't need to hash sequences
+ // with the length 1 as they are already handled when we hashed the
+ // children.
+ // We also don't need to hash the sub-sequence that contains the
+ // whole body as this would be equal to the hash of the CompoundStmt itself.
+ for (unsigned Length = 2; Length < CS->size(); ++Length) {
+ // The start index in the body of the CompoundStmt.
+ // We increase the position and rehash until the end of the sub-sequence
+ // reaches the end of the CompoundStmt body.
+ for (unsigned Pos = 0; Pos <= CS->size() - Length; ++Pos) {
+ HashValue SubHash;
+ unsigned SubComplexity = 1;
+ StmtSequence Sequence(CS, &Context, Pos, Pos + Length);
+
+ // We consider a sub-sequence to be CompoundStmt that only contains
+ // the children in the sub-sequence. This is necessary for matching
+ // sub-sequences with full CompoundStmts.
+ SubHash.addData(CS->getStmtClass());
+
+ HandleChildHashes(SubHash, SubComplexity, CS->child_begin(), Pos, Length);
+
+ CloneDetector::StmtData SubData(SubHash.getValue(), SubComplexity);
+ CD.add(Sequence, SubData);
+ }
+ }
+}
+
+void CloneDetector::AnalyzeTranslationUnitDecl(TranslationUnitDecl *TU) {
+ HashVisitor visitor(*this, TU->getASTContext());
+ visitor.TraverseDecl(TU);
+}
+
+void CloneDetector::add(const StmtSequence &S, const StmtData &Data) {
+ assert(!S.empty());
+ HashedStmts[S] = Data;
+}
+
+namespace {
+/// Returns true if and only if \p Stmt contains at least one
+/// other sequence in the \p Group.
+bool StmtContainsAnyInGroup(StmtSequence &Stmt,
+ CloneDetector::CloneGroup &Group) {
+ for (StmtSequence &GroupStmt : Group) {
+ if (Stmt.contains(GroupStmt))
+ return true;
+ }
+ return false;
+}
+
+/// Returns true if and only if all sequences in \p OtherGroup are contained
+/// by a sequence in \p Group.
+bool GroupContains(CloneDetector::CloneGroup &Group,
+ CloneDetector::CloneGroup &OtherGroup) {
+ // We have less sequences in the current group than we have in the other,
+ // so we will never fulfill the requirement for returning true.
+ // This is only possible because we know that a sequence in Group can
+ // contain at most one sequence in OtherGroup.
+ if (Group.size() < OtherGroup.size())
+ return false;
+
+ for (StmtSequence &Stmt : Group) {
+ if (!StmtContainsAnyInGroup(Stmt, OtherGroup))
+ return false;
+ }
+ return true;
+}
+} // end anonymous namespace
+
+std::vector<CloneDetector::CloneGroup>
+CloneDetector::findClones(unsigned MinGroupComplexity) {
+ std::vector<CloneGroup> Result;
+
+ std::map<unsigned, CloneGroup> GroupsByHash;
+
+ // Create a CloneGroup for every known statement.
+ for (auto &Pair : HashedStmts) {
+ // But we ignore statements that don't fulfill the complexity requirement.
+ if (Pair.second.Complexity > MinGroupComplexity) {
+ GroupsByHash[Pair.second.Hash].push_back(Pair.first);
+ }
+ }
+
+ // Put all groups that now contain at least one sequence into the
+ // result. We still need to filter the Result.
+ for (auto &HashGroupPair : GroupsByHash) {
+ if (HashGroupPair.second.size() > 1) {
+ Result.push_back(HashGroupPair.second);
+ }
+ }
+
+ std::set<unsigned> IndexesToRemove;
+
+ // Compare every group in the result with the others. If one groups
+ // contains another group, we only need to return the bigger group.
+ for (unsigned I = 0; I < Result.size(); ++I) {
+ for (unsigned J = 0; J < Result.size(); ++J) {
+ if (I != J) {
+ if (GroupContains(Result[J], Result[I])) {
+ IndexesToRemove.insert(I);
+ break;
+ }
+ }
+ }
+ }
+
+ for (auto Iter = IndexesToRemove.rbegin(); Iter != IndexesToRemove.rend();
+ ++Iter) {
+ Result.erase(Result.begin() + (*Iter));
+ }
+
+ return Result;
+}
Index: lib/AST/CMakeLists.txt
===================================================================
--- lib/AST/CMakeLists.txt
+++ lib/AST/CMakeLists.txt
@@ -9,6 +9,7 @@
ASTImporter.cpp
ASTTypeTraits.cpp
AttrImpl.cpp
+ CloneDetection.cpp
CXXInheritance.cpp
Comment.cpp
CommentBriefParser.cpp
Index: include/clang/StaticAnalyzer/Checkers/Checkers.td
===================================================================
--- include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -77,6 +77,8 @@
def LLVM : Package<"llvm">;
def Debug : Package<"debug">;
+def CloneDetection : Package<"clone">;
+
//===----------------------------------------------------------------------===//
// Core Checkers.
//===----------------------------------------------------------------------===//
@@ -657,3 +659,17 @@
DescFile<"DebugCheckers.cpp">;
} // end "debug"
+
+
+//===----------------------------------------------------------------------===//
+// Clone Detection
+//===----------------------------------------------------------------------===//
+
+let ParentPackage = CloneDetection in {
+
+def CloneChecker : Checker<"CloneChecker">,
+ HelpText<"Reports similar pieces of code.">,
+ DescFile<"CloneChecker.cpp">;
+
+} // end "clone"
+
Index: include/clang/AST/CloneDetection.h
===================================================================
--- /dev/null
+++ include/clang/AST/CloneDetection.h
@@ -0,0 +1,224 @@
+//===--- CloneDetection.h - Analyses the structure of an AST ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// /file
+/// This file defines classes for searching and anlyzing source code clones.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_CLONEDETECTION_H
+#define LLVM_CLANG_AST_CLONEDETECTION_H
+
+#include "clang/Basic/SourceLocation.h"
+
+#include <map>
+#include <vector>
+
+namespace clang {
+
+class Stmt;
+class ASTContext;
+class CompoundStmt;
+class TranslationUnitDecl;
+
+/// \brief Identifies a list of statements.
+///
+/// Can either identify a single arbitrary Stmt object, a continuous sequence of
+/// child statements inside a CompoundStmt or no statements at all.
+class StmtSequence {
+ /// If this object identifies a sequence of statements inside a CompoundStmt,
+ /// S points to this CompoundStmt.
+ /// If this object only identifies a single Stmt, then S is a pointer to this
+ /// Stmt.
+ Stmt *S;
+
+ /// The related ASTContext for S.
+ ASTContext *Context;
+
+ /// If EndIndex is non-zero, then S is a CompoundStmt and this StmtSequence
+ /// instance is representing the CompoundStmt children inside the array
+ /// [StartIndex, EndIndex).
+ unsigned StartIndex;
+ unsigned EndIndex;
+
+public:
+ /// \brief Constructs a StmtSequence holding multiple statements.
+ ///
+ /// The resulting StmtSequence identifies a continuous sequence of statements
+ /// in the body of the given CompoundStmt.
+ /// Which statements of the body should be identified needs to be specified
+ /// by providing a start and end index that describe a non-empty sub-array
+ /// in the body of the given CompoundStmt.
+ ///
+ /// \param Stmt A CompoundStmt that contains all statements in its body.
+ /// \param Context The ASTContext for the given CompoundStmt.
+ /// \param StartIndex the inclusive start index in the children array of
+ /// \p Stmt
+ /// \param EndIndex the exclusive end index in the children array of \p Stmt.
+ StmtSequence(CompoundStmt *Stmt, ASTContext *Context, unsigned StartIndex,
+ unsigned EndIndex);
+
+ /// \brief Constructs a StmtSequence holding a single statement.
+ ///
+ /// \param Stmt An arbitrary Stmt.
+ /// \param Context The ASTContext for the given Stmt.
+ StmtSequence(Stmt *Stmt, ASTContext *Context);
+
+ /// \brief Constructs an empty StmtSequence.
+ StmtSequence() : StmtSequence(nullptr, nullptr) {}
+
+ /// Returns an iterator-style pointer to the first statement in this sequence.
+ Stmt *const *begin() const;
+
+ /// Returns an iterator-style pointer that points behind the last statement
+ /// in this sequence.
+ Stmt *const *end() const;
+
+ /// Returns the first statement in this sequence.
+ /// This method should only be called on a non-empty StmtSequence object.
+ Stmt *front() const {
+ assert(!empty());
+ return begin()[0];
+ }
+
+ /// Returns the last statement in this sequence.
+ /// This method should only be called on a non-empty StmtSequence object.
+ Stmt *back() const {
+ assert(!empty());
+ return begin()[size() - 1];
+ }
+
+ /// Returns the number of statements this object holds.
+ unsigned size() const {
+ if (holdsSequence())
+ return EndIndex - StartIndex;
+ if (S == nullptr)
+ return 0;
+ return 1;
+ }
+
+ /// Returns true if and only if this StmtSequence contains no statements.
+ bool empty() const { return size() == 0; }
+
+ /// Returns the related ASTContext for the stored Stmts.
+ ASTContext &getASTContext() const {
+ assert(Context);
+ return *Context;
+ }
+
+ /// Returns true if this objects holds a list of statements.
+ bool holdsSequence() const { return EndIndex != 0; }
+
+ /// Returns the start sourcelocation of the first statement in this sequence.
+ /// This method should only be called on a non-empty StmtSequence object.
+ SourceLocation getStartLoc() const;
+
+ /// Returns the end sourcelocation of the last statement in this sequence.
+ /// This method should only be called on a non-empty StmtSequence object.
+ SourceLocation getEndLoc() const;
+
+ /// Provides an strict weak ordering operator.
+ bool operator<(const StmtSequence &Other) const {
+ return std::tie(S, StartIndex, EndIndex) <
+ std::tie(Other.S, Other.StartIndex, Other.EndIndex);
+ }
+
+ /// Returns true if and only if this sequence covers a source range that
+ /// contains the source range of the given sequence.
+ ///
+ /// This method should only be called on a non-empty StmtSequence object
+ /// and passed a non-empty StmtSequence object.
+ bool contains(const StmtSequence &Other) const;
+};
+
+/// \brief Searches clones in source code.
+///
+/// This class first needs to be provided with (possibly multiple) translation
+/// units by passing them \p AnalyzeTranslationUnitDecl .
+/// Afterwards all provided translation units can be searched for clones
+/// by calling \p findClones .
+///
+/// This class only searches for clones in exectuable source code
+/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations)
+/// are not supported.
+///
+/// CloneDetector generates search data for efficiently finding clones
+/// in the given statements.
+/// This is done by hashing all statements using a locality-sensitive hash
+/// function that generates identical hash values for similar statement
+/// trees.
+class CloneDetector {
+public:
+ /// Holds the generated data for a StmtSequence
+ struct StmtData {
+ /// The generated hash value.
+ unsigned Hash;
+ /// \brief The complexity of the StmtSequence.
+ ///
+ /// This scalar values serves as a simple way of filtering clones
+ /// that are too small to be reported.
+ /// A greater value indicates that the related StmtSequence is probably
+ /// more interesting to the user.
+ unsigned Complexity;
+ StmtData() : Hash(0), Complexity(0) {}
+
+ StmtData(unsigned Hash, unsigned Complexity)
+ : Hash(Hash), Complexity(Complexity) {}
+ };
+
+ /// \brief Generates and stores search data for all statements in the
+ /// given translation unit declarations.
+ ///
+ /// This method can be called multiple times on the same CloneDetector
+ /// to search clones across translation unit boundaries.
+ void AnalyzeTranslationUnitDecl(TranslationUnitDecl *TU);
+
+ /// \brief Returns the generated search data for the given StmtSequence.
+ ///
+ /// \param S the given StmtSequence
+ /// \param Error output parameter that is set to true if no
+ /// data was generated for the given StmtSequence.
+ /// This can happen if the given StmtSequence is not part of a
+ /// translation unit that was passed to \p AnalyzeTranslationUnitDecl
+ /// so far.
+ StmtData findHash(const StmtSequence &S, bool &Error) {
+ auto I = HashedStmts.find(S);
+ if (I == HashedStmts.end()) {
+ Error = true;
+ return StmtData();
+ }
+ Error = false;
+ return I->second;
+ }
+
+ /// \brief Stores the StmtSequence with its associated data in the search
+ /// database of this CloneDetector.
+ ///
+ /// StmtSequence objects added by this method are included in the clone
+ /// search.
+ void add(const StmtSequence &S, const StmtData &Data);
+
+ typedef std::vector<StmtSequence> CloneGroup;
+
+ /// \brief Searches all translation units in this CloneDetector for clones.
+ ///
+ /// \param MinGroupComplexity Only return clones which have at least this
+ /// complexity value.
+ /// \return a list of found clone groups. Each group contains multiple
+ /// StmtSequences that were identified to be clones of each other.
+ std::vector<CloneGroup> findClones(unsigned MinGroupComplexity = 10);
+
+private:
+ /// Maps each StmtSequence to the search data that was generated for it.
+ std::map<StmtSequence, StmtData> HashedStmts;
+};
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_AST_CLONEDETECTION_H
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits