Re: [PATCH] D18551: Added Fixer implementation and fix() interface in clang-format for removing redundant code.

Eric Liu via cfe-commits Mon, 11 Apr 2016 10:04:08 -0700

ioeric updated this revision to Diff 53268.
ioeric marked 16 inline comments as done.
ioeric added a comment.


- Addressed comments.


http://reviews.llvm.org/D18551

Files:
  include/clang/Format/Format.h
  lib/Format/Format.cpp
  lib/Format/TokenAnnotator.h
  unittests/Format/CMakeLists.txt
  unittests/Format/CleanupTest.cpp
  unittests/Format/FormatTest.cpp

Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -11274,6 +11274,35 @@
                           Code, formatReplacements(Code, Replaces, Style)));
 }
 
+TEST_F(ReplacementTest, FixOnlyAffectedCodeAfterReplacements) {
+  std::string Code = "namespace A {\n"
+                     "namespace B {\n"
+                     "  int x;\n"
+                     "} // namespace B\n"
+                     "} // namespace A\n"
+                     "\n"
+                     "namespace C {\n"
+                     "namespace D { int i; }\n"
+                     "inline namespace E { namespace { int y; } }\n"
+                     "int x=     0;"
+                     "}";
+  std::string Expected = "\n\nnamespace C {\n"
+                         "namespace D { int i; }\n\n"
+                         "int x=     0;"
+                         "}";
+  FileID ID = Context.createInMemoryFile("fix.cpp", Code);
+  tooling::Replacements Replaces;
+  Replaces.insert(tooling::Replacement(
+      Context.Sources, Context.getLocation(ID, 3, 3), 6, ""));
+  Replaces.insert(tooling::Replacement(
+      Context.Sources, Context.getLocation(ID, 9, 34), 6, ""));
+
+  format::FormatStyle Style = format::getLLVMStyle();
+  auto FinalReplaces = formatReplacements(
+      Code, cleanupAroundReplacements(Code, Replaces, Style), Style);
+  EXPECT_EQ(Expected, applyAllReplacements(Code, FinalReplaces));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: unittests/Format/CleanupTest.cpp
===================================================================
--- /dev/null
+++ unittests/Format/CleanupTest.cpp
@@ -0,0 +1,85 @@
+//===- unittest/Format/CleanupTest.cpp - Code cleanup unit tests ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Format/Format.h"
+
+#include "clang/Tooling/Core/Replacement.h"
+
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace format {
+namespace {
+
+class CleanupTest : public ::testing::Test {
+protected:
+  std::string cleanup(llvm::StringRef Code,
+                      const std::vector<tooling::Range> &Ranges,
+                      const FormatStyle &Style = getLLVMStyle()) {
+    tooling::Replacements Replaces = format::cleanup(Style, Code, Ranges);
+
+    std::string Result = applyAllReplacements(Code, Replaces);
+    EXPECT_NE("", Result);
+    return Result;
+  }
+};
+
+TEST_F(CleanupTest, DeleteEmptyNamespaces) {
+  std::string Code = "namespace A {\n"
+                     "namespace B {\n"
+                     "} // namespace B\n"
+                     "} // namespace A\n\n"
+                     "namespace C {\n"
+                     "namespace D { int i; }\n"
+                     "inline namespace E { namespace { } }\n"
+                     "}";
+  std::string Expected = "\n\nnamespace C {\n"
+                         "namespace D { int i; }\n\n"
+                         "}";
+  std::vector<tooling::Range> Ranges;
+  Ranges.push_back(tooling::Range(28, 0));
+  Ranges.push_back(tooling::Range(91, 6));
+  Ranges.push_back(tooling::Range(132, 0));
+  std::string Result = cleanup(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(CleanupTest, NamespaceWithSyntaxError) {
+  std::string Code = "namespace A {\n"
+                     "namespace B {\n" // missing r_brace
+                     "} // namespace A\n\n"
+                     "namespace C {\n"
+                     "namespace D int i; }\n"
+                     "inline namespace E { namespace { } }\n"
+                     "}";
+  std::string Expected = "namespace A {\n"
+                         "\n\nnamespace C {\n"
+                         "namespace D int i; }\n\n"
+                         "}";
+  std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
+  std::string Result = cleanup(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+TEST_F(CleanupTest, EmptyNamespaceNotAffected) {
+  std::string Code = "namespace A {\n\n"
+                     "namespace {\n\n}}";
+  // Even though the namespaces are empty, but the inner most empty namespace
+  // block is not affected by the changed ranges.
+  std::string Expected = "namespace A {\n\n"
+                         "namespace {\n\n}}";
+  // Set the changed range to be the second "\n".
+  std::vector<tooling::Range> Ranges(1, tooling::Range(14, 0));
+  std::string Result = cleanup(Code, Ranges);
+  EXPECT_EQ(Expected, Result);
+}
+
+} // end namespace
+} // end namespace format
+} // end namespace clang
Index: unittests/Format/CMakeLists.txt
===================================================================
--- unittests/Format/CMakeLists.txt
+++ unittests/Format/CMakeLists.txt
@@ -3,6 +3,7 @@
   )
 
 add_clang_unittest(FormatTests
+  CleanupTest.cpp
   FormatTest.cpp
   FormatTestJava.cpp
   FormatTestJS.cpp
Index: lib/Format/TokenAnnotator.h
===================================================================
--- lib/Format/TokenAnnotator.h
+++ lib/Format/TokenAnnotator.h
@@ -118,6 +118,12 @@
   /// \c True if a one of this line's children intersects with an input range.
   bool ChildrenAffected;
 
+  /// \c True if the entire line is has been deleted by fixer.
+  bool Deleted = false;
+
+  // \c True if we have done empty namespace cleanup on the namespace block.
+  bool NamespaceEmptynessChecked = false;
+
 private:
   // Disallow copying.
   AnnotatedLine(const AnnotatedLine &) = delete;
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -1347,7 +1347,8 @@
             Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() ==
                 tok::pp_define) &&
           std::find(ForEachMacros.begin(), ForEachMacros.end(),
-                    FormatTok->Tok.getIdentifierInfo()) != ForEachMacros.end()) {
+                    FormatTok->Tok.getIdentifierInfo()) !=
+              ForEachMacros.end()) {
         FormatTok->Type = TT_ForEachMacro;
       } else if (FormatTok->is(tok::identifier)) {
         if (MacroBlockBeginRegex.match(Text)) {
@@ -1443,86 +1444,12 @@
   }
 }
 
-class Formatter : public UnwrappedLineConsumer {
+class AffectedRangeManager {
 public:
-  Formatter(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
-            ArrayRef<CharSourceRange> Ranges)
-      : Style(Style), ID(ID), SourceMgr(SourceMgr),
-        Whitespaces(SourceMgr, Style,
-                    inputUsesCRLF(SourceMgr.getBufferData(ID))),
-        Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
-        Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
-    DEBUG(llvm::dbgs() << "File encoding: "
-                       << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
-                                                               : "unknown")
-                       << "\n");
-    DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
-                       << "\n");
-  }
-
-  tooling::Replacements format(bool *IncompleteFormat) {
-    tooling::Replacements Result;
-    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
-
-    UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
-                               *this);
-    Parser.parse();
-    assert(UnwrappedLines.rbegin()->empty());
-    for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
-         ++Run) {
-      DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
-      SmallVector<AnnotatedLine *, 16> AnnotatedLines;
-      for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
-        AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
-      }
-      tooling::Replacements RunResult =
-          format(AnnotatedLines, Tokens, Result, IncompleteFormat);
-      DEBUG({
-        llvm::dbgs() << "Replacements for run " << Run << ":\n";
-        for (tooling::Replacements::iterator I = RunResult.begin(),
-                                             E = RunResult.end();
-             I != E; ++I) {
-          llvm::dbgs() << I->toString() << "\n";
-        }
-      });
-      for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
-        delete AnnotatedLines[i];
-      }
-      Result.insert(RunResult.begin(), RunResult.end());
-      Whitespaces.reset();
-    }
-    return Result;
-  }
-
-  tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
-                               FormatTokenLexer &Tokens,
-                               tooling::Replacements &Result,
-                               bool *IncompleteFormat) {
-    TokenAnnotator Annotator(Style, Tokens.getKeywords());
-    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
-      Annotator.annotate(*AnnotatedLines[i]);
-    }
-    deriveLocalStyle(AnnotatedLines);
-    computeAffectedLines(AnnotatedLines.begin(), AnnotatedLines.end());
-    if (Style.Language == FormatStyle::LK_JavaScript &&
-        Style.JavaScriptQuotes != FormatStyle::JSQS_Leave)
-      requoteJSStringLiteral(AnnotatedLines, Result);
-
-    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
-      Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
-    }
+  AffectedRangeManager(SourceManager &SourceMgr,
+                       const SmallVector<CharSourceRange, 8> &Ranges)
+      : SourceMgr(SourceMgr), Ranges(Ranges) {}
 
-    Annotator.setCommentLineLevels(AnnotatedLines);
-    ContinuationIndenter Indenter(Style, Tokens.getKeywords(), SourceMgr,
-                                  Whitespaces, Encoding,
-                                  BinPackInconclusiveFunctions);
-    UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, Tokens.getKeywords(),
-                           IncompleteFormat)
-        .format(AnnotatedLines);
-    return Whitespaces.generateReplacements();
-  }
-
-private:
   // Determines which lines are affected by the SourceRanges given as input.
   // Returns \c true if at least one line between I and E or one of their
   // children is affected.
@@ -1561,83 +1488,51 @@
     }
     return SomeLineAffected;
   }
- 
-  // If the last token is a double/single-quoted string literal, generates a
-  // replacement with a single/double quoted string literal, re-escaping the
-  // contents in the process.
-  void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines,
-                                 tooling::Replacements &Result) {
-    for (AnnotatedLine *Line : Lines) {
-      requoteJSStringLiteral(Line->Children, Result);
-      if (!Line->Affected)
-        continue;
-      for (FormatToken *FormatTok = Line->First; FormatTok;
-           FormatTok = FormatTok->Next) {
-        StringRef Input = FormatTok->TokenText;
-        if (!FormatTok->isStringLiteral() ||
-            // NB: testing for not starting with a double quote to avoid
-            // breaking
-            // `template strings`.
-            (Style.JavaScriptQuotes == FormatStyle::JSQS_Single &&
-             !Input.startswith("\"")) ||
-            (Style.JavaScriptQuotes == FormatStyle::JSQS_Double &&
-             !Input.startswith("\'")))
-          continue;
 
-        // Change start and end quote.
-        bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single;
-        SourceLocation Start = FormatTok->Tok.getLocation();
-        auto Replace = [&](SourceLocation Start, unsigned Length,
-                           StringRef ReplacementText) {
-          Result.insert(
-              tooling::Replacement(SourceMgr, Start, Length, ReplacementText));
-        };
-        Replace(Start, 1, IsSingle ? "'" : "\"");
-        Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
-                IsSingle ? "'" : "\"");
+  // Returns true if 'Range' intersects with one of the input ranges.
+  bool affectsCharSourceRange(const CharSourceRange &Range) {
+    for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
+                                                          E = Ranges.end();
+         I != E; ++I) {
+      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
+          !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
+        return true;
+    }
+    return false;
+  }
 
-        // Escape internal quotes.
-        size_t ColumnWidth = FormatTok->TokenText.size();
-        bool Escaped = false;
-        for (size_t i = 1; i < Input.size() - 1; i++) {
-          switch (Input[i]) {
-            case '\\':
-              if (!Escaped && i + 1 < Input.size() &&
-                  ((IsSingle && Input[i + 1] == '"') ||
-                   (!IsSingle && Input[i + 1] == '\''))) {
-                // Remove this \, it's escaping a " or ' that no longer needs
-                // escaping
-                ColumnWidth--;
-                Replace(Start.getLocWithOffset(i), 1, "");
-                continue;
-              }
-              Escaped = !Escaped;
-              break;
-            case '\"':
-            case '\'':
-              if (!Escaped && IsSingle == (Input[i] == '\'')) {
-                // Escape the quote.
-                Replace(Start.getLocWithOffset(i), 0, "\\");
-                ColumnWidth++;
-              }
-              Escaped = false;
-              break;
-            default:
-              Escaped = false;
-              break;
-          }
-        }
+private:
+  // Returns true if the range from 'First' to 'Last' intersects with one of the
+  // input ranges.
+  bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
+                         bool IncludeLeadingNewlines) {
+    SourceLocation Start = First.WhitespaceRange.getBegin();
+    if (!IncludeLeadingNewlines)
+      Start = Start.getLocWithOffset(First.LastNewlineOffset);
+    SourceLocation End = Last.getStartOfNonWhitespace();
+    End = End.getLocWithOffset(Last.TokenText.size());
+    CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
+    return affectsCharSourceRange(Range);
+  }
 
-        // For formatting, count the number of non-escaped single quotes in them
-        // and adjust ColumnWidth to take the added escapes into account.
-        // FIXME(martinprobst): this might conflict with code breaking a long string
-        // literal (which clang-format doesn't do, yet). For that to work, this code
-        // would have to modify TokenText directly.
-        FormatTok->ColumnWidth = ColumnWidth;
-      }
-    }
+  // Returns true if one of the input ranges intersect the leading empty lines
+  // before 'Tok'.
+  bool affectsLeadingEmptyLines(const FormatToken &Tok) {
+    CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
+        Tok.WhitespaceRange.getBegin(),
+        Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
+    return affectsCharSourceRange(EmptyLineRange);
   }
 
+  // Marks all lines between I and E as well as all their children as affected.
+  void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
+                         SmallVectorImpl<AnnotatedLine *>::iterator E) {
+    while (I != E) {
+      (*I)->Affected = true;
+      markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
+      ++I;
+    }
+  }
 
   // Determines whether 'Line' is affected by the SourceRanges given as input.
   // Returns \c true if line or one if its children is affected.
@@ -1689,57 +1584,220 @@
     return SomeLineAffected;
   }
 
-  // Marks all lines between I and E as well as all their children as affected.
-  void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I,
-                         SmallVectorImpl<AnnotatedLine *>::iterator E) {
-    while (I != E) {
-      (*I)->Affected = true;
-      markAllAsAffected((*I)->Children.begin(), (*I)->Children.end());
-      ++I;
+  SourceManager &SourceMgr;
+  const SmallVector<CharSourceRange, 8> Ranges;
+};
+
+class CodeProcessor : public UnwrappedLineConsumer {
+public:
+  CodeProcessor(const FormatStyle &Style, SourceManager &SourceMgr, FileID ID,
+                ArrayRef<CharSourceRange> Ranges)
+      : Style(Style), ID(ID), SourceMgr(SourceMgr),
+        AffectedRangeMgr(SourceMgr, SmallVector<CharSourceRange, 8>(
+                                        Ranges.begin(), Ranges.end())),
+        UnwrappedLines(1),
+        Encoding(encoding::detectEncoding(SourceMgr.getBufferData(ID))) {
+    DEBUG(llvm::dbgs() << "File encoding: "
+                       << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
+                                                               : "unknown")
+                       << "\n");
+    DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
+                       << "\n");
+  }
+
+  template <typename T>
+  tooling::Replacements process(T Callable, bool *IncompleteFormat) {
+    tooling::Replacements Result;
+    FormatTokenLexer Tokens(SourceMgr, ID, Style, Encoding);
+
+    UnwrappedLineParser Parser(Style, Tokens.getKeywords(), Tokens.lex(),
+                               *this);
+    Parser.parse();
+    assert(UnwrappedLines.rbegin()->empty());
+    for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
+         ++Run) {
+      DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
+      SmallVector<AnnotatedLine *, 16> AnnotatedLines;
+
+      TokenAnnotator Annotator(Style, Tokens.getKeywords());
+      for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
+        AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
+        Annotator.annotate(*AnnotatedLines.back());
+      }
+
+      tooling::Replacements RunResult =
+          Callable(Annotator, AnnotatedLines, Tokens, Result, IncompleteFormat);
+
+      DEBUG({
+        llvm::dbgs() << "Replacements for run " << Run << ":\n";
+        for (tooling::Replacements::iterator I = RunResult.begin(),
+                                             E = RunResult.end();
+             I != E; ++I) {
+          llvm::dbgs() << I->toString() << "\n";
+        }
+      });
+      for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+        delete AnnotatedLines[i];
+      }
+      Result.insert(RunResult.begin(), RunResult.end());
     }
+    return Result;
   }
 
-  // Returns true if the range from 'First' to 'Last' intersects with one of the
-  // input ranges.
-  bool affectsTokenRange(const FormatToken &First, const FormatToken &Last,
-                         bool IncludeLeadingNewlines) {
-    SourceLocation Start = First.WhitespaceRange.getBegin();
-    if (!IncludeLeadingNewlines)
-      Start = Start.getLocWithOffset(First.LastNewlineOffset);
-    SourceLocation End = Last.getStartOfNonWhitespace();
-    End = End.getLocWithOffset(Last.TokenText.size());
-    CharSourceRange Range = CharSourceRange::getCharRange(Start, End);
-    return affectsCharSourceRange(Range);
+  FormatStyle &getFormatStyle() { return Style; }
+
+  FileID getFileID() { return ID; }
+
+  SourceManager &getSourceManager() { return SourceMgr; }
+
+  AffectedRangeManager &getAffectedRangeManager() { return AffectedRangeMgr; }
+
+  encoding::Encoding getEncoding() { return Encoding; }
+
+private:
+  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
+    assert(!UnwrappedLines.empty());
+    UnwrappedLines.back().push_back(TheLine);
   }
 
-  // Returns true if one of the input ranges intersect the leading empty lines
-  // before 'Tok'.
-  bool affectsLeadingEmptyLines(const FormatToken &Tok) {
-    CharSourceRange EmptyLineRange = CharSourceRange::getCharRange(
-        Tok.WhitespaceRange.getBegin(),
-        Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset));
-    return affectsCharSourceRange(EmptyLineRange);
+  void finishRun() override {
+    UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
   }
 
-  // Returns true if 'Range' intersects with one of the input ranges.
-  bool affectsCharSourceRange(const CharSourceRange &Range) {
-    for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
-                                                          E = Ranges.end();
-         I != E; ++I) {
-      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
-          !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
-        return true;
+  FormatStyle Style;
+  FileID ID;
+  SourceManager &SourceMgr;
+  // AffectedRangeMgr stores ranges to be fixed.
+  AffectedRangeManager AffectedRangeMgr;
+  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
+
+  encoding::Encoding Encoding;
+};
+
+class Formatter {
+public:
+  Formatter(CodeProcessor &Processor) : Processor(Processor) {}
+
+  tooling::Replacements
+  runFormat(TokenAnnotator &Annotator,
+            SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+            FormatTokenLexer &Tokens, tooling::Replacements &Result,
+            bool *IncompleteFormat) {
+    deriveLocalStyle(AnnotatedLines);
+    Processor.getAffectedRangeManager().computeAffectedLines(
+        AnnotatedLines.begin(), AnnotatedLines.end());
+
+    if (Processor.getFormatStyle().Language == FormatStyle::LK_JavaScript &&
+        Processor.getFormatStyle().JavaScriptQuotes != FormatStyle::JSQS_Leave)
+      requoteJSStringLiteral(AnnotatedLines, Result);
+
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
+    }
+
+    Annotator.setCommentLineLevels(AnnotatedLines);
+
+    WhitespaceManager Whitespaces(
+        Processor.getSourceManager(), Processor.getFormatStyle(),
+        inputUsesCRLF(
+            Processor.getSourceManager().getBufferData(Processor.getFileID())));
+    ContinuationIndenter Indenter(
+        Processor.getFormatStyle(), Tokens.getKeywords(),
+        Processor.getSourceManager(), Whitespaces, Processor.getEncoding(),
+        BinPackInconclusiveFunctions);
+    UnwrappedLineFormatter(&Indenter, &Whitespaces, Processor.getFormatStyle(),
+                           Tokens.getKeywords(), IncompleteFormat)
+        .format(AnnotatedLines);
+    return Whitespaces.generateReplacements();
+  }
+
+private:
+  // If the last token is a double/single-quoted string literal, generates a
+  // replacement with a single/double quoted string literal, re-escaping the
+  // contents in the process.
+  void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines,
+                              tooling::Replacements &Result) {
+    for (AnnotatedLine *Line : Lines) {
+      requoteJSStringLiteral(Line->Children, Result);
+      if (!Line->Affected)
+        continue;
+      for (FormatToken *FormatTok = Line->First; FormatTok;
+           FormatTok = FormatTok->Next) {
+        StringRef Input = FormatTok->TokenText;
+        if (!FormatTok->isStringLiteral() ||
+            // NB: testing for not starting with a double quote to avoid
+            // breaking
+            // `template strings`.
+            (Processor.getFormatStyle().JavaScriptQuotes ==
+                 FormatStyle::JSQS_Single &&
+             !Input.startswith("\"")) ||
+            (Processor.getFormatStyle().JavaScriptQuotes ==
+                 FormatStyle::JSQS_Double &&
+             !Input.startswith("\'")))
+          continue;
+
+        // Change start and end quote.
+        bool IsSingle = Processor.getFormatStyle().JavaScriptQuotes ==
+                        FormatStyle::JSQS_Single;
+        SourceLocation Start = FormatTok->Tok.getLocation();
+        auto Replace = [&](SourceLocation Start, unsigned Length,
+                           StringRef ReplacementText) {
+          Result.insert(tooling::Replacement(Processor.getSourceManager(),
+                                             Start, Length, ReplacementText));
+        };
+        Replace(Start, 1, IsSingle ? "'" : "\"");
+        Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1,
+                IsSingle ? "'" : "\"");
+
+        // Escape internal quotes.
+        size_t ColumnWidth = FormatTok->TokenText.size();
+        bool Escaped = false;
+        for (size_t i = 1; i < Input.size() - 1; i++) {
+          switch (Input[i]) {
+          case '\\':
+            if (!Escaped && i + 1 < Input.size() &&
+                ((IsSingle && Input[i + 1] == '"') ||
+                 (!IsSingle && Input[i + 1] == '\''))) {
+              // Remove this \, it's escaping a " or ' that no longer needs
+              // escaping
+              ColumnWidth--;
+              Replace(Start.getLocWithOffset(i), 1, "");
+              continue;
+            }
+            Escaped = !Escaped;
+            break;
+          case '\"':
+          case '\'':
+            if (!Escaped && IsSingle == (Input[i] == '\'')) {
+              // Escape the quote.
+              Replace(Start.getLocWithOffset(i), 0, "\\");
+              ColumnWidth++;
+            }
+            Escaped = false;
+            break;
+          default:
+            Escaped = false;
+            break;
+          }
+        }
+
+        // For formatting, count the number of non-escaped single quotes in them
+        // and adjust ColumnWidth to take the added escapes into account.
+        // FIXME(martinprobst): this might conflict with code breaking a long
+        // string literal (which clang-format doesn't do, yet). For that to
+        // work, this code would have to modify TokenText directly.
+        FormatTok->ColumnWidth = ColumnWidth;
+      }
     }
-    return false;
   }
 
   static bool inputUsesCRLF(StringRef Text) {
     return Text.count('\r') * 2 > Text.count('\n');
   }
 
   bool
   hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) {
-    for (const AnnotatedLine* Line : Lines) {
+    for (const AnnotatedLine *Line : Lines) {
       if (hasCpp03IncompatibleFormat(Line->Children))
         return true;
       for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
@@ -1757,7 +1815,7 @@
 
   int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
     int AlignmentDiff = 0;
-    for (const AnnotatedLine* Line : Lines) {
+    for (const AnnotatedLine *Line : Lines) {
       AlignmentDiff += countVariableAlignments(Line->Children);
       for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
         if (!Tok->is(TT_PointerOrReference))
@@ -1792,36 +1850,185 @@
         Tok = Tok->Next;
       }
     }
-    if (Style.DerivePointerAlignment)
-      Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0
-                                   ? FormatStyle::PAS_Left
-                                   : FormatStyle::PAS_Right;
-    if (Style.Standard == FormatStyle::LS_Auto)
-      Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines)
-                           ? FormatStyle::LS_Cpp11
-                           : FormatStyle::LS_Cpp03;
+    if (Processor.getFormatStyle().DerivePointerAlignment)
+      Processor.getFormatStyle().PointerAlignment =
+          countVariableAlignments(AnnotatedLines) <= 0 ? FormatStyle::PAS_Left
+                                                       : FormatStyle::PAS_Right;
+    if (Processor.getFormatStyle().Standard == FormatStyle::LS_Auto)
+      Processor.getFormatStyle().Standard =
+          hasCpp03IncompatibleFormat(AnnotatedLines) ? FormatStyle::LS_Cpp11
+                                                     : FormatStyle::LS_Cpp03;
     BinPackInconclusiveFunctions =
         HasBinPackedFunction || !HasOnePerLineFunction;
   }
 
-  void consumeUnwrappedLine(const UnwrappedLine &TheLine) override {
-    assert(!UnwrappedLines.empty());
-    UnwrappedLines.back().push_back(TheLine);
+  CodeProcessor &Processor;
+  bool BinPackInconclusiveFunctions;
+};
+
+// This class clean up the erroneous/redundant code around the given ranges in
+// file.
+class Cleaner {
+public:
+  Cleaner(CodeProcessor &Processor,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines)
+      : Processor(Processor), AnnotatedLines(AnnotatedLines),
+        DeletedTokens(FormatTokenLess(Processor.getSourceManager())) {}
+
+  tooling::Replacements runCleanup() {
+    // FIXME: in the current implementation the granularity of affected range
+    // is an annotated line. However, this is not sufficient. Furthermore,
+    // redundant code introduced by replacements does not necessarily
+    // intercept with ranges of replacements that result in the redundancy.
+    // To determine if some redundant code is actually introduced by
+    // replacements(e.g. deletions), we need to come up with a more
+    // sophisticated way of computing affected ranges.
+    Processor.getAffectedRangeManager().computeAffectedLines(
+        AnnotatedLines.begin(), AnnotatedLines.end());
+
+    checkEmptyNamespace();
+
+    return generateFixes();
   }
 
-  void finishRun() override {
-    UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
+private:
+  bool containsOnlyComments(const AnnotatedLine &Line) {
+    for (FormatToken *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) {
+      if (Tok->isNot(tok::comment))
+        return false;
+    }
+    return true;
   }
 
-  FormatStyle Style;
-  FileID ID;
-  SourceManager &SourceMgr;
-  WhitespaceManager Whitespaces;
-  SmallVector<CharSourceRange, 8> Ranges;
-  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
+  void checkEmptyNamespace() {
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      auto &Line = *AnnotatedLines[i];
+      if (Line.Deleted)
+        continue;
 
-  encoding::Encoding Encoding;
-  bool BinPackInconclusiveFunctions;
+      if (Line.startsWith(tok::kw_namespace) ||
+          Line.startsWith(tok::kw_inline, tok::kw_namespace)) {
+        checkEmptyNamespace(i, i);
+      }
+    }
+  }
+
+  // The function checks if the namespace, which starts from \p CurrentLine, and
+  // its nested namespaces are empty and delete them if they are empty. It also
+  // sets \p NewLine to the last line checked.
+  // Returns true if the current namespace is empty.
+  bool checkEmptyNamespace(unsigned CurrentLine, unsigned &NewLine) {
+    if (!AnnotatedLines[CurrentLine]->Last->is(tok::l_brace)) {
+      NewLine = CurrentLine;
+      return false; // FIXME: error handling.
+    }
+
+    unsigned InitLine = CurrentLine, End = AnnotatedLines.size();
+
+    while (++CurrentLine < End) {
+      if (AnnotatedLines[CurrentLine]->startsWith(tok::r_brace))
+        break;
+      if (AnnotatedLines[CurrentLine]->startsWith(tok::kw_namespace) ||
+          AnnotatedLines[CurrentLine]->startsWith(tok::kw_inline,
+                                                  tok::kw_namespace)) {
+        if (!checkEmptyNamespace(CurrentLine, NewLine))
+          return false;
+        CurrentLine = NewLine;
+        continue;
+      }
+
+      if (containsOnlyComments(*AnnotatedLines[CurrentLine]))
+        continue;
+
+      // If there is anything other than comments or nested namespaces in the
+      // current namespace, the namespace cannot be empty.
+      NewLine = CurrentLine;
+      return false;
+    }
+
+    NewLine = CurrentLine;
+    if (CurrentLine >= End)
+      return false;
+
+    // Check if the empty namespace is actually affected by changed ranges.
+    if (!Processor.getAffectedRangeManager().affectsCharSourceRange(
+            CharSourceRange::getCharRange(
+                AnnotatedLines[InitLine]->First->Tok.getLocation(),
+                AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc())))
+      return false;
+
+    for (unsigned i = InitLine; i <= CurrentLine; ++i) {
+      if (AnnotatedLines[i]->Deleted)
+        continue;
+      AnnotatedLines[i]->Deleted = true;
+      FormatToken *Tok = AnnotatedLines[i]->First;
+      while (Tok) {
+        deleteToken(Tok);
+        Tok = Tok->Next;
+      }
+    }
+
+    return true;
+  }
+
+  // Delete the given token.
+  inline void deleteToken(FormatToken *Tok) {
+    DeletedTokens.insert(Tok);
+  }
+
+  tooling::Replacements generateFixes() {
+    tooling::Replacements Fixes;
+    std::vector<FormatToken *> Tokens;
+    std::copy(DeletedTokens.begin(), DeletedTokens.end(),
+              std::back_inserter(Tokens));
+
+    // Link all tokens.
+    for (unsigned Idx = 0, End = AnnotatedLines.size() - 1; Idx < End; ++Idx) {
+      AnnotatedLines[Idx]->Last->Next = AnnotatedLines[Idx + 1]->First;
+    }
+
+    // Merge multiple continuous token deletions into one big deletion so that
+    // the number of replacements can be reduced. When we do `reformat` on the
+    // fixed code, there could be fewer changed code ranges, which makes
+    // computing affected lines more efficient.
+    unsigned Idx = 0;
+    while (Idx < Tokens.size()) {
+      unsigned St = Idx, End = Idx;
+      while ((End + 1) < Tokens.size() &&
+             Tokens[End]->Next == Tokens[End + 1]) {
+        End++;
+      }
+      auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(),
+                                              Tokens[End]->Tok.getEndLoc());
+      Fixes.insert(tooling::Replacement(Processor.getSourceManager(), SR, ""));
+      Idx = End + 1;
+    }
+
+    // Unlink all tokens.
+    for (unsigned Idx = 0, End = AnnotatedLines.size() - 1; Idx < End; ++Idx) {
+      AnnotatedLines[Idx]->Last->Next = nullptr;
+    }
+
+    return Fixes;
+  }
+
+  // Class for less-than inequality comparason for the set `RedundantTokens`.
+  // We store tokens in the order they appear in the translation unit so that
+  // we do not need to sort them in `generateFixes()`.
+  struct FormatTokenLess {
+    FormatTokenLess(SourceManager &SM) : SM(SM) {}
+
+    bool operator()(const FormatToken *LHS, const FormatToken *RHS) {
+      return SM.isBeforeInTranslationUnit(LHS->Tok.getLocation(),
+                                          RHS->Tok.getLocation());
+    }
+    SourceManager &SM;
+  };
+
+  CodeProcessor &Processor;
+  SmallVectorImpl<AnnotatedLine *> &AnnotatedLines;
+  // Tokens to be deleted.
+  std::set<FormatToken *, FormatTokenLess> DeletedTokens;
 };
 
 struct IncludeDirective {
@@ -1990,39 +2197,56 @@
   return Replaces;
 }
 
-tooling::Replacements formatReplacements(StringRef Code,
-                                         const tooling::Replacements &Replaces,
-                                         const FormatStyle &Style) {
+template <typename T>
+static tooling::Replacements
+processReplacements(T ProcessFunc, StringRef Code,
+                    const tooling::Replacements &Replaces,
+                    const FormatStyle &Style) {
   if (Replaces.empty())
     return tooling::Replacements();
 
   std::string NewCode = applyAllReplacements(Code, Replaces);
   std::vector<tooling::Range> ChangedRanges =
       tooling::calculateChangedRanges(Replaces);
   StringRef FileName = Replaces.begin()->getFilePath();
+
   tooling::Replacements FormatReplaces =
-      reformat(Style, NewCode, ChangedRanges, FileName);
+      ProcessFunc(Style, NewCode, ChangedRanges, FileName);
 
-  tooling::Replacements MergedReplacements =
-      mergeReplacements(Replaces, FormatReplaces);
+  return mergeReplacements(Replaces, FormatReplaces);
+}
 
-  return MergedReplacements;
+tooling::Replacements formatReplacements(StringRef Code,
+                                         const tooling::Replacements &Replaces,
+                                         const FormatStyle &Style) {
+  // We need to use lambda function here since `reformat` has default parameter
+  // `IncompleteFormat`.
+  auto Reformat = [](const FormatStyle &Style, StringRef Code,
+                     std::vector<tooling::Range> Ranges,
+                     StringRef FileName) -> tooling::Replacements {
+    return reformat(Style, Code, Ranges, FileName);
+  };
+  return processReplacements(Reformat, Code, Replaces, Style);
 }
 
-tooling::Replacements reformat(const FormatStyle &Style,
-                               SourceManager &SourceMgr, FileID ID,
-                               ArrayRef<CharSourceRange> Ranges,
-                               bool *IncompleteFormat) {
-  FormatStyle Expanded = expandPresets(Style);
-  if (Expanded.DisableFormat)
-    return tooling::Replacements();
-  Formatter formatter(Expanded, SourceMgr, ID, Ranges);
-  return formatter.format(IncompleteFormat);
+tooling::Replacements
+cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,
+                          const FormatStyle &Style) {
+  // We need to use lambda function here since there are two versions of
+  // `cleanup`.
+  auto Cleanup = [](const FormatStyle &Style, StringRef Code,
+                    std::vector<tooling::Range> Ranges,
+                    StringRef FileName) -> tooling::Replacements {
+    return cleanup(Style, Code, Ranges, FileName);
+  };
+  return processReplacements(Cleanup, Code, Replaces, Style);
 }
 
-tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
-                               ArrayRef<tooling::Range> Ranges,
-                               StringRef FileName, bool *IncompleteFormat) {
+template <typename T>
+static tooling::Replacements
+setupEnvironmentAndProcess(T Callable, const FormatStyle &Style, StringRef Code,
+                           ArrayRef<tooling::Range> Ranges, StringRef FileName,
+                           bool *IncompleteFormat) {
   if (Style.DisableFormat)
     return tooling::Replacements();
 
@@ -2045,7 +2269,73 @@
     SourceLocation End = Start.getLocWithOffset(Range.getLength());
     CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
   }
-  return reformat(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
+
+  return Callable(Style, SourceMgr, ID, CharRanges, IncompleteFormat);
+}
+
+tooling::Replacements reformat(const FormatStyle &Style,
+                               SourceManager &SourceMgr, FileID ID,
+                               ArrayRef<CharSourceRange> Ranges,
+                               bool *IncompleteFormat) {
+  FormatStyle Expanded = expandPresets(Style);
+  if (Expanded.DisableFormat)
+    return tooling::Replacements();
+
+  CodeProcessor Processor(Expanded, SourceMgr, ID, Ranges);
+  auto Callable =
+      [&Processor](TokenAnnotator &Annotator,
+                   SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+                   FormatTokenLexer &Tokens, tooling::Replacements &Result,
+                   bool *IncompleteFormat) -> tooling::Replacements {
+    Formatter Format(Processor);
+    return Format.runFormat(Annotator, AnnotatedLines, Tokens, Result,
+                            IncompleteFormat);
+  };
+  return Processor.process(Callable, IncompleteFormat);
+}
+
+tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
+                               ArrayRef<tooling::Range> Ranges,
+                               StringRef FileName, bool *IncompleteFormat) {
+  // We need to use lambda here since there are two versions of `reformat`.
+  auto Format = [](const FormatStyle &Style, SourceManager &SourceMgr,
+                   FileID ID, ArrayRef<CharSourceRange> Ranges,
+                   bool *IncompleteFormat) -> tooling::Replacements {
+    return reformat(Style, SourceMgr, ID, Ranges, IncompleteFormat);
+  };
+  return setupEnvironmentAndProcess(Format, Style, Code, Ranges, FileName,
+                                    IncompleteFormat);
+}
+
+tooling::Replacements cleanup(const FormatStyle &Style,
+                              SourceManager &SourceMgr, FileID ID,
+                              ArrayRef<CharSourceRange> Ranges) {
+  CodeProcessor Processor(Style, SourceMgr, ID, Ranges);
+  // FIXME: find a way to eliminate the unused parameters.
+  auto Callable =
+      [&Processor](TokenAnnotator &Annotator,
+                   SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+                   FormatTokenLexer &Tokens, tooling::Replacements &Result,
+                   bool *IncompleteFormat) -> tooling::Replacements {
+    Cleaner Cleaner(Processor, AnnotatedLines);
+    return Cleaner.runCleanup();
+  };
+  return Processor.process(Callable, nullptr);
+}
+
+tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
+                              ArrayRef<tooling::Range> Ranges,
+                              StringRef FileName) {
+  // We need to use lambda here since there are two versions of `cleanup`.
+  auto Cleanup = [](const FormatStyle &Style, SourceManager &SourceMgr,
+                    FileID ID, ArrayRef<CharSourceRange> Ranges,
+                    bool *IncompleteFormat) -> tooling::Replacements {
+
+    return cleanup(Style, SourceMgr, ID, Ranges);
+  };
+
+  return setupEnvironmentAndProcess(Cleanup, Style, Code, Ranges, FileName,
+                                    nullptr);
 }
 
 LangOptions getFormattingLangOpts(const FormatStyle &Style) {
@@ -2059,7 +2349,7 @@
   LangOpts.Bool = 1;
   LangOpts.ObjC1 = 1;
   LangOpts.ObjC2 = 1;
-  LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
+  LangOpts.MicrosoftExt = 1;    // To get kw___try, kw___finally.
   LangOpts.DeclSpecKeyword = 1; // To get __declspec.
   return LangOpts;
 }
Index: include/clang/Format/Format.h
===================================================================
--- include/clang/Format/Format.h
+++ include/clang/Format/Format.h
@@ -766,6 +766,12 @@
                                          const tooling::Replacements &Replaces,
                                          const FormatStyle &Style);
 
+/// \brief Returns the replacements corresponding to applying \p Replaces and
+/// cleaning up the code after that.
+tooling::Replacements
+cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,
+                          const FormatStyle &Style);
+
 /// \brief Reformats the given \p Ranges in the file \p ID.
 ///
 /// Each range is extended on either end to its next bigger logic unit, i.e.
@@ -791,6 +797,22 @@
                                StringRef FileName = "<stdin>",
                                bool *IncompleteFormat = nullptr);
 
+/// \brief Clean up any erroneous/redundant code in the given \p Ranges in the
+/// file \p ID.
+///
+/// Returns the ``Replacements`` that clean up all \p Ranges in the file \p ID.
+tooling::Replacements cleanup(const FormatStyle &Style,
+                              SourceManager &SourceMgr, FileID ID,
+                              ArrayRef<CharSourceRange> Ranges);
+
+/// \brief Clean up any erroneous/redundant code in the given \p Ranges in \p
+/// Code.
+///
+/// Otherwise identical to the cleanup() function using a file ID.
+tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code,
+                              ArrayRef<tooling::Range> Ranges,
+                              StringRef FileName = "<stdin>");
+
 /// \brief Returns the ``LangOpts`` that the formatter expects you to set.
 ///
 /// \param Style determines specific settings for lexing mode.

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Re: [PATCH] D18551: Added Fixer implementation and fix() interface in clang-format for removing redundant code.

Reply via email to