bkramer created this revision.
bkramer added reviewers: djasper, klimek.
bkramer added a subscriber: cfe-commits.
Herald added a subscriber: klimek.

/* aaaaaaaa a
 * a*/

Now becomes

/* aaaaaaaa
 * a a*/

instead of

/* aaaaaaaa
 * a
 * a*/

This is implemented by glueing the next line on while fixing whitespace
and adding another break if that brings us over the column limit again.
We also have heuristics to avoid making existing comments worse:
  1. Only reflow when the existing comment is already over the column limit
  2. Don't reflow when there's an empty line (to avoid breaking paragraphs)
  3. Don't reflow when there's a non-alphanumeric char at the beginning of
     the next line. This is a weak attempt to avoid mangling ASCII art.

I intend to do the same thing for line comments, but that will require
changes to other parts of clang-format first.

http://reviews.llvm.org/D15147

Files:
  lib/Format/BreakableToken.cpp
  lib/Format/BreakableToken.h
  unittests/Format/FormatTest.cpp

Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1568,8 +1568,8 @@
             "     *   a comment\n"
             "* that we break\n"
             " * another comment\n"
-            "* we have to break\n"
-            "* a left comment\n"
+            "* we have to break a\n"
+            "* left comment\n"
             " */",
             format("  /* some comment\n"
                    "       *   a comment that we break\n"
@@ -1647,6 +1647,57 @@
                    getLLVMStyleWithColumns(20)));
 }
 
+TEST_F(FormatTest, ReflowsBlockComments) {
+  EXPECT_EQ("/*\n"
+            " * aaaa aaaa\n"
+            " * aaaa\n"
+            " */",
+            format("/*\n"
+                   " * aaaa aaaa\n"
+                   " * aaaa\n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/*\n"
+            " * aaaaaaaaaaaaaaaaa\n"
+            " * aaaa\n"
+            " *\n"
+            " * aaaa\n"
+            " */",
+            format("/*\n"
+                   " * aaaaaaaaaaaaaaaaa aaaa\n"
+                   " *\n"
+                   " * aaaa\n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/*\n"
+            " * aaaaaaaaaaaaaaaaa\n"
+            " * aaaa\n"
+            " * |--|\n"
+            " * aaaa\n"
+            " */",
+            format("/*\n"
+                   " * aaaaaaaaaaaaaaaaa aaaa\n"
+                   " * |--|\n"
+                   " * aaaa\n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/*\n"
+            " * aaaaaaaaaaaaaaaaa\n"
+            " * aaaa aaaa\n"
+            " * aaaaaaaaaaaaaaaaa\n"
+            " * aaaa\n"
+            " */",
+            format("/*\n"
+                   " * aaaaaaaaaaaaaaaaa aaaa\n"
+                   " * aaaa aaaaaaaaaaaaaaaaa\n"
+                   " * aaaa\n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+}
+
 TEST_F(FormatTest, CommentsInStaticInitializers) {
   EXPECT_EQ(
       "static SomeType type = {aaaaaaaaaaaaaaaaaaaa, /* comment */\n"
Index: lib/Format/BreakableToken.h
===================================================================
--- lib/Format/BreakableToken.h
+++ lib/Format/BreakableToken.h
@@ -204,6 +204,10 @@
   // present) is also not considered part of the text.
   SmallVector<StringRef, 16> Lines;
 
+  // For each line in a block comment this stores how many characters overflowed
+  // into the next line if it was wrapped. Otherwise the value is 0.
+  SmallVector<unsigned, 16> WrappedLines;
+
   // LeadingWhitespace[i] is the number of characters regarded as whitespace in
   // front of Lines[i]. Note that this can include "* " sequences, which we
   // regard as whitespace when all lines have a "*" prefix.
Index: lib/Format/BreakableToken.cpp
===================================================================
--- lib/Format/BreakableToken.cpp
+++ lib/Format/BreakableToken.cpp
@@ -259,6 +259,7 @@
   TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
 
   int IndentDelta = StartColumn - OriginalStartColumn;
+  WrappedLines.resize(Lines.size());
   LeadingWhitespace.resize(Lines.size());
   StartOfLineColumn.resize(Lines.size());
   StartOfLineColumn[0] = StartColumn + 2;
@@ -400,6 +401,8 @@
   Whitespaces.replaceWhitespaceInToken(
       Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1,
       IndentLevel, IndentAtLineBreak - Decoration.size());
+
+  WrappedLines[LineIndex] = Lines[LineIndex].size() - Split.first;
 }
 
 void BreakableBlockComment::replaceWhitespace(unsigned LineIndex,
@@ -438,13 +441,49 @@
     }
   }
 
-  unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() -
-                                     Tok.TokenText.data() -
-                                     LeadingWhitespace[LineIndex];
+  const StringRef Line = Lines[LineIndex];
+  unsigned LeadingWhitespaceInToken = LeadingWhitespace[LineIndex];
+  unsigned WhitespaceOffsetInToken =
+      Line.data() - Tok.TokenText.data() - LeadingWhitespaceInToken;
+  unsigned NumNewLines = 1;
+  unsigned Spaces = StartOfLineColumn[LineIndex] - Prefix.size();
+
+  // At this point comment lines violating the column limit will are wrapped
+  // already. Reflow the comments by glueing the next line onto this one. We
+  // only do this if the line is non-empty (to avoid accidental removal of
+  // paragraphs) and starts with an alphanumeric character (to avoid breaking
+  // ASCII art). Otherwise keep the simple wrapping as-is. We also have to make
+  // sure that extending the current line won't push us over the column limit in
+  // the same word and cause a break to be inserted at the very same position.
+  const unsigned LastOverflow = WrappedLines[LineIndex - 1];
+  auto FirstWordLen = std::min(Line.find_first_of(Blanks), Line.size());
+  const bool Reflow =
+      LastOverflow > 0 && !Line.empty() && isAlphanumeric(Line[0]) &&
+      IndentLevel + Spaces + LastOverflow + FirstWordLen <= Style.ColumnLimit;
+  if (Reflow) {
+    Prefix = "";
+    unsigned NewBegin = Line.data() - Lines[LineIndex - 1].end();
+    WhitespaceOffsetInToken += LeadingWhitespaceInToken - NewBegin;
+    LeadingWhitespaceInToken = NewBegin;
+    NumNewLines = 0;
+    Spaces = 1;
+  }
+
   Whitespaces.replaceWhitespaceInToken(
-      Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
-      InPPDirective, 1, IndentLevel,
-      StartOfLineColumn[LineIndex] - Prefix.size());
+      Tok, WhitespaceOffsetInToken, LeadingWhitespaceInToken, "", Prefix,
+      InPPDirective, NumNewLines, IndentLevel, Spaces);
+
+  // Reflowing can bring this line over the column limit too. Break if
+  // necessary.
+  auto LineLength = encoding::columnWidthWithTabs(Line, IndentLevel,
+                                                  Style.TabWidth, Encoding);
+  unsigned NewLengthOfLine = IndentLevel + Spaces + LineLength + LastOverflow;
+  if (Reflow && NewLengthOfLine > Style.ColumnLimit &&
+      Line.size() <= Style.ColumnLimit) {
+    auto Split = getCommentSplit(Line, NewLengthOfLine - Line.size(),
+                                 Style.ColumnLimit, Style.TabWidth, Encoding);
+    insertBreak(LineIndex, 0, Split, Whitespaces);
+  }
 }
 
 unsigned
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to