kwk created this revision.
kwk added reviewers: HazardyKnusperkeks, MyDeveloperDay.
Herald added a project: All.
kwk requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Fixes #38995

This is an attempt to modify the regular expression to identify
`@import` and `import` alongside the regular `#include`. The challenging
part was not to support `@` in addition to `#` but how to handle
everything that comes after the `include|import` keywords. Previously
everything that wasn't `"` or `<` was consumed. But as you can see in
this example from the issue #38995, there is no `"` or `<` following the
keyword:

  @import Foundation;

I experimented with a lot of fancy and useful expressions in this
online regex tool <https://regex101.com> only to find out that some
things are simply not supported by the regex implementation in LLVM.

- For example the beginning `[\t\ ]*` should be replacable by the horizontal 
whitespace character `\h*` but this will break the 
`SortIncludesTest.LeadingWhitespace` test.

That's why I've chosen to come back to the basic building blocks.

The essential change in this patch is the change from this regular
expression:

  ^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">])
          ~                              ~~~~~~~~~~~~~~
          ^                              ^
          |                              |
          only support # prefix not @    |
                                         only support "" and <> as delimiters
                                         no support for C++ modules and ;
                                         ending. Also this allows for ">
                                         or <" or "" or <> which all seems
                                         either off or wrong.

to this:

  ^[\t\ ]*[@#]?[\t\ ]*(import|include)[^"<]*[\t\n\ 
\\]*("[^"]+"|<[^>]+>|[^"<>;]+;)
          ~~~~~                             ~~~~~~~~~~~ ~~~~~~~ ~~~~~~~ 
~~~~~~~~~
          ^                                 ^           ^       ^       ^
          |                                 |           |       |       |
          |                                 |           Clearly support "" and 
<>
          |                                 |           as well as an include 
name
          |                                 |           without enclosing 
characters.
          |                                 |           Allows for no mixture 
of ">
          |                                 |            or <" or empty include 
names.
          |                                 |
          Now optionally support @ and #.   These are needed to support: 
@import Foundation;

Here is how I've tested this patch:

  ninja clang-Format
  ninja FormatTests
  ./tools/clang/unittests/Format/FormatTests --gtest_filter=SortIncludesTest*

And if that worked I doubled checked that nothing else broke by running
all format checks:

  ./tools/clang/unittests/Format/FormatTests

One side effect of this change is it should partially support
C++20 Module <https://en.cppreference.com/w/cpp/language/modules>
`import` lines without the optional `export` in front. Adding
this can be a change on its own that shouldn't be too hard.

I see an opportunity to optimized the matching to exclude `@include` for
example. But eventually these should be caught by the compiler, so...


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D121370

Files:
  clang/lib/Format/Format.cpp
  clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
  clang/unittests/Format/SortIncludesTest.cpp


Index: clang/unittests/Format/SortIncludesTest.cpp
===================================================================
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -458,6 +458,19 @@
                  "#include \"b.h\"\n"));
 }
 
+TEST_F(SortIncludesTest, SupportAtImportLines) {
+  EXPECT_EQ("#import \"a.h\"\n"
+            "#import \"b.h\"\n"
+            "#import \"c.h\"\n"
+            "#import <d/e.h>\n"
+            "@import Foundation;\n",
+            sort("#import \"b.h\"\n"
+                 "#import \"c.h\"\n"
+                 "#import <d/e.h>\n"
+                 "@import Foundation;\n"
+                 "#import \"a.h\"\n"));
+}
+
 TEST_F(SortIncludesTest, LeavesMainHeaderFirst) {
   Style.IncludeIsMainRegex = "([-_](test|unittest))?$";
   EXPECT_EQ("#include \"llvm/a.h\"\n"
Index: clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
===================================================================
--- clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
+++ clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
@@ -170,11 +170,11 @@
 }
 
 inline StringRef trimInclude(StringRef IncludeName) {
-  return IncludeName.trim("\"<>");
+  return IncludeName.trim("\"<>;");
 }
 
 const char IncludeRegexPattern[] =
-    R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
+    R"(^[\t\ ]*[@#]?[\t\ ]*(import|include)[^"<]*[\t\n\ 
\\]*("[^"]+"|<[^>]+>|[^"<>;]+;))";
 
 // The filename of Path excluding extension.
 // Used to match implementation with headers, this differs from 
sys::path::stem:
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -2682,7 +2682,7 @@
 namespace {
 
 const char CppIncludeRegexPattern[] =
-    R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
+    R"(^[\t\ ]*[@#]?[\t\ ]*(import|include)[^"<]*[\t\n\ 
\\]*("[^"]+"|<[^>]+>|[^"<>;]+;))";
 
 } // anonymous namespace
 
@@ -2752,6 +2752,13 @@
     if (!FormattingOff && !MergeWithNextLine) {
       if (IncludeRegex.match(Line, &Matches)) {
         StringRef IncludeName = Matches[2];
+        // HACK(kkleine): Sort C++ module includes/imports that are not 
enclosed
+        // in "" or <> as if they are enclosed with <.
+        if (!IncludeName.startswith("\"") && !IncludeName.startswith("<")) {
+          IncludeName =
+              StringRef(Twine("<", IncludeName).concat(Twine(">")).str());
+        }
+
         if (Line.contains("/*") && !Line.contains("*/")) {
           // #include with a start of a block comment, but without the end.
           // Need to keep all the lines until the end of the comment together.


Index: clang/unittests/Format/SortIncludesTest.cpp
===================================================================
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -458,6 +458,19 @@
                  "#include \"b.h\"\n"));
 }
 
+TEST_F(SortIncludesTest, SupportAtImportLines) {
+  EXPECT_EQ("#import \"a.h\"\n"
+            "#import \"b.h\"\n"
+            "#import \"c.h\"\n"
+            "#import <d/e.h>\n"
+            "@import Foundation;\n",
+            sort("#import \"b.h\"\n"
+                 "#import \"c.h\"\n"
+                 "#import <d/e.h>\n"
+                 "@import Foundation;\n"
+                 "#import \"a.h\"\n"));
+}
+
 TEST_F(SortIncludesTest, LeavesMainHeaderFirst) {
   Style.IncludeIsMainRegex = "([-_](test|unittest))?$";
   EXPECT_EQ("#include \"llvm/a.h\"\n"
Index: clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
===================================================================
--- clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
+++ clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
@@ -170,11 +170,11 @@
 }
 
 inline StringRef trimInclude(StringRef IncludeName) {
-  return IncludeName.trim("\"<>");
+  return IncludeName.trim("\"<>;");
 }
 
 const char IncludeRegexPattern[] =
-    R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
+    R"(^[\t\ ]*[@#]?[\t\ ]*(import|include)[^"<]*[\t\n\ \\]*("[^"]+"|<[^>]+>|[^"<>;]+;))";
 
 // The filename of Path excluding extension.
 // Used to match implementation with headers, this differs from sys::path::stem:
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -2682,7 +2682,7 @@
 namespace {
 
 const char CppIncludeRegexPattern[] =
-    R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
+    R"(^[\t\ ]*[@#]?[\t\ ]*(import|include)[^"<]*[\t\n\ \\]*("[^"]+"|<[^>]+>|[^"<>;]+;))";
 
 } // anonymous namespace
 
@@ -2752,6 +2752,13 @@
     if (!FormattingOff && !MergeWithNextLine) {
       if (IncludeRegex.match(Line, &Matches)) {
         StringRef IncludeName = Matches[2];
+        // HACK(kkleine): Sort C++ module includes/imports that are not enclosed
+        // in "" or <> as if they are enclosed with <.
+        if (!IncludeName.startswith("\"") && !IncludeName.startswith("<")) {
+          IncludeName =
+              StringRef(Twine("<", IncludeName).concat(Twine(">")).str());
+        }
+
         if (Line.contains("/*") && !Line.contains("*/")) {
           // #include with a start of a block comment, but without the end.
           // Need to keep all the lines until the end of the comment together.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to