kwk created this revision. kwk added reviewers: HazardyKnusperkeks, MyDeveloperDay. Herald added a project: All. kwk requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
Fixes #38995 This is an attempt to modify the regular expression to identify `@import` and `import` alongside the regular `#include`. The challenging part was not to support `@` in addition to `#` but how to handle everything that comes after the `include|import` keywords. Previously everything that wasn't `"` or `<` was consumed. But as you can see in this example from the issue #38995, there is no `"` or `<` following the keyword: @import Foundation; I experimented with a lot of fancy and useful expressions in this online regex tool <https://regex101.com> only to find out that some things are simply not supported by the regex implementation in LLVM. - For example the beginning `[\t\ ]*` should be replacable by the horizontal whitespace character `\h*` but this will break the `SortIncludesTest.LeadingWhitespace` test. That's why I've chosen to come back to the basic building blocks. The essential change in this patch is the change from this regular expression: ^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]) ~ ~~~~~~~~~~~~~~ ^ ^ | | only support # prefix not @ | only support "" and <> as delimiters no support for C++ modules and ; ending. Also this allows for "> or <" or "" or <> which all seems either off or wrong. to this: ^[\t\ ]*[@#]?[\t\ ]*(import|include)[^"<]*[\t\n\ \\]*("[^"]+"|<[^>]+>|[^"<>;]+;) ~~~~~ ~~~~~~~~~~~ ~~~~~~~ ~~~~~~~ ~~~~~~~~~ ^ ^ ^ ^ ^ | | | | | | | Clearly support "" and <> | | as well as an include name | | without enclosing characters. | | Allows for no mixture of "> | | or <" or empty include names. | | Now optionally support @ and #. These are needed to support: @import Foundation; Here is how I've tested this patch: ninja clang-Format ninja FormatTests ./tools/clang/unittests/Format/FormatTests --gtest_filter=SortIncludesTest* And if that worked I doubled checked that nothing else broke by running all format checks: ./tools/clang/unittests/Format/FormatTests One side effect of this change is it should partially support C++20 Module <https://en.cppreference.com/w/cpp/language/modules> `import` lines without the optional `export` in front. Adding this can be a change on its own that shouldn't be too hard. I see an opportunity to optimized the matching to exclude `@include` for example. But eventually these should be caught by the compiler, so... Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D121370 Files: clang/lib/Format/Format.cpp clang/lib/Tooling/Inclusions/HeaderIncludes.cpp clang/unittests/Format/SortIncludesTest.cpp Index: clang/unittests/Format/SortIncludesTest.cpp =================================================================== --- clang/unittests/Format/SortIncludesTest.cpp +++ clang/unittests/Format/SortIncludesTest.cpp @@ -458,6 +458,19 @@ "#include \"b.h\"\n")); } +TEST_F(SortIncludesTest, SupportAtImportLines) { + EXPECT_EQ("#import \"a.h\"\n" + "#import \"b.h\"\n" + "#import \"c.h\"\n" + "#import <d/e.h>\n" + "@import Foundation;\n", + sort("#import \"b.h\"\n" + "#import \"c.h\"\n" + "#import <d/e.h>\n" + "@import Foundation;\n" + "#import \"a.h\"\n")); +} + TEST_F(SortIncludesTest, LeavesMainHeaderFirst) { Style.IncludeIsMainRegex = "([-_](test|unittest))?$"; EXPECT_EQ("#include \"llvm/a.h\"\n" Index: clang/lib/Tooling/Inclusions/HeaderIncludes.cpp =================================================================== --- clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -170,11 +170,11 @@ } inline StringRef trimInclude(StringRef IncludeName) { - return IncludeName.trim("\"<>"); + return IncludeName.trim("\"<>;"); } const char IncludeRegexPattern[] = - R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; + R"(^[\t\ ]*[@#]?[\t\ ]*(import|include)[^"<]*[\t\n\ \\]*("[^"]+"|<[^>]+>|[^"<>;]+;))"; // The filename of Path excluding extension. // Used to match implementation with headers, this differs from sys::path::stem: Index: clang/lib/Format/Format.cpp =================================================================== --- clang/lib/Format/Format.cpp +++ clang/lib/Format/Format.cpp @@ -2682,7 +2682,7 @@ namespace { const char CppIncludeRegexPattern[] = - R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; + R"(^[\t\ ]*[@#]?[\t\ ]*(import|include)[^"<]*[\t\n\ \\]*("[^"]+"|<[^>]+>|[^"<>;]+;))"; } // anonymous namespace @@ -2752,6 +2752,13 @@ if (!FormattingOff && !MergeWithNextLine) { if (IncludeRegex.match(Line, &Matches)) { StringRef IncludeName = Matches[2]; + // HACK(kkleine): Sort C++ module includes/imports that are not enclosed + // in "" or <> as if they are enclosed with <. + if (!IncludeName.startswith("\"") && !IncludeName.startswith("<")) { + IncludeName = + StringRef(Twine("<", IncludeName).concat(Twine(">")).str()); + } + if (Line.contains("/*") && !Line.contains("*/")) { // #include with a start of a block comment, but without the end. // Need to keep all the lines until the end of the comment together.
Index: clang/unittests/Format/SortIncludesTest.cpp =================================================================== --- clang/unittests/Format/SortIncludesTest.cpp +++ clang/unittests/Format/SortIncludesTest.cpp @@ -458,6 +458,19 @@ "#include \"b.h\"\n")); } +TEST_F(SortIncludesTest, SupportAtImportLines) { + EXPECT_EQ("#import \"a.h\"\n" + "#import \"b.h\"\n" + "#import \"c.h\"\n" + "#import <d/e.h>\n" + "@import Foundation;\n", + sort("#import \"b.h\"\n" + "#import \"c.h\"\n" + "#import <d/e.h>\n" + "@import Foundation;\n" + "#import \"a.h\"\n")); +} + TEST_F(SortIncludesTest, LeavesMainHeaderFirst) { Style.IncludeIsMainRegex = "([-_](test|unittest))?$"; EXPECT_EQ("#include \"llvm/a.h\"\n" Index: clang/lib/Tooling/Inclusions/HeaderIncludes.cpp =================================================================== --- clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -170,11 +170,11 @@ } inline StringRef trimInclude(StringRef IncludeName) { - return IncludeName.trim("\"<>"); + return IncludeName.trim("\"<>;"); } const char IncludeRegexPattern[] = - R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; + R"(^[\t\ ]*[@#]?[\t\ ]*(import|include)[^"<]*[\t\n\ \\]*("[^"]+"|<[^>]+>|[^"<>;]+;))"; // The filename of Path excluding extension. // Used to match implementation with headers, this differs from sys::path::stem: Index: clang/lib/Format/Format.cpp =================================================================== --- clang/lib/Format/Format.cpp +++ clang/lib/Format/Format.cpp @@ -2682,7 +2682,7 @@ namespace { const char CppIncludeRegexPattern[] = - R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; + R"(^[\t\ ]*[@#]?[\t\ ]*(import|include)[^"<]*[\t\n\ \\]*("[^"]+"|<[^>]+>|[^"<>;]+;))"; } // anonymous namespace @@ -2752,6 +2752,13 @@ if (!FormattingOff && !MergeWithNextLine) { if (IncludeRegex.match(Line, &Matches)) { StringRef IncludeName = Matches[2]; + // HACK(kkleine): Sort C++ module includes/imports that are not enclosed + // in "" or <> as if they are enclosed with <. + if (!IncludeName.startswith("\"") && !IncludeName.startswith("<")) { + IncludeName = + StringRef(Twine("<", IncludeName).concat(Twine(">")).str()); + } + if (Line.contains("/*") && !Line.contains("*/")) { // #include with a start of a block comment, but without the end. // Need to keep all the lines until the end of the comment together.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits