Author: Marek Kurdej Date: 2022-02-02T10:25:24+01:00 New Revision: bc40b76b5b95837e27217de6a446eeeace695f34
URL: https://github.com/llvm/llvm-project/commit/bc40b76b5b95837e27217de6a446eeeace695f34 DIFF: https://github.com/llvm/llvm-project/commit/bc40b76b5b95837e27217de6a446eeeace695f34.diff LOG: [clang-format] Correctly parse C99 digraphs: "<:", ":>", "<%", "%>", "%:", "%:%:". Fixes https://github.com/llvm/llvm-project/issues/31592. This commits enables lexing of digraphs in C++11 and onwards. Enabling them in C++03 is error-prone, as it would unconditionally treat sequences like "<:" as digraphs, even if they are followed by a single colon, e.g. "<::" would be treated as "[:" instead of "<" followed by "::". Lexing in C++11 doesn't have this problem as it looks ahead the following token. The relevant excerpt from Lexer::LexTokenInternal: ``` // C++0x [lex.pptoken]p3: // Otherwise, if the next three characters are <:: and the subsequent // character is neither : nor >, the < is treated as a preprocessor // token by itself and not as the first character of the alternative // token <:. ``` Also, note that both clang and gcc turn on digraphs by default (-fdigraphs), so clang-format should match this behaviour. Reviewed By: MyDeveloperDay, HazardyKnusperkeks, owenpan Differential Revision: https://reviews.llvm.org/D118706 Added: Modified: clang/lib/Format/Format.cpp clang/unittests/Format/FormatTest.cpp Removed: ################################################################################ diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index dd4755c2227e1..d9952f5d5d6d9 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -3242,6 +3242,10 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOpts.CPlusPlus17 = LexingStd >= FormatStyle::LS_Cpp17; LangOpts.CPlusPlus20 = LexingStd >= FormatStyle::LS_Cpp20; LangOpts.Char8 = LexingStd >= FormatStyle::LS_Cpp20; + // Turning on digraphs in standards before C++0x is error-prone, because e.g. + // the sequence "<::" will be unconditionally treated as "[:". + // Cf. Lexer::LexTokenInternal. + LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11; LangOpts.LineComment = 1; bool AlternativeOperators = Style.isCpp(); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 005e2d6a7b559..866847a531355 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -24219,6 +24219,16 @@ TEST_F(FormatTest, AlignAfterOpenBracketBlockIndentForStatement) { Style); } +TEST_F(FormatTest, UnderstandsDigraphs) { + verifyFormat("int arr<:5:> = {};"); + verifyFormat("int arr[5] = <%%>;"); + verifyFormat("int arr<:::qualified_variable:> = {};"); + verifyFormat("int arr[::qualified_variable] = <%%>;"); + verifyFormat("%:include <header>"); + verifyFormat("%:define A x##y"); + verifyFormat("#define A x%:%:y"); +} + } // namespace } // namespace format } // namespace clang _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits