rjelonek created this revision.
rjelonek added reviewers: rsmith, rnk.
rjelonek created this object with visibility "All Users".
rjelonek created this object with edit policy "Members of Project: clang".
rjelonek added a project: clang-format.
rjelonek requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

If file contain BOM then first instruction (include or clang-format off) is 
ignored


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D94201

Files:
  clang/lib/Format/Format.cpp
  clang/unittests/Format/SortIncludesTest.cpp


Index: clang/unittests/Format/SortIncludesTest.cpp
===================================================================
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -879,6 +879,42 @@
                  "#include \"a.h\""));
 }
 
+TEST_F(SortIncludesTest, skipUTF8ByteOrderMarkMerge) {
+  Style.IncludeBlocks = Style.IBS_Merge;
+  std::string Code = "\xEF\xBB\xBF#include \"d.h\"\r\n"
+                     "#include \"b.h\"\r\n"
+                     "\r\n"
+                     "#include \"c.h\"\r\n"
+                     "#include \"a.h\"\r\n"
+                     "#include \"e.h\"\r\n";
+
+  std::string Expected = "\xEF\xBB\xBF#include \"e.h\"\r\n"
+                         "#include \"a.h\"\r\n"
+                         "#include \"b.h\"\r\n"
+                         "#include \"c.h\"\r\n"
+                         "#include \"d.h\"\r\n";
+
+  EXPECT_EQ(Expected, sort(Code, "e.cpp", 1));
+}
+
+TEST_F(SortIncludesTest, skipUTF8ByteOrderMarkPreserve) {
+  Style.IncludeBlocks = Style.IBS_Preserve;
+  std::string Code = "\xEF\xBB\xBF#include \"d.h\"\r\n"
+                     "#include \"b.h\"\r\n"
+                     "\r\n"
+                     "#include \"c.h\"\r\n"
+                     "#include \"a.h\"\r\n"
+                     "#include \"e.h\"\r\n";
+
+  std::string Expected = "\xEF\xBB\xBF#include \"b.h\"\r\n"
+                         "#include \"d.h\"\r\n"
+                         "\r\n"
+                         "#include \"a.h\"\r\n"
+                         "#include \"c.h\"\r\n"
+                         "#include \"e.h\"\r\n";
+
+  EXPECT_EQ(Expected, sort(Code, "e.cpp", 2));
+}
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -2253,7 +2253,9 @@
                                       StringRef FileName,
                                       tooling::Replacements &Replaces,
                                       unsigned *Cursor) {
-  unsigned Prev = 0;
+  unsigned Prev = llvm::StringSwitch<size_t>(Code)
+                      .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
+                      .Default(0);
   unsigned SearchFrom = 0;
   llvm::Regex IncludeRegex(CppIncludeRegexPattern);
   SmallVector<StringRef, 4> Matches;


Index: clang/unittests/Format/SortIncludesTest.cpp
===================================================================
--- clang/unittests/Format/SortIncludesTest.cpp
+++ clang/unittests/Format/SortIncludesTest.cpp
@@ -879,6 +879,42 @@
                  "#include \"a.h\""));
 }
 
+TEST_F(SortIncludesTest, skipUTF8ByteOrderMarkMerge) {
+  Style.IncludeBlocks = Style.IBS_Merge;
+  std::string Code = "\xEF\xBB\xBF#include \"d.h\"\r\n"
+                     "#include \"b.h\"\r\n"
+                     "\r\n"
+                     "#include \"c.h\"\r\n"
+                     "#include \"a.h\"\r\n"
+                     "#include \"e.h\"\r\n";
+
+  std::string Expected = "\xEF\xBB\xBF#include \"e.h\"\r\n"
+                         "#include \"a.h\"\r\n"
+                         "#include \"b.h\"\r\n"
+                         "#include \"c.h\"\r\n"
+                         "#include \"d.h\"\r\n";
+
+  EXPECT_EQ(Expected, sort(Code, "e.cpp", 1));
+}
+
+TEST_F(SortIncludesTest, skipUTF8ByteOrderMarkPreserve) {
+  Style.IncludeBlocks = Style.IBS_Preserve;
+  std::string Code = "\xEF\xBB\xBF#include \"d.h\"\r\n"
+                     "#include \"b.h\"\r\n"
+                     "\r\n"
+                     "#include \"c.h\"\r\n"
+                     "#include \"a.h\"\r\n"
+                     "#include \"e.h\"\r\n";
+
+  std::string Expected = "\xEF\xBB\xBF#include \"b.h\"\r\n"
+                         "#include \"d.h\"\r\n"
+                         "\r\n"
+                         "#include \"a.h\"\r\n"
+                         "#include \"c.h\"\r\n"
+                         "#include \"e.h\"\r\n";
+
+  EXPECT_EQ(Expected, sort(Code, "e.cpp", 2));
+}
 } // end namespace
 } // end namespace format
 } // end namespace clang
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -2253,7 +2253,9 @@
                                       StringRef FileName,
                                       tooling::Replacements &Replaces,
                                       unsigned *Cursor) {
-  unsigned Prev = 0;
+  unsigned Prev = llvm::StringSwitch<size_t>(Code)
+                      .StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
+                      .Default(0);
   unsigned SearchFrom = 0;
   llvm::Regex IncludeRegex(CppIncludeRegexPattern);
   SmallVector<StringRef, 4> Matches;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to