MyDeveloperDay created this revision.
MyDeveloperDay added reviewers: bruno, arphaman, klimek, owenpan, 
mitchell-stellar, dexonsmith.
MyDeveloperDay added projects: clang, clang-format, clang-tools-extra.

Review comments on D68767: [clang-format] NFC - Move functionality into 
functions to help code structure <https://reviews.llvm.org/D68767> asked that 
this duplicated code in clang-format was moved to one central location that 
being SourceManager (where it had originally be copied from I assume)

Moved function into static function  ContentCache::getInvalidBOM(...)  - 
(closest class to where it was defined before)
Updated clang-format to call this static function

Added unit tests for said new function in BasicTests

Sorry not my normal code area so may have the wrong reviewers. (but your names 
were on the recent history)


Repository:
  rC Clang

https://reviews.llvm.org/D68914

Files:
  clang/include/clang/Basic/SourceManager.h
  clang/lib/Basic/SourceManager.cpp
  clang/tools/clang-format/ClangFormat.cpp
  clang/unittests/Basic/SourceManagerTest.cpp

Index: clang/unittests/Basic/SourceManagerTest.cpp
===================================================================
--- clang/unittests/Basic/SourceManagerTest.cpp
+++ clang/unittests/Basic/SourceManagerTest.cpp
@@ -200,6 +200,47 @@
             "</mainFile.cpp:1:1, /test-header.h:1:1>");
 }
 
+TEST_F(SourceManagerTest, getInvalidBOM) {
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM(""), nullptr);
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("\x00\x00\x00"), nullptr);
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("\xFF\xFF\xFF"), nullptr);
+  ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("#include <iostream>"),
+            nullptr);
+
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xFE\xFF#include <iostream>")),
+            "UTF-16 (BE)");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xFF\xFE#include <iostream>")),
+            "UTF-16 (LE)");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\x2B\x2F\x76#include <iostream>")),
+            "UTF-7");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xF7\x64\x4C#include <iostream>")),
+            "UTF-1");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xDD\x73\x66\x73#include <iostream>")),
+            "UTF-EBCDIC");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\x0E\xFE\xFF#include <iostream>")),
+            "SCSU");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\xFB\xEE\x28#include <iostream>")),
+            "BOCU-1");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                "\x84\x31\x95\x33#include <iostream>")),
+            "GB-18030");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                llvm::StringLiteral::withInnerNUL(
+                    "\x00\x00\xFE\xFF#include <iostream>"))),
+            "UTF-32 (BE)");
+  ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM(
+                llvm::StringLiteral::withInnerNUL(
+                    "\xFF\xFE\x00\x00#include <iostream>"))),
+            "UTF-32 (LE)");
+}
+
 #if defined(LLVM_ON_UNIX)
 
 TEST_F(SourceManagerTest, getMacroArgExpandedLocation) {
Index: clang/tools/clang-format/ClangFormat.cpp
===================================================================
--- clang/tools/clang-format/ClangFormat.cpp
+++ clang/tools/clang-format/ClangFormat.cpp
@@ -290,31 +290,6 @@
   }
 }
 
-// If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
-// nullptr.
-static const char *getInValidBOM(StringRef BufStr) {
-  // Check to see if the buffer has a UTF Byte Order Mark (BOM).
-  // We only support UTF-8 with and without a BOM right now.  See
-  // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding
-  // for more information.
-  const char *InvalidBOM =
-      llvm::StringSwitch<const char *>(BufStr)
-          .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
-                      "UTF-32 (BE)")
-          .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
-                      "UTF-32 (LE)")
-          .StartsWith("\xFE\xFF", "UTF-16 (BE)")
-          .StartsWith("\xFF\xFE", "UTF-16 (LE)")
-          .StartsWith("\x2B\x2F\x76", "UTF-7")
-          .StartsWith("\xF7\x64\x4C", "UTF-1")
-          .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
-          .StartsWith("\x0E\xFE\xFF", "SCSU")
-          .StartsWith("\xFB\xEE\x28", "BOCU-1")
-          .StartsWith("\x84\x31\x95\x33", "GB-18030")
-          .Default(nullptr);
-  return InvalidBOM;
-}
-
 static bool
 emitReplacementWarnings(const Replacements &Replaces, StringRef AssumedFileName,
                         const std::unique_ptr<llvm::MemoryBuffer> &Code) {
@@ -400,7 +375,7 @@
 
   StringRef BufStr = Code->getBuffer();
 
-  const char *InvalidBOM = getInValidBOM(BufStr);
+  const char *InvalidBOM = SrcMgr::ContentCache::getInvalidBOM(BufStr);
 
   if (InvalidBOM) {
     errs() << "error: encoding with unsupported byte order mark \""
Index: clang/lib/Basic/SourceManager.cpp
===================================================================
--- clang/lib/Basic/SourceManager.cpp
+++ clang/lib/Basic/SourceManager.cpp
@@ -95,6 +95,29 @@
   Buffer.setInt((B && DoNotFree) ? DoNotFreeFlag : 0);
 }
 
+const char *ContentCache::getInvalidBOM(StringRef BufStr) {
+  // If the buffer is valid, check to see if it has a UTF Byte Order Mark
+  // (BOM).  We only support UTF-8 with and without a BOM right now.  See
+  // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
+  const char *InvalidBOM =
+      llvm::StringSwitch<const char *>(BufStr)
+          .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
+                      "UTF-32 (BE)")
+          .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
+                      "UTF-32 (LE)")
+          .StartsWith("\xFE\xFF", "UTF-16 (BE)")
+          .StartsWith("\xFF\xFE", "UTF-16 (LE)")
+          .StartsWith("\x2B\x2F\x76", "UTF-7")
+          .StartsWith("\xF7\x64\x4C", "UTF-1")
+          .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
+          .StartsWith("\x0E\xFE\xFF", "SCSU")
+          .StartsWith("\xFB\xEE\x28", "BOCU-1")
+          .StartsWith("\x84\x31\x95\x33", "GB-18030")
+          .Default(nullptr);
+
+  return InvalidBOM;
+}
+
 const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag,
                                                   FileManager &FM,
                                                   SourceLocation Loc,
@@ -190,20 +213,7 @@
   // (BOM).  We only support UTF-8 with and without a BOM right now.  See
   // http://en.wikipedia.org/wiki/Byte_order_mark for more information.
   StringRef BufStr = Buffer.getPointer()->getBuffer();
-  const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr)
-    .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"),
-                                                  "UTF-32 (BE)")
-    .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"),
-                                                  "UTF-32 (LE)")
-    .StartsWith("\xFE\xFF", "UTF-16 (BE)")
-    .StartsWith("\xFF\xFE", "UTF-16 (LE)")
-    .StartsWith("\x2B\x2F\x76", "UTF-7")
-    .StartsWith("\xF7\x64\x4C", "UTF-1")
-    .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC")
-    .StartsWith("\x0E\xFE\xFF", "SCSU")
-    .StartsWith("\xFB\xEE\x28", "BOCU-1")
-    .StartsWith("\x84\x31\x95\x33", "GB-18030")
-    .Default(nullptr);
+  const char *InvalidBOM = getInvalidBOM(BufStr);
 
   if (InvalidBOM) {
     Diag.Report(Loc, diag::err_unsupported_bom)
Index: clang/include/clang/Basic/SourceManager.h
===================================================================
--- clang/include/clang/Basic/SourceManager.h
+++ clang/include/clang/Basic/SourceManager.h
@@ -226,6 +226,10 @@
     bool shouldFreeBuffer() const {
       return (Buffer.getInt() & DoNotFreeFlag) == 0;
     }
+
+    // If BufStr has an invalid BOM, returns the BOM name; otherwise, returns
+    // nullptr
+    static const char *getInvalidBOM(StringRef BufStr);
   };
 
   // Assert that the \c ContentCache objects will always be 8-byte aligned so
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to