MyDeveloperDay created this revision. MyDeveloperDay added reviewers: bruno, arphaman, klimek, owenpan, mitchell-stellar, dexonsmith. MyDeveloperDay added projects: clang, clang-format, clang-tools-extra.
Review comments on D68767: [clang-format] NFC - Move functionality into functions to help code structure <https://reviews.llvm.org/D68767> asked that this duplicated code in clang-format was moved to one central location that being SourceManager (where it had originally be copied from I assume) Moved function into static function ContentCache::getInvalidBOM(...) - (closest class to where it was defined before) Updated clang-format to call this static function Added unit tests for said new function in BasicTests Sorry not my normal code area so may have the wrong reviewers. (but your names were on the recent history) Repository: rC Clang https://reviews.llvm.org/D68914 Files: clang/include/clang/Basic/SourceManager.h clang/lib/Basic/SourceManager.cpp clang/tools/clang-format/ClangFormat.cpp clang/unittests/Basic/SourceManagerTest.cpp
Index: clang/unittests/Basic/SourceManagerTest.cpp =================================================================== --- clang/unittests/Basic/SourceManagerTest.cpp +++ clang/unittests/Basic/SourceManagerTest.cpp @@ -200,6 +200,47 @@ "</mainFile.cpp:1:1, /test-header.h:1:1>"); } +TEST_F(SourceManagerTest, getInvalidBOM) { + ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM(""), nullptr); + ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("\x00\x00\x00"), nullptr); + ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("\xFF\xFF\xFF"), nullptr); + ASSERT_EQ(SrcMgr::ContentCache::getInvalidBOM("#include <iostream>"), + nullptr); + + ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM( + "\xFE\xFF#include <iostream>")), + "UTF-16 (BE)"); + ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM( + "\xFF\xFE#include <iostream>")), + "UTF-16 (LE)"); + ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM( + "\x2B\x2F\x76#include <iostream>")), + "UTF-7"); + ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM( + "\xF7\x64\x4C#include <iostream>")), + "UTF-1"); + ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM( + "\xDD\x73\x66\x73#include <iostream>")), + "UTF-EBCDIC"); + ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM( + "\x0E\xFE\xFF#include <iostream>")), + "SCSU"); + ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM( + "\xFB\xEE\x28#include <iostream>")), + "BOCU-1"); + ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM( + "\x84\x31\x95\x33#include <iostream>")), + "GB-18030"); + ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM( + llvm::StringLiteral::withInnerNUL( + "\x00\x00\xFE\xFF#include <iostream>"))), + "UTF-32 (BE)"); + ASSERT_EQ(StringRef(SrcMgr::ContentCache::getInvalidBOM( + llvm::StringLiteral::withInnerNUL( + "\xFF\xFE\x00\x00#include <iostream>"))), + "UTF-32 (LE)"); +} + #if defined(LLVM_ON_UNIX) TEST_F(SourceManagerTest, getMacroArgExpandedLocation) { Index: clang/tools/clang-format/ClangFormat.cpp =================================================================== --- clang/tools/clang-format/ClangFormat.cpp +++ clang/tools/clang-format/ClangFormat.cpp @@ -290,31 +290,6 @@ } } -// If BufStr has an invalid BOM, returns the BOM name; otherwise, returns -// nullptr. -static const char *getInValidBOM(StringRef BufStr) { - // Check to see if the buffer has a UTF Byte Order Mark (BOM). - // We only support UTF-8 with and without a BOM right now. See - // https://en.wikipedia.org/wiki/Byte_order_mark#Byte_order_marks_by_encoding - // for more information. - const char *InvalidBOM = - llvm::StringSwitch<const char *>(BufStr) - .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), - "UTF-32 (BE)") - .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), - "UTF-32 (LE)") - .StartsWith("\xFE\xFF", "UTF-16 (BE)") - .StartsWith("\xFF\xFE", "UTF-16 (LE)") - .StartsWith("\x2B\x2F\x76", "UTF-7") - .StartsWith("\xF7\x64\x4C", "UTF-1") - .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") - .StartsWith("\x0E\xFE\xFF", "SCSU") - .StartsWith("\xFB\xEE\x28", "BOCU-1") - .StartsWith("\x84\x31\x95\x33", "GB-18030") - .Default(nullptr); - return InvalidBOM; -} - static bool emitReplacementWarnings(const Replacements &Replaces, StringRef AssumedFileName, const std::unique_ptr<llvm::MemoryBuffer> &Code) { @@ -400,7 +375,7 @@ StringRef BufStr = Code->getBuffer(); - const char *InvalidBOM = getInValidBOM(BufStr); + const char *InvalidBOM = SrcMgr::ContentCache::getInvalidBOM(BufStr); if (InvalidBOM) { errs() << "error: encoding with unsupported byte order mark \"" Index: clang/lib/Basic/SourceManager.cpp =================================================================== --- clang/lib/Basic/SourceManager.cpp +++ clang/lib/Basic/SourceManager.cpp @@ -95,6 +95,29 @@ Buffer.setInt((B && DoNotFree) ? DoNotFreeFlag : 0); } +const char *ContentCache::getInvalidBOM(StringRef BufStr) { + // If the buffer is valid, check to see if it has a UTF Byte Order Mark + // (BOM). We only support UTF-8 with and without a BOM right now. See + // http://en.wikipedia.org/wiki/Byte_order_mark for more information. + const char *InvalidBOM = + llvm::StringSwitch<const char *>(BufStr) + .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), + "UTF-32 (BE)") + .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), + "UTF-32 (LE)") + .StartsWith("\xFE\xFF", "UTF-16 (BE)") + .StartsWith("\xFF\xFE", "UTF-16 (LE)") + .StartsWith("\x2B\x2F\x76", "UTF-7") + .StartsWith("\xF7\x64\x4C", "UTF-1") + .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") + .StartsWith("\x0E\xFE\xFF", "SCSU") + .StartsWith("\xFB\xEE\x28", "BOCU-1") + .StartsWith("\x84\x31\x95\x33", "GB-18030") + .Default(nullptr); + + return InvalidBOM; +} + const llvm::MemoryBuffer *ContentCache::getBuffer(DiagnosticsEngine &Diag, FileManager &FM, SourceLocation Loc, @@ -190,20 +213,7 @@ // (BOM). We only support UTF-8 with and without a BOM right now. See // http://en.wikipedia.org/wiki/Byte_order_mark for more information. StringRef BufStr = Buffer.getPointer()->getBuffer(); - const char *InvalidBOM = llvm::StringSwitch<const char *>(BufStr) - .StartsWith(llvm::StringLiteral::withInnerNUL("\x00\x00\xFE\xFF"), - "UTF-32 (BE)") - .StartsWith(llvm::StringLiteral::withInnerNUL("\xFF\xFE\x00\x00"), - "UTF-32 (LE)") - .StartsWith("\xFE\xFF", "UTF-16 (BE)") - .StartsWith("\xFF\xFE", "UTF-16 (LE)") - .StartsWith("\x2B\x2F\x76", "UTF-7") - .StartsWith("\xF7\x64\x4C", "UTF-1") - .StartsWith("\xDD\x73\x66\x73", "UTF-EBCDIC") - .StartsWith("\x0E\xFE\xFF", "SCSU") - .StartsWith("\xFB\xEE\x28", "BOCU-1") - .StartsWith("\x84\x31\x95\x33", "GB-18030") - .Default(nullptr); + const char *InvalidBOM = getInvalidBOM(BufStr); if (InvalidBOM) { Diag.Report(Loc, diag::err_unsupported_bom) Index: clang/include/clang/Basic/SourceManager.h =================================================================== --- clang/include/clang/Basic/SourceManager.h +++ clang/include/clang/Basic/SourceManager.h @@ -226,6 +226,10 @@ bool shouldFreeBuffer() const { return (Buffer.getInt() & DoNotFreeFlag) == 0; } + + // If BufStr has an invalid BOM, returns the BOM name; otherwise, returns + // nullptr + static const char *getInvalidBOM(StringRef BufStr); }; // Assert that the \c ContentCache objects will always be 8-byte aligned so
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits