benhamilton created this revision. benhamilton added reviewers: jolesiak, krasimir. Herald added subscribers: cfe-commits, klimek.
This improves upon the previous Objective-C header guessing heuristic from https://reviews.llvm.org/rC320479. Now, we run the lexer on C++ header files and look for Objective-C keywords and syntax. We also look for Foundation types. Test Plan: make -j12 FormatTests && ./tools/clang/unittests/Format/FormatTests Repository: rC Clang https://reviews.llvm.org/D42135 Files: lib/Format/Format.cpp unittests/Format/FormatTest.cpp
Index: unittests/Format/FormatTest.cpp =================================================================== --- unittests/Format/FormatTest.cpp +++ unittests/Format/FormatTest.cpp @@ -11680,6 +11680,74 @@ llvm::consumeError(Style7.takeError()); } +TEST(FormatStyle, GetStyle_ObjCHeaderStyleGuesser) { + vfs::InMemoryFileSystem FS; + + // Header files with ObjC syntax should override language to ObjC + auto Style1 = getStyle("{}", "a.h", "none", "@interface Foo\n@end\n", &FS); + ASSERT_TRUE((bool)Style1); + ASSERT_EQ(Style1->Language, FormatStyle::LK_ObjC); + + auto Style1Cpp = + getStyle("{}", "a.h", "none", + "const int interface = 1;\nconst int end = 2\n", &FS); + ASSERT_TRUE((bool)Style1Cpp); + ASSERT_EQ(Style1Cpp->Language, FormatStyle::LK_Cpp); + + auto Style2 = getStyle("{}", "a.h", "none", "@protocol Foo\n@end\n", &FS); + ASSERT_TRUE((bool)Style2); + ASSERT_EQ(Style2->Language, FormatStyle::LK_ObjC); + + auto Style2Cpp = getStyle( + "{}", "a.h", "none", "const int protocol = 1;\nconst int end = 2\n", &FS); + ASSERT_TRUE((bool)Style2Cpp); + ASSERT_EQ(Style2Cpp->Language, FormatStyle::LK_Cpp); + + auto Style3 = getStyle("{}", "a.h", "none", "extern NSString *kFoo;\n", &FS); + ASSERT_TRUE((bool)Style3); + ASSERT_EQ(Style3->Language, FormatStyle::LK_ObjC); + + auto Style4 = getStyle("{}", "a.h", "none", + "typedef NS_ENUM(NSInteger, Foo) {};\n", &FS); + ASSERT_TRUE((bool)Style4); + ASSERT_EQ(Style4->Language, FormatStyle::LK_ObjC); + + auto Style4Cpp = getStyle("{}", "a.h", "none", "enum Foo {};", &FS); + ASSERT_TRUE((bool)Style4Cpp); + ASSERT_EQ(Style4Cpp->Language, FormatStyle::LK_Cpp); + + auto Style5 = getStyle("{}", "a.h", "none", "extern NSInteger Foo();\n", &FS); + ASSERT_TRUE((bool)Style5); + ASSERT_EQ(Style5->Language, FormatStyle::LK_ObjC); + + auto Style6 = getStyle("{}", "a.h", "none", + "inline void Foo() { Log(@\"Foo\"); }\n", &FS); + ASSERT_TRUE((bool)Style6); + ASSERT_EQ(Style6->Language, FormatStyle::LK_ObjC); + + auto Style6Cpp = getStyle("{}", "a.h", "none", + "inline void Foo() { Log(\"Foo\"); }\n", &FS); + ASSERT_TRUE((bool)Style6Cpp); + ASSERT_EQ(Style6Cpp->Language, FormatStyle::LK_Cpp); + + auto Style7 = getStyle("{}", "a.h", "none", + "inline void Foo() { id = @[1, 2, 3]; }\n", &FS); + ASSERT_TRUE((bool)Style7); + ASSERT_EQ(Style7->Language, FormatStyle::LK_ObjC); + + auto Style8 = + getStyle("{}", "a.h", "none", + "inline void Foo() { id foo = @{1: 2, 3: 4, 5: 6}; }\n", &FS); + ASSERT_TRUE((bool)Style8); + ASSERT_EQ(Style8->Language, FormatStyle::LK_ObjC); + + auto Style8Cpp = + getStyle("{}", "a.h", "none", + "inline void Foo() { int foo[] = {1, 2, 3}; }\n", &FS); + ASSERT_TRUE((bool)Style8Cpp); + ASSERT_EQ(Style8Cpp->Language, FormatStyle::LK_Cpp); +} + TEST_F(ReplacementTest, FormatCodeAfterReplacements) { // Column limit is 20. std::string Code = "Type *a =\n" Index: lib/Format/Format.cpp =================================================================== --- lib/Format/Format.cpp +++ lib/Format/Format.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/YAMLTraits.h" #include <algorithm> #include <memory> +#include <set> #include <string> #define DEBUG_TYPE "format-formatter" @@ -1381,6 +1382,107 @@ std::set<FormatToken *, FormatTokenLess> DeletedTokens; }; +class ObjCHeaderStyleGuesser : public TokenAnalyzer { +public: + ObjCHeaderStyleGuesser(const Environment &Env, const FormatStyle &Style) + : TokenAnalyzer(Env, Style), IsObjC(false) {} + + std::pair<tooling::Replacements, unsigned> + analyze(TokenAnnotator &Annotator, + SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + FormatTokenLexer &Tokens) override { + assert(Style.Language == FormatStyle::LK_Cpp); + guessStyle(AnnotatedLines, Tokens.getKeywords()); + tooling::Replacements Result; + return {Result, 0}; + } + + bool isObjC() { return IsObjC; } + +private: + void guessStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, + const AdditionalKeywords &Keywords) { + static const std::set<StringRef> FoundationIdentifiers = { + "CGFloat", + "NSAffineTransform", + "NSArray", + "NSAttributedString", + "NSCache", + "NSCharacterSet", + "NSCountedSet", + "NSData", + "NSDataDetector", + "NSDecimal", + "NSDecimalNumber", + "NSDictionary", + "NSEdgeInsets", + "NSHashTable", + "NSIndexPath", + "NSIndexSet", + "NSInteger", + "NSLocale", + "NSMapTable", + "NSMutableArray", + "NSMutableAttributedString", + "NSMutableCharacterSet", + "NSMutableData", + "NSMutableDictionary", + "NSMutableIndexSet", + "NSMutableOrderedSet", + "NSMutableSet", + "NSMutableString", + "NSNumber", + "NSNumberFormatter", + "NSOrderedSet", + "NSPoint", + "NSPointerArray", + "NSRange", + "NSRect", + "NSRegularExpression", + "NSSet", + "NSSize", + "NSString", + "NSUInteger", + "NSURL", + "NSURLComponents", + "NSURLQueryItem", + "NSUUID", + }; + + for (auto &Line : AnnotatedLines) { + for (FormatToken *FormatTok = Line->First->Next; FormatTok; + FormatTok = FormatTok->Next) { + if (((FormatTok->isObjCAtKeyword(tok::objc_interface) || + FormatTok->isObjCAtKeyword(tok::objc_implementation) || + FormatTok->isObjCAtKeyword(tok::objc_protocol) || + FormatTok->isObjCAtKeyword(tok::objc_end)) && + FormatTok->Previous->is(tok::at)) || + (FormatTok->Tok.isAnyIdentifier() && + FoundationIdentifiers.find(FormatTok->TokenText) != + FoundationIdentifiers.end()) || + FormatTok->is(TT_ObjCStringLiteral) || + (FormatTok->is(tok::numeric_constant) && + FormatTok->Previous->is(tok::at)) || + (FormatTok->is(tok::l_square) && + FormatTok->Previous->is(tok::at)) || + (FormatTok->is(tok::l_brace) && FormatTok->Previous->is(tok::at)) || + FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS) || + FormatTok->is(TT_ObjCBlockLBrace) || + FormatTok->is(TT_ObjCBlockLParen) || FormatTok->is(TT_ObjCDecl) || + FormatTok->is(TT_ObjCForIn) || FormatTok->is(TT_ObjCMethodExpr) || + FormatTok->is(TT_ObjCMethodSpecifier) || + FormatTok->is(TT_ObjCProperty)) { + IsObjC = true; + return; + } + } + } + } + + std::set<StringRef> FoundationIdentifiers; + bool IsObjC; +}; + struct IncludeDirective { StringRef Filename; StringRef Text; @@ -2166,14 +2268,15 @@ FormatStyle Style = getLLVMStyle(); Style.Language = getLanguageByFileName(FileName); - // This is a very crude detection of whether a header contains ObjC code that - // should be improved over time and probably be done on tokens, not one the - // bare content of the file. - if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") && - (Code.contains("\n- (") || Code.contains("\n+ (") || - Code.contains("\n@end\n") || Code.contains("\n@end ") || - Code.endswith("@end"))) - Style.Language = FormatStyle::LK_ObjC; + if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h")) { + std::unique_ptr<Environment> Env = + Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); + ObjCHeaderStyleGuesser Guesser(*Env, Style); + Guesser.process(); + if (Guesser.isObjC()) { + Style.Language = FormatStyle::LK_ObjC; + } + } FormatStyle FallbackStyle = getNoStyle(); if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle))
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits