benhamilton created this revision.
benhamilton added reviewers: jolesiak, krasimir.
Herald added subscribers: cfe-commits, klimek.

This improves upon the previous Objective-C header guessing heuristic
from https://reviews.llvm.org/rC320479.

Now, we run the lexer on C++ header files and look for Objective-C
keywords and syntax. We also look for Foundation types.

Test Plan: make -j12 FormatTests && ./tools/clang/unittests/Format/FormatTests


Repository:
  rC Clang

https://reviews.llvm.org/D42135

Files:
  lib/Format/Format.cpp
  unittests/Format/FormatTest.cpp

Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -11680,6 +11680,74 @@
   llvm::consumeError(Style7.takeError());
 }
 
+TEST(FormatStyle, GetStyle_ObjCHeaderStyleGuesser) {
+  vfs::InMemoryFileSystem FS;
+
+  // Header files with ObjC syntax should override language to ObjC
+  auto Style1 = getStyle("{}", "a.h", "none", "@interface Foo\n@end\n", &FS);
+  ASSERT_TRUE((bool)Style1);
+  ASSERT_EQ(Style1->Language, FormatStyle::LK_ObjC);
+
+  auto Style1Cpp =
+      getStyle("{}", "a.h", "none",
+               "const int interface = 1;\nconst int end = 2\n", &FS);
+  ASSERT_TRUE((bool)Style1Cpp);
+  ASSERT_EQ(Style1Cpp->Language, FormatStyle::LK_Cpp);
+
+  auto Style2 = getStyle("{}", "a.h", "none", "@protocol Foo\n@end\n", &FS);
+  ASSERT_TRUE((bool)Style2);
+  ASSERT_EQ(Style2->Language, FormatStyle::LK_ObjC);
+
+  auto Style2Cpp = getStyle(
+      "{}", "a.h", "none", "const int protocol = 1;\nconst int end = 2\n", &FS);
+  ASSERT_TRUE((bool)Style2Cpp);
+  ASSERT_EQ(Style2Cpp->Language, FormatStyle::LK_Cpp);
+
+  auto Style3 = getStyle("{}", "a.h", "none", "extern NSString *kFoo;\n", &FS);
+  ASSERT_TRUE((bool)Style3);
+  ASSERT_EQ(Style3->Language, FormatStyle::LK_ObjC);
+
+  auto Style4 = getStyle("{}", "a.h", "none",
+                         "typedef NS_ENUM(NSInteger, Foo) {};\n", &FS);
+  ASSERT_TRUE((bool)Style4);
+  ASSERT_EQ(Style4->Language, FormatStyle::LK_ObjC);
+
+  auto Style4Cpp = getStyle("{}", "a.h", "none", "enum Foo {};", &FS);
+  ASSERT_TRUE((bool)Style4Cpp);
+  ASSERT_EQ(Style4Cpp->Language, FormatStyle::LK_Cpp);
+
+  auto Style5 = getStyle("{}", "a.h", "none", "extern NSInteger Foo();\n", &FS);
+  ASSERT_TRUE((bool)Style5);
+  ASSERT_EQ(Style5->Language, FormatStyle::LK_ObjC);
+
+  auto Style6 = getStyle("{}", "a.h", "none",
+                         "inline void Foo() { Log(@\"Foo\"); }\n", &FS);
+  ASSERT_TRUE((bool)Style6);
+  ASSERT_EQ(Style6->Language, FormatStyle::LK_ObjC);
+
+  auto Style6Cpp = getStyle("{}", "a.h", "none",
+                            "inline void Foo() { Log(\"Foo\"); }\n", &FS);
+  ASSERT_TRUE((bool)Style6Cpp);
+  ASSERT_EQ(Style6Cpp->Language, FormatStyle::LK_Cpp);
+
+  auto Style7 = getStyle("{}", "a.h", "none",
+                         "inline void Foo() { id = @[1, 2, 3]; }\n", &FS);
+  ASSERT_TRUE((bool)Style7);
+  ASSERT_EQ(Style7->Language, FormatStyle::LK_ObjC);
+
+  auto Style8 =
+      getStyle("{}", "a.h", "none",
+               "inline void Foo() { id foo = @{1: 2, 3: 4, 5: 6}; }\n", &FS);
+  ASSERT_TRUE((bool)Style8);
+  ASSERT_EQ(Style8->Language, FormatStyle::LK_ObjC);
+
+  auto Style8Cpp =
+      getStyle("{}", "a.h", "none",
+               "inline void Foo() { int foo[] = {1, 2, 3}; }\n", &FS);
+  ASSERT_TRUE((bool)Style8Cpp);
+  ASSERT_EQ(Style8Cpp->Language, FormatStyle::LK_Cpp);
+}
+
 TEST_F(ReplacementTest, FormatCodeAfterReplacements) {
   // Column limit is 20.
   std::string Code = "Type *a =\n"
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Support/YAMLTraits.h"
 #include <algorithm>
 #include <memory>
+#include <set>
 #include <string>
 
 #define DEBUG_TYPE "format-formatter"
@@ -1381,6 +1382,107 @@
   std::set<FormatToken *, FormatTokenLess> DeletedTokens;
 };
 
+class ObjCHeaderStyleGuesser : public TokenAnalyzer {
+public:
+  ObjCHeaderStyleGuesser(const Environment &Env, const FormatStyle &Style)
+      : TokenAnalyzer(Env, Style), IsObjC(false) {}
+
+  std::pair<tooling::Replacements, unsigned>
+  analyze(TokenAnnotator &Annotator,
+          SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+          FormatTokenLexer &Tokens) override {
+    assert(Style.Language == FormatStyle::LK_Cpp);
+    guessStyle(AnnotatedLines, Tokens.getKeywords());
+    tooling::Replacements Result;
+    return {Result, 0};
+  }
+
+  bool isObjC() { return IsObjC; }
+
+private:
+  void guessStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+                  const AdditionalKeywords &Keywords) {
+    static const std::set<StringRef> FoundationIdentifiers = {
+        "CGFloat",
+        "NSAffineTransform",
+        "NSArray",
+        "NSAttributedString",
+        "NSCache",
+        "NSCharacterSet",
+        "NSCountedSet",
+        "NSData",
+        "NSDataDetector",
+        "NSDecimal",
+        "NSDecimalNumber",
+        "NSDictionary",
+        "NSEdgeInsets",
+        "NSHashTable",
+        "NSIndexPath",
+        "NSIndexSet",
+        "NSInteger",
+        "NSLocale",
+        "NSMapTable",
+        "NSMutableArray",
+        "NSMutableAttributedString",
+        "NSMutableCharacterSet",
+        "NSMutableData",
+        "NSMutableDictionary",
+        "NSMutableIndexSet",
+        "NSMutableOrderedSet",
+        "NSMutableSet",
+        "NSMutableString",
+        "NSNumber",
+        "NSNumberFormatter",
+        "NSOrderedSet",
+        "NSPoint",
+        "NSPointerArray",
+        "NSRange",
+        "NSRect",
+        "NSRegularExpression",
+        "NSSet",
+        "NSSize",
+        "NSString",
+        "NSUInteger",
+        "NSURL",
+        "NSURLComponents",
+        "NSURLQueryItem",
+        "NSUUID",
+    };
+
+    for (auto &Line : AnnotatedLines) {
+      for (FormatToken *FormatTok = Line->First->Next; FormatTok;
+           FormatTok = FormatTok->Next) {
+        if (((FormatTok->isObjCAtKeyword(tok::objc_interface) ||
+              FormatTok->isObjCAtKeyword(tok::objc_implementation) ||
+              FormatTok->isObjCAtKeyword(tok::objc_protocol) ||
+              FormatTok->isObjCAtKeyword(tok::objc_end)) &&
+             FormatTok->Previous->is(tok::at)) ||
+            (FormatTok->Tok.isAnyIdentifier() &&
+             FoundationIdentifiers.find(FormatTok->TokenText) !=
+             FoundationIdentifiers.end()) ||
+            FormatTok->is(TT_ObjCStringLiteral) ||
+            (FormatTok->is(tok::numeric_constant) &&
+             FormatTok->Previous->is(tok::at)) ||
+            (FormatTok->is(tok::l_square) &&
+             FormatTok->Previous->is(tok::at)) ||
+            (FormatTok->is(tok::l_brace) && FormatTok->Previous->is(tok::at)) ||
+            FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS) ||
+            FormatTok->is(TT_ObjCBlockLBrace) ||
+            FormatTok->is(TT_ObjCBlockLParen) || FormatTok->is(TT_ObjCDecl) ||
+            FormatTok->is(TT_ObjCForIn) || FormatTok->is(TT_ObjCMethodExpr) ||
+            FormatTok->is(TT_ObjCMethodSpecifier) ||
+            FormatTok->is(TT_ObjCProperty)) {
+          IsObjC = true;
+          return;
+        }
+      }
+    }
+  }
+
+  std::set<StringRef> FoundationIdentifiers;
+  bool IsObjC;
+};
+
 struct IncludeDirective {
   StringRef Filename;
   StringRef Text;
@@ -2166,14 +2268,15 @@
   FormatStyle Style = getLLVMStyle();
   Style.Language = getLanguageByFileName(FileName);
 
-  // This is a very crude detection of whether a header contains ObjC code that
-  // should be improved over time and probably be done on tokens, not one the
-  // bare content of the file.
-  if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h") &&
-      (Code.contains("\n- (") || Code.contains("\n+ (") ||
-       Code.contains("\n@end\n") || Code.contains("\n@end ") ||
-       Code.endswith("@end")))
-    Style.Language = FormatStyle::LK_ObjC;
+  if (Style.Language == FormatStyle::LK_Cpp && FileName.endswith(".h")) {
+    std::unique_ptr<Environment> Env =
+        Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+    ObjCHeaderStyleGuesser Guesser(*Env, Style);
+    Guesser.process();
+    if (Guesser.isObjC()) {
+      Style.Language = FormatStyle::LK_ObjC;
+    }
+  }
 
   FormatStyle FallbackStyle = getNoStyle();
   if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle))
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to