MyDeveloperDay updated this revision to Diff 191649.
MyDeveloperDay added a comment.

Addressing Review Comments

- Remove all non Format only code (i.e. changes to clang/Basic and clang/Lexer 
to support verbatim interpolated strings C# 6)
- For the most part the need to for them is removed by merging of tokens during 
the FormatLexer phase
- There is one obscure failure scenario, but for a first pass I think it can be 
ignored (tests commented out) where a `\` is used unescaped just before a `"` 
(because of paths can be more common than you think)
  - `string s = @"ABC\" + ToString("B");";`

    In this case, the string `@"ABC\"` is seen as `@"ABC\" + ToString("`, 
clang-format then gets confused about where it should put spaces and we end up 
with
  - `string s = @"ABC\" + ToString(" B ");";`

  AnnotatedTokens(L=0):
   M=0 C=0 T=Unknown S=1 B=0 BK=0 P=0 Name=identifier L=6 PPK=2 FakeLParens=2/ 
FakeRParens=0 II=0x252fbe52028 Text='string'
   M=0 C=1 T=StartOfName S=1 B=0 BK=0 P=220 Name=identifier L=8 PPK=2 
FakeLParens= FakeRParens=0 II=0x252fbe522d0 Text='s'
   M=0 C=0 T=BinaryOperator S=1 B=0 BK=0 P=22 Name=equal L=10 PPK=2 
FakeLParens= FakeRParens=0 II=0x0 Text='='
   M=0 C=1 T=Unknown S=1 B=0 BK=0 P=22 Name=string_literal L=31 PPK=2 
FakeLParens= FakeRParens=0 II=0x0 Text='@"ABC\" + ToString("'
   M=0 C=0 T=Unknown S=1 B=0 BK=0 P=23 Name=identifier L=33 PPK=2 FakeLParens= 
FakeRParens=0 II=0x252fbe52300 Text='B'
   M=0 C=1 T=Unknown S=1 B=0 BK=0 P=23 Name=string_literal L=38 PPK=2 
FakeLParens= FakeRParens=1 II=0x0 Text='");"'
   M=0 C=0 T=Unknown S=0 B=0 BK=0 P=23 Name=semi L=39 PPK=2 FakeLParens= 
FakeRParens=0 II=0x0 Text=';'



     


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D58404/new/

https://reviews.llvm.org/D58404

Files:
  clang/docs/ClangFormat.rst
  clang/docs/ClangFormatStyleOptions.rst
  clang/docs/ReleaseNotes.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/ContinuationIndenter.cpp
  clang/lib/Format/Format.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/FormatTokenLexer.cpp
  clang/lib/Format/FormatTokenLexer.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/UnwrappedLineFormatter.cpp
  clang/lib/Format/UnwrappedLineParser.cpp
  clang/tools/clang-format/ClangFormat.cpp
  clang/unittests/Format/CMakeLists.txt
  clang/unittests/Format/FormatTestCSharp.cpp

Index: clang/unittests/Format/FormatTestCSharp.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Format/FormatTestCSharp.cpp
@@ -0,0 +1,184 @@
+//===- unittest/Format/FormatTestCSharp.cpp - Formatting tests for CSharp -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FormatTestUtils.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Debug.h"
+#include "gtest/gtest.h"
+
+#define DEBUG_TYPE "format-test"
+
+namespace clang {
+namespace format {
+
+class FormatTestCSharp : public ::testing::Test {
+protected:
+  static std::string format(llvm::StringRef Code, unsigned Offset,
+                            unsigned Length, const FormatStyle &Style) {
+    LLVM_DEBUG(llvm::errs() << "---\n");
+    LLVM_DEBUG(llvm::errs() << Code << "\n\n");
+    std::vector<tooling::Range> Ranges(1, tooling::Range(Offset, Length));
+    tooling::Replacements Replaces = reformat(Style, Code, Ranges);
+    auto Result = applyAllReplacements(Code, Replaces);
+    EXPECT_TRUE(static_cast<bool>(Result));
+    LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n");
+    return *Result;
+  }
+
+  static std::string
+  format(llvm::StringRef Code,
+         const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_CSharp)) {
+    return format(Code, 0, Code.size(), Style);
+  }
+
+  static FormatStyle getStyleWithColumns(unsigned ColumnLimit) {
+    FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp);
+    Style.ColumnLimit = ColumnLimit;
+    return Style;
+  }
+
+  static void verifyFormat(
+      llvm::StringRef Code,
+      const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_CSharp)) {
+    EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable";
+    EXPECT_EQ(Code.str(), format(test::messUp(Code), Style));
+  }
+};
+
+TEST_F(FormatTestCSharp, CSharpClass) {
+  verifyFormat("public class SomeClass {\n"
+               "  void f() {}\n"
+               "  int g() { return 0; }\n"
+               "  void h() {\n"
+               "    while (true) f();\n"
+               "    for (;;) f();\n"
+               "    if (true) f();\n"
+               "  }\n"
+               "}");
+}
+
+TEST_F(FormatTestCSharp, AccessModifiers) {
+  verifyFormat("public String toString() {}");
+  verifyFormat("private String toString() {}");
+  verifyFormat("protected String toString() {}");
+  verifyFormat("internal String toString() {}");
+
+  verifyFormat("public override String toString() {}");
+  verifyFormat("private override String toString() {}");
+  verifyFormat("protected override String toString() {}");
+  verifyFormat("internal override String toString() {}");
+
+  verifyFormat("internal static String toString() {}");
+}
+
+TEST_F(FormatTestCSharp, NoStringLiteralBreaks) {
+  verifyFormat("foo("
+               "\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+               "aaaaaa\");");
+}
+
+TEST_F(FormatTestCSharp, CSharpVerbatiumStringLiterals) {
+  verifyFormat("foo(@\"aaaaaaaa\\abc\\aaaa\");");
+  // @"ABC\" + ToString("B") - handle embedded \ in literal string at
+  // the end
+  //
+  /*
+   * After removal of Lexer change we are currently not able
+   * To handle these cases
+   verifyFormat("string s = @\"ABC\\\" + ToString(\"B\");");
+   verifyFormat("string s = @\"ABC\"\"DEF\"\"GHI\"");
+   verifyFormat("string s = @\"ABC\"\"DEF\"\"\"");
+   verifyFormat("string s = @\"ABC\"\"DEF\"\"\" + abc");
+  */
+}
+
+TEST_F(FormatTestCSharp, CSharpInterpolatedStringLiterals) {
+  verifyFormat("foo($\"aaaaaaaa{aaa}aaaa\");");
+  verifyFormat("foo($\"aaaa{A}\");");
+  verifyFormat(
+      "foo($\"aaaa{A}"
+      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\");");
+  verifyFormat("Name = $\"{firstName} {lastName}\";");
+
+  // $"ABC\" + ToString("B") - handle embedded \ in literal string at
+  // the end
+  verifyFormat("string s = $\"A{abc}BC\" + ToString(\"B\");");
+  verifyFormat("$\"{domain}\\\\{user}\"");
+  verifyFormat(
+      "var verbatimInterpolated = $@\"C:\\Users\\{userName}\\Documents\\\";");
+}
+
+TEST_F(FormatTestCSharp, CSharpFatArrows) {
+  verifyFormat("Task serverTask = Task.Run(async() => {");
+  verifyFormat("public override string ToString() => \"{Name}\\{Age}\";");
+}
+
+TEST_F(FormatTestCSharp, CSharpNullConditional) {
+  verifyFormat(
+      "public Person(string firstName, string lastName, int? age=null)");
+
+  verifyFormat("switch(args?.Length)");
+
+  verifyFormat("public static void Main(string[] args) { string dirPath "
+               "= args?[0]; }");
+}
+
+TEST_F(FormatTestCSharp, Attributes) {
+  verifyFormat("[STAThread]\n"
+               "static void\n"
+               "Main(string[] args) {}");
+
+  verifyFormat("[TestMethod]\n"
+               "private class Test {}");
+
+  verifyFormat("[TestMethod]\n"
+               "protected class Test {}");
+
+  verifyFormat("[TestMethod]\n"
+               "internal class Test {}");
+
+  verifyFormat("[TestMethod]\n"
+               "class Test {}");
+
+  verifyFormat("[TestMethod]\n"
+               "[DeploymentItem(\"Test.txt\")]\n"
+               "public class Test {}");
+
+  verifyFormat("[System.AttributeUsage(System.AttributeTargets.Method)]\n"
+               "[System.Runtime.InteropServices.ComVisible(true)]\n"
+               "public sealed class STAThreadAttribute : Attribute {}");
+
+  verifyFormat("[Verb(\"start\", HelpText = \"Starts the server listening on "
+               "provided port\")]\n"
+               "class Test {}");
+
+  verifyFormat("[TestMethod]\n"
+               "public string Host {\n  set;\n  get;\n}");
+
+  verifyFormat("[TestMethod(\"start\", HelpText = \"Starts the server "
+               "listening on provided host\")]\n"
+               "public string Host {\n  set;\n  get;\n}");
+}
+
+TEST_F(FormatTestCSharp, CSharpRegions) {
+  verifyFormat("#region aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa aaaaaaaaaaaaaaaaa "
+               "aaaaaaaaaaaaaaa long region");
+}
+
+TEST_F(FormatTestCSharp, CSharpKeyWordEscaping) {
+  verifyFormat("public enum var { none, @string, bool, @enum }");
+}
+
+TEST_F(FormatTestCSharp, CSharpNullCoalescing) {
+  verifyFormat("var test = ABC ?? DEF");
+  verifyFormat("string myname = name ?? \"ABC\";");
+  verifyFormat("return _name ?? \"DEF\";");
+}
+
+} // namespace format
+} // end namespace clang
Index: clang/unittests/Format/CMakeLists.txt
===================================================================
--- clang/unittests/Format/CMakeLists.txt
+++ clang/unittests/Format/CMakeLists.txt
@@ -6,6 +6,7 @@
   CleanupTest.cpp
   FormatTest.cpp
   FormatTestComments.cpp
+  FormatTestCSharp.cpp
   FormatTestJS.cpp
   FormatTestJava.cpp
   FormatTestObjC.cpp
Index: clang/tools/clang-format/ClangFormat.cpp
===================================================================
--- clang/tools/clang-format/ClangFormat.cpp
+++ clang/tools/clang-format/ClangFormat.cpp
@@ -345,7 +345,7 @@
   cl::SetVersionPrinter(PrintVersion);
   cl::ParseCommandLineOptions(
       argc, argv,
-      "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf code.\n\n"
+      "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n"
       "If no arguments are specified, it formats the code from standard input\n"
       "and writes the result to the standard output.\n"
       "If <file>s are given, it reformats the files. If -i is specified\n"
Index: clang/lib/Format/UnwrappedLineParser.cpp
===================================================================
--- clang/lib/Format/UnwrappedLineParser.cpp
+++ clang/lib/Format/UnwrappedLineParser.cpp
@@ -1000,7 +1000,7 @@
   case tok::kw_protected:
   case tok::kw_private:
     if (Style.Language == FormatStyle::LK_Java ||
-        Style.Language == FormatStyle::LK_JavaScript)
+        Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
       nextToken();
     else
       parseAccessSpecifier();
@@ -1214,9 +1214,9 @@
       // parseRecord falls through and does not yet add an unwrapped line as a
       // record declaration or definition can start a structural element.
       parseRecord();
-      // This does not apply for Java and JavaScript.
+      // This does not apply for Java, JavaScript and C#.
       if (Style.Language == FormatStyle::LK_Java ||
-          Style.Language == FormatStyle::LK_JavaScript) {
+          Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
         if (FormatTok->is(tok::semi))
           nextToken();
         addUnwrappedLine();
Index: clang/lib/Format/UnwrappedLineFormatter.cpp
===================================================================
--- clang/lib/Format/UnwrappedLineFormatter.cpp
+++ clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -94,7 +94,7 @@
   /// characters to the left from their level.
   int getIndentOffset(const FormatToken &RootToken) {
     if (Style.Language == FormatStyle::LK_Java ||
-        Style.Language == FormatStyle::LK_JavaScript)
+        Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
       return 0;
     if (RootToken.isAccessSpecifier(false) ||
         RootToken.isObjCAccessSpecifier() ||
@@ -1073,7 +1073,9 @@
           TheLine.Last->TotalLength + Indent <= ColumnLimit ||
           (TheLine.Type == LT_ImportStatement &&
            (Style.Language != FormatStyle::LK_JavaScript ||
-            !Style.JavaScriptWrapImports));
+            !Style.JavaScriptWrapImports)) ||
+          (Style.isCSharp() &&
+           TheLine.InPPDirective); // don't split #regions in C#
       if (Style.ColumnLimit == 0)
         NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
             .formatLine(TheLine, NextStartColumn + Indent,
Index: clang/lib/Format/TokenAnnotator.cpp
===================================================================
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -298,6 +298,8 @@
           CurrentToken->Type = TT_JavaAnnotation;
         if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation))
           CurrentToken->Type = TT_LeadingJavaAnnotation;
+        if (Left->Previous && Left->Previous->is(TT_AttributeSquare))
+          CurrentToken->Type = TT_AttributeSquare;
 
         if (!HasMultipleLines)
           Left->PackingKind = PPK_Inconclusive;
@@ -348,6 +350,40 @@
     return false;
   }
 
+  bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
+    if (!Style.isCSharp())
+      return false;
+
+    const FormatToken *AttrTok = Tok.Next;
+    if (!AttrTok)
+      return false;
+
+    // Just an empty declaration e.g. string [].
+    if (AttrTok->is(tok::r_square))
+      return false;
+
+    // Move along the tokens inbetween the '[' and ']' e.g. [STAThread]
+    while (AttrTok && AttrTok->isNot(tok::r_square)) {
+      AttrTok = AttrTok->Next;
+    }
+
+    if (!AttrTok)
+      return false;
+
+    // move past the end of ']'
+    AttrTok = AttrTok->Next;
+    if (!AttrTok)
+      return false;
+
+    // limit this to being an access modifier that follows
+    if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
+                         tok::kw_class, tok::kw_static, tok::l_square,
+                         Keywords.kw_internal)) {
+      return true;
+    }
+    return false;
+  }
+
   bool isCpp11AttributeSpecifier(const FormatToken &Tok) {
     if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square))
       return false;
@@ -398,6 +434,11 @@
     bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) ||
                                      Contexts.back().InCpp11AttributeSpecifier;
 
+    // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
+    bool IsCSharp11AttributeSpecifier =
+        isCSharpAttributeSpecifier(*Left) ||
+        Contexts.back().InCSharpAttributeSpecifier;
+
     bool InsideInlineASM = Line.startsWith(tok::kw_asm);
     bool StartsObjCMethodExpr =
         !InsideInlineASM && !CppArrayTemplates && Style.isCpp() &&
@@ -478,6 +519,8 @@
                                  // Should only be relevant to JavaScript:
                                  tok::kw_default)) {
         Left->Type = TT_ArrayInitializerLSquare;
+      } else if (IsCSharp11AttributeSpecifier) {
+        Left->Type = TT_AttributeSquare;
       } else {
         BindingIncrease = 10;
         Left->Type = TT_ArraySubscriptLSquare;
@@ -492,11 +535,14 @@
 
     Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
     Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
+    Contexts.back().InCSharpAttributeSpecifier = IsCSharp11AttributeSpecifier;
 
     while (CurrentToken) {
       if (CurrentToken->is(tok::r_square)) {
         if (IsCpp11AttributeSpecifier)
           CurrentToken->Type = TT_AttributeSquare;
+        if (IsCSharp11AttributeSpecifier)
+          CurrentToken->Type = TT_AttributeSquare;
         else if (((CurrentToken->Next &&
                    CurrentToken->Next->is(tok::l_paren)) ||
                   (CurrentToken->Previous &&
@@ -1193,6 +1239,7 @@
     bool CaretFound = false;
     bool IsForEachMacro = false;
     bool InCpp11AttributeSpecifier = false;
+    bool InCSharpAttributeSpecifier = false;
   };
 
   /// Puts a new \c Context onto the stack \c Contexts for the lifetime
@@ -2630,7 +2677,7 @@
     // and "%d %d"
     if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
       return Right.WhitespaceRange.getEnd() != Right.WhitespaceRange.getBegin();
-  } else if (Style.Language == FormatStyle::LK_JavaScript) {
+  } else if (Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
     if (Left.is(TT_JsFatArrow))
       return true;
     // for await ( ...
@@ -2977,6 +3024,14 @@
   if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine)
     return true;
 
+  // Put multiple C# attributes on a new line.
+  if (Style.isCSharp() &&
+      ((Left.is(TT_AttributeSquare) && Left.is(tok::r_square)) ||
+       (Left.is(tok::r_square) && Right.is(TT_AttributeSquare) &&
+        Right.is(tok::l_square))))
+    return true;
+
+  // Put multiple Java annotation on a new line.
   if ((Style.Language == FormatStyle::LK_Java ||
        Style.Language == FormatStyle::LK_JavaScript) &&
       Left.is(TT_LeadingJavaAnnotation) &&
Index: clang/lib/Format/FormatTokenLexer.h
===================================================================
--- clang/lib/Format/FormatTokenLexer.h
+++ clang/lib/Format/FormatTokenLexer.h
@@ -49,6 +49,10 @@
   bool tryMergeLessLess();
   bool tryMergeNSStringLiteral();
   bool tryMergeJSPrivateIdentifier();
+  bool tryMergeCSharpVerbatimStringLiteral();
+  bool tryMergeCSharpKeywordVariables();
+  bool tryMergeCSharpNullConditionals();
+  bool tryMergeCSharpDoubleQuestion();
 
   bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
 
Index: clang/lib/Format/FormatTokenLexer.cpp
===================================================================
--- clang/lib/Format/FormatTokenLexer.cpp
+++ clang/lib/Format/FormatTokenLexer.cpp
@@ -66,6 +66,21 @@
     return;
   if (tryMergeLessLess())
     return;
+
+  if (Style.isCSharp()) {
+    if (tryMergeCSharpKeywordVariables())
+      return;
+    if (tryMergeCSharpVerbatimStringLiteral())
+      return;
+    if (tryMergeCSharpDoubleQuestion())
+      return;
+    if (tryMergeCSharpNullConditionals())
+      return;
+    static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater};
+    if (tryMergeTokens(JSRightArrow, TT_JsFatArrow))
+      return;
+  }
+
   if (tryMergeNSStringLiteral())
     return;
 
@@ -142,6 +157,100 @@
   return true;
 }
 
+// Search for verbatim or interpolated string literals @"ABC" or
+// $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to
+// prevent splitting of @, $ and ".
+bool FormatTokenLexer::tryMergeCSharpVerbatimStringLiteral() {
+  if (Tokens.size() < 2)
+    return false;
+  auto &At = *(Tokens.end() - 2);
+  auto &String = *(Tokens.end() - 1);
+
+  // Look for $"aaaaaa" @"aaaaaa".
+  if (!(At->is(tok::at) || At->TokenText == "$") ||
+      !String->is(tok::string_literal))
+    return false;
+
+  if (Tokens.size() >= 2 && At->is(tok::at)) {
+    auto &Dollar = *(Tokens.end() - 3);
+    if (Dollar->TokenText == "$") {
+      // this looks like $@"aaaaa" so we need to combine all 3
+      Dollar->Tok.setKind(tok::string_literal);
+      Dollar->TokenText =
+          StringRef(Dollar->TokenText.begin(),
+                    String->TokenText.end() - Dollar->TokenText.begin());
+      Dollar->ColumnWidth += (At->ColumnWidth + String->ColumnWidth);
+      Dollar->Type = TT_CSharpStringLiteral;
+      Tokens.erase(Tokens.end() - 2);
+      Tokens.erase(Tokens.end() - 1);
+      return true;
+    }
+  }
+
+  // Convert back into just a string_literal
+  At->Tok.setKind(tok::string_literal);
+  At->TokenText = StringRef(At->TokenText.begin(),
+                            String->TokenText.end() - At->TokenText.begin());
+  At->ColumnWidth += String->ColumnWidth;
+  At->Type = TT_CSharpStringLiteral;
+  Tokens.erase(Tokens.end() - 1);
+  return true;
+}
+
+bool FormatTokenLexer::tryMergeCSharpDoubleQuestion() {
+  if (Tokens.size() < 2)
+    return false;
+  auto &FirstQuestion = *(Tokens.end() - 2);
+  auto &SecondQuestion = *(Tokens.end() - 1);
+  if (!FirstQuestion->is(tok::question) || !SecondQuestion->is(tok::question))
+    return false;
+  FirstQuestion->Tok.setKind(tok::question);
+  FirstQuestion->TokenText = StringRef(FirstQuestion->TokenText.begin(),
+                                       SecondQuestion->TokenText.end() -
+                                           FirstQuestion->TokenText.begin());
+  FirstQuestion->ColumnWidth += SecondQuestion->ColumnWidth;
+  FirstQuestion->Type = TT_CSharpNullCoalescing;
+  Tokens.erase(Tokens.end() - 1);
+  return true;
+}
+
+bool FormatTokenLexer::tryMergeCSharpKeywordVariables() {
+  if (Tokens.size() < 2)
+    return false;
+  auto &At = *(Tokens.end() - 2);
+  auto &Keyword = *(Tokens.end() - 1);
+  if (!At->is(tok::at))
+    return false;
+  if (!Keywords.isCSharpKeyword(*Keyword))
+    return false;
+
+  At->Tok.setKind(tok::identifier);
+  At->TokenText = StringRef(At->TokenText.begin(),
+                            Keyword->TokenText.end() - At->TokenText.begin());
+  At->ColumnWidth += Keyword->ColumnWidth;
+  At->Type = Keyword->Type;
+  Tokens.erase(Tokens.end() - 1);
+  return true;
+}
+
+// in C# merge the Identifier and the ? together  arg?
+bool FormatTokenLexer::tryMergeCSharpNullConditionals() {
+  if (Tokens.size() < 2)
+    return false;
+  auto &Identifier = *(Tokens.end() - 2);
+  auto &Question = *(Tokens.end() - 1);
+  if (!Identifier->isOneOf(tok::r_square, tok::identifier) ||
+      !Question->is(tok::question))
+    return false;
+  Identifier->TokenText =
+      StringRef(Identifier->TokenText.begin(),
+                Question->TokenText.end() - Identifier->TokenText.begin());
+  Identifier->ColumnWidth += Question->ColumnWidth;
+  Identifier->Type = Identifier->Type;
+  Tokens.erase(Tokens.end() - 1);
+  return true;
+}
+
 bool FormatTokenLexer::tryMergeLessLess() {
   // Merge X,less,less,Y into X,lessless,Y unless X or Y is less.
   if (Tokens.size() < 3)
Index: clang/lib/Format/FormatToken.h
===================================================================
--- clang/lib/Format/FormatToken.h
+++ clang/lib/Format/FormatToken.h
@@ -96,6 +96,8 @@
   TYPE(TrailingReturnArrow)                                                    \
   TYPE(TrailingUnaryOperator)                                                  \
   TYPE(UnaryOperator)                                                          \
+  TYPE(CSharpStringLiteral)                                                    \
+  TYPE(CSharpNullCoalescing)                                                   \
   TYPE(Unknown)
 
 enum TokenType {
@@ -723,7 +725,36 @@
     kw_slots = &IdentTable.get("slots");
     kw_qslots = &IdentTable.get("Q_SLOTS");
 
-    // Keep this at the end of the constructor to make sure everything here is
+    // C# keywords
+    kw_dollar = &IdentTable.get("dollar");
+    kw_base = &IdentTable.get("base");
+    kw_byte = &IdentTable.get("byte");
+    kw_checked = &IdentTable.get("checked");
+    kw_decimal = &IdentTable.get("decimal");
+    kw_delegate = &IdentTable.get("delegate");
+    kw_event = &IdentTable.get("event");
+    kw_fixed = &IdentTable.get("fixed");
+    kw_foreach = &IdentTable.get("foreach");
+    kw_implicit = &IdentTable.get("implicit");
+    kw_internal = &IdentTable.get("internal");
+    kw_lock = &IdentTable.get("lock");
+    kw_null = &IdentTable.get("null");
+    kw_object = &IdentTable.get("object");
+    kw_out = &IdentTable.get("out");
+    kw_params = &IdentTable.get("params");
+    kw_ref = &IdentTable.get("ref");
+    kw_string = &IdentTable.get("string");
+    kw_stackalloc = &IdentTable.get("stackalloc");
+    kw_sbyte = &IdentTable.get("sbyte");
+    kw_sealed = &IdentTable.get("sealed");
+    kw_uint = &IdentTable.get("uint");
+    kw_ulong = &IdentTable.get("ulong");
+    kw_unchecked = &IdentTable.get("unchecked");
+    kw_unsafe = &IdentTable.get("unsafe");
+    kw_ushort = &IdentTable.get("ushort");
+
+    // Keep this at the end of the constructor to make sure everything here
+    // is
     // already initialized.
     JsExtraKeywords = std::unordered_set<IdentifierInfo *>(
         {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
@@ -731,6 +762,19 @@
          kw_set, kw_type, kw_typeof, kw_var, kw_yield,
          // Keywords from the Java section.
          kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
+
+    CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>(
+        {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event,
+         kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal,
+         kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params,
+         kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed,
+         kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort,
+         // Keywords from the JavaScript section.
+         kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
+         kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
+         kw_set, kw_type, kw_typeof, kw_var, kw_yield,
+         // Keywords from the Java section.
+         kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
   }
 
   // Context sensitive keywords.
@@ -796,6 +840,37 @@
   IdentifierInfo *kw_slots;
   IdentifierInfo *kw_qslots;
 
+  // C# keywords
+  IdentifierInfo *kw_dollar;
+  IdentifierInfo *kw_base;
+  IdentifierInfo *kw_byte;
+  IdentifierInfo *kw_checked;
+  IdentifierInfo *kw_decimal;
+  IdentifierInfo *kw_delegate;
+  IdentifierInfo *kw_event;
+  IdentifierInfo *kw_fixed;
+  IdentifierInfo *kw_foreach;
+  IdentifierInfo *kw_implicit;
+  IdentifierInfo *kw_internal;
+
+  IdentifierInfo *kw_lock;
+  IdentifierInfo *kw_null;
+  IdentifierInfo *kw_object;
+  IdentifierInfo *kw_out;
+
+  IdentifierInfo *kw_params;
+
+  IdentifierInfo *kw_ref;
+  IdentifierInfo *kw_string;
+  IdentifierInfo *kw_stackalloc;
+  IdentifierInfo *kw_sbyte;
+  IdentifierInfo *kw_sealed;
+  IdentifierInfo *kw_uint;
+  IdentifierInfo *kw_ulong;
+  IdentifierInfo *kw_unchecked;
+  IdentifierInfo *kw_unsafe;
+  IdentifierInfo *kw_ushort;
+
   /// Returns \c true if \p Tok is a true JavaScript identifier, returns
   /// \c false if it is a keyword or a pseudo keyword.
   bool IsJavaScriptIdentifier(const FormatToken &Tok) const {
@@ -804,9 +879,68 @@
                JsExtraKeywords.end();
   }
 
+  /// Returns \c true if \p Tok is a C# keyword, returns
+  /// \c false if it is a anything else.
+  bool isCSharpKeyword(const FormatToken &Tok) const {
+    switch (Tok.Tok.getKind()) {
+    case tok::kw_bool:
+    case tok::kw_break:
+    case tok::kw_case:
+    case tok::kw_catch:
+    case tok::kw_char:
+    case tok::kw_class:
+    case tok::kw_const:
+    case tok::kw_continue:
+    case tok::kw_default:
+    case tok::kw_do:
+    case tok::kw_double:
+    case tok::kw_else:
+    case tok::kw_enum:
+    case tok::kw_explicit:
+    case tok::kw_extern:
+    case tok::kw_false:
+    case tok::kw_float:
+    case tok::kw_for:
+    case tok::kw_goto:
+    case tok::kw_if:
+    case tok::kw_int:
+    case tok::kw_long:
+    case tok::kw_namespace:
+    case tok::kw_new:
+    case tok::kw_operator:
+    case tok::kw_private:
+    case tok::kw_protected:
+    case tok::kw_public:
+    case tok::kw_return:
+    case tok::kw_short:
+    case tok::kw_sizeof:
+    case tok::kw_static:
+    case tok::kw_struct:
+    case tok::kw_switch:
+    case tok::kw_this:
+    case tok::kw_throw:
+    case tok::kw_true:
+    case tok::kw_try:
+    case tok::kw_typeof:
+    case tok::kw_using:
+    case tok::kw_virtual:
+    case tok::kw_void:
+    case tok::kw_volatile:
+    case tok::kw_while:
+      return true;
+    default:
+      return Tok.is(tok::identifier) &&
+             CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
+                 CSharpExtraKeywords.end();
+    }
+  }
+
 private:
   /// The JavaScript keywords beyond the C++ keyword set.
   std::unordered_set<IdentifierInfo *> JsExtraKeywords;
+
+  /// The C# keywords beyond the C++ keyword set
+  std::unordered_set<IdentifierInfo *> CSharpExtraKeywords;
 };
 
 } // namespace format
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -61,6 +61,7 @@
     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
     IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
     IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto);
+    IO.enumCase(Value, "CSharp", FormatStyle::LK_CSharp);
   }
 };
 
@@ -287,8 +288,8 @@
     IO.mapOptional("Language", Style.Language);
 
     if (IO.outputting()) {
-      StringRef StylesArray[] = {"LLVM",    "Google", "Chromium",
-                                 "Mozilla", "WebKit", "GNU"};
+      StringRef StylesArray[] = {"LLVM",   "Google", "Chromium", "Mozilla",
+                                 "WebKit", "GNU",    "Microsoft"};
       ArrayRef<StringRef> Styles(StylesArray);
       for (size_t i = 0, e = Styles.size(); i < e; ++i) {
         StringRef StyleName(Styles[i]);
@@ -951,6 +952,32 @@
   return Style;
 }
 
+FormatStyle getMicrosoftStyle(FormatStyle::LanguageKind Language) {
+  FormatStyle Style = getLLVMStyle();
+  Style.ColumnLimit = 120;
+  Style.TabWidth = 4;
+  Style.IndentWidth = 4;
+  Style.UseTab = FormatStyle::UT_Never;
+  Style.BreakBeforeBraces = FormatStyle::BS_Custom;
+  Style.BraceWrapping.AfterClass = true;
+  Style.BraceWrapping.AfterControlStatement = true;
+  Style.BraceWrapping.AfterEnum = true;
+  Style.BraceWrapping.AfterFunction = true;
+  Style.BraceWrapping.AfterNamespace = true;
+  Style.BraceWrapping.AfterObjCDeclaration = true;
+  Style.BraceWrapping.AfterStruct = true;
+  Style.BraceWrapping.AfterExternBlock = true;
+  Style.BraceWrapping.BeforeCatch = true;
+  Style.BraceWrapping.BeforeElse = true;
+  Style.PenaltyReturnTypeOnItsOwnLine = 1000;
+  Style.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
+  Style.AllowShortBlocksOnASingleLine = false;
+  Style.AllowShortCaseLabelsOnASingleLine = false;
+  Style.AllowShortIfStatementsOnASingleLine = FormatStyle::SIS_Never;
+  Style.AllowShortLoopsOnASingleLine = false;
+  return Style;
+}
+
 FormatStyle getNoStyle() {
   FormatStyle NoStyle = getLLVMStyle();
   NoStyle.DisableFormat = true;
@@ -973,6 +1000,8 @@
     *Style = getWebKitStyle();
   } else if (Name.equals_lower("gnu")) {
     *Style = getGNUStyle();
+  } else if (Name.equals_lower("microsoft")) {
+    *Style = getMicrosoftStyle(Language);
   } else if (Name.equals_lower("none")) {
     *Style = getNoStyle();
   } else {
@@ -2323,6 +2352,8 @@
     return FormatStyle::LK_TextProto;
   if (FileName.endswith_lower(".td"))
     return FormatStyle::LK_TableGen;
+  if (FileName.endswith_lower(".cs"))
+    return FormatStyle::LK_CSharp;
   return FormatStyle::LK_Cpp;
 }
 
Index: clang/lib/Format/ContinuationIndenter.cpp
===================================================================
--- clang/lib/Format/ContinuationIndenter.cpp
+++ clang/lib/Format/ContinuationIndenter.cpp
@@ -418,7 +418,8 @@
   if (Style.AlwaysBreakBeforeMultilineStrings &&
       (NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth ||
        Previous.is(tok::comma) || Current.NestingLevel < 2) &&
-      !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) &&
+      !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at,
+                        Keywords.kw_dollar) &&
       !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) &&
       nextIsMultilineString(State))
     return true;
@@ -1158,6 +1159,8 @@
 
   if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0)
     State.StartOfStringLiteral = State.Column + 1;
+  if (Current.is(TT_CSharpStringLiteral) && State.StartOfStringLiteral == 0)
+    State.StartOfStringLiteral = State.Column + 1;
   else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0)
     State.StartOfStringLiteral = State.Column;
   else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) &&
@@ -1711,10 +1714,11 @@
                                            LineState &State, bool AllowBreak) {
   unsigned StartColumn = State.Column - Current.ColumnWidth;
   if (Current.isStringLiteral()) {
-    // FIXME: String literal breaking is currently disabled for Java and JS, as
-    // it requires strings to be merged using "+" which we don't support.
+    // FIXME: String literal breaking is currently disabled for C#,Java and
+    // JavaScript, as it requires strings to be merged using "+" which we
+    // don't support.
     if (Style.Language == FormatStyle::LK_Java ||
-        Style.Language == FormatStyle::LK_JavaScript ||
+        Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp() ||
         !Style.BreakStringLiterals || !AllowBreak)
       return nullptr;
 
Index: clang/include/clang/Format/Format.h
===================================================================
--- clang/include/clang/Format/Format.h
+++ clang/include/clang/Format/Format.h
@@ -1258,6 +1258,8 @@
     LK_None,
     /// Should be used for C, C++.
     LK_Cpp,
+    /// Should be used for C#.
+    LK_CSharp,
     /// Should be used for Java.
     LK_Java,
     /// Should be used for JavaScript.
@@ -1274,6 +1276,7 @@
     LK_TextProto
   };
   bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; }
+  bool isCSharp() const { return Language == LK_CSharp; }
 
   /// Language, this format style is targeted at.
   LanguageKind Language;
@@ -2090,6 +2093,8 @@
   switch (Language) {
   case FormatStyle::LK_Cpp:
     return "C++";
+  case FormatStyle::LK_CSharp:
+    return "CSharp";
   case FormatStyle::LK_ObjC:
     return "Objective-C";
   case FormatStyle::LK_Java:
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -165,7 +165,8 @@
 AST Matchers
 ------------
 
-- ...
+- Add language support for clang-formatting C# files
+- Add Microsoft coding style to encapsulate default C# formatting style
 
 clang-format
 ------------
Index: clang/docs/ClangFormatStyleOptions.rst
===================================================================
--- clang/docs/ClangFormatStyleOptions.rst
+++ clang/docs/ClangFormatStyleOptions.rst
@@ -7,8 +7,8 @@
 
 When using :program:`clang-format` command line utility or
 ``clang::format::reformat(...)`` functions from code, one can either use one of
-the predefined styles (LLVM, Google, Chromium, Mozilla, WebKit) or create a
-custom style by configuring specific style options.
+the predefined styles (LLVM, Google, Chromium, Mozilla, WebKit, Microsoft) or
+create a custom style by configuring specific style options.
 
 
 Configuring Style with clang-format
@@ -68,6 +68,10 @@
   Language: Proto
   # Don't format .proto files.
   DisableFormat: true
+  ---
+  Language: CSharp
+  # Use 100 columns for C#.
+  ColumnLimit: 100
   ...
 
 An easy way to get a valid ``.clang-format`` file containing all configuration
@@ -144,6 +148,9 @@
   * ``WebKit``
     A style complying with `WebKit's style guide
     <https://www.webkit.org/coding/coding-style.html>`_
+  * ``Microsoft``
+    A style complying with `Microsoft's style guide
+    <https://docs.microsoft.com/en-us/visualstudio/ide/editorconfig-code-style-settings-reference?view=vs-2017>'_
 
 .. START_FORMAT_STYLE_OPTIONS
 
@@ -1552,6 +1559,9 @@
   * ``LK_Cpp`` (in configuration: ``Cpp``)
     Should be used for C, C++.
 
+  * ``LK_CSharp`` (in configuration: ``CSharp``)
+    Should be used for C#.
+
   * ``LK_Java`` (in configuration: ``Java``)
     Should be used for Java.
 
Index: clang/docs/ClangFormat.rst
===================================================================
--- clang/docs/ClangFormat.rst
+++ clang/docs/ClangFormat.rst
@@ -11,12 +11,12 @@
 ===============
 
 :program:`clang-format` is located in `clang/tools/clang-format` and can be used
-to format C/C++/Java/JavaScript/Objective-C/Protobuf code.
+to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.
 
 .. code-block:: console
 
   $ clang-format -help
-  OVERVIEW: A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf code.
+  OVERVIEW: A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.
 
   If no arguments are specified, it formats the code from standard input
   and writes the result to the standard output.
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to