[PATCH] D126651: [clang-diff] Fix getStmtValue when dealing with wide chars

2022-05-30 Thread Kaining Zhong via Phabricator via cfe-commits
PRESIDENT810 created this revision.
PRESIDENT810 added reviewers: klimek, arphaman, johannes.
Herald added a project: All.
PRESIDENT810 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

This fixes https://github.com/llvm/llvm-project/issues/55771.
Directly using StringLiteral::getString for wide string is not currently 
supported; therefore in ASTDiff, getStmtValue will fail when asserting that the 
StringLiteral has a width of 1. This patch will convert wide string to utf-8 
string.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D126651

Files:
  clang/lib/Tooling/ASTDiff/ASTDiff.cpp
  clang/test/Tooling/clang-diff-ast.cpp


Index: clang/test/Tooling/clang-diff-ast.cpp
===
--- clang/test/Tooling/clang-diff-ast.cpp
+++ clang/test/Tooling/clang-diff-ast.cpp
@@ -51,6 +51,12 @@
 return 0;
   }
 
+  // CHECK: CXXMethodDecl: :bar(const wchar_t *()
+  const wchar_t *bar() {
+// CHECK: StringLiteral: bar(
+return L"bar";
+  }
+
   // CHECK: AccessSpecDecl: public(
 public:
   int not_initialized;
Index: clang/lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include 
 #include 
@@ -463,8 +464,19 @@
   }
   if (auto *D = dyn_cast(S))
 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast(S))
+  if (auto *String = dyn_cast(S)) {
+if (String->isWide()) {
+  unsigned int wsize = String->getByteLength() / 
String->getCharByteWidth();
+  const wchar_t *temp =
+  reinterpret_cast(String->getBytes().data());
+  std::wstring wstr(temp);
+  std::string str;
+  if (!convertWideToUTF8(wstr.substr(0, wsize), str))
+return "";
+  return str;
+}
 return std::string(String->getString());
+  }
   if (auto *B = dyn_cast(S))
 return B->getValue() ? "true" : "false";
   return "";


Index: clang/test/Tooling/clang-diff-ast.cpp
===
--- clang/test/Tooling/clang-diff-ast.cpp
+++ clang/test/Tooling/clang-diff-ast.cpp
@@ -51,6 +51,12 @@
 return 0;
   }
 
+  // CHECK: CXXMethodDecl: :bar(const wchar_t *()
+  const wchar_t *bar() {
+// CHECK: StringLiteral: bar(
+return L"bar";
+  }
+
   // CHECK: AccessSpecDecl: public(
 public:
   int not_initialized;
Index: clang/lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include 
 #include 
@@ -463,8 +464,19 @@
   }
   if (auto *D = dyn_cast(S))
 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast(S))
+  if (auto *String = dyn_cast(S)) {
+if (String->isWide()) {
+  unsigned int wsize = String->getByteLength() / String->getCharByteWidth();
+  const wchar_t *temp =
+  reinterpret_cast(String->getBytes().data());
+  std::wstring wstr(temp);
+  std::string str;
+  if (!convertWideToUTF8(wstr.substr(0, wsize), str))
+return "";
+  return str;
+}
 return std::string(String->getString());
+  }
   if (auto *B = dyn_cast(S))
 return B->getValue() ? "true" : "false";
   return "";
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D126651: [clang-diff] Fix getStmtValue when dealing with wide chars

2022-06-01 Thread Kaining Zhong via Phabricator via cfe-commits
PRESIDENT810 updated this revision to Diff 433375.
PRESIDENT810 added a comment.

Refactored some code and add support of U16 & U32 characters, as well as tests 
for them.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126651/new/

https://reviews.llvm.org/D126651

Files:
  clang/lib/Tooling/ASTDiff/ASTDiff.cpp
  clang/test/Tooling/clang-diff-ast.cpp


Index: clang/test/Tooling/clang-diff-ast.cpp
===
--- clang/test/Tooling/clang-diff-ast.cpp
+++ clang/test/Tooling/clang-diff-ast.cpp
@@ -51,6 +51,30 @@
 return 0;
   }
 
+  const wchar_t *fooWide(int i) {
+if (i == 0)
+  // CHECK: StringLiteral: foo(
+  return L"foo";
+// CHECK-NOT: ImplicitCastExpr
+return 0;
+  }
+
+  const char16_t *fooU16(int i) {
+if (i == 0)
+  // CHECK: StringLiteral: foo(
+  return u"foo";
+// CHECK-NOT: ImplicitCastExpr
+return 0;
+  }
+
+  const char32_t *fooU32(int i) {
+if (i == 0)
+  // CHECK: StringLiteral: foo(
+  return U"foo";
+// CHECK-NOT: ImplicitCastExpr
+return 0;
+  }
+
   // CHECK: AccessSpecDecl: public(
 public:
   int not_initialized;
Index: clang/lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include 
 #include 
@@ -463,8 +464,39 @@
   }
   if (auto *D = dyn_cast(S))
 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast(S))
+  if (auto *String = dyn_cast(S)) {
+if (String->isWide()) {
+  unsigned int wsize = String->getByteLength() / 
String->getCharByteWidth();
+  const auto *temp =
+  reinterpret_cast(String->getBytes().data());
+  std::wstring wstr(temp, wsize);
+  std::string str;
+  if (!convertWideToUTF8(wstr, str))
+return "";
+  return str;
+}
+if (String->isUTF16()) {
+  unsigned int usize = String->getByteLength() / 
String->getCharByteWidth();
+  const auto *temp =
+  reinterpret_cast(String->getBytes().data());
+  ArrayRef u16str(temp, usize);
+  std::string str;
+  if (!convertUTF16ToUTF8String(u16str, str))
+return "";
+  return str;
+}
+if (String->isUTF32()) {
+  unsigned int usize = String->getByteLength() / 
String->getCharByteWidth();
+  const auto *temp =
+  reinterpret_cast(String->getBytes().data());
+  ArrayRef u32str(temp, usize);
+  std::string str;
+  if (!convertUTF32ToUTF8String(u32str, str))
+return "";
+  return str;
+}
 return std::string(String->getString());
+  }
   if (auto *B = dyn_cast(S))
 return B->getValue() ? "true" : "false";
   return "";


Index: clang/test/Tooling/clang-diff-ast.cpp
===
--- clang/test/Tooling/clang-diff-ast.cpp
+++ clang/test/Tooling/clang-diff-ast.cpp
@@ -51,6 +51,30 @@
 return 0;
   }
 
+  const wchar_t *fooWide(int i) {
+if (i == 0)
+  // CHECK: StringLiteral: foo(
+  return L"foo";
+// CHECK-NOT: ImplicitCastExpr
+return 0;
+  }
+
+  const char16_t *fooU16(int i) {
+if (i == 0)
+  // CHECK: StringLiteral: foo(
+  return u"foo";
+// CHECK-NOT: ImplicitCastExpr
+return 0;
+  }
+
+  const char32_t *fooU32(int i) {
+if (i == 0)
+  // CHECK: StringLiteral: foo(
+  return U"foo";
+// CHECK-NOT: ImplicitCastExpr
+return 0;
+  }
+
   // CHECK: AccessSpecDecl: public(
 public:
   int not_initialized;
Index: clang/lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include 
 #include 
@@ -463,8 +464,39 @@
   }
   if (auto *D = dyn_cast(S))
 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast(S))
+  if (auto *String = dyn_cast(S)) {
+if (String->isWide()) {
+  unsigned int wsize = String->getByteLength() / String->getCharByteWidth();
+  const auto *temp =
+  reinterpret_cast(String->getBytes().data());
+  std::wstring wstr(temp, wsize);
+  std::string str;
+  if (!convertWideToUTF8(wstr, str))
+return "";
+  return str;
+}
+if (String->isUTF16()) {
+  unsigned int usize = String->getByteLength() / String->getCharByteWidth();
+  const auto *temp =
+  reinterpret_cast(String->getBytes().data());
+  ArrayRef u16str(temp, usize);
+  std::string str;
+  if (!convertUTF16ToUTF8String

[PATCH] D126651: [clang-diff] Fix getStmtValue when dealing with wide chars

2022-06-06 Thread Kaining Zhong via Phabricator via cfe-commits
PRESIDENT810 updated this revision to Diff 434690.
PRESIDENT810 added a comment.

More refactoring following Johannes's suggestion


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126651/new/

https://reviews.llvm.org/D126651

Files:
  clang/lib/Tooling/ASTDiff/ASTDiff.cpp
  clang/test/Tooling/clang-diff-ast.cpp


Index: clang/test/Tooling/clang-diff-ast.cpp
===
--- clang/test/Tooling/clang-diff-ast.cpp
+++ clang/test/Tooling/clang-diff-ast.cpp
@@ -47,6 +47,12 @@
 if (i == 0)
   // CHECK: StringLiteral: foo(
   return "foo";
+// CHECK: StringLiteral: wide(
+(void)L"wide";
+// CHECK: StringLiteral: utf-16(
+(void)u"utf-16";
+// CHECK: StringLiteral: utf-32(
+(void)U"utf-32";
 // CHECK-NOT: ImplicitCastExpr
 return 0;
   }
Index: clang/lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include 
 #include 
@@ -463,8 +464,30 @@
   }
   if (auto *D = dyn_cast(S))
 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast(S))
+  if (auto *String = dyn_cast(S)) {
+if (String->isWide() || String->isUTF16() || String->isUTF32()) {
+  std::string UTF8Str;
+  unsigned int NumChars = String->getLength();
+  const char *Bytes = String->getBytes().data();
+  if (String->isWide()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertWideToUTF8({Chars, NumChars}, UTF8Str))
+  return "";
+  } else if (String->isUTF16()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF16ToUTF8String(ArrayRef(Chars, NumChars),
+  UTF8Str))
+  return "";
+  } else if (String->isUTF32()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF32ToUTF8String(ArrayRef(Chars, NumChars),
+  UTF8Str))
+  return "";
+  }
+  return UTF8Str;
+}
 return std::string(String->getString());
+  }
   if (auto *B = dyn_cast(S))
 return B->getValue() ? "true" : "false";
   return "";


Index: clang/test/Tooling/clang-diff-ast.cpp
===
--- clang/test/Tooling/clang-diff-ast.cpp
+++ clang/test/Tooling/clang-diff-ast.cpp
@@ -47,6 +47,12 @@
 if (i == 0)
   // CHECK: StringLiteral: foo(
   return "foo";
+// CHECK: StringLiteral: wide(
+(void)L"wide";
+// CHECK: StringLiteral: utf-16(
+(void)u"utf-16";
+// CHECK: StringLiteral: utf-32(
+(void)U"utf-32";
 // CHECK-NOT: ImplicitCastExpr
 return 0;
   }
Index: clang/lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include 
 #include 
@@ -463,8 +464,30 @@
   }
   if (auto *D = dyn_cast(S))
 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast(S))
+  if (auto *String = dyn_cast(S)) {
+if (String->isWide() || String->isUTF16() || String->isUTF32()) {
+  std::string UTF8Str;
+  unsigned int NumChars = String->getLength();
+  const char *Bytes = String->getBytes().data();
+  if (String->isWide()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertWideToUTF8({Chars, NumChars}, UTF8Str))
+  return "";
+  } else if (String->isUTF16()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF16ToUTF8String(ArrayRef(Chars, NumChars),
+  UTF8Str))
+  return "";
+  } else if (String->isUTF32()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF32ToUTF8String(ArrayRef(Chars, NumChars),
+  UTF8Str))
+  return "";
+  }
+  return UTF8Str;
+}
 return std::string(String->getString());
+  }
   if (auto *B = dyn_cast(S))
 return B->getValue() ? "true" : "false";
   return "";
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D126651: [clang-diff] Fix getStmtValue when dealing with wide, UTF16 UTF32 chars

2022-06-06 Thread Kaining Zhong via Phabricator via cfe-commits
PRESIDENT810 updated this revision to Diff 434691.
PRESIDENT810 retitled this revision from "[clang-diff] Fix getStmtValue when 
dealing with wide chars" to "[clang-diff] Fix getStmtValue when dealing with 
wide, UTF16 UTF32 chars".
PRESIDENT810 edited the summary of this revision.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126651/new/

https://reviews.llvm.org/D126651

Files:
  clang/lib/Tooling/ASTDiff/ASTDiff.cpp
  clang/test/Tooling/clang-diff-ast.cpp


Index: clang/test/Tooling/clang-diff-ast.cpp
===
--- clang/test/Tooling/clang-diff-ast.cpp
+++ clang/test/Tooling/clang-diff-ast.cpp
@@ -47,6 +47,12 @@
 if (i == 0)
   // CHECK: StringLiteral: foo(
   return "foo";
+// CHECK: StringLiteral: wide(
+(void)L"wide";
+// CHECK: StringLiteral: utf-16(
+(void)u"utf-16";
+// CHECK: StringLiteral: utf-32(
+(void)U"utf-32";
 // CHECK-NOT: ImplicitCastExpr
 return 0;
   }
Index: clang/lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include 
 #include 
@@ -463,8 +464,31 @@
   }
   if (auto *D = dyn_cast(S))
 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast(S))
+  if (auto *String = dyn_cast(S)) {
+if (String->isWide() || String->isUTF16() || String->isUTF32()) {
+  std::string UTF8Str;
+  unsigned int NumChars = String->getLength();
+  const char *Bytes = String->getBytes().data();
+  if (String->isWide()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertWideToUTF8({Chars, NumChars}, UTF8Str))
+  return "";
+  } else if (String->isUTF16()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF16ToUTF8String(ArrayRef(Chars, NumChars),
+  UTF8Str))
+  return "";
+  } else {
+assert(String->isUTF32() && "Unsupported string encoding.");
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF32ToUTF8String(ArrayRef(Chars, NumChars),
+  UTF8Str))
+  return "";
+  }
+  return UTF8Str;
+}
 return std::string(String->getString());
+  }
   if (auto *B = dyn_cast(S))
 return B->getValue() ? "true" : "false";
   return "";


Index: clang/test/Tooling/clang-diff-ast.cpp
===
--- clang/test/Tooling/clang-diff-ast.cpp
+++ clang/test/Tooling/clang-diff-ast.cpp
@@ -47,6 +47,12 @@
 if (i == 0)
   // CHECK: StringLiteral: foo(
   return "foo";
+// CHECK: StringLiteral: wide(
+(void)L"wide";
+// CHECK: StringLiteral: utf-16(
+(void)u"utf-16";
+// CHECK: StringLiteral: utf-32(
+(void)U"utf-32";
 // CHECK-NOT: ImplicitCastExpr
 return 0;
   }
Index: clang/lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include 
 #include 
@@ -463,8 +464,31 @@
   }
   if (auto *D = dyn_cast(S))
 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast(S))
+  if (auto *String = dyn_cast(S)) {
+if (String->isWide() || String->isUTF16() || String->isUTF32()) {
+  std::string UTF8Str;
+  unsigned int NumChars = String->getLength();
+  const char *Bytes = String->getBytes().data();
+  if (String->isWide()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertWideToUTF8({Chars, NumChars}, UTF8Str))
+  return "";
+  } else if (String->isUTF16()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF16ToUTF8String(ArrayRef(Chars, NumChars),
+  UTF8Str))
+  return "";
+  } else {
+assert(String->isUTF32() && "Unsupported string encoding.");
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF32ToUTF8String(ArrayRef(Chars, NumChars),
+  UTF8Str))
+  return "";
+  }
+  return UTF8Str;
+}
 return std::string(String->getString());
+  }
   if (auto *B = dyn_cast(S))
 return B->getValue() ? "true" : "false";
   return "";
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D126651: [clang-diff] Fix getStmtValue when dealing with wide, UTF16 UTF32 chars

2022-06-07 Thread Kaining Zhong via Phabricator via cfe-commits
PRESIDENT810 updated this revision to Diff 434717.
PRESIDENT810 added a comment.

Sorry! I'm a novice at LLVM and I just didn't realize that those types can be 
implicitly cast to ArrayRef ... I have changed those and it should be fine now!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D126651/new/

https://reviews.llvm.org/D126651

Files:
  clang/lib/Tooling/ASTDiff/ASTDiff.cpp
  clang/test/Tooling/clang-diff-ast.cpp


Index: clang/test/Tooling/clang-diff-ast.cpp
===
--- clang/test/Tooling/clang-diff-ast.cpp
+++ clang/test/Tooling/clang-diff-ast.cpp
@@ -47,6 +47,12 @@
 if (i == 0)
   // CHECK: StringLiteral: foo(
   return "foo";
+// CHECK: StringLiteral: wide(
+(void)L"wide";
+// CHECK: StringLiteral: utf-16(
+(void)u"utf-16";
+// CHECK: StringLiteral: utf-32(
+(void)U"utf-32";
 // CHECK-NOT: ImplicitCastExpr
 return 0;
   }
Index: clang/lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include 
 #include 
@@ -463,8 +464,29 @@
   }
   if (auto *D = dyn_cast(S))
 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast(S))
+  if (auto *String = dyn_cast(S)) {
+if (String->isWide() || String->isUTF16() || String->isUTF32()) {
+  std::string UTF8Str;
+  unsigned int NumChars = String->getLength();
+  const char *Bytes = String->getBytes().data();
+  if (String->isWide()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertWideToUTF8({Chars, NumChars}, UTF8Str))
+  return "";
+  } else if (String->isUTF16()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF16ToUTF8String({Chars, NumChars}, UTF8Str))
+  return "";
+  } else {
+assert(String->isUTF32() && "Unsupported string encoding.");
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF32ToUTF8String({Chars, NumChars}, UTF8Str))
+  return "";
+  }
+  return UTF8Str;
+}
 return std::string(String->getString());
+  }
   if (auto *B = dyn_cast(S))
 return B->getValue() ? "true" : "false";
   return "";


Index: clang/test/Tooling/clang-diff-ast.cpp
===
--- clang/test/Tooling/clang-diff-ast.cpp
+++ clang/test/Tooling/clang-diff-ast.cpp
@@ -47,6 +47,12 @@
 if (i == 0)
   // CHECK: StringLiteral: foo(
   return "foo";
+// CHECK: StringLiteral: wide(
+(void)L"wide";
+// CHECK: StringLiteral: utf-16(
+(void)u"utf-16";
+// CHECK: StringLiteral: utf-32(
+(void)U"utf-32";
 // CHECK-NOT: ImplicitCastExpr
 return 0;
   }
Index: clang/lib/Tooling/ASTDiff/ASTDiff.cpp
===
--- clang/lib/Tooling/ASTDiff/ASTDiff.cpp
+++ clang/lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Support/ConvertUTF.h"
 
 #include 
 #include 
@@ -463,8 +464,29 @@
   }
   if (auto *D = dyn_cast(S))
 return getRelativeName(D->getDecl(), getEnclosingDeclContext(AST, S));
-  if (auto *String = dyn_cast(S))
+  if (auto *String = dyn_cast(S)) {
+if (String->isWide() || String->isUTF16() || String->isUTF32()) {
+  std::string UTF8Str;
+  unsigned int NumChars = String->getLength();
+  const char *Bytes = String->getBytes().data();
+  if (String->isWide()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertWideToUTF8({Chars, NumChars}, UTF8Str))
+  return "";
+  } else if (String->isUTF16()) {
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF16ToUTF8String({Chars, NumChars}, UTF8Str))
+  return "";
+  } else {
+assert(String->isUTF32() && "Unsupported string encoding.");
+const auto *Chars = reinterpret_cast(Bytes);
+if (!convertUTF32ToUTF8String({Chars, NumChars}, UTF8Str))
+  return "";
+  }
+  return UTF8Str;
+}
 return std::string(String->getString());
+  }
   if (auto *B = dyn_cast(S))
 return B->getValue() ? "true" : "false";
   return "";
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits