llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Damian Andrei (xTachyon)

<details>
<summary>Changes</summary>

Tries to implement #<!-- -->69749.

>From Discord:
`
In terms of how to solve it... I'm hoping we can extend CXString to be 
length-aware and then add an interface that returns a CXString object instead. 
Perhaps clang_EvalResult_getAsCXString() with a comment explaining that 
getAsStr() is only valid for null-terminated strings?
`


There's some questions I have:

1. Is `size_t` appropriate here? I see some functions using `unsigned`, and 
some using `size_t`.
2. On this new flow to get a string literal, there's at least 2 allocations of 
the string from the 2 `cxstring::createDup`, one when evaluating the result, 
and one when returning it. Is this ok/can it be done better?
3. Is returning a "null" `CXString` on error a good idea?

There's probably more things that I did wrong, so I'm hoping for a review.

---
Full diff: https://github.com/llvm/llvm-project/pull/134551.diff


6 Files Affected:

- (modified) clang/include/clang-c/CXString.h (+3) 
- (modified) clang/include/clang-c/Index.h (+11-4) 
- (modified) clang/tools/libclang/CIndex.cpp (+19-27) 
- (modified) clang/tools/libclang/CXString.cpp (+16-6) 
- (modified) clang/tools/libclang/libclang.map (+6) 
- (modified) clang/unittests/libclang/LibclangTest.cpp (+41) 


``````````diff
diff --git a/clang/include/clang-c/CXString.h b/clang/include/clang-c/CXString.h
index 63dce4d140ce2..a0ad830230338 100644
--- a/clang/include/clang-c/CXString.h
+++ b/clang/include/clang-c/CXString.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CLANG_C_CXSTRING_H
 #define LLVM_CLANG_C_CXSTRING_H
 
+#include <stddef.h>
 #include "clang-c/ExternC.h"
 #include "clang-c/Platform.h"
 
@@ -36,6 +37,7 @@ LLVM_CLANG_C_EXTERN_C_BEGIN
  */
 typedef struct {
   const void *data;
+  size_t length;
   unsigned private_flags;
 } CXString;
 
@@ -52,6 +54,7 @@ typedef struct {
  * to `std::string::c_str()`.
  */
 CINDEX_LINKAGE const char *clang_getCString(CXString string);
+CINDEX_LINKAGE const char *clang_getCString2(CXString string, size_t *length);
 
 /**
  * Free the given string.
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index 38e2417dcd181..4ef41d56e43ae 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -5918,12 +5918,19 @@ clang_EvalResult_getAsUnsigned(CXEvalResult E);
 CINDEX_LINKAGE double clang_EvalResult_getAsDouble(CXEvalResult E);
 
 /**
- * Returns the evaluation result as a constant string if the
- * kind is other than Int or float. User must not free this pointer,
- * instead call clang_EvalResult_dispose on the CXEvalResult returned
- * by clang_Cursor_Evaluate.
+ * This function behaves the same as clang_EvalResult_getAsCXString, with 2
+ * exceptions:
+ * - the string literal will be truncated if a nul byte is found in the string.
+ * For this reason clang_EvalResult_getAsCXString is recommended.
+ * - User must not free this pointer, instead call clang_EvalResult_dispose on
+ * the CXEvalResult returned by clang_Cursor_Evaluate.
  */
 CINDEX_LINKAGE const char *clang_EvalResult_getAsStr(CXEvalResult E);
+/**
+ * Returns the evaluation result as a constant string if the
+ * kind is other than Int or float. This might include zero bytes.
+ */
+CINDEX_LINKAGE CXString clang_EvalResult_getAsCXString(CXEvalResult E);
 
 /**
  * Disposes the created Eval memory.
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index c8db6c92bb4d4..672e791ad3455 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -4589,13 +4589,13 @@ struct ExprEvalResult {
     unsigned long long unsignedVal;
     long long intVal;
     double floatVal;
-    char *stringVal;
+    CXString stringVal;
   } EvalData;
   bool IsUnsignedInt;
   ~ExprEvalResult() {
     if (EvalType != CXEval_UnExposed && EvalType != CXEval_Float &&
         EvalType != CXEval_Int) {
-      delete[] EvalData.stringVal;
+      clang_disposeString(EvalData.stringVal);
     }
   }
 };
@@ -4651,7 +4651,17 @@ const char *clang_EvalResult_getAsStr(CXEvalResult E) {
   if (!E) {
     return nullptr;
   }
-  return ((ExprEvalResult *)E)->EvalData.stringVal;
+  return clang_getCString(((ExprEvalResult *)E)->EvalData.stringVal);
+}
+
+CXString clang_EvalResult_getAsCXString(CXEvalResult E) {
+  if (!E) {
+    return cxstring::createNull();
+  }
+  size_t length;
+  auto data =
+      clang_getCString2(((ExprEvalResult *)E)->EvalData.stringVal, &length);
+  return cxstring::createDup(StringRef(data, length));
 }
 
 static const ExprEvalResult *evaluateExpr(Expr *expr, CXCursor C) {
@@ -4716,11 +4726,7 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, 
CXCursor C) {
         result->EvalType = CXEval_StrLiteral;
       }
 
-      std::string strRef(StrE->getString().str());
-      result->EvalData.stringVal = new char[strRef.size() + 1];
-      strncpy((char *)result->EvalData.stringVal, strRef.c_str(),
-              strRef.size());
-      result->EvalData.stringVal[strRef.size()] = '\0';
+      result->EvalData.stringVal = cxstring::createDup(StrE->getString());
       return result.release();
     }
   } else if (expr->getStmtClass() == Stmt::ObjCStringLiteralClass ||
@@ -4737,10 +4743,7 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, 
CXCursor C) {
       result->EvalType = CXEval_StrLiteral;
     }
 
-    std::string strRef(StrE->getString().str());
-    result->EvalData.stringVal = new char[strRef.size() + 1];
-    strncpy((char *)result->EvalData.stringVal, strRef.c_str(), strRef.size());
-    result->EvalData.stringVal[strRef.size()] = '\0';
+    result->EvalData.stringVal = cxstring::createDup(StrE->getString());
     return result.release();
   }
 
@@ -4754,13 +4757,8 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, 
CXCursor C) {
       callExpr = static_cast<CallExpr *>(CC->getSubExpr());
       StringLiteral *S = getCFSTR_value(callExpr);
       if (S) {
-        std::string strLiteral(S->getString().str());
         result->EvalType = CXEval_CFStr;
-
-        result->EvalData.stringVal = new char[strLiteral.size() + 1];
-        strncpy((char *)result->EvalData.stringVal, strLiteral.c_str(),
-                strLiteral.size());
-        result->EvalData.stringVal[strLiteral.size()] = '\0';
+        result->EvalData.stringVal = cxstring::createDup(S->getString());
         return result.release();
       }
     }
@@ -4780,12 +4778,8 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, 
CXCursor C) {
 
       StringLiteral *S = getCFSTR_value(callExpr);
       if (S) {
-        std::string strLiteral(S->getString().str());
         result->EvalType = CXEval_CFStr;
-        result->EvalData.stringVal = new char[strLiteral.size() + 1];
-        strncpy((char *)result->EvalData.stringVal, strLiteral.c_str(),
-                strLiteral.size());
-        result->EvalData.stringVal[strLiteral.size()] = '\0';
+        result->EvalData.stringVal = cxstring::createDup(S->getString());
         return result.release();
       }
     }
@@ -4793,11 +4787,9 @@ static const ExprEvalResult *evaluateExpr(Expr *expr, 
CXCursor C) {
     DeclRefExpr *D = static_cast<DeclRefExpr *>(expr);
     ValueDecl *V = D->getDecl();
     if (V->getKind() == Decl::Function) {
-      std::string strName = V->getNameAsString();
       result->EvalType = CXEval_Other;
-      result->EvalData.stringVal = new char[strName.size() + 1];
-      strncpy(result->EvalData.stringVal, strName.c_str(), strName.size());
-      result->EvalData.stringVal[strName.size()] = '\0';
+      result->EvalData.stringVal =
+          cxstring::createDup(StringRef(V->getNameAsString()));
       return result.release();
     }
   }
diff --git a/clang/tools/libclang/CXString.cpp 
b/clang/tools/libclang/CXString.cpp
index aaa8f8eeb67a1..7cc3e6681c542 100644
--- a/clang/tools/libclang/CXString.cpp
+++ b/clang/tools/libclang/CXString.cpp
@@ -43,6 +43,7 @@ namespace cxstring {
 CXString createEmpty() {
   CXString Str;
   Str.data = "";
+  Str.length = 0;
   Str.private_flags = CXS_Unmanaged;
   return Str;
 }
@@ -50,6 +51,7 @@ CXString createEmpty() {
 CXString createNull() {
   CXString Str;
   Str.data = nullptr;
+  Str.length = 0;
   Str.private_flags = CXS_Unmanaged;
   return Str;
 }
@@ -60,6 +62,7 @@ CXString createRef(const char *String) {
 
   CXString Str;
   Str.data = String;
+  Str.length = -1;
   Str.private_flags = CXS_Unmanaged;
   return Str;
 }
@@ -71,10 +74,7 @@ CXString createDup(const char *String) {
   if (String[0] == '\0')
     return createEmpty();
 
-  CXString Str;
-  Str.data = strdup(String);
-  Str.private_flags = CXS_Malloc;
-  return Str;
+  return createDup(StringRef(String));
 }
 
 CXString createRef(StringRef String) {
@@ -91,11 +91,13 @@ CXString createRef(StringRef String) {
 }
 
 CXString createDup(StringRef String) {
-  CXString Result;
   char *Spelling = static_cast<char *>(llvm::safe_malloc(String.size() + 1));
-  memmove(Spelling, String.data(), String.size());
+  memcpy(Spelling, String.data(), String.size());
   Spelling[String.size()] = 0;
+
+  CXString Result;
   Result.data = Spelling;
+  Result.length = String.size();
   Result.private_flags = (unsigned) CXS_Malloc;
   return Result;
 }
@@ -103,6 +105,7 @@ CXString createDup(StringRef String) {
 CXString createCXString(CXStringBuf *buf) {
   CXString Str;
   Str.data = buf;
+  Str.length = -1;
   Str.private_flags = (unsigned) CXS_StringBuf;
   return Str;
 }
@@ -164,6 +167,13 @@ const char *clang_getCString(CXString string) {
   return static_cast<const char *>(string.data);
 }
 
+const char *clang_getCString2(CXString string, size_t *length) {
+  auto ret = clang_getCString(string);
+  *length =
+      string.length == static_cast<size_t>(-1) ? strlen(ret) : string.length;
+  return ret;
+}
+
 void clang_disposeString(CXString string) {
   switch ((CXStringFlag) string.private_flags) {
     case CXS_Unmanaged:
diff --git a/clang/tools/libclang/libclang.map 
b/clang/tools/libclang/libclang.map
index 07471ca42c97e..85ac50bb59c62 100644
--- a/clang/tools/libclang/libclang.map
+++ b/clang/tools/libclang/libclang.map
@@ -438,6 +438,12 @@ LLVM_20 {
     clang_visitCXXMethods;
 };
 
+LLVM_21 {
+  global:
+    clang_getCString2;
+    clang_EvalResult_getAsCXString;
+};
+
 # Example of how to add a new symbol version entry.  If you do add a new symbol
 # version, please update the example to depend on the version you added.
 # LLVM_X {
diff --git a/clang/unittests/libclang/LibclangTest.cpp 
b/clang/unittests/libclang/LibclangTest.cpp
index 6de4d02bf74f4..c242d194b2c04 100644
--- a/clang/unittests/libclang/LibclangTest.cpp
+++ b/clang/unittests/libclang/LibclangTest.cpp
@@ -623,6 +623,47 @@ TEST_F(LibclangParseTest, EvaluateChildExpression) {
       nullptr);
 }
 
+TEST_F(LibclangParseTest, StringLiteralWithZeros) {
+  const char testSource[] = R"cpp(
+const char str[] = "pika\0chu";
+)cpp";
+  std::string fileName = "main.cpp";
+  WriteFile(fileName, testSource);
+
+  const char *Args[] = {"-xc++"};
+  ClangTU = clang_parseTranslationUnit(Index, fileName.c_str(), Args, 1,
+                                       nullptr, 0, TUFlags);
+
+  int nodes = 0;
+
+  Traverse([&nodes](CXCursor cursor, CXCursor parent) -> CXChildVisitResult {
+    if (cursor.kind == CXCursor_StringLiteral) {
+      CXEvalResult RE = clang_Cursor_Evaluate(cursor);
+      EXPECT_NE(RE, nullptr);
+      EXPECT_EQ(clang_EvalResult_getKind(RE), CXEval_StrLiteral);
+
+      const char expected[] = "pika\0chu";
+      size_t expected_size = sizeof(expected) - 1;
+
+      auto lit = clang_EvalResult_getAsCXString(RE);
+      size_t length;
+      auto str = clang_getCString2(lit, &length);
+
+      EXPECT_TRUE(length == expected_size &&
+                  memcmp(str, expected, length) == 0);
+
+      clang_disposeString(lit);
+      clang_EvalResult_dispose(RE);
+
+      nodes++;
+      return CXChildVisit_Continue;
+    }
+    return CXChildVisit_Recurse;
+  });
+
+  EXPECT_EQ(nodes, 1);
+}
+
 class LibclangReparseTest : public LibclangParseTest {
 public:
   void DisplayDiagnostics() {

``````````

</details>


https://github.com/llvm/llvm-project/pull/134551
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to