elsteveogrande created this revision.
Herald added a subscriber: cfe-commits.

Previous impl would read the byte past the end of a string (a 
`llvm::StringRef`), possibly exceeding the allocation for that memory and 
raising an MSAN issue.

This will instead save the character range specified by the `StringRef` and 
copy it on-demand to a new C string.

Passes existing tests, and now passes with MSAN as well.


Repository:
  rC Clang

https://reviews.llvm.org/D42043

Files:
  include/clang-c/CXString.h
  tools/c-index-test/c-index-test.c
  tools/libclang/CXString.cpp
  tools/libclang/CXString.h

Index: tools/libclang/CXString.h
===================================================================
--- tools/libclang/CXString.h
+++ tools/libclang/CXString.h
@@ -106,4 +106,3 @@
 }
 
 #endif
-
Index: tools/libclang/CXString.cpp
===================================================================
--- tools/libclang/CXString.cpp
+++ tools/libclang/CXString.cpp
@@ -15,99 +15,141 @@
 
 #include "CXString.h"
 #include "CXTranslationUnit.h"
+#include "clang-c/CXString.h"
 #include "clang-c/Index.h"
 #include "clang/Frontend/ASTUnit.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cstring>
 
-using namespace clang;
-
-/// Describes the kind of underlying data in CXString.
-enum CXStringFlag {
-  /// CXString contains a 'const char *' that it doesn't own.
-  CXS_Unmanaged,
+static_assert(sizeof(CXString) <= 16, "");
 
-  /// CXString contains a 'const char *' that it allocated with malloc().
-  CXS_Malloc,
-
-  /// CXString contains a CXStringBuf that needs to be returned to the
-  /// CXStringPool.
-  CXS_StringBuf
-};
+using namespace clang;
 
 namespace clang {
 namespace cxstring {
 
+/**
+ * This is for \b CXString 's which are created with \b CreateRef(StringRef).
+ * We'll store the info from the input \b StringRef: char ptr and size.
+ *
+ * We don't know for sure whether this is null-terminated so, when and if
+ * \b clang_getCString is called for this \b CXString, we'll allocate C string
+ * storage and copy data into the storage.  We'll memo-ize that in the
+ * \b CString member.
+ *
+ * This is refcounted; the \b Count member is initially 1.  When a \b CXString
+ * instance using this object is disposed via \b clang_disposeString, \b Count
+ * is decremented.  When this string is duplicated the \b Count increases.
+ *
+ * When \b Count finally drops to zero, the ptr at \b CString, and this object,
+ * should be freed with \b free .
+ */
+struct RefCountedCharRange {
+  const char *Data;
+  const char *CString;
+  unsigned Size;
+  unsigned Count;
+};
+
 //===----------------------------------------------------------------------===//
 // Basic generation of CXStrings.
 //===----------------------------------------------------------------------===//
 
 CXString createEmpty() {
   CXString Str;
-  Str.data = "";
-  Str.private_flags = CXS_Unmanaged;
+  Str.Contents = (const void *) "";
+  Str.Size = 0;
+  Str.IsNullTerminated = true;
+  Str.IsOwned = false;
+  Str.IsPooled = false;
   return Str;
 }
 
 CXString createNull() {
   CXString Str;
-  Str.data = nullptr;
-  Str.private_flags = CXS_Unmanaged;
+  Str.Contents = nullptr;
+  Str.Size = 0;
+  Str.IsNullTerminated = false;
+  Str.IsOwned = false;
+  Str.IsPooled = false;
   return Str;
 }
 
 CXString createRef(const char *String) {
-  if (String && String[0] == '\0')
+  if (!String) {
+    return createNull();
+  }
+  if (String[0] == '\0') {
     return createEmpty();
+  }
 
   CXString Str;
-  Str.data = String;
-  Str.private_flags = CXS_Unmanaged;
+  Str.Contents = (const void *) String;
+  Str.Size = strlen(String);
+  Str.IsNullTerminated = true;
+  Str.IsOwned = false;
+  Str.IsPooled = false;
   return Str;
 }
 
+inline static const char *copyCharRange(const char *CS, unsigned Size) {
+  char *Spelling = static_cast<char *>(malloc(Size + 1));
+  memcpy(Spelling, CS, Size);
+  Spelling[Size] = 0;
+  return Spelling;
+}
+
 CXString createDup(const char *String) {
-  if (!String)
+  if (!String) {
     return createNull();
-
-  if (String[0] == '\0')
+  }
+  if (String[0] == '\0') {
     return createEmpty();
+  }
 
   CXString Str;
-  Str.data = strdup(String);
-  Str.private_flags = CXS_Malloc;
+  Str.Size = strlen(String);
+  Str.Contents = (const void *) copyCharRange(String, Str.Size);
+  Str.IsNullTerminated = true;
+  Str.IsOwned = true;
+  Str.IsPooled = false;
   return Str;
 }
 
 CXString createRef(StringRef String) {
-  // If the string is not nul-terminated, we have to make a copy.
-
-  // FIXME: This is doing a one past end read, and should be removed! For memory
-  // we don't manage, the API string can become unterminated at any time outside
-  // our control.
-
-  if (!String.empty() && String.data()[String.size()] != 0)
-    return createDup(String);
-
-  CXString Result;
-  Result.data = String.data();
-  Result.private_flags = (unsigned) CXS_Unmanaged;
-  return Result;
+  assert (String.size() <= std::numeric_limits<unsigned>::max());
+  CXString Str;
+  Str.Size = unsigned(String.size());
+  Str.IsNullTerminated = false;
+  Str.IsOwned = false;
+  Str.IsPooled = false;
+  auto *RC = new RefCountedCharRange {
+    /* Data */ String.data(),
+    /* CString */ nullptr,
+    /* Size */ Str.Size,
+    /* Count */ 1,
+  };
+  Str.Contents = (const void *) RC;
+  return Str;
 }
 
 CXString createDup(StringRef String) {
-  CXString Result;
-  char *Spelling = static_cast<char *>(malloc(String.size() + 1));
-  memmove(Spelling, String.data(), String.size());
-  Spelling[String.size()] = 0;
-  Result.data = Spelling;
-  Result.private_flags = (unsigned) CXS_Malloc;
-  return Result;
+  CXString Str;
+  Str.Size = String.size();
+  Str.Contents = (const void *) copyCharRange(String.data(), Str.Size);
+  Str.IsNullTerminated = true;
+  Str.IsOwned = true;
+  Str.IsPooled = false;
+  return Str;
 }
 
 CXString createCXString(CXStringBuf *buf) {
   CXString Str;
-  Str.data = buf;
-  Str.private_flags = (unsigned) CXS_StringBuf;
+  Str.Contents = buf->Data.data();
+  Str.Size = buf->Data.size();
+  Str.IsNullTerminated = true;
+  Str.IsOwned = false;
+  Str.IsPooled = true;
   return Str;
 }
 
@@ -151,7 +193,7 @@
 }
 
 bool isManagedByPool(CXString str) {
-  return ((CXStringFlag) str.private_flags) == CXS_StringBuf;
+  return str.IsPooled;
 }
 
 } // end namespace cxstring
@@ -162,24 +204,63 @@
 //===----------------------------------------------------------------------===//
 
 const char *clang_getCString(CXString string) {
-  if (string.private_flags == (unsigned) CXS_StringBuf) {
-    return static_cast<const cxstring::CXStringBuf *>(string.data)->Data.data();
+  const char *CString = nullptr;
+
+  // If neither `IsOwned` nor `IsNullTerminated`, then this is a reference to
+  // a range of characters owned elsewhere.  We'll need to copy this to a new
+  // C string to ensure it's null-terminated, if we haven't done so already.
+  if (string.IsOwned || string.IsNullTerminated) {
+    CString = (const char *) string.Contents;
+  } else {
+    auto *V = const_cast<void *>(string.Contents);
+    if (V) {
+      auto *R = (clang::cxstring::RefCountedCharRange *) V;
+      if (R) {
+        if (!R->CString) {
+          // Need to make this a new C string.
+          char *NewCString = (char *) malloc(R->Size + 1);
+          memcpy(NewCString, R->Data, R->Size);
+          NewCString[R->Size] = 0;
+          R->CString = (const char *) NewCString;
+        }
+        CString = R->CString;
+      }
+    }
   }
-  return static_cast<const char *>(string.data);
+
+  return CString;
+}
+
+const char *clang_getStringData(CXString string) {
+  if (!(string.IsOwned || string.IsNullTerminated)) {
+    auto *R = (const clang::cxstring::RefCountedCharRange *) string.Contents;
+    return R ? R->Data : nullptr;
+  }
+  return (const char *) string.Contents;
+}
+
+unsigned clang_getStringSize(CXString string) {
+  return string.Size;
 }
 
 void clang_disposeString(CXString string) {
-  switch ((CXStringFlag) string.private_flags) {
-    case CXS_Unmanaged:
-      break;
-    case CXS_Malloc:
-      if (string.data)
-        free(const_cast<void *>(string.data));
-      break;
-    case CXS_StringBuf:
-      static_cast<cxstring::CXStringBuf *>(
-          const_cast<void *>(string.data))->dispose();
-      break;
+  if (string.IsOwned) {
+    // Is a C string that we own; need to free it.
+    free(const_cast<void *>(string.Contents));
+  } else {
+    if (!string.IsNullTerminated) {
+      // Neither null-terminated nor owned.  Is a char range for which we might
+      // have created a C string.
+      auto *CR = (const clang::cxstring::RefCountedCharRange *) string.Contents;
+      auto *R = const_cast<clang::cxstring::RefCountedCharRange *>(CR);
+      if (R) {
+        -- R->Count;
+        if (!R->Count) {
+          free((void *) R->CString);
+          free((void *) R);
+        }
+      }
+    }
   }
 }
 
@@ -189,4 +270,3 @@
   delete[] set->Strings;
   delete set;
 }
-
Index: tools/c-index-test/c-index-test.c
===================================================================
--- tools/c-index-test/c-index-test.c
+++ tools/c-index-test/c-index-test.c
@@ -1040,6 +1040,16 @@
   }
 }
 
+static CXString createCXString(const char *CS) {
+  CXString Str;
+  Str.Contents = (const void *) CS;
+  Str.Size = strlen(CS);
+  Str.IsNullTerminated = true;
+  Str.IsOwned = false;
+  Str.IsPooled = false;
+  return Str;
+}
+
 /******************************************************************************/
 /* Callbacks.                                                                 */
 /******************************************************************************/
@@ -2994,7 +3004,7 @@
   int first_check_printed;
   int fail_for_error;
   int abort;
-  const char *main_filename;
+  CXString main_filename;
   ImportedASTFilesData *importedASTs;
   IndexDataStringList *strings;
   CXTranslationUnit TU;
@@ -3033,6 +3043,7 @@
   const char *cname;
   CXIdxClientFile file;
   unsigned line, column;
+  const char *main_filename;
   int isMainFile;
   
   index_data = (IndexData *)client_data;
@@ -3047,7 +3058,8 @@
   }
   filename = clang_getFileName((CXFile)file);
   cname = clang_getCString(filename);
-  if (strcmp(cname, index_data->main_filename) == 0)
+  main_filename = clang_getCString(index_data->main_filename);
+  if (strcmp(cname, main_filename) == 0)
     isMainFile = 1;
   else
     isMainFile = 0;
@@ -3249,14 +3261,11 @@
 static CXIdxClientFile index_enteredMainFile(CXClientData client_data,
                                        CXFile file, void *reserved) {
   IndexData *index_data;
-  CXString filename;
 
   index_data = (IndexData *)client_data;
   printCheck(index_data);
 
-  filename = clang_getFileName(file);
-  index_data->main_filename = clang_getCString(filename);
-  clang_disposeString(filename);
+  index_data->main_filename = clang_getFileName(file);
 
   printf("[enteredMainFile]: ");
   printCXIndexFile((CXIdxClientFile)file);
@@ -3495,7 +3504,7 @@
   index_data.first_check_printed = 0;
   index_data.fail_for_error = 0;
   index_data.abort = 0;
-  index_data.main_filename = "";
+  index_data.main_filename = createCXString("");
   index_data.importedASTs = importedASTs;
   index_data.strings = NULL;
   index_data.TU = NULL;
@@ -3511,6 +3520,7 @@
   if (index_data.fail_for_error)
     result = -1;
 
+  clang_disposeString(index_data.main_filename);
   free_client_data(&index_data);
   return result;
 }
@@ -3532,7 +3542,7 @@
   index_data.first_check_printed = 0;
   index_data.fail_for_error = 0;
   index_data.abort = 0;
-  index_data.main_filename = "";
+  index_data.main_filename = createCXString("");
   index_data.importedASTs = importedASTs;
   index_data.strings = NULL;
   index_data.TU = TU;
@@ -3545,6 +3555,7 @@
     result = -1;
 
   clang_disposeTranslationUnit(TU);
+  clang_disposeString(index_data.main_filename);
   free_client_data(&index_data);
   return result;
 }
@@ -4037,9 +4048,7 @@
           if (!isUSR(I[2]))
             return not_usr("<class USR>", I[2]);
           else {
-            CXString x;
-            x.data = (void*) I[2];
-            x.private_flags = 0;
+            CXString x = createCXString(I[2]);
             print_usr(clang_constructUSR_ObjCIvar(I[1], x));
           }
 
@@ -4064,9 +4073,7 @@
           if (!isUSR(I[3]))
             return not_usr("<class USR>", I[3]);
           else {
-            CXString x;
-            x.data = (void*) I[3];
-            x.private_flags = 0;
+            CXString x = createCXString(I[3]);
             print_usr(clang_constructUSR_ObjCMethod(I[1], atoi(I[2]), x));
           }
           I += 4;
@@ -4094,9 +4101,7 @@
           if (!isUSR(I[2]))
             return not_usr("<class USR>", I[2]);
           else {
-            CXString x;
-            x.data = (void*) I[2];
-            x.private_flags = 0;
+            CXString x = createCXString(I[2]);
             print_usr(clang_constructUSR_ObjCProperty(I[1], x));
           }
           I += 3;
Index: include/clang-c/CXString.h
===================================================================
--- include/clang-c/CXString.h
+++ include/clang-c/CXString.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_C_CXSTRING_H
 
 #include "clang-c/Platform.h"
+#include <stdbool.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -36,8 +37,11 @@
  * with the string data, call \c clang_disposeString() to free the string.
  */
 typedef struct {
-  const void *data;
-  unsigned private_flags;
+  const void *Contents;
+  unsigned Size;
+  bool IsNullTerminated;
+  bool IsOwned;
+  bool IsPooled;
 } CXString;
 
 typedef struct {
@@ -47,10 +51,33 @@
 
 /**
  * \brief Retrieve the character data associated with the given string.
+ *
+ * This returns a pointer to a C string even if this \b CXString was built
+ * from e.g. a StringRef reference (which is not null-terminated).  In those
+ * cases a string copy is performed on-demand.
+ *
+ * See also \b clang_getStringData and \b clang_getStringSize which could
+ * provide a range or "window" of data, if needed, and a C string is not
+ * strictly needed; this could save a C string allocation, deallocation, and
+ * string copy.
  */
 CINDEX_LINKAGE const char *clang_getCString(CXString string);
 
 /**
+ * \brief Retrieve a pointer to the start of the character data for this string.
+ *
+ * Note that the string is not necessarily null-terminated, so it might not be
+ * suitable for use as a C string.  See \b clang_getCString which guarantees
+ * a C string result.
+ */
+CINDEX_LINKAGE const char *clang_getStringData(CXString string);
+
+/**
+ * \brief Retrieve the string length (not including null-terminator, if any).
+ */
+CINDEX_LINKAGE unsigned clang_getStringSize(CXString string);
+
+/**
  * \brief Free the given string.
  */
 CINDEX_LINKAGE void clang_disposeString(CXString string);
@@ -68,4 +95,3 @@
 }
 #endif
 #endif
-
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to