vsk updated this revision to Diff 242417.
vsk marked 5 inline comments as done.
vsk added a comment.

Address review feedback.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73860/new/

https://reviews.llvm.org/D73860

Files:
  
lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
  
lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
  lldb/source/DataFormatters/StringPrinter.cpp
  lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp

Index: lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
===================================================================
--- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
+++ lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
@@ -527,6 +527,16 @@
     size = (layout == eLibcxxStringLayoutModeDSC)
                ? size_mode_value
                : ((size_mode_value >> 1) % 256);
+
+    // When the small-string optimization takes place, the data must fit in the
+    // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's
+    // likely that the string isn't initialized and we're reading garbage.
+    ExecutionContext exe_ctx(location_sp->GetExecutionContextRef());
+    auto max_bytes = location_sp->GetCompilerType().GetByteSize(
+        exe_ctx.GetBestExecutionContextScope());
+    if (size > max_bytes)
+      return false;
+
     return (location_sp.get() != nullptr);
   } else {
     ValueObjectSP l(D->GetChildAtIndex(0, true));
@@ -549,8 +559,10 @@
     const TypeSummaryOptions &summary_options) {
   uint64_t size = 0;
   ValueObjectSP location_sp;
-  if (!ExtractLibcxxStringInfo(valobj, location_sp, size))
-    return false;
+  if (!ExtractLibcxxStringInfo(valobj, location_sp, size)) {
+    stream.Printf("Summary Unavailable");
+    return true;
+  }
   if (size == 0) {
     stream.Printf("L\"\"");
     return true;
@@ -591,29 +603,24 @@
 
   switch (*wchar_t_size) {
   case 1:
-    StringPrinter::ReadBufferAndDumpToStream<
+    return StringPrinter::ReadBufferAndDumpToStream<
         lldb_private::formatters::StringPrinter::StringElementType::UTF8>(
         options);
-    break;
 
   case 2:
-    lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
+    return lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
         lldb_private::formatters::StringPrinter::StringElementType::UTF16>(
         options);
-    break;
 
   case 4:
-    lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
+    return lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
         lldb_private::formatters::StringPrinter::StringElementType::UTF32>(
         options);
-    break;
 
   default:
     stream.Printf("size for wchar_t is not valid");
     return true;
   }
-
-  return true;
 }
 
 template <StringPrinter::StringElementType element_type>
@@ -623,8 +630,10 @@
   uint64_t size = 0;
   ValueObjectSP location_sp;
 
-  if (!ExtractLibcxxStringInfo(valobj, location_sp, size))
-    return false;
+  if (!ExtractLibcxxStringInfo(valobj, location_sp, size)) {
+    stream.Printf("Summary Unavailable");
+    return true;
+  }
 
   if (size == 0) {
     stream.Printf("\"\"");
@@ -657,9 +666,7 @@
   options.SetQuote('"');
   options.SetSourceSize(size);
   options.SetBinaryZeroIsTerminator(false);
-  StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
-
-  return true;
+  return StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
 }
 
 bool lldb_private::formatters::LibcxxStringSummaryProviderASCII(
Index: lldb/source/DataFormatters/StringPrinter.cpp
===================================================================
--- lldb/source/DataFormatters/StringPrinter.cpp
+++ lldb/source/DataFormatters/StringPrinter.cpp
@@ -15,6 +15,7 @@
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/Status.h"
 
+#include "llvm/Support/CheckedArithmetic.h"
 #include "llvm/Support/ConvertUTF.h"
 
 #include <ctype.h>
@@ -133,12 +134,17 @@
 
   unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
 
-  if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) {
-    // I don't have enough bytes - print whatever I have left
-    retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)};
-    next = buffer_end + 1;
+  // If the utf8 encoded length is invalid, or if there aren't enough bytes to
+  // print, this is some kind of corrupted string.
+  if (utf8_encoded_len == 0 || utf8_encoded_len > 4)
+    return retval;
+  if ((buffer_end - buffer) < utf8_encoded_len)
+    // There's no room in the buffer for the utf8 sequence.
+    return retval;
+  if (!llvm::checkedAdd(reinterpret_cast<intptr_t>(buffer),
+                        static_cast<intptr_t>(utf8_encoded_len)))
+    // Accessing the utf8 sequence would cause a pointer overflow.
     return retval;
-  }
 
   char32_t codepoint = 0;
   switch (utf8_encoded_len) {
@@ -160,12 +166,6 @@
         (unsigned char)*buffer, (unsigned char)*(buffer + 1),
         (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3));
     break;
-  default:
-    // this is probably some bogus non-character thing just print it as-is and
-    // hope to sync up again soon
-    retval = {buffer, 1};
-    next = buffer + 1;
-    return retval;
   }
 
   if (codepoint) {
@@ -215,9 +215,7 @@
     return retval;
   }
 
-  // this should not happen - but just in case.. try to resync at some point
-  retval = {buffer, 1};
-  next = buffer + 1;
+  // We couldn't figure out how to print this string.
   return retval;
 }
 
@@ -227,7 +225,7 @@
 static StringPrinter::StringPrinterBufferPointer<>
 GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer,
              uint8_t *buffer_end, uint8_t *&next) {
-  if (!buffer)
+  if (!buffer || buffer >= buffer_end)
     return {nullptr};
 
   switch (type) {
@@ -354,13 +352,11 @@
             escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
         auto printable_bytes = printable.GetBytes();
         auto printable_size = printable.GetSize();
-        if (!printable_bytes || !next_data) {
-          // GetPrintable() failed on us - print one byte in a desperate resync
-          // attempt
-          printable_bytes = utf8_data_ptr;
-          printable_size = 1;
-          next_data = utf8_data_ptr + 1;
-        }
+
+        // We failed to figure out how to print this string.
+        if (!printable_bytes || !next_data)
+          return false;
+
         for (unsigned c = 0; c < printable_size; c++)
           stream.Printf("%c", *(printable_bytes + c));
         utf8_data_ptr = (uint8_t *)next_data;
@@ -478,13 +474,11 @@
       auto printable = escaping_callback(data, data_end, next_data);
       auto printable_bytes = printable.GetBytes();
       auto printable_size = printable.GetSize();
-      if (!printable_bytes || !next_data) {
-        // GetPrintable() failed on us - print one byte in a desperate resync
-        // attempt
-        printable_bytes = data;
-        printable_size = 1;
-        next_data = data + 1;
-      }
+
+      // We failed to figure out how to print this string.
+      if (!printable_bytes || !next_data)
+        return false;
+
       for (unsigned c = 0; c < printable_size; c++)
         options.GetStream()->Printf("%c", *(printable_bytes + c));
       data = (uint8_t *)next_data;
Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
===================================================================
--- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
+++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
@@ -1,4 +1,27 @@
 #include <string>
+#include <stdint.h>
+
+// For more information about libc++'s std::string ABI, see:
+//
+//   https://joellaity.com/2020/01/31/string.html
+
+// A corrupt string which hits the SSO code path, but has an invalid size.
+static struct {
+  // Set the size of this short-mode string to 116. Note that in short mode,
+  // the size is encoded as `size << 1`.
+  unsigned char size = 232;
+
+  // 23 garbage bytes for the inline string payload.
+  char inline_buf[23] = {0};
+} garbage_string_short_mode;
+
+// A corrupt libcxx string which points to garbage and has a crazy length.
+static unsigned char garbage_utf8_payload[] = {0 /* illegal utf8 encoding */, 1, 2, 3};
+static struct {
+  uint64_t cap = 100;
+  uint64_t size = 200;
+  unsigned char *data = &garbage_utf8_payload[0];
+} garbage_string_long_mode;
 
 int main()
 {
@@ -17,6 +40,17 @@
     std::u32string u32_string(U"🍄🍅🍆🍌");
     std::u32string u32_empty(U"");
     std::basic_string<unsigned char> uchar(5, 'a');
+    std::string garbage1, garbage2;
+
+#if _LIBCPP_ABI_VERSION == 1
+    if (sizeof(std::string) == sizeof(garbage_string_short_mode))
+      memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
+    if (sizeof(std::string) == sizeof(garbage_string_long_mode))
+      memcpy((void *)&garbage2, &garbage_string_long_mode, sizeof(std::string));
+#else
+#error "Test potentially needs to be updated for a new std::string ABI."
+#endif
+
     S.assign(L"!!!!!"); // Set break point at this line.
     return 0;
 }
Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
===================================================================
--- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
+++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
@@ -51,6 +51,8 @@
                 "settings set target.max-children-count 256",
                 check=False)
 
+        is_64_bit = self.process().GetAddressByteSize() == 8
+
         # Execute the cleanup function during test case tear down.
         self.addTearDownHook(cleanup)
 
@@ -114,3 +116,7 @@
                 '(%s::basic_string<unsigned char, %s::char_traits<unsigned char>, '
                 '%s::allocator<unsigned char> >) uchar = "aaaaa"'%(ns,ns,ns),
         ])
+
+        if is_64_bit:
+            self.expect("frame variable garbage1", substrs=['garbage1 = Summary Unavailable'])
+            self.expect("frame variable garbage2", substrs=['garbage2 = Summary Unavailable'])
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to