vsk created this revision.
vsk added reviewers: teemperor, shafik, jingham.
Herald added a subscriber: christof.

This patch fixes two distinct but related out-of-bounds read bugs in
string data formatters. Both issues have to do with mishandling of un-
initialized strings. These manifest as ASan exceptions when debugging a
clang binary.

The first issue was that the std::string formatter treated strings in
"short mode" with length greater than the size of the inline buffer as
valid.

The second issue was that the StringPrinter facility did not check that
a full utf8 codepoint sequence can be read from the buffer (i.e. there
are some missing range checks). I took the opportunity here to delete
some untested code that was meant to deal with invalid input and replace
it with fail-on-invalid logic ([1][2][3]). This means we'll give up on
formatting an invalid string instead of guessing our way through it.

I've added regression tests for both issues to
test/functionalities/data-formatter/data-formatter-stl/libcxx/string.

[1]
http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L136
[2]
http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L163
[3]
http://lab.llvm.org:8080/coverage/coverage-reports/coverage/Users/buildslave/jenkins/workspace/coverage/llvm-project/lldb/source/DataFormatters/StringPrinter.cpp.html#L357

rdar://59080026


https://reviews.llvm.org/D73860

Files:
  
lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
  lldb/source/DataFormatters/StringPrinter.cpp
  lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp

Index: lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
===================================================================
--- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
+++ lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
@@ -527,6 +527,16 @@
     size = (layout == eLibcxxStringLayoutModeDSC)
                ? size_mode_value
                : ((size_mode_value >> 1) % 256);
+
+    // When the small-string optimization takes place, the data must fit in the
+    // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's
+    // likely that the string isn't initialized and we're reading garbage.
+    ExecutionContext exe_ctx(location_sp->GetExecutionContextRef());
+    auto max_bytes = location_sp->GetCompilerType().GetByteSize(
+        exe_ctx.GetBestExecutionContextScope());
+    if (size > max_bytes)
+      return false;
+
     return (location_sp.get() != nullptr);
   } else {
     ValueObjectSP l(D->GetChildAtIndex(0, true));
@@ -657,9 +667,7 @@
   options.SetQuote('"');
   options.SetSourceSize(size);
   options.SetBinaryZeroIsTerminator(false);
-  StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
-
-  return true;
+  return StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
 }
 
 bool lldb_private::formatters::LibcxxStringSummaryProviderASCII(
Index: lldb/source/DataFormatters/StringPrinter.cpp
===================================================================
--- lldb/source/DataFormatters/StringPrinter.cpp
+++ lldb/source/DataFormatters/StringPrinter.cpp
@@ -59,11 +59,19 @@
   return true;
 }
 
+/// Return true if \p Needle lies within the range [\p Start, \p End).
+static bool isInHalfOpenRange(uint8_t *Needle, uint8_t *Start, uint8_t *End) {
+  return uintptr_t(Needle) >= uintptr_t(Start) &&
+         uintptr_t(Needle) < uintptr_t(End);
+}
+
 template <>
 StringPrinter::StringPrinterBufferPointer<>
 GetPrintableImpl<StringPrinter::StringElementType::ASCII>(uint8_t *buffer,
                                                           uint8_t *buffer_end,
                                                           uint8_t *&next) {
+  assert(isInHalfOpenRange(buffer, buffer, buffer_end) &&
+         "Cannot read the first byte of ASCII string buffer");
   StringPrinter::StringPrinterBufferPointer<> retval = {nullptr};
 
   switch (*buffer) {
@@ -129,16 +137,17 @@
 GetPrintableImpl<StringPrinter::StringElementType::UTF8>(uint8_t *buffer,
                                                          uint8_t *buffer_end,
                                                          uint8_t *&next) {
+  assert(isInHalfOpenRange(buffer, buffer, buffer_end) &&
+         "Cannot read the first byte of UTF8 string buffer");
   StringPrinter::StringPrinterBufferPointer<> retval{nullptr};
 
   unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
 
-  if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) {
-    // I don't have enough bytes - print whatever I have left
-    retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)};
-    next = buffer_end + 1;
+  // If the utf8 encoded length is invalid, or if there aren't enough bytes to
+  // print, this is some kind of corrupted string.
+  if ((utf8_encoded_len == 0 || utf8_encoded_len > 4) ||
+      !isInHalfOpenRange(buffer + (utf8_encoded_len - 1), buffer, buffer_end))
     return retval;
-  }
 
   char32_t codepoint = 0;
   switch (utf8_encoded_len) {
@@ -160,12 +169,6 @@
         (unsigned char)*buffer, (unsigned char)*(buffer + 1),
         (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3));
     break;
-  default:
-    // this is probably some bogus non-character thing just print it as-is and
-    // hope to sync up again soon
-    retval = {buffer, 1};
-    next = buffer + 1;
-    return retval;
   }
 
   if (codepoint) {
@@ -215,9 +218,7 @@
     return retval;
   }
 
-  // this should not happen - but just in case.. try to resync at some point
-  retval = {buffer, 1};
-  next = buffer + 1;
+  // We couldn't figure out how to print this string.
   return retval;
 }
 
@@ -354,13 +355,11 @@
             escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
         auto printable_bytes = printable.GetBytes();
         auto printable_size = printable.GetSize();
-        if (!printable_bytes || !next_data) {
-          // GetPrintable() failed on us - print one byte in a desperate resync
-          // attempt
-          printable_bytes = utf8_data_ptr;
-          printable_size = 1;
-          next_data = utf8_data_ptr + 1;
-        }
+
+        // We failed to figure out how to print this string.
+        if (!printable_bytes || !next_data)
+          return false;
+
         for (unsigned c = 0; c < printable_size; c++)
           stream.Printf("%c", *(printable_bytes + c));
         utf8_data_ptr = (uint8_t *)next_data;
Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
===================================================================
--- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
+++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
@@ -1,5 +1,12 @@
 #include <string>
 
+// A corrupt libcxx string which hits the SSO code path, but has an invalid
+// size.
+static unsigned char garbage_string_sso[] = {232, 32, 175, 29, 1, 0, 0, 0, 32, 30, 175, 29, 1, 0, 0, 0, 184, 30, 175, 29, 1, 0, 0, 0};
+
+// A corrupt libcxx string which points to garbage and has a crazy length.
+static unsigned char garbage_string_long[] = {185, 52, 168, 29, 1, 0, 0, 0, 168, 61, 175, 29, 1, 0, 0, 0, 104, 222, 174, 29, 1, 0, 0, 0};
+
 int main()
 {
     std::wstring wempty(L"");
@@ -17,6 +24,11 @@
     std::u32string u32_string(U"🍄🍅🍆🍌");
     std::u32string u32_empty(U"");
     std::basic_string<unsigned char> uchar(5, 'a');
+    std::string garbage1, garbage2;
+    if (sizeof(std::string) == sizeof(garbage_string_sso))
+      memcpy((void *)&garbage1, &garbage_string_sso, sizeof(std::string));
+    if (sizeof(std::string) == sizeof(garbage_string_long))
+      memcpy((void *)&garbage2, &garbage_string_long, sizeof(std::string));
     S.assign(L"!!!!!"); // Set break point at this line.
     return 0;
 }
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to