vsk updated this revision to Diff 242461.
vsk added a reviewer: LLDB.
vsk added a comment.
- Enhance test coverage with several more examples of garbage long-mode
std::strings.
- In several cases, when we detected an invalid string, we would fall back to
spewing the "raw mode" representation of the ValueObject to the user. Use a
ScopeExit to make sure we never do that.
- Detect when the `GetPointeeData` call for the long-mode payload fails, and do
not attempt to read the full payload when this happens.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D73860/new/
https://reviews.llvm.org/D73860
Files:
lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
lldb/source/DataFormatters/StringPrinter.cpp
lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
Index: lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
===================================================================
--- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
+++ lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
@@ -527,6 +527,16 @@
size = (layout == eLibcxxStringLayoutModeDSC)
? size_mode_value
: ((size_mode_value >> 1) % 256);
+
+ // When the small-string optimization takes place, the data must fit in the
+ // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's
+ // likely that the string isn't initialized and we're reading garbage.
+ ExecutionContext exe_ctx(location_sp->GetExecutionContextRef());
+ auto max_bytes = location_sp->GetCompilerType().GetByteSize(
+ exe_ctx.GetBestExecutionContextScope());
+ if (size > max_bytes)
+ return false;
+
return (location_sp.get() != nullptr);
} else {
ValueObjectSP l(D->GetChildAtIndex(0, true));
@@ -537,9 +547,16 @@
? layout_decider
: l->GetChildAtIndex(2, true);
ValueObjectSP size_vo(l->GetChildAtIndex(1, true));
- if (!size_vo || !location_sp)
+ unsigned cap_index = (layout == eLibcxxStringLayoutModeDSC) ? 2 : 0;
+ ValueObjectSP cap_vo(l->GetChildAtIndex(cap_index, true));
+ if (!size_vo || !location_sp || !cap_vo)
+ return false;
+ size = size_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
+ if (size == LLDB_INVALID_OFFSET)
+ return false;
+ uint64_t cap = cap_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
+ if (cap == LLDB_INVALID_OFFSET || cap < size)
return false;
- size = size_vo->GetValueAsUnsigned(0);
return true;
}
}
@@ -549,14 +566,22 @@
const TypeSummaryOptions &summary_options) {
uint64_t size = 0;
ValueObjectSP location_sp;
+
+ // Unless we specifically disable this scope exit callable (i.e. unless we
+ // successfully read the string), explain that a summary is unavailable
+ // instead of spewing out the raw ValueObject to the user.
+ auto set_summary_unavailable = llvm::make_scope_exit(
+ [&stream] { stream.Printf("Summary Unavailable"); });
+
if (!ExtractLibcxxStringInfo(valobj, location_sp, size))
- return false;
+ return true;
if (size == 0) {
stream.Printf("L\"\"");
+ set_summary_unavailable.release();
return true;
}
if (!location_sp)
- return false;
+ return true;
DataExtractor extractor;
@@ -569,50 +594,54 @@
options.SetIsTruncated(true);
}
}
- location_sp->GetPointeeData(extractor, 0, size);
+ size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size);
+ if (bytes_read < size)
+ return true;
// std::wstring::size() is measured in 'characters', not bytes
TypeSystemClang *ast_context =
TypeSystemClang::GetScratch(*valobj.GetTargetSP());
if (!ast_context)
- return false;
+ return true;
auto wchar_t_size =
ast_context->GetBasicType(lldb::eBasicTypeWChar).GetByteSize(nullptr);
if (!wchar_t_size)
- return false;
+ return true;
+ StreamString scratch_stream;
+ options.SetStream(&scratch_stream);
options.SetData(extractor);
- options.SetStream(&stream);
options.SetPrefixToken("L");
options.SetQuote('"');
options.SetSourceSize(size);
options.SetBinaryZeroIsTerminator(false);
+ bool success = false;
switch (*wchar_t_size) {
case 1:
- StringPrinter::ReadBufferAndDumpToStream<
+ success = StringPrinter::ReadBufferAndDumpToStream<
lldb_private::formatters::StringPrinter::StringElementType::UTF8>(
options);
break;
case 2:
- lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
+ success = StringPrinter::ReadBufferAndDumpToStream<
lldb_private::formatters::StringPrinter::StringElementType::UTF16>(
options);
break;
case 4:
- lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
+ success = StringPrinter::ReadBufferAndDumpToStream<
lldb_private::formatters::StringPrinter::StringElementType::UTF32>(
options);
break;
-
- default:
- stream.Printf("size for wchar_t is not valid");
- return true;
}
+ if (success) {
+ stream << scratch_stream.GetData();
+ set_summary_unavailable.release();
+ }
return true;
}
@@ -623,16 +652,23 @@
uint64_t size = 0;
ValueObjectSP location_sp;
+ // Unless we specifically disable this scope exit callable (i.e. unless we
+ // successfully read the string), explain that a summary is unavailable
+ // instead of spewing out the raw ValueObject to the user.
+ auto set_summary_unavailable = llvm::make_scope_exit(
+ [&stream] { stream.Printf("Summary Unavailable"); });
+
if (!ExtractLibcxxStringInfo(valobj, location_sp, size))
- return false;
+ return true;
if (size == 0) {
stream.Printf("\"\"");
+ set_summary_unavailable.release();
return true;
}
if (!location_sp)
- return false;
+ return true;
StringPrinter::ReadBufferAndDumpToStreamOptions options(valobj);
@@ -644,10 +680,14 @@
options.SetIsTruncated(true);
}
}
- location_sp->GetPointeeData(extractor, 0, size);
+ size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size);
+ if (bytes_read < size)
+ return true;
options.SetData(extractor);
- options.SetStream(&stream);
+
+ StreamString scratch_stream;
+ options.SetStream(&scratch_stream);
if (prefix_token.empty())
options.SetPrefixToken(nullptr);
@@ -657,8 +697,10 @@
options.SetQuote('"');
options.SetSourceSize(size);
options.SetBinaryZeroIsTerminator(false);
- StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
-
+ if (StringPrinter::ReadBufferAndDumpToStream<element_type>(options)) {
+ stream << scratch_stream.GetData();
+ set_summary_unavailable.release();
+ }
return true;
}
Index: lldb/source/DataFormatters/StringPrinter.cpp
===================================================================
--- lldb/source/DataFormatters/StringPrinter.cpp
+++ lldb/source/DataFormatters/StringPrinter.cpp
@@ -15,6 +15,7 @@
#include "lldb/Target/Target.h"
#include "lldb/Utility/Status.h"
+#include "llvm/Support/CheckedArithmetic.h"
#include "llvm/Support/ConvertUTF.h"
#include <ctype.h>
@@ -133,12 +134,17 @@
unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
- if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) {
- // I don't have enough bytes - print whatever I have left
- retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)};
- next = buffer_end + 1;
+ // If the utf8 encoded length is invalid, or if there aren't enough bytes to
+ // print, this is some kind of corrupted string.
+ if (utf8_encoded_len == 0 || utf8_encoded_len > 4)
+ return retval;
+ if ((buffer_end - buffer) < utf8_encoded_len)
+ // There's no room in the buffer for the utf8 sequence.
+ return retval;
+ if (!llvm::checkedAdd(reinterpret_cast<intptr_t>(buffer),
+ static_cast<intptr_t>(utf8_encoded_len)))
+ // Accessing the utf8 sequence would cause a pointer overflow.
return retval;
- }
char32_t codepoint = 0;
switch (utf8_encoded_len) {
@@ -160,12 +166,6 @@
(unsigned char)*buffer, (unsigned char)*(buffer + 1),
(unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3));
break;
- default:
- // this is probably some bogus non-character thing just print it as-is and
- // hope to sync up again soon
- retval = {buffer, 1};
- next = buffer + 1;
- return retval;
}
if (codepoint) {
@@ -215,9 +215,7 @@
return retval;
}
- // this should not happen - but just in case.. try to resync at some point
- retval = {buffer, 1};
- next = buffer + 1;
+ // We couldn't figure out how to print this string.
return retval;
}
@@ -227,7 +225,7 @@
static StringPrinter::StringPrinterBufferPointer<>
GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer,
uint8_t *buffer_end, uint8_t *&next) {
- if (!buffer)
+ if (!buffer || buffer >= buffer_end)
return {nullptr};
switch (type) {
@@ -354,13 +352,11 @@
escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
auto printable_bytes = printable.GetBytes();
auto printable_size = printable.GetSize();
- if (!printable_bytes || !next_data) {
- // GetPrintable() failed on us - print one byte in a desperate resync
- // attempt
- printable_bytes = utf8_data_ptr;
- printable_size = 1;
- next_data = utf8_data_ptr + 1;
- }
+
+ // We failed to figure out how to print this string.
+ if (!printable_bytes || !next_data)
+ return false;
+
for (unsigned c = 0; c < printable_size; c++)
stream.Printf("%c", *(printable_bytes + c));
utf8_data_ptr = (uint8_t *)next_data;
@@ -478,13 +474,11 @@
auto printable = escaping_callback(data, data_end, next_data);
auto printable_bytes = printable.GetBytes();
auto printable_size = printable.GetSize();
- if (!printable_bytes || !next_data) {
- // GetPrintable() failed on us - print one byte in a desperate resync
- // attempt
- printable_bytes = data;
- printable_size = 1;
- next_data = data + 1;
- }
+
+ // We failed to figure out how to print this string.
+ if (!printable_bytes || !next_data)
+ return false;
+
for (unsigned c = 0; c < printable_size; c++)
options.GetStream()->Printf("%c", *(printable_bytes + c));
data = (uint8_t *)next_data;
Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
===================================================================
--- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
+++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
@@ -1,4 +1,58 @@
#include <string>
+#include <stdint.h>
+
+// For more information about libc++'s std::string ABI, see:
+//
+// https://joellaity.com/2020/01/31/string.html
+
+// A corrupt string which hits the SSO code path, but has an invalid size.
+static struct {
+ // Set the size of this short-mode string to 116. Note that in short mode,
+ // the size is encoded as `size << 1`.
+ unsigned char size = 232;
+
+ // 23 garbage bytes for the inline string payload.
+ char inline_buf[23] = {0};
+} garbage_string_short_mode;
+
+// A corrupt libcxx string in long mode whose payload contains a utf8 sequence
+// that's inherently too long.
+static unsigned char garbage_utf8_payload1[] = {
+ 250 // This means that we expect a 5-byte sequence, this is invalid.
+};
+static struct {
+ uint64_t cap = 5;
+ uint64_t size = 4;
+ unsigned char *data = &garbage_utf8_payload1[0];
+} garbage_string_long_mode1;
+
+// A corrupt libcxx string in long mode whose payload contains a utf8 sequence
+// that's too long to fit in the buffer.
+static unsigned char garbage_utf8_payload2[] = {
+ 240 // This means that we expect a 4-byte sequence, but the buffer is too
+ // small for this.
+};
+static struct {
+ uint64_t cap = 3;
+ uint64_t size = 2;
+ unsigned char *data = &garbage_utf8_payload1[0];
+} garbage_string_long_mode2;
+
+// A corrupt libcxx string which has an invalid size (i.e. a size greater than
+// the capacity of the string).
+static struct {
+ uint64_t cap = 5;
+ uint64_t size = 7;
+ const char *data = "foo";
+} garbage_string_long_mode3;
+
+// A corrupt libcxx string in long mode whose payload would trigger a buffer
+// overflow.
+static struct {
+ uint64_t cap = 5;
+ uint64_t size = 2;
+ uint64_t data = 0xfffffffffffffffeULL;
+} garbage_string_long_mode4;
int main()
{
@@ -17,6 +71,23 @@
std::u32string u32_string(U"🍄🍅🍆🍌");
std::u32string u32_empty(U"");
std::basic_string<unsigned char> uchar(5, 'a');
+
+#if _LIBCPP_ABI_VERSION == 1
+ std::string garbage1, garbage2, garbage3, garbage4, garbage5;
+ if (sizeof(std::string) == sizeof(garbage_string_short_mode))
+ memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
+ memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
+ memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
+ memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
+ memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
+#else
+#error "Test potentially needs to be updated for a new std::string ABI."
+#endif
+
S.assign(L"!!!!!"); // Set break point at this line.
return 0;
}
Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
===================================================================
--- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
+++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
@@ -51,6 +51,8 @@
"settings set target.max-children-count 256",
check=False)
+ is_64_bit = self.process().GetAddressByteSize() == 8
+
# Execute the cleanup function during test case tear down.
self.addTearDownHook(cleanup)
@@ -114,3 +116,10 @@
'(%s::basic_string<unsigned char, %s::char_traits<unsigned char>, '
'%s::allocator<unsigned char> >) uchar = "aaaaa"'%(ns,ns,ns),
])
+
+ if is_64_bit:
+ self.expect("frame variable garbage1", substrs=['garbage1 = Summary Unavailable'])
+ self.expect("frame variable garbage2", substrs=['garbage2 = Summary Unavailable'])
+ self.expect("frame variable garbage3", substrs=['garbage3 = Summary Unavailable'])
+ self.expect("frame variable garbage4", substrs=['garbage4 = Summary Unavailable'])
+ self.expect("frame variable garbage5", substrs=['garbage5 = Summary Unavailable'])
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits