vsk updated this revision to Diff 242461. vsk added a reviewer: LLDB. vsk added a comment.
- Enhance test coverage with several more examples of garbage long-mode std::strings. - In several cases, when we detected an invalid string, we would fall back to spewing the "raw mode" representation of the ValueObject to the user. Use a ScopeExit to make sure we never do that. - Detect when the `GetPointeeData` call for the long-mode payload fails, and do not attempt to read the full payload when this happens. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D73860/new/ https://reviews.llvm.org/D73860 Files: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp lldb/source/DataFormatters/StringPrinter.cpp lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
Index: lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp =================================================================== --- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp +++ lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp @@ -527,6 +527,16 @@ size = (layout == eLibcxxStringLayoutModeDSC) ? size_mode_value : ((size_mode_value >> 1) % 256); + + // When the small-string optimization takes place, the data must fit in the + // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's + // likely that the string isn't initialized and we're reading garbage. + ExecutionContext exe_ctx(location_sp->GetExecutionContextRef()); + auto max_bytes = location_sp->GetCompilerType().GetByteSize( + exe_ctx.GetBestExecutionContextScope()); + if (size > max_bytes) + return false; + return (location_sp.get() != nullptr); } else { ValueObjectSP l(D->GetChildAtIndex(0, true)); @@ -537,9 +547,16 @@ ? layout_decider : l->GetChildAtIndex(2, true); ValueObjectSP size_vo(l->GetChildAtIndex(1, true)); - if (!size_vo || !location_sp) + unsigned cap_index = (layout == eLibcxxStringLayoutModeDSC) ? 2 : 0; + ValueObjectSP cap_vo(l->GetChildAtIndex(cap_index, true)); + if (!size_vo || !location_sp || !cap_vo) + return false; + size = size_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET); + if (size == LLDB_INVALID_OFFSET) + return false; + uint64_t cap = cap_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET); + if (cap == LLDB_INVALID_OFFSET || cap < size) return false; - size = size_vo->GetValueAsUnsigned(0); return true; } } @@ -549,14 +566,22 @@ const TypeSummaryOptions &summary_options) { uint64_t size = 0; ValueObjectSP location_sp; + + // Unless we specifically disable this scope exit callable (i.e. unless we + // successfully read the string), explain that a summary is unavailable + // instead of spewing out the raw ValueObject to the user. + auto set_summary_unavailable = llvm::make_scope_exit( + [&stream] { stream.Printf("Summary Unavailable"); }); + if (!ExtractLibcxxStringInfo(valobj, location_sp, size)) - return false; + return true; if (size == 0) { stream.Printf("L\"\""); + set_summary_unavailable.release(); return true; } if (!location_sp) - return false; + return true; DataExtractor extractor; @@ -569,50 +594,54 @@ options.SetIsTruncated(true); } } - location_sp->GetPointeeData(extractor, 0, size); + size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size); + if (bytes_read < size) + return true; // std::wstring::size() is measured in 'characters', not bytes TypeSystemClang *ast_context = TypeSystemClang::GetScratch(*valobj.GetTargetSP()); if (!ast_context) - return false; + return true; auto wchar_t_size = ast_context->GetBasicType(lldb::eBasicTypeWChar).GetByteSize(nullptr); if (!wchar_t_size) - return false; + return true; + StreamString scratch_stream; + options.SetStream(&scratch_stream); options.SetData(extractor); - options.SetStream(&stream); options.SetPrefixToken("L"); options.SetQuote('"'); options.SetSourceSize(size); options.SetBinaryZeroIsTerminator(false); + bool success = false; switch (*wchar_t_size) { case 1: - StringPrinter::ReadBufferAndDumpToStream< + success = StringPrinter::ReadBufferAndDumpToStream< lldb_private::formatters::StringPrinter::StringElementType::UTF8>( options); break; case 2: - lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream< + success = StringPrinter::ReadBufferAndDumpToStream< lldb_private::formatters::StringPrinter::StringElementType::UTF16>( options); break; case 4: - lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream< + success = StringPrinter::ReadBufferAndDumpToStream< lldb_private::formatters::StringPrinter::StringElementType::UTF32>( options); break; - - default: - stream.Printf("size for wchar_t is not valid"); - return true; } + if (success) { + stream << scratch_stream.GetData(); + set_summary_unavailable.release(); + } return true; } @@ -623,16 +652,23 @@ uint64_t size = 0; ValueObjectSP location_sp; + // Unless we specifically disable this scope exit callable (i.e. unless we + // successfully read the string), explain that a summary is unavailable + // instead of spewing out the raw ValueObject to the user. + auto set_summary_unavailable = llvm::make_scope_exit( + [&stream] { stream.Printf("Summary Unavailable"); }); + if (!ExtractLibcxxStringInfo(valobj, location_sp, size)) - return false; + return true; if (size == 0) { stream.Printf("\"\""); + set_summary_unavailable.release(); return true; } if (!location_sp) - return false; + return true; StringPrinter::ReadBufferAndDumpToStreamOptions options(valobj); @@ -644,10 +680,14 @@ options.SetIsTruncated(true); } } - location_sp->GetPointeeData(extractor, 0, size); + size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size); + if (bytes_read < size) + return true; options.SetData(extractor); - options.SetStream(&stream); + + StreamString scratch_stream; + options.SetStream(&scratch_stream); if (prefix_token.empty()) options.SetPrefixToken(nullptr); @@ -657,8 +697,10 @@ options.SetQuote('"'); options.SetSourceSize(size); options.SetBinaryZeroIsTerminator(false); - StringPrinter::ReadBufferAndDumpToStream<element_type>(options); - + if (StringPrinter::ReadBufferAndDumpToStream<element_type>(options)) { + stream << scratch_stream.GetData(); + set_summary_unavailable.release(); + } return true; } Index: lldb/source/DataFormatters/StringPrinter.cpp =================================================================== --- lldb/source/DataFormatters/StringPrinter.cpp +++ lldb/source/DataFormatters/StringPrinter.cpp @@ -15,6 +15,7 @@ #include "lldb/Target/Target.h" #include "lldb/Utility/Status.h" +#include "llvm/Support/CheckedArithmetic.h" #include "llvm/Support/ConvertUTF.h" #include <ctype.h> @@ -133,12 +134,17 @@ unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer); - if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) { - // I don't have enough bytes - print whatever I have left - retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)}; - next = buffer_end + 1; + // If the utf8 encoded length is invalid, or if there aren't enough bytes to + // print, this is some kind of corrupted string. + if (utf8_encoded_len == 0 || utf8_encoded_len > 4) + return retval; + if ((buffer_end - buffer) < utf8_encoded_len) + // There's no room in the buffer for the utf8 sequence. + return retval; + if (!llvm::checkedAdd(reinterpret_cast<intptr_t>(buffer), + static_cast<intptr_t>(utf8_encoded_len))) + // Accessing the utf8 sequence would cause a pointer overflow. return retval; - } char32_t codepoint = 0; switch (utf8_encoded_len) { @@ -160,12 +166,6 @@ (unsigned char)*buffer, (unsigned char)*(buffer + 1), (unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3)); break; - default: - // this is probably some bogus non-character thing just print it as-is and - // hope to sync up again soon - retval = {buffer, 1}; - next = buffer + 1; - return retval; } if (codepoint) { @@ -215,9 +215,7 @@ return retval; } - // this should not happen - but just in case.. try to resync at some point - retval = {buffer, 1}; - next = buffer + 1; + // We couldn't figure out how to print this string. return retval; } @@ -227,7 +225,7 @@ static StringPrinter::StringPrinterBufferPointer<> GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer, uint8_t *buffer_end, uint8_t *&next) { - if (!buffer) + if (!buffer || buffer >= buffer_end) return {nullptr}; switch (type) { @@ -354,13 +352,11 @@ escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data); auto printable_bytes = printable.GetBytes(); auto printable_size = printable.GetSize(); - if (!printable_bytes || !next_data) { - // GetPrintable() failed on us - print one byte in a desperate resync - // attempt - printable_bytes = utf8_data_ptr; - printable_size = 1; - next_data = utf8_data_ptr + 1; - } + + // We failed to figure out how to print this string. + if (!printable_bytes || !next_data) + return false; + for (unsigned c = 0; c < printable_size; c++) stream.Printf("%c", *(printable_bytes + c)); utf8_data_ptr = (uint8_t *)next_data; @@ -478,13 +474,11 @@ auto printable = escaping_callback(data, data_end, next_data); auto printable_bytes = printable.GetBytes(); auto printable_size = printable.GetSize(); - if (!printable_bytes || !next_data) { - // GetPrintable() failed on us - print one byte in a desperate resync - // attempt - printable_bytes = data; - printable_size = 1; - next_data = data + 1; - } + + // We failed to figure out how to print this string. + if (!printable_bytes || !next_data) + return false; + for (unsigned c = 0; c < printable_size; c++) options.GetStream()->Printf("%c", *(printable_bytes + c)); data = (uint8_t *)next_data; Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp =================================================================== --- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp +++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp @@ -1,4 +1,58 @@ #include <string> +#include <stdint.h> + +// For more information about libc++'s std::string ABI, see: +// +// https://joellaity.com/2020/01/31/string.html + +// A corrupt string which hits the SSO code path, but has an invalid size. +static struct { + // Set the size of this short-mode string to 116. Note that in short mode, + // the size is encoded as `size << 1`. + unsigned char size = 232; + + // 23 garbage bytes for the inline string payload. + char inline_buf[23] = {0}; +} garbage_string_short_mode; + +// A corrupt libcxx string in long mode whose payload contains a utf8 sequence +// that's inherently too long. +static unsigned char garbage_utf8_payload1[] = { + 250 // This means that we expect a 5-byte sequence, this is invalid. +}; +static struct { + uint64_t cap = 5; + uint64_t size = 4; + unsigned char *data = &garbage_utf8_payload1[0]; +} garbage_string_long_mode1; + +// A corrupt libcxx string in long mode whose payload contains a utf8 sequence +// that's too long to fit in the buffer. +static unsigned char garbage_utf8_payload2[] = { + 240 // This means that we expect a 4-byte sequence, but the buffer is too + // small for this. +}; +static struct { + uint64_t cap = 3; + uint64_t size = 2; + unsigned char *data = &garbage_utf8_payload1[0]; +} garbage_string_long_mode2; + +// A corrupt libcxx string which has an invalid size (i.e. a size greater than +// the capacity of the string). +static struct { + uint64_t cap = 5; + uint64_t size = 7; + const char *data = "foo"; +} garbage_string_long_mode3; + +// A corrupt libcxx string in long mode whose payload would trigger a buffer +// overflow. +static struct { + uint64_t cap = 5; + uint64_t size = 2; + uint64_t data = 0xfffffffffffffffeULL; +} garbage_string_long_mode4; int main() { @@ -17,6 +71,23 @@ std::u32string u32_string(U"🍄🍅🍆🍌"); std::u32string u32_empty(U""); std::basic_string<unsigned char> uchar(5, 'a'); + +#if _LIBCPP_ABI_VERSION == 1 + std::string garbage1, garbage2, garbage3, garbage4, garbage5; + if (sizeof(std::string) == sizeof(garbage_string_short_mode)) + memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode1)) + memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode2)) + memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode3)) + memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode4)) + memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string)); +#else +#error "Test potentially needs to be updated for a new std::string ABI." +#endif + S.assign(L"!!!!!"); // Set break point at this line. return 0; } Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py =================================================================== --- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py +++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py @@ -51,6 +51,8 @@ "settings set target.max-children-count 256", check=False) + is_64_bit = self.process().GetAddressByteSize() == 8 + # Execute the cleanup function during test case tear down. self.addTearDownHook(cleanup) @@ -114,3 +116,10 @@ '(%s::basic_string<unsigned char, %s::char_traits<unsigned char>, ' '%s::allocator<unsigned char> >) uchar = "aaaaa"'%(ns,ns,ns), ]) + + if is_64_bit: + self.expect("frame variable garbage1", substrs=['garbage1 = Summary Unavailable']) + self.expect("frame variable garbage2", substrs=['garbage2 = Summary Unavailable']) + self.expect("frame variable garbage3", substrs=['garbage3 = Summary Unavailable']) + self.expect("frame variable garbage4", substrs=['garbage4 = Summary Unavailable']) + self.expect("frame variable garbage5", substrs=['garbage5 = Summary Unavailable'])
_______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits