vsk updated this revision to Diff 242661.
vsk marked 4 inline comments as done.
vsk added a comment.
- `s/cap/capacity/`
- Remove the `checkedAdd` pointer overflow check as it's not necessary.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D73860/new/
https://reviews.llvm.org/D73860
Files:
lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
lldb/source/DataFormatters/StringPrinter.cpp
lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
Index: lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
===================================================================
--- lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
+++ lldb/source/Plugins/Language/CPlusPlus/LibCxx.cpp
@@ -8,7 +8,6 @@
#include "LibCxx.h"
-#include "llvm/ADT/ScopeExit.h"
#include "lldb/Core/Debugger.h"
#include "lldb/Core/FormatEntity.h"
#include "lldb/Core/ValueObject.h"
@@ -527,6 +526,17 @@
size = (layout == eLibcxxStringLayoutModeDSC)
? size_mode_value
: ((size_mode_value >> 1) % 256);
+
+ // When the small-string optimization takes place, the data must fit in the
+ // inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's
+ // likely that the string isn't initialized and we're reading garbage.
+ ExecutionContext exe_ctx(location_sp->GetExecutionContextRef());
+ const llvm::Optional<uint64_t> max_bytes =
+ location_sp->GetCompilerType().GetByteSize(
+ exe_ctx.GetBestExecutionContextScope());
+ if (!max_bytes || size > *max_bytes)
+ return false;
+
return (location_sp.get() != nullptr);
} else {
ValueObjectSP l(D->GetChildAtIndex(0, true));
@@ -537,9 +547,15 @@
? layout_decider
: l->GetChildAtIndex(2, true);
ValueObjectSP size_vo(l->GetChildAtIndex(1, true));
- if (!size_vo || !location_sp)
+ const unsigned capacity_index =
+ (layout == eLibcxxStringLayoutModeDSC) ? 2 : 0;
+ ValueObjectSP capacity_vo(l->GetChildAtIndex(capacity_index, true));
+ if (!size_vo || !location_sp || !capacity_vo)
+ return false;
+ size = size_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
+ const uint64_t cap = capacity_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
+ if (size == LLDB_INVALID_OFFSET || cap == LLDB_INVALID_OFFSET || cap < size)
return false;
- size = size_vo->GetValueAsUnsigned(0);
return true;
}
}
@@ -569,7 +585,9 @@
options.SetIsTruncated(true);
}
}
- location_sp->GetPointeeData(extractor, 0, size);
+ const size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size);
+ if (bytes_read < size)
+ return false;
// std::wstring::size() is measured in 'characters', not bytes
TypeSystemClang *ast_context =
@@ -591,41 +609,33 @@
switch (*wchar_t_size) {
case 1:
- StringPrinter::ReadBufferAndDumpToStream<
+ return StringPrinter::ReadBufferAndDumpToStream<
lldb_private::formatters::StringPrinter::StringElementType::UTF8>(
options);
break;
case 2:
- lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
+ return StringPrinter::ReadBufferAndDumpToStream<
lldb_private::formatters::StringPrinter::StringElementType::UTF16>(
options);
break;
case 4:
- lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStream<
+ return StringPrinter::ReadBufferAndDumpToStream<
lldb_private::formatters::StringPrinter::StringElementType::UTF32>(
options);
- break;
-
- default:
- stream.Printf("size for wchar_t is not valid");
- return true;
}
-
- return true;
+ return false;
}
template <StringPrinter::StringElementType element_type>
bool LibcxxStringSummaryProvider(ValueObject &valobj, Stream &stream,
const TypeSummaryOptions &summary_options,
- std::string prefix_token = "") {
+ std::string prefix_token) {
uint64_t size = 0;
ValueObjectSP location_sp;
-
if (!ExtractLibcxxStringInfo(valobj, location_sp, size))
return false;
-
if (size == 0) {
stream.Printf("\"\"");
return true;
@@ -644,7 +654,9 @@
options.SetIsTruncated(true);
}
}
- location_sp->GetPointeeData(extractor, 0, size);
+ const size_t bytes_read = location_sp->GetPointeeData(extractor, 0, size);
+ if (bytes_read < size)
+ return false;
options.SetData(extractor);
options.SetStream(&stream);
@@ -657,28 +669,40 @@
options.SetQuote('"');
options.SetSourceSize(size);
options.SetBinaryZeroIsTerminator(false);
- StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
+ return StringPrinter::ReadBufferAndDumpToStream<element_type>(options);
+}
+template <StringPrinter::StringElementType element_type>
+static bool formatStringImpl(ValueObject &valobj, Stream &stream,
+ const TypeSummaryOptions &summary_options,
+ std::string prefix_token) {
+ StreamString scratch_stream;
+ const bool success = LibcxxStringSummaryProvider<element_type>(
+ valobj, scratch_stream, summary_options, prefix_token);
+ if (success)
+ stream << scratch_stream.GetData();
+ else
+ stream << "Summary Unavailable";
return true;
}
bool lldb_private::formatters::LibcxxStringSummaryProviderASCII(
ValueObject &valobj, Stream &stream,
const TypeSummaryOptions &summary_options) {
- return LibcxxStringSummaryProvider<StringPrinter::StringElementType::ASCII>(
- valobj, stream, summary_options);
+ return formatStringImpl<StringPrinter::StringElementType::ASCII>(
+ valobj, stream, summary_options, "");
}
bool lldb_private::formatters::LibcxxStringSummaryProviderUTF16(
ValueObject &valobj, Stream &stream,
const TypeSummaryOptions &summary_options) {
- return LibcxxStringSummaryProvider<StringPrinter::StringElementType::UTF16>(
+ return formatStringImpl<StringPrinter::StringElementType::UTF16>(
valobj, stream, summary_options, "u");
}
bool lldb_private::formatters::LibcxxStringSummaryProviderUTF32(
ValueObject &valobj, Stream &stream,
const TypeSummaryOptions &summary_options) {
- return LibcxxStringSummaryProvider<StringPrinter::StringElementType::UTF32>(
+ return formatStringImpl<StringPrinter::StringElementType::UTF32>(
valobj, stream, summary_options, "U");
}
Index: lldb/source/DataFormatters/StringPrinter.cpp
===================================================================
--- lldb/source/DataFormatters/StringPrinter.cpp
+++ lldb/source/DataFormatters/StringPrinter.cpp
@@ -131,14 +131,15 @@
uint8_t *&next) {
StringPrinter::StringPrinterBufferPointer<> retval{nullptr};
- unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
+ const unsigned utf8_encoded_len = llvm::getNumBytesForUTF8(*buffer);
- if (1u + std::distance(buffer, buffer_end) < utf8_encoded_len) {
- // I don't have enough bytes - print whatever I have left
- retval = {buffer, static_cast<size_t>(1 + buffer_end - buffer)};
- next = buffer_end + 1;
+ // If the utf8 encoded length is invalid, or if there aren't enough bytes to
+ // print, this is some kind of corrupted string.
+ if (utf8_encoded_len == 0 || utf8_encoded_len > 4)
+ return retval;
+ if ((buffer_end - buffer) < utf8_encoded_len)
+ // There's no room in the buffer for the utf8 sequence.
return retval;
- }
char32_t codepoint = 0;
switch (utf8_encoded_len) {
@@ -160,12 +161,6 @@
(unsigned char)*buffer, (unsigned char)*(buffer + 1),
(unsigned char)*(buffer + 2), (unsigned char)*(buffer + 3));
break;
- default:
- // this is probably some bogus non-character thing just print it as-is and
- // hope to sync up again soon
- retval = {buffer, 1};
- next = buffer + 1;
- return retval;
}
if (codepoint) {
@@ -215,9 +210,7 @@
return retval;
}
- // this should not happen - but just in case.. try to resync at some point
- retval = {buffer, 1};
- next = buffer + 1;
+ // We couldn't figure out how to print this string.
return retval;
}
@@ -227,7 +220,7 @@
static StringPrinter::StringPrinterBufferPointer<>
GetPrintable(StringPrinter::StringElementType type, uint8_t *buffer,
uint8_t *buffer_end, uint8_t *&next) {
- if (!buffer)
+ if (!buffer || buffer >= buffer_end)
return {nullptr};
switch (type) {
@@ -354,13 +347,11 @@
escaping_callback(utf8_data_ptr, utf8_data_end_ptr, next_data);
auto printable_bytes = printable.GetBytes();
auto printable_size = printable.GetSize();
- if (!printable_bytes || !next_data) {
- // GetPrintable() failed on us - print one byte in a desperate resync
- // attempt
- printable_bytes = utf8_data_ptr;
- printable_size = 1;
- next_data = utf8_data_ptr + 1;
- }
+
+ // We failed to figure out how to print this string.
+ if (!printable_bytes || !next_data)
+ return false;
+
for (unsigned c = 0; c < printable_size; c++)
stream.Printf("%c", *(printable_bytes + c));
utf8_data_ptr = (uint8_t *)next_data;
@@ -478,13 +469,11 @@
auto printable = escaping_callback(data, data_end, next_data);
auto printable_bytes = printable.GetBytes();
auto printable_size = printable.GetSize();
- if (!printable_bytes || !next_data) {
- // GetPrintable() failed on us - print one byte in a desperate resync
- // attempt
- printable_bytes = data;
- printable_size = 1;
- next_data = data + 1;
- }
+
+ // We failed to figure out how to print this string.
+ if (!printable_bytes || !next_data)
+ return false;
+
for (unsigned c = 0; c < printable_size; c++)
options.GetStream()->Printf("%c", *(printable_bytes + c));
data = (uint8_t *)next_data;
Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
===================================================================
--- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
+++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
@@ -1,4 +1,58 @@
#include <string>
+#include <stdint.h>
+
+// For more information about libc++'s std::string ABI, see:
+//
+// https://joellaity.com/2020/01/31/string.html
+
+// A corrupt string which hits the SSO code path, but has an invalid size.
+static struct {
+ // Set the size of this short-mode string to 116. Note that in short mode,
+ // the size is encoded as `size << 1`.
+ unsigned char size = 232;
+
+ // 23 garbage bytes for the inline string payload.
+ char inline_buf[23] = {0};
+} garbage_string_short_mode;
+
+// A corrupt libcxx string in long mode with a payload that contains a utf8
+// sequence that's inherently too long.
+static unsigned char garbage_utf8_payload1[] = {
+ 250 // This means that we expect a 5-byte sequence, this is invalid.
+};
+static struct {
+ uint64_t cap = 5;
+ uint64_t size = 4;
+ unsigned char *data = &garbage_utf8_payload1[0];
+} garbage_string_long_mode1;
+
+// A corrupt libcxx string in long mode with a payload that contains a utf8
+// sequence that's too long to fit in the buffer.
+static unsigned char garbage_utf8_payload2[] = {
+ 240 // This means that we expect a 4-byte sequence, but the buffer is too
+ // small for this.
+};
+static struct {
+ uint64_t cap = 3;
+ uint64_t size = 2;
+ unsigned char *data = &garbage_utf8_payload1[0];
+} garbage_string_long_mode2;
+
+// A corrupt libcxx string which has an invalid size (i.e. a size greater than
+// the capacity of the string).
+static struct {
+ uint64_t cap = 5;
+ uint64_t size = 7;
+ const char *data = "foo";
+} garbage_string_long_mode3;
+
+// A corrupt libcxx string in long mode with a payload that would trigger a
+// buffer overflow.
+static struct {
+ uint64_t cap = 5;
+ uint64_t size = 2;
+ uint64_t data = 0xfffffffffffffffeULL;
+} garbage_string_long_mode4;
int main()
{
@@ -17,6 +71,23 @@
std::u32string u32_string(U"🍄🍅🍆🍌");
std::u32string u32_empty(U"");
std::basic_string<unsigned char> uchar(5, 'a');
+
+#if _LIBCPP_ABI_VERSION == 1
+ std::string garbage1, garbage2, garbage3, garbage4, garbage5;
+ if (sizeof(std::string) == sizeof(garbage_string_short_mode))
+ memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
+ memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
+ memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
+ memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
+ if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
+ memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
+#else
+#error "Test potentially needs to be updated for a new std::string ABI."
+#endif
+
S.assign(L"!!!!!"); // Set break point at this line.
return 0;
}
Index: lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
===================================================================
--- lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
+++ lldb/packages/Python/lldbsuite/test/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
@@ -51,6 +51,8 @@
"settings set target.max-children-count 256",
check=False)
+ is_64_bit = self.process().GetAddressByteSize() == 8
+
# Execute the cleanup function during test case tear down.
self.addTearDownHook(cleanup)
@@ -114,3 +116,10 @@
'(%s::basic_string<unsigned char, %s::char_traits<unsigned char>, '
'%s::allocator<unsigned char> >) uchar = "aaaaa"'%(ns,ns,ns),
])
+
+ if is_64_bit:
+ self.expect("frame variable garbage1", substrs=['garbage1 = Summary Unavailable'])
+ self.expect("frame variable garbage2", substrs=['garbage2 = Summary Unavailable'])
+ self.expect("frame variable garbage3", substrs=['garbage3 = Summary Unavailable'])
+ self.expect("frame variable garbage4", substrs=['garbage4 = Summary Unavailable'])
+ self.expect("frame variable garbage5", substrs=['garbage5 = Summary Unavailable'])
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits