Author: Michael Buch Date: 2025-07-07T11:33:33+01:00 New Revision: c19c71b90593dcbb94a9592d7cf75e58c99df6da
URL: https://github.com/llvm/llvm-project/commit/c19c71b90593dcbb94a9592d7cf75e58c99df6da DIFF: https://github.com/llvm/llvm-project/commit/c19c71b90593dcbb94a9592d7cf75e58c99df6da.diff LOG: [lldb][test] Split out libc++ std::string tests that check corrupted strings (#147252) As a pre-requisite to combine the libcxx and libstdcxx string formatter tests (see https://github.com/llvm/llvm-project/pull/146740) this patch splits out the libcxx specific parts into a separate test. These are probably best tested with the libcxx-simulator tests. But for now I just moved them. Added: lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp Modified: lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp Removed: ################################################################################ diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile new file mode 100644 index 0000000000000..c5df567e01a2a --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile @@ -0,0 +1,5 @@ +CXX_SOURCES := main.cpp + +USE_LIBCPP := 1 + +include Makefile.rules diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py new file mode 100644 index 0000000000000..ae8e0ac08c2b0 --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py @@ -0,0 +1,38 @@ +""" +Test lldb behaves sanely when formatting corrupted `std::string`s. +""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class LibcxxInvalidStringDataFormatterTestCase(TestBase): + @add_test_categories(["libc++"]) + @skipIf(oslist=[lldbplatformutil.getDarwinOSTriples()], archs=["arm", "aarch64"]) + def test(self): + self.build() + + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( + self, "Set break point at this line.", lldb.SBFileSpec("main.cpp") + ) + frame = thread.frames[0] + + if not self.process().GetAddressByteSize() == 8: + self.skip() + + # The test assumes that std::string is in its cap-size-data layout. + self.expect( + "frame variable garbage1", substrs=["garbage1 = Summary Unavailable"] + ) + self.expect( + "frame variable garbage2", substrs=[r'garbage2 = "\xfa\xfa\xfa\xfa"'] + ) + self.expect("frame variable garbage3", substrs=[r'garbage3 = "\xf0\xf0"']) + self.expect( + "frame variable garbage4", substrs=["garbage4 = Summary Unavailable"] + ) + self.expect( + "frame variable garbage5", substrs=["garbage5 = Summary Unavailable"] + ) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp new file mode 100644 index 0000000000000..eb3efe1bcb7ef --- /dev/null +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp @@ -0,0 +1,110 @@ +#include <cstdio> +#include <cstdlib> +#include <stdint.h> +#include <string> + +// For more information about libc++'s std::string ABI, see: +// +// https://joellaity.com/2020/01/31/string.html + +// A corrupt string which hits the SSO code path, but has an invalid size. +static struct { +#if _LIBCPP_ABI_VERSION == 1 + // Set the size of this short-mode string to 116. Note that in short mode, + // the size is encoded as `size << 1`. + unsigned char size = 232; + + // 23 garbage bytes for the inline string payload. + char inline_buf[23] = {0}; +#else // _LIBCPP_ABI_VERSION == 1 + // Like above, but data comes first, and use bitfields to indicate size. + char inline_buf[23] = {0}; + unsigned char size : 7 = 116; + unsigned char is_long : 1 = 0; +#endif // #if _LIBCPP_ABI_VERSION == 1 +} garbage_string_short_mode; + +// A corrupt libcxx string in long mode with a payload that contains a utf8 +// sequence that's inherently too long. +static unsigned char garbage_utf8_payload1[] = { + 250, // This means that we expect a 5-byte sequence, this is invalid. LLDB + // should fall back to ASCII printing. + 250, 250, 250}; +static struct { +#if _LIBCPP_ABI_VERSION == 1 + uint64_t cap = 5; + uint64_t size = 4; + unsigned char *data = &garbage_utf8_payload1[0]; +#else // _LIBCPP_ABI_VERSION == 1 + unsigned char *data = &garbage_utf8_payload1[0]; + uint64_t size = 4; + uint64_t cap : 63 = 4; + uint64_t is_long : 1 = 1; +#endif // #if _LIBCPP_ABI_VERSION == 1 +} garbage_string_long_mode1; + +// A corrupt libcxx string in long mode with a payload that contains a utf8 +// sequence that's too long to fit in the buffer. +static unsigned char garbage_utf8_payload2[] = { + 240, // This means that we expect a 4-byte sequence, but the buffer is too + // small for this. LLDB should fall back to ASCII printing. + 240}; +static struct { +#if _LIBCPP_ABI_VERSION == 1 + uint64_t cap = 3; + uint64_t size = 2; + unsigned char *data = &garbage_utf8_payload2[0]; +#else // _LIBCPP_ABI_VERSION == 1 + unsigned char *data = &garbage_utf8_payload2[0]; + uint64_t size = 2; + uint64_t cap : 63 = 3; + uint64_t is_long : 1 = 1; +#endif // #if _LIBCPP_ABI_VERSION == 1 +} garbage_string_long_mode2; + +// A corrupt libcxx string which has an invalid size (i.e. a size greater than +// the capacity of the string). +static struct { +#if _LIBCPP_ABI_VERSION == 1 + uint64_t cap = 5; + uint64_t size = 7; + const char *data = "foo"; +#else // _LIBCPP_ABI_VERSION == 1 + const char *data = "foo"; + uint64_t size = 7; + uint64_t cap : 63 = 5; + uint64_t is_long : 1 = 1; +#endif // #if _LIBCPP_ABI_VERSION == 1 +} garbage_string_long_mode3; + +// A corrupt libcxx string in long mode with a payload that would trigger a +// buffer overflow. +static struct { +#if _LIBCPP_ABI_VERSION == 1 + uint64_t cap = 5; + uint64_t size = 2; + uint64_t data = 0xfffffffffffffffeULL; +#else // _LIBCPP_ABI_VERSION == 1 + uint64_t data = 0xfffffffffffffffeULL; + uint64_t size = 2; + uint64_t cap : 63 = 5; + uint64_t is_long : 1 = 1; +#endif // #if _LIBCPP_ABI_VERSION == 1 +} garbage_string_long_mode4; + +int main() { + std::string garbage1, garbage2, garbage3, garbage4, garbage5; + if (sizeof(std::string) == sizeof(garbage_string_short_mode)) + memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode1)) + memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode2)) + memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode3)) + memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string)); + if (sizeof(std::string) == sizeof(garbage_string_long_mode4)) + memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string)); + + std::puts("// Set break point at this line."); + return 0; +} diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py index 32764629d65a7..2f7fc88e0f449 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py @@ -42,8 +42,6 @@ def cleanup(): self.runCmd("type filter clear", check=False) self.runCmd("type synth clear", check=False) - is_64_bit = self.process().GetAddressByteSize() == 8 - # Execute the cleanup function during test case tear down. self.addTearDownHook(cleanup) @@ -126,25 +124,6 @@ def cleanup(): ], ) - # The test assumes that std::string is in its cap-size-data layout. - is_alternate_layout = ( - "arm" in self.getArchitecture() - ) and self.platformIsDarwin() - if is_64_bit and not is_alternate_layout: - self.expect( - "frame variable garbage1", substrs=["garbage1 = Summary Unavailable"] - ) - self.expect( - "frame variable garbage2", substrs=[r'garbage2 = "\xfa\xfa\xfa\xfa"'] - ) - self.expect("frame variable garbage3", substrs=[r'garbage3 = "\xf0\xf0"']) - self.expect( - "frame variable garbage4", substrs=["garbage4 = Summary Unavailable"] - ) - self.expect( - "frame variable garbage5", substrs=["garbage5 = Summary Unavailable"] - ) - # Finally, make sure that if the string is not readable, we give an error: bkpt_2 = target.BreakpointCreateBySourceRegex( "Break here to look at bad string", self.main_spec diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp index f9f1c0802e518..373f817a80f7d 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp @@ -1,97 +1,6 @@ #include <string> #include <stdint.h> -// For more information about libc++'s std::string ABI, see: -// -// https://joellaity.com/2020/01/31/string.html - -// A corrupt string which hits the SSO code path, but has an invalid size. -static struct { -#if _LIBCPP_ABI_VERSION == 1 - // Set the size of this short-mode string to 116. Note that in short mode, - // the size is encoded as `size << 1`. - unsigned char size = 232; - - // 23 garbage bytes for the inline string payload. - char inline_buf[23] = {0}; -#else // _LIBCPP_ABI_VERSION == 1 - // Like above, but data comes first, and use bitfields to indicate size. - char inline_buf[23] = {0}; - unsigned char size : 7 = 116; - unsigned char is_long : 1 = 0; -#endif // #if _LIBCPP_ABI_VERSION == 1 -} garbage_string_short_mode; - -// A corrupt libcxx string in long mode with a payload that contains a utf8 -// sequence that's inherently too long. -static unsigned char garbage_utf8_payload1[] = { - 250, // This means that we expect a 5-byte sequence, this is invalid. LLDB - // should fall back to ASCII printing. - 250, 250, 250 -}; -static struct { -#if _LIBCPP_ABI_VERSION == 1 - uint64_t cap = 5; - uint64_t size = 4; - unsigned char *data = &garbage_utf8_payload1[0]; -#else // _LIBCPP_ABI_VERSION == 1 - unsigned char *data = &garbage_utf8_payload1[0]; - uint64_t size = 4; - uint64_t cap : 63 = 4; - uint64_t is_long : 1 = 1; -#endif // #if _LIBCPP_ABI_VERSION == 1 -} garbage_string_long_mode1; - -// A corrupt libcxx string in long mode with a payload that contains a utf8 -// sequence that's too long to fit in the buffer. -static unsigned char garbage_utf8_payload2[] = { - 240, // This means that we expect a 4-byte sequence, but the buffer is too - // small for this. LLDB should fall back to ASCII printing. - 240 -}; -static struct { -#if _LIBCPP_ABI_VERSION == 1 - uint64_t cap = 3; - uint64_t size = 2; - unsigned char *data = &garbage_utf8_payload2[0]; -#else // _LIBCPP_ABI_VERSION == 1 - unsigned char *data = &garbage_utf8_payload2[0]; - uint64_t size = 2; - uint64_t cap : 63 = 3; - uint64_t is_long : 1 = 1; -#endif // #if _LIBCPP_ABI_VERSION == 1 -} garbage_string_long_mode2; - -// A corrupt libcxx string which has an invalid size (i.e. a size greater than -// the capacity of the string). -static struct { -#if _LIBCPP_ABI_VERSION == 1 - uint64_t cap = 5; - uint64_t size = 7; - const char *data = "foo"; -#else // _LIBCPP_ABI_VERSION == 1 - const char *data = "foo"; - uint64_t size = 7; - uint64_t cap : 63 = 5; - uint64_t is_long : 1 = 1; -#endif // #if _LIBCPP_ABI_VERSION == 1 -} garbage_string_long_mode3; - -// A corrupt libcxx string in long mode with a payload that would trigger a -// buffer overflow. -static struct { -#if _LIBCPP_ABI_VERSION == 1 - uint64_t cap = 5; - uint64_t size = 2; - uint64_t data = 0xfffffffffffffffeULL; -#else // _LIBCPP_ABI_VERSION == 1 - uint64_t data = 0xfffffffffffffffeULL; - uint64_t size = 2; - uint64_t cap : 63 = 5; - uint64_t is_long : 1 = 1; -#endif // #if _LIBCPP_ABI_VERSION == 1 -} garbage_string_long_mode4; - size_t touch_string(std::string &in_str) { return in_str.size(); // Break here to look at bad string @@ -115,18 +24,6 @@ int main() std::u32string u32_empty(U""); std::string *null_str = nullptr; - std::string garbage1, garbage2, garbage3, garbage4, garbage5; - if (sizeof(std::string) == sizeof(garbage_string_short_mode)) - memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string)); - if (sizeof(std::string) == sizeof(garbage_string_long_mode1)) - memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string)); - if (sizeof(std::string) == sizeof(garbage_string_long_mode2)) - memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string)); - if (sizeof(std::string) == sizeof(garbage_string_long_mode3)) - memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string)); - if (sizeof(std::string) == sizeof(garbage_string_long_mode4)) - memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string)); - S.assign(L"!!!!!"); // Set break point at this line. std::string *not_a_string = (std::string *) 0x0; touch_string(*not_a_string); _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits