Author: Michael Buch
Date: 2025-07-07T11:33:33+01:00
New Revision: c19c71b90593dcbb94a9592d7cf75e58c99df6da

URL: 
https://github.com/llvm/llvm-project/commit/c19c71b90593dcbb94a9592d7cf75e58c99df6da
DIFF: 
https://github.com/llvm/llvm-project/commit/c19c71b90593dcbb94a9592d7cf75e58c99df6da.diff

LOG: [lldb][test] Split out libc++ std::string tests that check corrupted 
strings (#147252)

As a pre-requisite to combine the libcxx and libstdcxx string formatter
tests (see https://github.com/llvm/llvm-project/pull/146740) this patch
splits out the libcxx specific parts into a separate test.

These are probably best tested with the libcxx-simulator tests. But for
now I just moved them.

Added: 
    
lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile
    
lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py
    
lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp

Modified: 
    
lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
    
lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp

Removed: 
    


################################################################################
diff  --git 
a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile
 
b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile
new file mode 100644
index 0000000000000..c5df567e01a2a
--- /dev/null
+++ 
b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/Makefile
@@ -0,0 +1,5 @@
+CXX_SOURCES := main.cpp
+
+USE_LIBCPP := 1
+
+include Makefile.rules

diff  --git 
a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py
 
b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py
new file mode 100644
index 0000000000000..ae8e0ac08c2b0
--- /dev/null
+++ 
b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/TestDataFormatterLibcxxInvalidString.py
@@ -0,0 +1,38 @@
+"""
+Test lldb behaves sanely when formatting corrupted `std::string`s.
+"""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class LibcxxInvalidStringDataFormatterTestCase(TestBase):
+    @add_test_categories(["libc++"])
+    @skipIf(oslist=[lldbplatformutil.getDarwinOSTriples()], archs=["arm", 
"aarch64"])
+    def test(self):
+        self.build()
+
+        (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint(
+            self, "Set break point at this line.", lldb.SBFileSpec("main.cpp")
+        )
+        frame = thread.frames[0]
+
+        if not self.process().GetAddressByteSize() == 8:
+            self.skip()
+
+        # The test assumes that std::string is in its cap-size-data layout.
+        self.expect(
+            "frame variable garbage1", substrs=["garbage1 = Summary 
Unavailable"]
+        )
+        self.expect(
+            "frame variable garbage2", substrs=[r'garbage2 = 
"\xfa\xfa\xfa\xfa"']
+        )
+        self.expect("frame variable garbage3", substrs=[r'garbage3 = 
"\xf0\xf0"'])
+        self.expect(
+            "frame variable garbage4", substrs=["garbage4 = Summary 
Unavailable"]
+        )
+        self.expect(
+            "frame variable garbage5", substrs=["garbage5 = Summary 
Unavailable"]
+        )

diff  --git 
a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp
 
b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp
new file mode 100644
index 0000000000000..eb3efe1bcb7ef
--- /dev/null
+++ 
b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/invalid-string/main.cpp
@@ -0,0 +1,110 @@
+#include <cstdio>
+#include <cstdlib>
+#include <stdint.h>
+#include <string>
+
+// For more information about libc++'s std::string ABI, see:
+//
+//   https://joellaity.com/2020/01/31/string.html
+
+// A corrupt string which hits the SSO code path, but has an invalid size.
+static struct {
+#if _LIBCPP_ABI_VERSION == 1
+  // Set the size of this short-mode string to 116. Note that in short mode,
+  // the size is encoded as `size << 1`.
+  unsigned char size = 232;
+
+  // 23 garbage bytes for the inline string payload.
+  char inline_buf[23] = {0};
+#else  // _LIBCPP_ABI_VERSION == 1
+  // Like above, but data comes first, and use bitfields to indicate size.
+  char inline_buf[23] = {0};
+  unsigned char size : 7 = 116;
+  unsigned char is_long : 1 = 0;
+#endif // #if _LIBCPP_ABI_VERSION == 1
+} garbage_string_short_mode;
+
+// A corrupt libcxx string in long mode with a payload that contains a utf8
+// sequence that's inherently too long.
+static unsigned char garbage_utf8_payload1[] = {
+    250, // This means that we expect a 5-byte sequence, this is invalid. LLDB
+         // should fall back to ASCII printing.
+    250, 250, 250};
+static struct {
+#if _LIBCPP_ABI_VERSION == 1
+  uint64_t cap = 5;
+  uint64_t size = 4;
+  unsigned char *data = &garbage_utf8_payload1[0];
+#else  // _LIBCPP_ABI_VERSION == 1
+  unsigned char *data = &garbage_utf8_payload1[0];
+  uint64_t size = 4;
+  uint64_t cap : 63 = 4;
+  uint64_t is_long : 1 = 1;
+#endif // #if _LIBCPP_ABI_VERSION == 1
+} garbage_string_long_mode1;
+
+// A corrupt libcxx string in long mode with a payload that contains a utf8
+// sequence that's too long to fit in the buffer.
+static unsigned char garbage_utf8_payload2[] = {
+    240, // This means that we expect a 4-byte sequence, but the buffer is too
+         // small for this. LLDB should fall back to ASCII printing.
+    240};
+static struct {
+#if _LIBCPP_ABI_VERSION == 1
+  uint64_t cap = 3;
+  uint64_t size = 2;
+  unsigned char *data = &garbage_utf8_payload2[0];
+#else  // _LIBCPP_ABI_VERSION == 1
+  unsigned char *data = &garbage_utf8_payload2[0];
+  uint64_t size = 2;
+  uint64_t cap : 63 = 3;
+  uint64_t is_long : 1 = 1;
+#endif // #if _LIBCPP_ABI_VERSION == 1
+} garbage_string_long_mode2;
+
+// A corrupt libcxx string which has an invalid size (i.e. a size greater than
+// the capacity of the string).
+static struct {
+#if _LIBCPP_ABI_VERSION == 1
+  uint64_t cap = 5;
+  uint64_t size = 7;
+  const char *data = "foo";
+#else  // _LIBCPP_ABI_VERSION == 1
+  const char *data = "foo";
+  uint64_t size = 7;
+  uint64_t cap : 63 = 5;
+  uint64_t is_long : 1 = 1;
+#endif // #if _LIBCPP_ABI_VERSION == 1
+} garbage_string_long_mode3;
+
+// A corrupt libcxx string in long mode with a payload that would trigger a
+// buffer overflow.
+static struct {
+#if _LIBCPP_ABI_VERSION == 1
+  uint64_t cap = 5;
+  uint64_t size = 2;
+  uint64_t data = 0xfffffffffffffffeULL;
+#else  // _LIBCPP_ABI_VERSION == 1
+  uint64_t data = 0xfffffffffffffffeULL;
+  uint64_t size = 2;
+  uint64_t cap : 63 = 5;
+  uint64_t is_long : 1 = 1;
+#endif // #if _LIBCPP_ABI_VERSION == 1
+} garbage_string_long_mode4;
+
+int main() {
+  std::string garbage1, garbage2, garbage3, garbage4, garbage5;
+  if (sizeof(std::string) == sizeof(garbage_string_short_mode))
+    memcpy((void *)&garbage1, &garbage_string_short_mode, sizeof(std::string));
+  if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
+    memcpy((void *)&garbage2, &garbage_string_long_mode1, sizeof(std::string));
+  if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
+    memcpy((void *)&garbage3, &garbage_string_long_mode2, sizeof(std::string));
+  if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
+    memcpy((void *)&garbage4, &garbage_string_long_mode3, sizeof(std::string));
+  if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
+    memcpy((void *)&garbage5, &garbage_string_long_mode4, sizeof(std::string));
+
+  std::puts("// Set break point at this line.");
+  return 0;
+}

diff  --git 
a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
 
b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
index 32764629d65a7..2f7fc88e0f449 100644
--- 
a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
+++ 
b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/TestDataFormatterLibcxxString.py
@@ -42,8 +42,6 @@ def cleanup():
             self.runCmd("type filter clear", check=False)
             self.runCmd("type synth clear", check=False)
 
-        is_64_bit = self.process().GetAddressByteSize() == 8
-
         # Execute the cleanup function during test case tear down.
         self.addTearDownHook(cleanup)
 
@@ -126,25 +124,6 @@ def cleanup():
             ],
         )
 
-        # The test assumes that std::string is in its cap-size-data layout.
-        is_alternate_layout = (
-            "arm" in self.getArchitecture()
-        ) and self.platformIsDarwin()
-        if is_64_bit and not is_alternate_layout:
-            self.expect(
-                "frame variable garbage1", substrs=["garbage1 = Summary 
Unavailable"]
-            )
-            self.expect(
-                "frame variable garbage2", substrs=[r'garbage2 = 
"\xfa\xfa\xfa\xfa"']
-            )
-            self.expect("frame variable garbage3", substrs=[r'garbage3 = 
"\xf0\xf0"'])
-            self.expect(
-                "frame variable garbage4", substrs=["garbage4 = Summary 
Unavailable"]
-            )
-            self.expect(
-                "frame variable garbage5", substrs=["garbage5 = Summary 
Unavailable"]
-            )
-
         # Finally, make sure that if the string is not readable, we give an 
error:
         bkpt_2 = target.BreakpointCreateBySourceRegex(
             "Break here to look at bad string", self.main_spec

diff  --git 
a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
 
b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
index f9f1c0802e518..373f817a80f7d 100644
--- 
a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
+++ 
b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/string/main.cpp
@@ -1,97 +1,6 @@
 #include <string>
 #include <stdint.h>
 
-// For more information about libc++'s std::string ABI, see:
-//
-//   https://joellaity.com/2020/01/31/string.html
-
-// A corrupt string which hits the SSO code path, but has an invalid size.
-static struct {
-#if _LIBCPP_ABI_VERSION == 1
-  // Set the size of this short-mode string to 116. Note that in short mode,
-  // the size is encoded as `size << 1`.
-  unsigned char size = 232;
-
-  // 23 garbage bytes for the inline string payload.
-  char inline_buf[23] = {0};
-#else  // _LIBCPP_ABI_VERSION == 1
-  // Like above, but data comes first, and use bitfields to indicate size.
-  char inline_buf[23] = {0};
-  unsigned char size : 7 = 116;
-  unsigned char is_long : 1 = 0;
-#endif // #if _LIBCPP_ABI_VERSION == 1
-} garbage_string_short_mode;
-
-// A corrupt libcxx string in long mode with a payload that contains a utf8
-// sequence that's inherently too long.
-static unsigned char garbage_utf8_payload1[] = {
-  250, // This means that we expect a 5-byte sequence, this is invalid. LLDB
-       // should fall back to ASCII printing.
-  250, 250, 250
-};
-static struct {
-#if _LIBCPP_ABI_VERSION == 1
-  uint64_t cap = 5;
-  uint64_t size = 4;
-  unsigned char *data = &garbage_utf8_payload1[0];
-#else  // _LIBCPP_ABI_VERSION == 1
-  unsigned char *data = &garbage_utf8_payload1[0];
-  uint64_t size = 4;
-  uint64_t cap : 63 = 4;
-  uint64_t is_long : 1 = 1;
-#endif // #if _LIBCPP_ABI_VERSION == 1
-} garbage_string_long_mode1;
-
-// A corrupt libcxx string in long mode with a payload that contains a utf8
-// sequence that's too long to fit in the buffer.
-static unsigned char garbage_utf8_payload2[] = {
-  240, // This means that we expect a 4-byte sequence, but the buffer is too
-       // small for this. LLDB should fall back to ASCII printing.
-  240
-};
-static struct {
-#if _LIBCPP_ABI_VERSION == 1
-  uint64_t cap = 3;
-  uint64_t size = 2;
-  unsigned char *data = &garbage_utf8_payload2[0];
-#else  // _LIBCPP_ABI_VERSION == 1
-  unsigned char *data = &garbage_utf8_payload2[0];
-  uint64_t size = 2;
-  uint64_t cap : 63 = 3;
-  uint64_t is_long : 1 = 1;
-#endif // #if _LIBCPP_ABI_VERSION == 1
-} garbage_string_long_mode2;
-
-// A corrupt libcxx string which has an invalid size (i.e. a size greater than
-// the capacity of the string).
-static struct {
-#if _LIBCPP_ABI_VERSION == 1
-  uint64_t cap = 5;
-  uint64_t size = 7;
-  const char *data = "foo";
-#else  // _LIBCPP_ABI_VERSION == 1
-  const char *data = "foo";
-  uint64_t size = 7;
-  uint64_t cap : 63 = 5;
-  uint64_t is_long : 1 = 1;
-#endif // #if _LIBCPP_ABI_VERSION == 1
-} garbage_string_long_mode3;
-
-// A corrupt libcxx string in long mode with a payload that would trigger a
-// buffer overflow.
-static struct {
-#if _LIBCPP_ABI_VERSION == 1
-  uint64_t cap = 5;
-  uint64_t size = 2;
-  uint64_t data = 0xfffffffffffffffeULL;
-#else  // _LIBCPP_ABI_VERSION == 1
-  uint64_t data = 0xfffffffffffffffeULL;
-  uint64_t size = 2;
-  uint64_t cap : 63 = 5;
-  uint64_t is_long : 1 = 1;
-#endif // #if _LIBCPP_ABI_VERSION == 1
-} garbage_string_long_mode4;
-
 size_t touch_string(std::string &in_str)
 {
   return in_str.size(); // Break here to look at bad string
@@ -115,18 +24,6 @@ int main()
     std::u32string u32_empty(U"");
     std::string *null_str = nullptr;
 
-    std::string garbage1, garbage2, garbage3, garbage4, garbage5;
-    if (sizeof(std::string) == sizeof(garbage_string_short_mode))
-      memcpy((void *)&garbage1, &garbage_string_short_mode, 
sizeof(std::string));
-    if (sizeof(std::string) == sizeof(garbage_string_long_mode1))
-      memcpy((void *)&garbage2, &garbage_string_long_mode1, 
sizeof(std::string));
-    if (sizeof(std::string) == sizeof(garbage_string_long_mode2))
-      memcpy((void *)&garbage3, &garbage_string_long_mode2, 
sizeof(std::string));
-    if (sizeof(std::string) == sizeof(garbage_string_long_mode3))
-      memcpy((void *)&garbage4, &garbage_string_long_mode3, 
sizeof(std::string));
-    if (sizeof(std::string) == sizeof(garbage_string_long_mode4))
-      memcpy((void *)&garbage5, &garbage_string_long_mode4, 
sizeof(std::string));
-
     S.assign(L"!!!!!"); // Set break point at this line.
     std::string *not_a_string = (std::string *) 0x0;
     touch_string(*not_a_string);


        
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to