https://github.com/felipepiovezan updated https://github.com/llvm/llvm-project/pull/172026
>From 3ce20b0c116b0a9c59f0d820e9511992d8141961 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan <[email protected]> Date: Thu, 11 Dec 2025 14:08:01 +0000 Subject: [PATCH 1/2] [lldb] Add ReadCStrings API to Process This commit uses Process::ReadMemoryRanges to create an efficient method for reading multiple strings at once. This method works like the single-string version, reading 256 bytes at a time, but instead doing it for _every_ string requested at the same time. --- lldb/include/lldb/API/SBProcess.h | 3 + lldb/include/lldb/Target/Process.h | 3 + lldb/source/API/SBProcess.cpp | 31 ++++++++++ lldb/source/Target/Process.cpp | 57 +++++++++++++++++++ .../process/read_multiple_cstrings/Makefile | 3 + .../TestReadMultipleStrings.py | 46 +++++++++++++++ .../process/read_multiple_cstrings/main.c | 8 +++ 7 files changed, 151 insertions(+) create mode 100644 lldb/test/API/python_api/process/read_multiple_cstrings/Makefile create mode 100644 lldb/test/API/python_api/process/read_multiple_cstrings/TestReadMultipleStrings.py create mode 100644 lldb/test/API/python_api/process/read_multiple_cstrings/main.c diff --git a/lldb/include/lldb/API/SBProcess.h b/lldb/include/lldb/API/SBProcess.h index 882b8bd837131..5f04d3330a1d1 100644 --- a/lldb/include/lldb/API/SBProcess.h +++ b/lldb/include/lldb/API/SBProcess.h @@ -205,6 +205,9 @@ class LLDB_API SBProcess { size_t ReadCStringFromMemory(addr_t addr, void *char_buf, size_t size, lldb::SBError &error); + SBStringList ReadCStringsFromMemory(SBValueList string_addresses, + SBError &error); + uint64_t ReadUnsignedFromMemory(addr_t addr, uint32_t byte_size, lldb::SBError &error); diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 8e6c16cbfe0fc..4493e81ce0eae 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -1680,6 +1680,9 @@ class Process : public std::enable_shared_from_this<Process>, size_t ReadCStringFromMemory(lldb::addr_t vm_addr, std::string &out_str, Status &error); + llvm::SmallVector<std::optional<std::string>> + ReadCStringsFromMemory(llvm::ArrayRef<lldb::addr_t> addresses); + /// Reads an unsigned integer of the specified byte size from process /// memory. /// diff --git a/lldb/source/API/SBProcess.cpp b/lldb/source/API/SBProcess.cpp index 14aa9432eed83..1a83a3d164e53 100644 --- a/lldb/source/API/SBProcess.cpp +++ b/lldb/source/API/SBProcess.cpp @@ -876,6 +876,37 @@ lldb::addr_t SBProcess::FindInMemory(const void *buf, uint64_t size, range.ref(), alignment, error.ref()); } +SBStringList SBProcess::ReadCStringsFromMemory(SBValueList sb_string_addresses, + SBError &error) { + std::vector<lldb::addr_t> string_addresses; + string_addresses.reserve(sb_string_addresses.GetSize()); + + for (size_t idx = 0; idx < sb_string_addresses.GetSize(); idx++) { + SBValue sb_address = sb_string_addresses.GetValueAtIndex(idx); + string_addresses.push_back(sb_address.GetValueAsAddress()); + } + + ProcessSP process_sp(GetSP()); + if (!process_sp) { + error = Status::FromErrorString("SBProcess is invalid"); + return {}; + } + Process::StopLocker stop_locker; + if (!stop_locker.TryLock(&process_sp->GetRunLock())) { + error = Status::FromErrorString("process is running"); + return {}; + } + + SBStringList strings; + llvm::SmallVector<std::optional<std::string>> maybe_strings = + process_sp->ReadCStringsFromMemory(string_addresses); + + for (std::optional<std::string> maybe_str : maybe_strings) + strings.AppendString(maybe_str ? maybe_str->c_str() : ""); + + return strings; +} + size_t SBProcess::ReadMemory(addr_t addr, void *dst, size_t dst_len, SBError &sb_error) { LLDB_INSTRUMENT_VA(this, addr, dst, dst_len, sb_error); diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 9c8e8fa7041ee..3890a91dc4608 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -2135,6 +2135,63 @@ lldb::addr_t Process::FindInMemory(const uint8_t *buf, uint64_t size, return matches[0].GetBaseAddress().GetLoadAddress(&target); } +llvm::SmallVector<std::optional<std::string>> +Process::ReadCStringsFromMemory(llvm::ArrayRef<lldb::addr_t> addresses) { + // Make the same read width choice as ReadCStringFromMemory. + constexpr auto read_width = 256; + + llvm::SmallVector<std::optional<std::string>> output_strs(addresses.size(), + ""); + llvm::SmallVector<Range<addr_t, size_t>> ranges{ + llvm::map_range(addresses, [=](addr_t ptr) { + return Range<addr_t, size_t>(ptr, read_width); + })}; + + std::vector<uint8_t> buffer(read_width * addresses.size(), 0); + uint64_t num_completed_strings = 0; + + while (num_completed_strings != addresses.size()) { + llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results = + ReadMemoryRanges(ranges, buffer); + + // Each iteration of this loop either increments num_completed_strings or + // updates the base pointer of some range, guaranteeing forward progress of + // the outer loop. + for (auto [range, read_result, output_str] : + llvm::zip(ranges, read_results, output_strs)) { + // A previously completed string. + if (range.GetByteSize() == 0) + continue; + + // The read failed, set the range to 0 to avoid reading it again. + if (read_result.empty()) { + output_str = std::nullopt; + range.SetByteSize(0); + num_completed_strings++; + continue; + } + + // Convert ArrayRef to StringRef so the pointers work with std::string. + auto read_result_str = llvm::toStringRef(read_result); + + const char *null_terminator_pos = llvm::find(read_result_str, '\0'); + output_str->append(read_result_str.begin(), null_terminator_pos); + + // If the terminator was found, this string is complete. + if (null_terminator_pos != read_result_str.end()) { + range.SetByteSize(0); + num_completed_strings++; + } + // Otherwise increment the base pointer for the next read. + else { + range.SetRangeBase(range.GetRangeBase() + read_result.size()); + } + } + } + + return output_strs; +} + size_t Process::ReadCStringFromMemory(addr_t addr, std::string &out_str, Status &error) { char buf[256]; diff --git a/lldb/test/API/python_api/process/read_multiple_cstrings/Makefile b/lldb/test/API/python_api/process/read_multiple_cstrings/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/python_api/process/read_multiple_cstrings/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/python_api/process/read_multiple_cstrings/TestReadMultipleStrings.py b/lldb/test/API/python_api/process/read_multiple_cstrings/TestReadMultipleStrings.py new file mode 100644 index 0000000000000..75ecfb13d8b77 --- /dev/null +++ b/lldb/test/API/python_api/process/read_multiple_cstrings/TestReadMultipleStrings.py @@ -0,0 +1,46 @@ +"""Test reading c-strings from memory via SB API.""" + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class TestReadMultipleStrings(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + def test_read_multiple_strings(self): + """Test corner case behavior of SBProcess::ReadCStringFromMemory""" + self.build() + + (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( + self, "breakpoint here", lldb.SBFileSpec("main.c") + ) + + frame = thread.GetFrameAtIndex(0) + err = lldb.SBError() + + empty_str_addr = frame.FindVariable("empty_string") + self.assertSuccess(err) + str1_addr = frame.FindVariable("str1") + self.assertSuccess(err) + banana_addr = frame.FindVariable("banana") + self.assertSuccess(err) + bad_addr = frame.FindVariable("bad_addr") + self.assertSuccess(err) + + string_addresses = [empty_str_addr, str1_addr, banana_addr, bad_addr] + for addr in string_addresses: + self.assertNotEqual(addr.GetValueAsUnsigned(), lldb.LLDB_INVALID_ADDRESS) + + addresses = lldb.SBValueList() + for addr in string_addresses: + addresses.Append(addr) + + strings = process.ReadCStringsFromMemory(addresses, err) + self.assertSuccess(err) + self.assertEqual(strings.GetStringAtIndex(0), "") + self.assertEqual(strings.GetStringAtIndex(1), "1") + self.assertEqual(strings.GetStringAtIndex(2), "banana") + # invalid address will also return an empty string. + self.assertEqual(strings.GetStringAtIndex(3), "") diff --git a/lldb/test/API/python_api/process/read_multiple_cstrings/main.c b/lldb/test/API/python_api/process/read_multiple_cstrings/main.c new file mode 100644 index 0000000000000..d7affad6734da --- /dev/null +++ b/lldb/test/API/python_api/process/read_multiple_cstrings/main.c @@ -0,0 +1,8 @@ +int main(int argc, char **argv) { + const char *empty_string = ""; + const char *str1 = "1"; + const char *banana = "banana"; + const char *bad_addr = (char *)0x100; + + return 0; // breakpoint here +} >From 04f122c57ab58b0e7259d70f1e9b5fda5d7ca477 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan <[email protected]> Date: Sun, 14 Dec 2025 11:07:15 +0000 Subject: [PATCH 2/2] fixup! address review comments --- lldb/include/lldb/API/SBProcess.h | 11 +++++++++++ lldb/source/Target/Process.cpp | 11 +++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/lldb/include/lldb/API/SBProcess.h b/lldb/include/lldb/API/SBProcess.h index 5f04d3330a1d1..49fab59aec041 100644 --- a/lldb/include/lldb/API/SBProcess.h +++ b/lldb/include/lldb/API/SBProcess.h @@ -205,6 +205,17 @@ class LLDB_API SBProcess { size_t ReadCStringFromMemory(addr_t addr, void *char_buf, size_t size, lldb::SBError &error); + /// Given a list of addresses, read one NULL-terminated string per address. + /// + /// \param [in] string_addresses + /// The list of addresses to read from. + /// \param [in] error + /// An error that gets populated if the process is not in a valid state. + /// + /// \return + /// A list of strings read, which is guaranteed to contain as + /// many strings as there are addresses in the input. If reading from an + /// address fails, an empty string is returned for that address. SBStringList ReadCStringsFromMemory(SBValueList string_addresses, SBError &error); diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 3890a91dc4608..ab250941b183b 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -117,6 +117,8 @@ static constexpr OptionEnumValueElement g_follow_fork_mode_values[] = { }, }; +static constexpr unsigned g_string_read_width = 256; + #define LLDB_PROPERTIES_process #include "TargetProperties.inc" @@ -2137,17 +2139,14 @@ lldb::addr_t Process::FindInMemory(const uint8_t *buf, uint64_t size, llvm::SmallVector<std::optional<std::string>> Process::ReadCStringsFromMemory(llvm::ArrayRef<lldb::addr_t> addresses) { - // Make the same read width choice as ReadCStringFromMemory. - constexpr auto read_width = 256; - llvm::SmallVector<std::optional<std::string>> output_strs(addresses.size(), ""); llvm::SmallVector<Range<addr_t, size_t>> ranges{ llvm::map_range(addresses, [=](addr_t ptr) { - return Range<addr_t, size_t>(ptr, read_width); + return Range<addr_t, size_t>(ptr, g_string_read_width); })}; - std::vector<uint8_t> buffer(read_width * addresses.size(), 0); + std::vector<uint8_t> buffer(g_string_read_width * addresses.size(), 0); uint64_t num_completed_strings = 0; while (num_completed_strings != addresses.size()) { @@ -2194,7 +2193,7 @@ Process::ReadCStringsFromMemory(llvm::ArrayRef<lldb::addr_t> addresses) { size_t Process::ReadCStringFromMemory(addr_t addr, std::string &out_str, Status &error) { - char buf[256]; + char buf[g_string_read_width]; out_str.clear(); addr_t curr_addr = addr; while (true) { _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
