llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-lldb Author: Adrian Prantl (adrian-prantl) <details> <summary>Changes</summary> Compared to the python version, this also does type checking and error handling, so it's slightly longer, however, it's still comfortably under 500 lines. See https://discourse.llvm.org/t/a-bytecode-for-lldb-data-formatters/82696 for more context! This is currently a draft, I still want to add more tests and also extend the metadata with (show children) flag and potentially others. --- Patch is 47.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114333.diff 19 Files Affected: - (modified) lldb/include/lldb/DataFormatters/TypeSummary.h (+21-1) - (modified) lldb/include/lldb/lldb-enumerations.h (+2) - (modified) lldb/source/Core/Section.cpp (+4) - (modified) lldb/source/DataFormatters/CMakeLists.txt (+1) - (added) lldb/source/DataFormatters/FormatterBytecode.cpp (+576) - (added) lldb/source/DataFormatters/FormatterBytecode.def (+101) - (added) lldb/source/DataFormatters/FormatterBytecode.h (+63) - (modified) lldb/source/DataFormatters/TypeSummary.cpp (+71-3) - (modified) lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp (+2) - (modified) lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp (+8) - (modified) lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp (+2) - (modified) lldb/source/Symbol/ObjectFile.cpp (+1) - (modified) lldb/source/Target/Target.cpp (+151) - (added) lldb/test/API/functionalities/data-formatter/bytecode-summary/Makefile (+2) - (added) lldb/test/API/functionalities/data-formatter/bytecode-summary/TestBytecodeSummary.py (+14) - (added) lldb/test/API/functionalities/data-formatter/bytecode-summary/main.cpp (+36) - (added) lldb/test/API/functionalities/data-formatter/embedded-summary/Makefile (+2) - (added) lldb/test/API/functionalities/data-formatter/embedded-summary/TestEmbeddedTypeSummary.py (+12) - (added) lldb/test/API/functionalities/data-formatter/embedded-summary/main.c (+22) ``````````diff diff --git a/lldb/include/lldb/DataFormatters/TypeSummary.h b/lldb/include/lldb/DataFormatters/TypeSummary.h index 382824aa2813da..0d8e46fa0b1598 100644 --- a/lldb/include/lldb/DataFormatters/TypeSummary.h +++ b/lldb/include/lldb/DataFormatters/TypeSummary.h @@ -22,6 +22,10 @@ #include "lldb/Utility/Status.h" #include "lldb/Utility/StructuredData.h" +namespace llvm { +class MemoryBuffer; +} + namespace lldb_private { class TypeSummaryOptions { public: @@ -44,7 +48,7 @@ class TypeSummaryOptions { class TypeSummaryImpl { public: - enum class Kind { eSummaryString, eScript, eCallback, eInternal }; + enum class Kind { eSummaryString, eScript, eBytecode, eCallback, eInternal }; virtual ~TypeSummaryImpl() = default; @@ -409,6 +413,22 @@ struct ScriptSummaryFormat : public TypeSummaryImpl { ScriptSummaryFormat(const ScriptSummaryFormat &) = delete; const ScriptSummaryFormat &operator=(const ScriptSummaryFormat &) = delete; }; + +/// A summary formatter that is defined in LLDB formmater bytecode. +class BytecodeSummaryFormat : public TypeSummaryImpl { + std::unique_ptr<llvm::MemoryBuffer> m_bytecode; +public: + BytecodeSummaryFormat(const TypeSummaryImpl::Flags &flags, + std::unique_ptr<llvm::MemoryBuffer> bytecode); + bool FormatObject(ValueObject *valobj, std::string &dest, + const TypeSummaryOptions &options) override; + std::string GetDescription() override; + std::string GetName() override; + static bool classof(const TypeSummaryImpl *S) { + return S->GetKind() == Kind::eBytecode; + } +}; + } // namespace lldb_private #endif // LLDB_DATAFORMATTERS_TYPESUMMARY_H diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index 938f6e3abe8f2a..b2f0943d5a9260 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -761,6 +761,8 @@ enum SectionType { eSectionTypeDWARFDebugLocListsDwo, eSectionTypeDWARFDebugTuIndex, eSectionTypeCTF, + eSectionTypeLLDBTypeSummaries, + eSectionTypeLLDBFormatters, eSectionTypeSwiftModules, }; diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp index 0763e88d4608f4..3b5ca2c6785ef0 100644 --- a/lldb/source/Core/Section.cpp +++ b/lldb/source/Core/Section.cpp @@ -147,6 +147,8 @@ const char *Section::GetTypeAsCString() const { return "dwarf-gnu-debugaltlink"; case eSectionTypeCTF: return "ctf"; + case eSectionTypeLLDBTypeSummaries: + return "lldb-type-summaries"; case eSectionTypeOther: return "regular"; case eSectionTypeSwiftModules: @@ -457,6 +459,8 @@ bool Section::ContainsOnlyDebugInfo() const { case eSectionTypeDWARFAppleObjC: case eSectionTypeDWARFGNUDebugAltLink: case eSectionTypeCTF: + case eSectionTypeLLDBTypeSummaries: + case eSectionTypeLLDBFormatters: case eSectionTypeSwiftModules: return true; } diff --git a/lldb/source/DataFormatters/CMakeLists.txt b/lldb/source/DataFormatters/CMakeLists.txt index 7f48a2785c73f5..17da138227d4f1 100644 --- a/lldb/source/DataFormatters/CMakeLists.txt +++ b/lldb/source/DataFormatters/CMakeLists.txt @@ -5,6 +5,7 @@ add_lldb_library(lldbDataFormatters NO_PLUGIN_DEPENDENCIES FormatCache.cpp FormatClasses.cpp FormatManager.cpp + FormatterBytecode.cpp FormattersHelpers.cpp LanguageCategory.cpp StringPrinter.cpp diff --git a/lldb/source/DataFormatters/FormatterBytecode.cpp b/lldb/source/DataFormatters/FormatterBytecode.cpp new file mode 100644 index 00000000000000..7e8bfd3a370ce7 --- /dev/null +++ b/lldb/source/DataFormatters/FormatterBytecode.cpp @@ -0,0 +1,576 @@ +//===-- FormatterBytecode.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FormatterBytecode.h" +#include "lldb/Core/ValueObject.h" +#include "lldb/Utility/LLDBLog.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadicDetails.h" +#include "llvm/Support/FormatProviders.h" +#include "llvm/ADT/StringExtras.h" + +using namespace lldb; +namespace lldb_private { + +std::string toString(FormatterBytecode::OpCodes op) { + switch (op) { +#define DEFINE_OPCODE(OP, MNEMONIC, NAME) \ + case OP: { \ + const char *s = MNEMONIC; \ + return s ? s : #NAME; \ + } +#include "FormatterBytecode.def" +#undef DEFINE_SIGNATURE + } + return llvm::utostr(op); +} + +std::string toString(FormatterBytecode::Selectors sel) { + switch (sel) { +#define DEFINE_SELECTOR(ID, NAME) \ + case ID: \ + return "@" #NAME; +#include "FormatterBytecode.def" +#undef DEFINE_SIGNATURE + } + return "@"+llvm::utostr(sel); +} + +std::string toString(FormatterBytecode::Signatures sig) { + switch (sig) { +#define DEFINE_SIGNATURE(ID, NAME) \ + case ID: \ + return "@" #NAME; +#include "FormatterBytecode.def" +#undef DEFINE_SIGNATURE + } + return llvm::utostr(sig); +} + +std::string toString(const FormatterBytecode::DataStack &data) { + std::string s; + llvm::raw_string_ostream os(s); + os << "[ "; + for (auto &d : data) { + if (auto s = std::get_if<std::string>(&d)) + os << '"' << *s << '"'; + else if (auto u = std::get_if<uint64_t>(&d)) + os << *u << 'u'; + else if (auto i = std::get_if<int64_t>(&d)) + os << *i; + else if (auto valobj = std::get_if<ValueObjectSP>(&d)) { + if (!valobj->get()) + os << "null"; + else + os << "object(" << valobj->get()->GetValueAsCString() << ')'; + } else if (auto type = std::get_if<CompilerType>(&d)) { + os << '(' << type->GetTypeName(true) << ')'; + } else if (auto sel = std::get_if<FormatterBytecode::Selectors>(&d)) { + os << toString(*sel); + } + os << ' '; + } + os << ']'; + return s; +} + +namespace FormatterBytecode { + +/// Implement the @format function. +static llvm::Error FormatImpl(DataStack &data) { + auto fmt = data.Pop<std::string>(); + auto replacements = + llvm::formatv_object_base::parseFormatString(fmt, 0, false); + std::string s; + llvm::raw_string_ostream os(s); + unsigned num_args = 0; + for (const auto &r : replacements) + if (r.Type == llvm::ReplacementType::Format) + num_args = std::max(num_args, r.Index); + + if (data.size() < num_args) + return llvm::createStringError("not enough arguments"); + + for (const auto &r : replacements) { + if (r.Type == llvm::ReplacementType::Literal) { + os << r.Spec; + continue; + } + using namespace llvm::support::detail; + auto arg = data[data.size() - num_args + r.Index]; + auto format = [&](format_adapter &&adapter) { + llvm::FmtAlign Align(adapter, r.Where, r.Width, r.Pad); + Align.format(os, r.Options); + }; + + if (auto s = std::get_if<std::string>(&arg)) + format(build_format_adapter(s)); + else if (auto u = std::get_if<uint64_t>(&arg)) + format(build_format_adapter(u)); + else if (auto i = std::get_if<int64_t>(&arg)) + format(build_format_adapter(i)); + else if (auto valobj = std::get_if<ValueObjectSP>(&arg)) { + if (!valobj->get()) + format(build_format_adapter("null object")); + else + format(build_format_adapter(valobj->get()->GetValueAsCString())); + } else if (auto type = std::get_if<CompilerType>(&arg)) + format(build_format_adapter(type->GetDisplayTypeName())); + else if (auto sel = std::get_if<FormatterBytecode::Selectors>(&arg)) + format(build_format_adapter(toString(*sel))); + } + data.Push(s); + return llvm::Error::success(); +} + +static llvm::Error TypeCheck(llvm::ArrayRef<DataStackElement> data, + DataType type) { + if (data.size() < 1) + return llvm::createStringError("not enough elements on data stack"); + + auto &elem = data.back(); + switch (type) { + case Any: + break; + case String: + if (!std::holds_alternative<std::string>(elem)) + return llvm::createStringError("expected String"); + break; + case UInt: + if (!std::holds_alternative<uint64_t>(elem)) + return llvm::createStringError("expected UInt"); + break; + case Int: + if (!std::holds_alternative<int64_t>(elem)) + return llvm::createStringError("expected Int"); + break; + case Object: + if (!std::holds_alternative<ValueObjectSP>(elem)) + return llvm::createStringError("expected Object"); + break; + case Type: + if (!std::holds_alternative<CompilerType>(elem)) + return llvm::createStringError("expected Type"); + break; + case Selector: + if (!std::holds_alternative<Selectors>(elem)) + return llvm::createStringError("expected Selector"); + break; + } + return llvm::Error::success(); +} + +static llvm::Error TypeCheck(llvm::ArrayRef<DataStackElement> data, + DataType type1, DataType type2) { + if (auto error = TypeCheck(data, type2)) + return error; + return TypeCheck(data.drop_back(), type1); +} + +static llvm::Error TypeCheck(llvm::ArrayRef<DataStackElement> data, + DataType type1, DataType type2, DataType type3) { + if (auto error = TypeCheck(data, type3)) + return error; + return TypeCheck(data.drop_back(1), type2, type1); +} + +llvm::Error Interpret(std::vector<ControlStackElement> &control, + DataStack &data, Selectors sel) { + if (control.empty()) + return llvm::Error::success(); + // Since the only data types are single endian and ULEBs, the + // endianness should not matter. + llvm::DataExtractor cur_block(control.back(), true, 64); + llvm::DataExtractor::Cursor pc(0); + + while (!control.empty()) { + /// Activate the top most block from the control stack. + auto activate_block = [&]() { + // Save the return address. + if (control.size() > 1) + control[control.size() - 2] = cur_block.getData().drop_front(pc.tell()); + cur_block = llvm::DataExtractor(control.back(), true, 64); + if (pc) + pc = llvm::DataExtractor::Cursor(0); + }; + + /// Fetch the next byte in the instruction stream. + auto next_byte = [&]() -> uint8_t { + // At the end of the current block? + while (pc.tell() >= cur_block.size() && !control.empty()) { + if (control.size() == 1) { + control.pop_back(); + return 0; + } + control.pop_back(); + activate_block(); + } + + // Fetch the next instruction. + return cur_block.getU8(pc); + }; + + // Fetch the next opcode. + OpCodes opcode = (OpCodes)next_byte(); + if (control.empty() || !pc) + return pc.takeError(); + + LLDB_LOGV(GetLog(LLDBLog::DataFormatters), + "[eval {0}] opcode={1}, control={2}, data={3}", toString(sel), + toString(opcode), control.size(), toString(data)); + + + // Various shorthands to improve the readability of error handling. +#define TYPE_CHECK(...) \ + if (auto error = TypeCheck(data, __VA_ARGS__)) \ + return error; + + auto error = [&](const char *msg) { + return llvm::createStringError("{0} (opcode={1})", msg, toString(opcode).c_str()); + }; + + switch (opcode) { + // Data stack manipulation. + case op_dup: + TYPE_CHECK(Any); + data.Push(data.back()); + break; + case op_drop: + TYPE_CHECK(Any); + data.pop_back(); + break; + case op_pick: { + TYPE_CHECK(UInt); + uint64_t idx = data.Pop<uint64_t>(); + if (idx >= data.size()) + return error("index out of bounds"); + data.Push(data[idx]); + break; + } + case op_over: + TYPE_CHECK(Any, Any); + data.Push(data[data.size() - 2]); + break; + case op_swap: { + TYPE_CHECK(Any, Any); + auto x = data.PopAny(); + auto y = data.PopAny(); + data.Push(x); + data.Push(y); + break; + } + case op_rot: { + TYPE_CHECK(Any, Any, Any); + auto z = data.PopAny(); + auto y = data.PopAny(); + auto x = data.PopAny(); + data.Push(z); + data.Push(x); + data.Push(y); + break; + } + // Control stack manipulation. + case op_begin: { + uint64_t length = cur_block.getULEB128(pc); + if (!pc) + return pc.takeError(); + llvm::StringRef block = cur_block.getBytes(pc, length); + if (!pc) + return pc.takeError(); + control.push_back(block); + break; + } + case op_if: + TYPE_CHECK(UInt); + if (data.Pop<uint64_t>() != 0) { + if (!cur_block.size()) + return error("empty control stack"); + activate_block(); + } + break; + case op_ifelse: + TYPE_CHECK(UInt); + if (cur_block.size() < 2) + return error("empty control stack"); + if (data.Pop<uint64_t>() == 0) + control[control.size()-2] = control.back(); + control.pop_back(); + activate_block(); + break; + // Literals. + case op_lit_uint: + data.Push(cur_block.getULEB128(pc)); + break; + case op_lit_int: + data.Push(cur_block.getSLEB128(pc)); + break; + case op_lit_selector: + data.Push(Selectors(cur_block.getU8(pc))); + break; + case op_lit_string: { + uint64_t length = cur_block.getULEB128(pc); + llvm::StringRef bytes = cur_block.getBytes(pc, length); + data.Push(bytes.str()); + break; + } + case op_as_uint: { + TYPE_CHECK(Int); + uint64_t casted; + int64_t val = data.Pop<int64_t>(); + memcpy(&casted, &val, sizeof(val)); + data.Push(casted); + break; + } + case op_as_int: { + TYPE_CHECK(UInt); + int64_t casted; + uint64_t val = data.Pop<uint64_t>(); + memcpy(&casted, &val, sizeof(val)); + data.Push(casted); + break; + } + case op_is_null: { + TYPE_CHECK(Object); + data.Push(data.Pop<ValueObjectSP>() ? 0ULL : 1ULL); + break; + } + // Arithmetic, logic, etc. +#define BINOP_IMPL(OP, CHECK_ZERO) \ + { \ + TYPE_CHECK(Any, Any); \ + auto y = data.PopAny(); \ + if (std::holds_alternative<uint64_t>(y)) { \ + if (CHECK_ZERO && !std::get<uint64_t>(y)) \ + return error(#OP " by zero"); \ + TYPE_CHECK(UInt); \ + data.Push((uint64_t)(data.Pop<uint64_t>() OP std::get<uint64_t>(y))); \ + } else if (std::holds_alternative<int64_t>(y)) { \ + if (CHECK_ZERO && !std::get<int64_t>(y)) \ + return error(#OP " by zero"); \ + TYPE_CHECK(Int); \ + data.Push((int64_t)(data.Pop<int64_t>() OP std::get<int64_t>(y))); \ + } else \ + return error("unsupported data types"); \ + } +#define BINOP(OP) BINOP_IMPL(OP, false) +#define BINOP_CHECKZERO(OP) BINOP_IMPL(OP, true) + case op_plus: + BINOP(+); + break; + case op_minus: + BINOP(-); + break; + case op_mul: + BINOP(*); + break; + case op_div: + BINOP_CHECKZERO(/); + break; + case op_mod: + BINOP_CHECKZERO(%); + break; + case op_shl: +#define SHIFTOP(OP) \ + { \ + TYPE_CHECK(Any, Any); \ + if (std::holds_alternative<uint64_t>(data.back())) { \ + uint64_t y = data.Pop<uint64_t>(); \ + TYPE_CHECK(UInt); \ + uint64_t x = data.Pop<uint64_t>(); \ + if (y > 64) \ + return error("shift out of bounds"); \ + data.Push(x OP y); \ + } else if (std::holds_alternative<int64_t>(data.back())) { \ + uint64_t y = data.Pop<int64_t>(); \ + TYPE_CHECK(Int); \ + uint64_t x = data.Pop<int64_t>(); \ + if (y > 64) \ + return error("shift out of bounds"); \ + if (y < 0) \ + return error("shift out of bounds"); \ + data.Push(x OP y); \ + } else \ + return error("unsupported data types"); \ + } + SHIFTOP(<<); + break; + case op_shr: + SHIFTOP(<<); + break; + case op_and: + BINOP(&); + break; + case op_or: + BINOP(|); + break; + case op_xor: + BINOP(^); + break; + case op_not: + TYPE_CHECK(UInt); + data.Push(~data.Pop<uint64_t>()); + break; + case op_eq: + BINOP(==); + break; + case op_neq: + BINOP(!=); + break; + case op_lt: + BINOP(<); + break; + case op_gt: + BINOP(>); + break; + case op_le: + BINOP(<=); + break; + case op_ge: + BINOP(>=); + break; + case op_call: { + TYPE_CHECK(Selector); + Selectors sel = data.Pop<Selectors>(); + + // Shorthand to improve readability. +#define POP_VALOBJ(VALOBJ) \ + auto VALOBJ = data.Pop<ValueObjectSP>(); \ + if (!VALOBJ) \ + return error("null object"); + + auto sel_error = [&](const char *msg) { + return llvm::createStringError("{0} (opcode={1}, selector={2})", msg, + toString(opcode).c_str(), + toString(sel).c_str()); + }; + + switch (sel) { + case sel_summary: { + TYPE_CHECK(Object); + POP_VALOBJ(valobj); + const char *summary = valobj->GetSummaryAsCString(); + data.Push(summary ? std::string(valobj->GetSummaryAsCString()) + : std::string()); + break; + } + case sel_get_num_children: { + TYPE_CHECK(Object); + POP_VALOBJ(valobj); + auto result = valobj->GetNumChildren(); + if (!result) + return result.takeError(); + data.Push((uint64_t)*result); + break; + } + case sel_get_child_at_index: { + TYPE_CHECK(Object, UInt); + auto index = data.Pop<uint64_t>(); + POP_VALOBJ(valobj); + data.Push(valobj->GetChildAtIndex(index)); + break; + } + case sel_get_child_with_name: { + TYPE_CHECK(Object, String); + auto name = data.Pop<std::string>(); + POP_VALOBJ(valobj); + data.Push(valobj->GetChildMemberWithName(name)); + break; + } + c... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/114333 _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits