https://github.com/jasonmolenda updated https://github.com/llvm/llvm-project/pull/148105
>From 74995e8f1a982a82bafaa150fb407187fa4b95e4 Mon Sep 17 00:00:00 2001 From: Jason Molenda <jmole...@apple.com> Date: Thu, 10 Jul 2025 22:31:54 -0700 Subject: [PATCH 1/2] [lldb][WIP] dump riscv opcode bytes as blocks of 2/4 bytes Part of the changes by Ted Woodward et al in https://github.com/llvm/llvm-project/pull/145793 to change the riscv disassemble --bytes mode output to match llvm-objdump prints the instruction bytes as sequences of 2/4 bytes for instructions that may be 2/4/6/8/etc bytes long, instead of printing them as an array of UInt8's like x86 as is done today. This PR is doing it by adding a special Dump method to Opcode, but I believe the correct way to accomplish this is to have an Opcode::Type that implies this formatting, and not adding a DumpRISCV() method. I put up this PR to show what I meant by this. The unit test I added fails on the 64-bit example instruction, MCInst seems to be decoding this as a 2-byte instruction. I'm not sure if I copied this hypothetical instruction from the original PR incorrectly, or if they might be working on a downstream branch that can decode this, it seems to work in their test cases. --- lldb/include/lldb/Core/Opcode.h | 39 ++++++++++--- lldb/source/Core/Opcode.cpp | 38 +++++++++++- .../Disassembler/LLVMC/DisassemblerLLVMC.cpp | 58 +++++++++++-------- lldb/source/Utility/ArchSpec.cpp | 4 +- .../RISCV/TestMCDisasmInstanceRISCV.cpp | 46 +++++++++++++++ 5 files changed, 152 insertions(+), 33 deletions(-) diff --git a/lldb/include/lldb/Core/Opcode.h b/lldb/include/lldb/Core/Opcode.h index f72f2687b54fe..91af15c62e6ab 100644 --- a/lldb/include/lldb/Core/Opcode.h +++ b/lldb/include/lldb/Core/Opcode.h @@ -32,7 +32,10 @@ class Opcode { eTypeInvalid, eType8, eType16, - eType16_2, // a 32-bit Thumb instruction, made up of two words + eType16_2, // a 32-bit Thumb instruction, made up of two words + eType16_32Tuples, // RISC-V that can have 2, 4, 6, 8 etc byte long + // instructions which will be printed in combinations of + // 16 & 32-bit words. eType32, eType64, eTypeBytes @@ -60,9 +63,9 @@ class Opcode { m_data.inst64 = inst; } - Opcode(uint8_t *bytes, size_t length) - : m_byte_order(lldb::eByteOrderInvalid) { - SetOpcodeBytes(bytes, length); + Opcode(uint8_t *bytes, size_t length, Opcode::Type type, + lldb::ByteOrder order) { + DoSetOpcodeBytes(bytes, length, type, order); } void Clear() { @@ -82,6 +85,8 @@ class Opcode { break; case Opcode::eType16_2: break; + case Opcode::eType16_32Tuples: + break; case Opcode::eType32: break; case Opcode::eType64: @@ -103,6 +108,8 @@ class Opcode { : m_data.inst16; case Opcode::eType16_2: break; + case Opcode::eType16_32Tuples: + break; case Opcode::eType32: break; case Opcode::eType64: @@ -122,6 +129,8 @@ class Opcode { case Opcode::eType16: return GetEndianSwap() ? llvm::byteswap<uint16_t>(m_data.inst16) : m_data.inst16; + case Opcode::eType16_32Tuples: + break; case Opcode::eType16_2: // passthrough case Opcode::eType32: return GetEndianSwap() ? llvm::byteswap<uint32_t>(m_data.inst32) @@ -143,6 +152,8 @@ class Opcode { case Opcode::eType16: return GetEndianSwap() ? llvm::byteswap<uint16_t>(m_data.inst16) : m_data.inst16; + case Opcode::eType16_32Tuples: + break; case Opcode::eType16_2: // passthrough case Opcode::eType32: return GetEndianSwap() ? llvm::byteswap<uint32_t>(m_data.inst32) @@ -186,20 +197,30 @@ class Opcode { m_byte_order = order; } + void SetOpcode16_32TupleBytes(const void *bytes, size_t length, + lldb::ByteOrder order) { + DoSetOpcodeBytes(bytes, length, eType16_32Tuples, order); + } + void SetOpcodeBytes(const void *bytes, size_t length) { + DoSetOpcodeBytes(bytes, length, eTypeBytes, lldb::eByteOrderInvalid); + } + + void DoSetOpcodeBytes(const void *bytes, size_t length, Opcode::Type type, + lldb::ByteOrder order) { if (bytes != nullptr && length > 0) { - m_type = eTypeBytes; + m_type = type; m_data.inst.length = length; assert(length < sizeof(m_data.inst.bytes)); memcpy(m_data.inst.bytes, bytes, length); - m_byte_order = lldb::eByteOrderInvalid; + m_byte_order = order; } else { m_type = eTypeInvalid; m_data.inst.length = 0; } } - int Dump(Stream *s, uint32_t min_byte_width); + int Dump(Stream *s, uint32_t min_byte_width) const; const void *GetOpcodeBytes() const { return ((m_type == Opcode::eTypeBytes) ? m_data.inst.bytes : nullptr); @@ -213,6 +234,8 @@ class Opcode { return sizeof(m_data.inst8); case Opcode::eType16: return sizeof(m_data.inst16); + case Opcode::eType16_32Tuples: + return m_data.inst.length; case Opcode::eType16_2: // passthrough case Opcode::eType32: return sizeof(m_data.inst32); @@ -238,6 +261,8 @@ class Opcode { return &m_data.inst8; case Opcode::eType16: return &m_data.inst16; + case Opcode::eType16_32Tuples: + return m_data.inst.bytes; case Opcode::eType16_2: // passthrough case Opcode::eType32: return &m_data.inst32; diff --git a/lldb/source/Core/Opcode.cpp b/lldb/source/Core/Opcode.cpp index 3e30d98975d8a..15ae87ea99f9f 100644 --- a/lldb/source/Core/Opcode.cpp +++ b/lldb/source/Core/Opcode.cpp @@ -21,7 +21,7 @@ using namespace lldb; using namespace lldb_private; -int Opcode::Dump(Stream *s, uint32_t min_byte_width) { +int Opcode::Dump(Stream *s, uint32_t min_byte_width) const { const uint32_t previous_bytes = s->GetWrittenBytes(); switch (m_type) { case Opcode::eTypeInvalid: @@ -38,6 +38,38 @@ int Opcode::Dump(Stream *s, uint32_t min_byte_width) { s->Printf("0x%8.8x", m_data.inst32); break; + case Opcode::eType16_32Tuples: { + uint8_t buf[m_data.inst.length]; + // Swap the byte order if lldb and the + // target are different endian. May be + // any of 2/4/6/8/etc bytes long, easiest + // to reverse the bytes manually. + if (GetEndianSwap()) + for (int i = 0; i < m_data.inst.length; i++) + buf[m_data.inst.length - i] = m_data.inst.bytes[i]; + else + memcpy(buf, m_data.inst.bytes, m_data.inst.length); + uint32_t i = 0; + while (i < m_data.inst.length) { + if (i > 0) + s->PutChar(' '); + // if size % 4 == 0, print as 1 or 2 32 bit values (32 or 64 bit inst) + if (!(m_data.inst.length % 4)) { + uint32_t value; + memcpy(&value, &buf[i], 4); + s->Printf("%8.8x", value); + i += 4; + // else if size % 2 == 0, print as 1 or 3 16 bit values (16 or 48 bit + // inst) + } else if (!(m_data.inst.length % 2)) { + uint16_t value; + memcpy(&value, &buf[i], 2); + s->Printf("%4.4x", value); + i += 2; + } + } + } break; + case Opcode::eType64: s->Printf("0x%16.16" PRIx64, m_data.inst64); break; @@ -69,6 +101,7 @@ lldb::ByteOrder Opcode::GetDataByteOrder() const { case Opcode::eType8: case Opcode::eType16: case Opcode::eType16_2: + case Opcode::eType16_32Tuples: case Opcode::eType32: case Opcode::eType64: return endian::InlHostByteOrder(); @@ -113,6 +146,9 @@ uint32_t Opcode::GetData(DataExtractor &data) const { swap_buf[3] = m_data.inst.bytes[2]; buf = swap_buf; break; + case Opcode::eType16_32Tuples: + buf = GetOpcodeDataBytes(); + break; case Opcode::eType32: *(uint32_t *)swap_buf = llvm::byteswap<uint32_t>(m_data.inst32); buf = swap_buf; diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp index ed6047f8f4ef3..0a8caf54ef1a0 100644 --- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp +++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp @@ -61,6 +61,8 @@ class DisassemblerLLVMC::MCDisasmInstance { uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, llvm::MCInst &mc_inst) const; + bool GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, + lldb::addr_t pc, llvm::MCInst &mc_inst, uint64_t &size) const; void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc, std::string &inst_string, std::string &comments_string); void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style); @@ -486,8 +488,13 @@ class InstructionLLVMC : public lldb_private::Instruction { break; default: - m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size), - min_op_byte_size); + if (arch.GetTriple().isRISCV()) + m_opcode.SetOpcode16_32TupleBytes( + data.PeekData(data_offset, min_op_byte_size), min_op_byte_size, + byte_order); + else + m_opcode.SetOpcodeBytes( + data.PeekData(data_offset, min_op_byte_size), min_op_byte_size); got_op = true; break; } @@ -524,13 +531,16 @@ class InstructionLLVMC : public lldb_private::Instruction { const addr_t pc = m_address.GetFileAddress(); llvm::MCInst inst; - const size_t inst_size = - mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); - if (inst_size == 0) - m_opcode.Clear(); - else { - m_opcode.SetOpcodeBytes(opcode_data, inst_size); - m_is_valid = true; + uint64_t inst_size = 0; + m_is_valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, + pc, inst, inst_size); + m_opcode.Clear(); + if (inst_size != 0) { + if (arch.GetTriple().isRISCV()) + m_opcode.SetOpcode16_32TupleBytes(opcode_data, inst_size, + byte_order); + else + m_opcode.SetOpcodeBytes(opcode_data, inst_size); } } } @@ -604,10 +614,11 @@ class InstructionLLVMC : public lldb_private::Instruction { const uint8_t *opcode_data = data.GetDataStart(); const size_t opcode_data_len = data.GetByteSize(); llvm::MCInst inst; - size_t inst_size = - mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); + uint64_t inst_size = 0; + bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, + inst, inst_size); - if (inst_size > 0) { + if (valid && inst_size > 0) { mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style); const bool saved_use_color = mc_disasm_ptr->GetUseColor(); @@ -1206,9 +1217,10 @@ class InstructionLLVMC : public lldb_private::Instruction { const uint8_t *opcode_data = data.GetDataStart(); const size_t opcode_data_len = data.GetByteSize(); llvm::MCInst inst; - const size_t inst_size = - mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst); - if (inst_size == 0) + uint64_t inst_size = 0; + const bool valid = mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, + pc, inst, inst_size); + if (!valid) return; m_has_visited_instruction = true; @@ -1337,19 +1349,19 @@ DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance( m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up); } -uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst( - const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, - llvm::MCInst &mc_inst) const { +bool DisassemblerLLVMC::MCDisasmInstance::GetMCInst(const uint8_t *opcode_data, + size_t opcode_data_len, + lldb::addr_t pc, + llvm::MCInst &mc_inst, + uint64_t &size) const { llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len); llvm::MCDisassembler::DecodeStatus status; - uint64_t new_inst_size; - status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc, - llvm::nulls()); + status = m_disasm_up->getInstruction(mc_inst, size, data, pc, llvm::nulls()); if (status == llvm::MCDisassembler::Success) - return new_inst_size; + return true; else - return 0; + return false; } void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst( diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp index 70b9800f4dade..7c71aaae6bcf2 100644 --- a/lldb/source/Utility/ArchSpec.cpp +++ b/lldb/source/Utility/ArchSpec.cpp @@ -228,9 +228,9 @@ static const CoreDefinition g_core_definitions[] = { {eByteOrderLittle, 4, 4, 4, llvm::Triple::hexagon, ArchSpec::eCore_hexagon_hexagonv5, "hexagonv5"}, - {eByteOrderLittle, 4, 2, 4, llvm::Triple::riscv32, ArchSpec::eCore_riscv32, + {eByteOrderLittle, 4, 2, 8, llvm::Triple::riscv32, ArchSpec::eCore_riscv32, "riscv32"}, - {eByteOrderLittle, 8, 2, 4, llvm::Triple::riscv64, ArchSpec::eCore_riscv64, + {eByteOrderLittle, 8, 2, 8, llvm::Triple::riscv64, ArchSpec::eCore_riscv64, "riscv64"}, {eByteOrderLittle, 4, 4, 4, llvm::Triple::loongarch32, diff --git a/lldb/unittests/Disassembler/RISCV/TestMCDisasmInstanceRISCV.cpp b/lldb/unittests/Disassembler/RISCV/TestMCDisasmInstanceRISCV.cpp index 8ec5d62a99ac5..51487034d89cd 100644 --- a/lldb/unittests/Disassembler/RISCV/TestMCDisasmInstanceRISCV.cpp +++ b/lldb/unittests/Disassembler/RISCV/TestMCDisasmInstanceRISCV.cpp @@ -14,6 +14,7 @@ #include "lldb/Core/Disassembler.h" #include "lldb/Target/ExecutionContext.h" #include "lldb/Utility/ArchSpec.h" +#include "lldb/Utility/StreamString.h" #include "Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h" @@ -90,3 +91,48 @@ TEST_F(TestMCDisasmInstanceRISCV, TestRISCV32Instruction) { EXPECT_FALSE(inst_sp->IsCall()); EXPECT_TRUE(inst_sp->DoesBranch()); } + +TEST_F(TestMCDisasmInstanceRISCV, TestOpcodeBytePrinter) { + ArchSpec arch("riscv32-*-linux"); + + const unsigned num_of_instructions = 7; + uint8_t data[] = { + 0x41, 0x11, // addi sp, sp, -0x10 + 0x06, 0xc6, // sw ra, 0xc(sp) + 0x23, 0x2a, 0xa4, 0xfe, // sw a0, -0xc(s0) + 0x23, 0x28, 0xa4, 0xfe, // sw a0, -0x10(s0) + 0x22, 0x44, // lw s0, 0x8(sp) + 0x20, 0x00, 0x20, 0x09, 0x40, 0x00, 0x3F, // 64 bit custom instruction + 0x10, 0x00, 0x00, 0x00, 0x02, 0x1f // 48 bit xqci.e.li rd=8 imm=0x1000 + }; + + const char *expected_outputs[] = {"1141", "c606", + "fea42a23", "fea42823", + "4422", "0940003f 00200020", + "021f 0000 1000"}; + + EXPECT_EQ(num_of_instructions, sizeof(expected_outputs) / sizeof(char *)); + + DisassemblerSP disass_sp; + Address start_addr(0x100); + disass_sp = Disassembler::DisassembleBytes( + arch, nullptr, nullptr, nullptr, nullptr, start_addr, &data, sizeof(data), + num_of_instructions, false); + + // If we failed to get a disassembler, we can assume it is because + // the llvm we linked against was not built with the riscv target, + // and we should skip these tests without marking anything as failing. + if (!disass_sp) + return; + + const InstructionList inst_list(disass_sp->GetInstructionList()); + EXPECT_EQ(num_of_instructions, inst_list.GetSize()); + + for (size_t i = 0; i < sizeof(expected_outputs) / sizeof(char *); i++) { + InstructionSP inst_sp; + StreamString s; + inst_sp = inst_list.GetInstructionAtIndex(i); + inst_sp->GetOpcode().Dump(&s, 1); + ASSERT_STREQ(s.GetString().str().c_str(), expected_outputs[i]); + } +} >From 8880121940ded4b535284911c23fa387aabd16d1 Mon Sep 17 00:00:00 2001 From: Jason Molenda <jmole...@apple.com> Date: Thu, 10 Jul 2025 22:46:55 -0700 Subject: [PATCH 2/2] un-clang-formatify --- .../RISCV/TestMCDisasmInstanceRISCV.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lldb/unittests/Disassembler/RISCV/TestMCDisasmInstanceRISCV.cpp b/lldb/unittests/Disassembler/RISCV/TestMCDisasmInstanceRISCV.cpp index 51487034d89cd..de4de32145e09 100644 --- a/lldb/unittests/Disassembler/RISCV/TestMCDisasmInstanceRISCV.cpp +++ b/lldb/unittests/Disassembler/RISCV/TestMCDisasmInstanceRISCV.cpp @@ -106,10 +106,17 @@ TEST_F(TestMCDisasmInstanceRISCV, TestOpcodeBytePrinter) { 0x10, 0x00, 0x00, 0x00, 0x02, 0x1f // 48 bit xqci.e.li rd=8 imm=0x1000 }; - const char *expected_outputs[] = {"1141", "c606", - "fea42a23", "fea42823", - "4422", "0940003f 00200020", - "021f 0000 1000"}; + // clang-format off + const char *expected_outputs[] = { + "1141", + "c606", + "fea42a23", + "fea42823", + "4422", + "0940003f 00200020", + "021f 0000 1000" + }; + // clang-format on EXPECT_EQ(num_of_instructions, sizeof(expected_outputs) / sizeof(char *)); _______________________________________________ lldb-commits mailing list lldb-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits