RISC-V disassembler
I'll check in the attached patch which implements a disassembler for RISC-V. It also fixes a problem in the x86 disassember, exposed through the additions needed for RISC-V. Since aside rth, who added the BPF disassembler, no one beside me ever worked on that code I will push the changes as soon as I can. diff --git a/backends/riscv_init.c b/backends/riscv_init.c index 9aaec9ce..9be5c6f2 100644 --- a/backends/riscv_init.c +++ b/backends/riscv_init.c @@ -58,6 +58,7 @@ riscv_init (Elf *elf, HOOK (eh, reloc_simple_type); HOOK (eh, register_info); HOOK (eh, abi_cfi); + HOOK (eh, disasm); /* gcc/config/ #define DWARF_FRAME_REGISTERS. */ eh->frame_nregs = 66; HOOK (eh, check_special_symbol); diff --git a/lib/color.c b/lib/color.c index 20b9698a..2cb41eba 100644 --- a/lib/color.c +++ b/lib/color.c @@ -72,6 +72,8 @@ char *color_operand = NULL; char *color_operand1 = ""; char *color_operand2 = ""; char *color_operand3 = ""; +char *color_operand4 = ""; +char *color_operand5 = ""; char *color_label = ""; char *color_undef = ""; char *color_undef_tls = ""; @@ -167,8 +169,10 @@ valid arguments are:\n\ E (m, mnemonic), E (o, operand), E (o1, operand1), - E (o1, operand2), - E (o1, operand3), + E (o2, operand2), + E (o3, operand3), + E (o4, operand4), + E (o5, operand5), E (l, label), E (u, undef), E (ut, undef_tls), @@ -205,6 +209,10 @@ valid arguments are:\n\ color_operand2 = color_operand; if (color_operand3[0] == '\0') color_operand3 = color_operand; + if (color_operand4[0] == '\0') + color_operand4 = color_operand; + if (color_operand5[0] == '\0') + color_operand5 = color_operand; } } #if 0 @@ -216,7 +224,7 @@ valid arguments are:\n\ color_mnemonic = xstrdup ("\e[38;5;202;1m"); color_operand1 = xstrdup ("\e[38;5;220m"); color_operand2 = xstrdup ("\e[38;5;48m"); - color_operand3 = xstrdup ("\e[38;5;112m"); + color_operand = xstrdup ("\e[38;5;112m"); color_label = xstrdup ("\e[38;5;21m"); } #endif diff --git a/lib/color.h b/lib/color.h index 3872eb0a..cb241435 100644 --- a/lib/color.h +++ b/lib/color.h @@ -50,6 +50,8 @@ extern char *color_mnemonic; extern char *color_operand1; extern char *color_operand2; extern char *color_operand3; +extern char *color_operand4; +extern char *color_operand5; extern char *color_label; extern char *color_undef; extern char *color_undef_tls; diff --git a/libcpu/Makefile.am b/libcpu/Makefile.am index 88717361..03c71ea3 100644 --- a/libcpu/Makefile.am +++ b/libcpu/Makefile.am @@ -42,7 +42,7 @@ noinst_LIBRARIES = libcpu.a libcpu_pic.a noinst_HEADERS = i386_dis.h x86_64_dis.h -libcpu_a_SOURCES = i386_disasm.c x86_64_disasm.c bpf_disasm.c +libcpu_a_SOURCES = i386_disasm.c x86_64_disasm.c bpf_disasm.c riscv_disasm.c libcpu_pic_a_SOURCES = am_libcpu_pic_a_OBJECTS = $(libcpu_a_SOURCES:.c=.os) diff --git a/libcpu/i386_disasm.c b/libcpu/i386_disasm.c index a7e03f95..8a206398 100644 --- a/libcpu/i386_disasm.c +++ b/libcpu/i386_disasm.c @@ -1030,7 +1030,7 @@ i386_disasm (Ebl *ebl __attribute__((unused)), string_end_idx = bufcnt; } else - bufcnt = string_end_idx; + start_idx = bufcnt = string_end_idx; break; case 'e': diff --git a/libcpu/riscv_disasm.c b/libcpu/riscv_disasm.c new file mode 100644 index ..bc4e02e5 --- /dev/null +++ b/libcpu/riscv_disasm.c @@ -0,0 +1,1501 @@ +/* Disassembler for RISC-V. + Copyright (C) 2019 Red Hat, Inc. + This file is part of elfutils. + Written by Ulrich Drepper , 2019. + + This file is free software; you can redistribute it and/or modify + it under the terms of either + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at + your option) any later version + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at + your option) any later version + + or both in parallel, as here. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied wa
Re: RISC-V disassembler
On 9/9/19 11:25 PM, Jim Wilson wrote: > There is a testfile45.expect.bz2 that doesn't look like it is supposed > to be part of the patch set. You probably don't want to commit that one. Yes, I do. The x86-64 disassembler had a tiny whitespace bug exposed through the code. > There is a testcase for riscv64 but not for riscv32, though the code > does look like it correctly handles rv32 versus rv64 decodes. A > testcase for rv32 would be a nice improvement. Well, if someone has the time. > Otherwise, it looks pretty good at a first glance, and I'm not planning > to do a full review. Seems to handle the obvious tricky cases > correctly. It doesn't support rv128 This necessitates the introduction of ELFCLASS128 first. > or the q (quadfloat) extension, Yes, it does. > We have binutils patches for the draft V > (vector) and B (bit manipulation) extensions in branches in the > github.com riscv repos, but these are still changing instruction > mnemonics and encodings, so not ready for official trees yet. Which is why I didn't add any of that. signature.asc Description: OpenPGP digital signature
cannot skip augment string handling
He dwarf_next_cfi function has some clever code which skips over the processing of the augmentation string content if the first character is 'z' (for sized augmentation). This would be OK if it wouldn't be for the fact that the augment processing loop produces additional information, namely, it fills in the fde_augmentation_data_size fields. That information isn't available elsewhere. In addition, the loop over the augment string is incorrect because the interpretation of the P, L, and R entries depends on 'z' being present. in the absence of 'z', when the loop would be executed in the current version, the interpretation of those entries is not the same. In the patch below I've removed the shortcut and fixed the handling of the P, L, and R entries. I've also added an additional test checking that the entries of the augmentation string don't guide the code to consume more data then is indicated in the 'z' data. libdw/ChangeLog 2022-08-09 Ulrich Drepper * dwarf_next_cfi.c (dwarf_next_cfi): Don't skip processing the augmentation string. Be more stringent what to accept. diff --git a/libdw/dwarf_next_cfi.c b/libdw/dwarf_next_cfi.c index fa28d99b..23b16885 100644 --- a/libdw/dwarf_next_cfi.c +++ b/libdw/dwarf_next_cfi.c @@ -193,50 +193,71 @@ dwarf_next_cfi (const unsigned char e_ident[], else /* DWARF 2 */ entry->cie.return_address_register = *bytes++; - /* If we have sized augmentation data, - we don't need to grok it all. */ entry->cie.fde_augmentation_data_size = 0; + entry->cie.augmentation_data = bytes; bool sized_augmentation = *ap == 'z'; if (sized_augmentation) { + ++ap; if (bytes >= limit) goto invalid; get_uleb128 (entry->cie.augmentation_data_size, bytes, limit); if ((Dwarf_Word) (limit - bytes) < entry->cie.augmentation_data_size) goto invalid; entry->cie.augmentation_data = bytes; - bytes += entry->cie.augmentation_data_size; } - else - { - entry->cie.augmentation_data = bytes; - for (; *ap != '\0'; ++ap) + for (; *ap != '\0'; ++ap) + { + uint8_t encoding; + switch (*ap) { - uint8_t encoding; - switch (*ap) + case 'L': + if (sized_augmentation) { - case 'L': /* Skip LSDA pointer encoding byte. */ - case 'R': /* Skip FDE address encoding byte. */ + /* Skip LSDA pointer encoding byte. */ encoding = *bytes++; entry->cie.fde_augmentation_data_size += encoded_value_size (data, e_ident, encoding, NULL); continue; - case 'P': /* Skip encoded personality routine pointer. */ + } + break; + case 'R': + if (sized_augmentation) + { + /* Skip FDE address encoding byte. */ encoding = *bytes++; - bytes += encoded_value_size (data, e_ident, encoding, bytes); continue; - case 'S': /* Skip signal-frame flag. */ + } + break; + case 'P': + if (sized_augmentation) + { + /* Skip encoded personality routine pointer. */ + encoding = *bytes++; + bytes += encoded_value_size (data, e_ident, encoding, bytes); continue; - default: - /* Unknown augmentation string. initial_instructions might - actually start with some augmentation data. */ - break; } break; + case 'S': + if (sized_augmentation) + /* Skip signal-frame flag. */ + continue; + break; + default: + /* Unknown augmentation string. initial_instructions might + actually start with some augmentation data. */ + break; } - entry->cie.augmentation_data_size - = bytes - entry->cie.augmentation_data; + break; + } + if (! sized_augmentation) + entry->cie.augmentation_data_size = bytes - entry->cie.augmentation_data; + else + { + if (bytes > entry->cie.augmentation_data + entry->cie.augmentation_data_size) + goto invalid; + bytes = entry->cie.augmentation_data + entry->cie.augmentation_data_size; } entry->cie.initial_instructions = bytes;