PR 26773 points out that some sleb128 values are decoded incorrectly. Looking into this, I found some other unusual cases as well.
In this patch, I chose to try to handle weird leb128 encodings by preserving their values when possible; or returning the maximum value on overflow. It isn't clear to me that this is necessarily the best choice. --- .gitignore | 1 + ChangeLog | 4 + libdw/ChangeLog | 11 +++ libdw/dwarf_getlocation.c | 2 +- libdw/memory-access.h | 153 +++++++++++++++++++----------------- tests/ChangeLog | 7 ++ tests/Makefile.am | 8 +- tests/leb128.c | 161 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 272 insertions(+), 75 deletions(-) create mode 100644 tests/leb128.c diff --git a/.gitignore b/.gitignore index c9790941..d737b14d 100644 --- a/.gitignore +++ b/.gitignore @@ -151,6 +151,7 @@ Makefile.in /tests/get-units-invalid /tests/get-units-split /tests/hash +/tests/leb128 /tests/line2addr /tests/low_high_pc /tests/msg_tst diff --git a/ChangeLog b/ChangeLog index 72e8397c..0b50578d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2020-10-23 Tom Tromey <t...@tromey.com> + + * .gitignore: Add /tests/leb128. + 2020-10-01 Frank Ch. Eigler <f...@redhat.com> PR25461 diff --git a/libdw/ChangeLog b/libdw/ChangeLog index 8b0b583a..23081c12 100644 --- a/libdw/ChangeLog +++ b/libdw/ChangeLog @@ -1,3 +1,14 @@ +2020-10-23 Tom Tromey <t...@tromey.com> + + PR26773 + * dwarf_getlocation.c (store_implicit_value): Use + __libdw_get_uleb128_unchecked. + * memory-access.h (__libdw_max_len_leb128) + (__libdw_max_len_uleb128, __libdw_max_len_sleb128): Remove. + (__libdw_get_uleb128, __libdw_get_uleb128_unchecked): Rewrite. + (get_sleb128_step): Remove. + (__libdw_get_sleb128, __libdw_get_sleb128_unchecked): Rewrite. + 2020-09-03 Mark Wielaard <m...@klomp.org> * dwarf.h: Add DW_CFA_AARCH64_negate_ra_state. diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c index 4617f9e9..0ce2e08a 100644 --- a/libdw/dwarf_getlocation.c +++ b/libdw/dwarf_getlocation.c @@ -130,7 +130,7 @@ store_implicit_value (Dwarf *dbg, void **cache, Dwarf_Op *op) struct loc_block_s *block = libdw_alloc (dbg, struct loc_block_s, sizeof (struct loc_block_s), 1); const unsigned char *data = (const unsigned char *) (uintptr_t) op->number2; - uint64_t len = __libdw_get_uleb128 (&data, data + len_leb128 (Dwarf_Word)); + uint64_t len = __libdw_get_uleb128_unchecked (&data); if (unlikely (len != op->number)) return -1; block->addr = op; diff --git a/libdw/memory-access.h b/libdw/memory-access.h index a39ad6d2..70317d81 100644 --- a/libdw/memory-access.h +++ b/libdw/memory-access.h @@ -39,29 +39,6 @@ #define len_leb128(var) ((8 * sizeof (var) + 6) / 7) -static inline size_t -__libdw_max_len_leb128 (const size_t type_len, - const unsigned char *addr, const unsigned char *end) -{ - const size_t pointer_len = likely (addr < end) ? end - addr : 0; - return likely (type_len <= pointer_len) ? type_len : pointer_len; -} - -static inline size_t -__libdw_max_len_uleb128 (const unsigned char *addr, const unsigned char *end) -{ - const size_t type_len = len_leb128 (uint64_t); - return __libdw_max_len_leb128 (type_len, addr, end); -} - -static inline size_t -__libdw_max_len_sleb128 (const unsigned char *addr, const unsigned char *end) -{ - /* Subtract one step, so we don't shift into sign bit. */ - const size_t type_len = len_leb128 (int64_t) - 1; - return __libdw_max_len_leb128 (type_len, addr, end); -} - #define get_uleb128_step(var, addr, nth) \ do { \ unsigned char __b = *(addr)++; \ @@ -79,12 +56,39 @@ __libdw_get_uleb128 (const unsigned char **addrp, const unsigned char *end) for the common single-byte case. */ get_uleb128_step (acc, *addrp, 0); - const size_t max = __libdw_max_len_uleb128 (*addrp - 1, end); - for (size_t i = 1; i < max; ++i) + const size_t max = len_leb128 (acc) - 1; + for (size_t i = 1; i < max && *addrp < end; ++i) get_uleb128_step (acc, *addrp, i); - /* Other implementations set VALUE to UINT_MAX in this - case. So we better do this as well. */ - return UINT64_MAX; + + /* A single step to check the final byte, which could cause + overflow. */ + if (*addrp < end) + { + const unsigned mask = (1u << (8 * sizeof (acc) - 7 * max)) - 1; + if (unlikely ((**addrp & mask) != 0)) + { + /* Overflow. */ + ++*addrp; + return UINT64_MAX; + } + get_uleb128_step (acc, *addrp, max); + } + + /* Now we can read any number of bytes, as long as the payload is 0. + Anything else would overflow. */ + while (*addrp < end) + { + unsigned char b = *(*addrp)++; + if (b == 0x0) + break; + if (b != 0x80) + { + /* Overflow. */ + return UINT64_MAX; + } + } + + return acc; } static inline uint64_t @@ -96,65 +100,72 @@ __libdw_get_uleb128_unchecked (const unsigned char **addrp) for the common single-byte case. */ get_uleb128_step (acc, *addrp, 0); - const size_t max = len_leb128 (uint64_t); + const size_t max = len_leb128 (acc) - 1; for (size_t i = 1; i < max; ++i) get_uleb128_step (acc, *addrp, i); - /* Other implementations set VALUE to UINT_MAX in this - case. So we better do this as well. */ - return UINT64_MAX; + + /* A single step to check the final byte, which could cause + overflow. */ + const unsigned mask = (1u << (8 * sizeof (acc) - 7 * max)) - 1; + if (unlikely ((**addrp & mask) != 0)) + { + /* Overflow. */ + ++*addrp; + return UINT64_MAX; + } + get_uleb128_step (acc, *addrp, max); + + /* Now we can read any number of bytes, as long as the payload is 0. + Anything else would overflow. */ + while (true) + { + unsigned char b = *(*addrp)++; + if (b == 0x0) + break; + if (b != 0x80) + { + /* Overflow. */ + return UINT64_MAX; + } + } + + return acc; } /* Note, addr needs to me smaller than end. */ #define get_uleb128(var, addr, end) ((var) = __libdw_get_uleb128 (&(addr), end)) #define get_uleb128_unchecked(var, addr) ((var) = __libdw_get_uleb128_unchecked (&(addr))) -/* The signed case is similar, but we sign-extend the result. */ - -#define get_sleb128_step(var, addr, nth) \ - do { \ - unsigned char __b = *(addr)++; \ - if (likely ((__b & 0x80) == 0)) \ - { \ - struct { signed int i:7; } __s = { .i = __b }; \ - (var) |= (typeof (var)) __s.i * ((typeof (var)) 1 << ((nth) * 7)); \ - return (var); \ - } \ - (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7); \ - } while (0) - static inline int64_t __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end) { - int64_t acc = 0; - - /* Unroll the first step to help the compiler optimize - for the common single-byte case. */ - get_sleb128_step (acc, *addrp, 0); - - const size_t max = __libdw_max_len_sleb128 (*addrp - 1, end); - for (size_t i = 1; i < max; ++i) - get_sleb128_step (acc, *addrp, i); - /* Other implementations set VALUE to INT_MAX in this - case. So we better do this as well. */ - return INT64_MAX; + const unsigned char *start = *addrp; + uint64_t value = __libdw_get_uleb128 (addrp, end); + if (unlikely (value == UINT64_MAX)) + return INT64_MAX; + if (((*addrp)[-1] & 0x40) != 0) + { + size_t nbytes = *addrp - start; + if (7 * nbytes < 8 * sizeof (value)) + value |= -(((typeof (value)) 1) << 7 * nbytes); + } + return value; } static inline int64_t __libdw_get_sleb128_unchecked (const unsigned char **addrp) { - int64_t acc = 0; - - /* Unroll the first step to help the compiler optimize - for the common single-byte case. */ - get_sleb128_step (acc, *addrp, 0); - - /* Subtract one step, so we don't shift into sign bit. */ - const size_t max = len_leb128 (int64_t) - 1; - for (size_t i = 1; i < max; ++i) - get_sleb128_step (acc, *addrp, i); - /* Other implementations set VALUE to INT_MAX in this - case. So we better do this as well. */ - return INT64_MAX; + const unsigned char *start = *addrp; + uint64_t value = __libdw_get_uleb128_unchecked (addrp); + if (unlikely (value == UINT64_MAX)) + return INT64_MAX; + if (((*addrp)[-1] & 0x40) != 0) + { + size_t nbytes = *addrp - start; + if (7 * nbytes < 8 * sizeof (value)) + value |= -(((typeof (value)) 1) << 7 * nbytes); + } + return value; } #define get_sleb128(var, addr, end) ((var) = __libdw_get_sleb128 (&(addr), end)) diff --git a/tests/ChangeLog b/tests/ChangeLog index aa68ffd3..5f075df8 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,10 @@ +2020-10-22 Tom Tromey <t...@tromey.com> + + PR26773 + * Makefile.am (check_PROGRAMS, TESTS): Add leb128. + (leb128_LDADD): New variable. + * leb128.c: New file. + 2020-10-20 Frank Ch. Eigler <f...@redhat.com> PR26756: more prometheus metrics diff --git a/tests/Makefile.am b/tests/Makefile.am index 9d0707da..568a9f8d 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,6 +1,6 @@ ## Process this file with automake to create Makefile.in ## -## Copyright (C) 1996-2019 Red Hat, Inc. +## Copyright (C) 1996-2020 Red Hat, Inc. ## This file is part of elfutils. ## ## This file is free software; you can redistribute it and/or modify @@ -63,7 +63,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx scnnames sectiondump \ all-dwarf-ranges unit-info next_cfi \ elfcopy addsections xlate_notes elfrdwrnop \ dwelf_elf_e_machine_string \ - getphdrnum + getphdrnum leb128 asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \ asm-tst6 asm-tst7 asm-tst8 asm-tst9 @@ -185,7 +185,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh newfile test-nlist \ run-elfclassify.sh run-elfclassify-self.sh \ run-disasm-riscv64.sh \ run-pt_gnu_prop-tests.sh \ - run-getphdrnum.sh run-test-includes.sh + run-getphdrnum.sh run-test-includes.sh \ + leb128 if !BIARCH export ELFUTILS_DISABLE_BIARCH = 1 @@ -694,6 +695,7 @@ xlate_notes_LDADD = $(libelf) elfrdwrnop_LDADD = $(libelf) dwelf_elf_e_machine_string_LDADD = $(libelf) $(libdw) getphdrnum_LDADD = $(libelf) $(libdw) +leb128_LDADD = $(libelf) $(libdw) # We want to test the libelf header against the system elf.h header. # Don't include any -I CPPFLAGS. Except when we install our own elf.h. diff --git a/tests/leb128.c b/tests/leb128.c new file mode 100644 index 00000000..856a2b89 --- /dev/null +++ b/tests/leb128.c @@ -0,0 +1,161 @@ +/* Test program for leb128 + Copyright (C) 2020 Tom Tromey + This file is part of elfutils. + + This file is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + elfutils is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> +#include <stddef.h> +#include <stdbool.h> +#include <libdw.h> +#include <libdwfl.h> +#include "../libdwelf/libdwelf.h" +#include "../libdw/libdwP.h" +#include "../libdwfl/libdwflP.h" +#include "../libdw/memory-access.h" + +#define OK 0 +#define FAIL 1 + +static const unsigned char v0[] = { 0x0 }; +static const unsigned char v23[] = { 23 }; +static const unsigned char vm_2[] = { 0x7e }; +static const unsigned char v128[] = { 0x80, 0x01 }; +static const unsigned char vm_128[] = { 0x80, 0x7f }; +/* This is pathological but nothing seems to prevent it, and maybe + it would be useful for a (hypothetical) LEB128 relocation. */ +static const unsigned char v0000[] = + { + 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, + 0x0 + }; +static const unsigned char vhuge[] = + { + 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x0 + }; +static const unsigned char most_positive[] = + { + 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x3f + }; +static const unsigned char most_negative[] = + { + 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x40 + }; +static const unsigned char minus_one[] = + { + 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0x7f + }; +static const unsigned char overflow[] = + { + 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xbf, 0x01 + }; + +static int +test_one_sleb (const unsigned char *data, size_t len, int64_t expect) +{ + int64_t value; + const unsigned char *p; + + p = data; + get_sleb128 (value, p, p + len); + if (value != expect || p != data + len) + return FAIL; + + p = data; + get_sleb128_unchecked (value, p); + if (value != expect || p != data + len) + return FAIL; + + return OK; +} + +static int +test_sleb (void) +{ +#define TEST(ARRAY, V) \ + if (test_one_sleb (ARRAY, sizeof (ARRAY), V) != OK) \ + return FAIL; + + TEST (v0, 0); + TEST (v23, 23); + TEST (vm_2, -2); + TEST (v128, 128); + TEST (vm_128, -128); + TEST (v0000, 0); + TEST (vhuge, 9223372036854775807ll); + TEST (most_positive, 4611686018427387903ll); + TEST (most_negative, -4611686018427387904ll); + TEST (minus_one, -1); + TEST (overflow, INT64_MAX); + +#undef TEST + + return OK; +} + +static int +test_one_uleb (const unsigned char *data, size_t len, uint64_t expect) +{ + uint64_t value; + const unsigned char *p; + + p = data; + get_uleb128 (value, p, p + len); + if (value != expect || p != data + len) + return FAIL; + + p = data; + get_uleb128_unchecked (value, p); + if (value != expect || p != data + len) + return FAIL; + + return OK; +} + +static int +test_uleb (void) +{ +#define TEST(ARRAY, V) \ + if (test_one_uleb (ARRAY, sizeof (ARRAY), V) != OK) \ + return FAIL; + + TEST (v0, 0); + TEST (v23, 23); + TEST (vm_2, 126); + TEST (v128, 128); + TEST (vm_128, 16256); + TEST (v0000, 0); + TEST (vhuge, 9223372036854775807ull); + TEST (most_positive, 4611686018427387903ull); + TEST (most_negative, 4611686018427387904ull); + TEST (minus_one, 9223372036854775807ull); + TEST (overflow, UINT64_MAX); + +#undef TEST + + return OK; +} + +int +main (void) +{ + return test_sleb () || test_uleb (); +} -- 2.17.2