Hi Tom,
On Fri, Oct 23, 2020 at 09:58:34PM -0600, Tom Tromey wrote:
> PR 26773 points out that some sleb128 values are decoded incorrectly.
> Looking into this, I found some other unusual cases as well.
>
> In this patch, I chose to try to handle weird leb128 encodings by
> preserving their values when possible; or returning the maximum value
> on overflow. It isn't clear to me that this is necessarily the best
> choice.
I think it is a good thing to accept "padded" leb128 numbers, but
probably up to a point. The padding will probably be either mod-4 or
mod-8 bytes. So lets at most read up to 16 bytes total.
Note we use a Signed-off-by like as described in the CONTRIBUTING
document.
> ---
> .gitignore| 1 +
> ChangeLog | 4 +
> libdw/ChangeLog | 11 +++
> libdw/dwarf_getlocation.c | 2 +-
> libdw/memory-access.h | 153 +++-
> tests/ChangeLog | 7 ++
> tests/Makefile.am | 8 +-
> tests/leb128.c| 161 ++
> 8 files changed, 272 insertions(+), 75 deletions(-)
> create mode 100644 tests/leb128.c
>
> diff --git a/.gitignore b/.gitignore
> index c9790941..d737b14d 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -151,6 +151,7 @@ Makefile.in
> /tests/get-units-invalid
> /tests/get-units-split
> /tests/hash
> +/tests/leb128
> /tests/line2addr
> /tests/low_high_pc
> /tests/msg_tst
> diff --git a/ChangeLog b/ChangeLog
> index 72e8397c..0b50578d 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,7 @@
> +2020-10-23 Tom Tromey
> +
> + * .gitignore: Add /tests/leb128.
> +
> 2020-10-01 Frank Ch. Eigler
Nice, I always forget that.
> PR25461
> diff --git a/libdw/ChangeLog b/libdw/ChangeLog
> index 8b0b583a..23081c12 100644
> --- a/libdw/ChangeLog
> +++ b/libdw/ChangeLog
> @@ -1,3 +1,14 @@
> +2020-10-23 Tom Tromey
> +
> + PR26773
> + * dwarf_getlocation.c (store_implicit_value): Use
> + __libdw_get_uleb128_unchecked.
> + * memory-access.h (__libdw_max_len_leb128)
> + (__libdw_max_len_uleb128, __libdw_max_len_sleb128): Remove.
> + (__libdw_get_uleb128, __libdw_get_uleb128_unchecked): Rewrite.
> + (get_sleb128_step): Remove.
> + (__libdw_get_sleb128, __libdw_get_sleb128_unchecked): Rewrite.
> +
> 2020-09-03 Mark Wielaard
>
> * dwarf.h: Add DW_CFA_AARCH64_negate_ra_state.
> diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c
> index 4617f9e9..0ce2e08a 100644
> --- a/libdw/dwarf_getlocation.c
> +++ b/libdw/dwarf_getlocation.c
> @@ -130,7 +130,7 @@ store_implicit_value (Dwarf *dbg, void **cache, Dwarf_Op
> *op)
>struct loc_block_s *block = libdw_alloc (dbg, struct loc_block_s,
> sizeof (struct loc_block_s), 1);
>const unsigned char *data = (const unsigned char *) (uintptr_t)
> op->number2;
> - uint64_t len = __libdw_get_uleb128 (&data, data + len_leb128 (Dwarf_Word));
> + uint64_t len = __libdw_get_uleb128_unchecked (&data);
>if (unlikely (len != op->number))
> return -1;
>block->addr = op;
Well spotted. So when we read the actual Dwarf_Op we have already read
len as op->number from the same data location. So this time we can
read it "unchecked". Note that we didn't used to do the sanity check
len == op->number before commit commit b7a5bc8aa ("libdw: Detect bad
DWARF in store_implicit_value.") I think we can also simply drop it
again. It really is only an internal sanity check, I don't believe it
can actually catch bad DWARF.
> diff --git a/libdw/memory-access.h b/libdw/memory-access.h
> index a39ad6d2..70317d81 100644
> --- a/libdw/memory-access.h
> +++ b/libdw/memory-access.h
> @@ -39,29 +39,6 @@
>
> #define len_leb128(var) ((8 * sizeof (var) + 6) / 7)
>
> -static inline size_t
> -__libdw_max_len_leb128 (const size_t type_len,
> - const unsigned char *addr, const unsigned char *end)
> -{
> - const size_t pointer_len = likely (addr < end) ? end - addr : 0;
> - return likely (type_len <= pointer_len) ? type_len : pointer_len;
> -}
> -
> -static inline size_t
> -__libdw_max_len_uleb128 (const unsigned char *addr, const unsigned char *end)
> -{
> - const size_t type_len = len_leb128 (uint64_t);
> - return __libdw_max_len_leb128 (type_len, addr, end);
> -}
> -
> -static inline size_t
> -__libdw_max_len_sleb128 (const unsigned char *addr, const unsigned char *end)
> -{
> - /* Subtract one step, so we don't shift into sign bit. */
> - const size_t type_len = len_leb128 (int64_t) - 1;
> - return __libdw_max_len_leb128 (type_len, addr, end);
> -}
OK, we don't need any of these anymore.
> #define get_uleb128_step(var, addr, nth) \
>do {
> \
> unsigned char __b = *(addr)++; \
> @@ -79,12 +56,39 @@ __libdw_get_uleb128 (const uns