[PATCH] Fix leb128 reading

Tom Tromey Fri, 23 Oct 2020 20:59:28 -0700

PR 26773 points out that some sleb128 values are decoded incorrectly.
Looking into this, I found some other unusual cases as well.


In this patch, I chose to try to handle weird leb128 encodings by
preserving their values when possible; or returning the maximum value
on overflow.  It isn't clear to me that this is necessarily the best
choice.
---
 .gitignore                |   1 +
 ChangeLog                 |   4 +
 libdw/ChangeLog           |  11 +++
 libdw/dwarf_getlocation.c |   2 +-
 libdw/memory-access.h     | 153 +++++++++++++++++++-----------------
 tests/ChangeLog           |   7 ++
 tests/Makefile.am         |   8 +-
 tests/leb128.c            | 161 ++++++++++++++++++++++++++++++++++++++
 8 files changed, 272 insertions(+), 75 deletions(-)
 create mode 100644 tests/leb128.c

diff --git a/.gitignore b/.gitignore
index c9790941..d737b14d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -151,6 +151,7 @@ Makefile.in
 /tests/get-units-invalid
 /tests/get-units-split
 /tests/hash
+/tests/leb128
 /tests/line2addr
 /tests/low_high_pc
 /tests/msg_tst
diff --git a/ChangeLog b/ChangeLog
index 72e8397c..0b50578d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2020-10-23  Tom Tromey  <[email protected]>
+
+       * .gitignore: Add /tests/leb128.
+
 2020-10-01  Frank Ch. Eigler  <[email protected]>
 
        PR25461
diff --git a/libdw/ChangeLog b/libdw/ChangeLog
index 8b0b583a..23081c12 100644
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,3 +1,14 @@
+2020-10-23  Tom Tromey  <[email protected]>
+
+       PR26773
+       * dwarf_getlocation.c (store_implicit_value): Use
+       __libdw_get_uleb128_unchecked.
+       * memory-access.h (__libdw_max_len_leb128)
+       (__libdw_max_len_uleb128, __libdw_max_len_sleb128): Remove.
+       (__libdw_get_uleb128, __libdw_get_uleb128_unchecked): Rewrite.
+       (get_sleb128_step): Remove.
+       (__libdw_get_sleb128, __libdw_get_sleb128_unchecked): Rewrite.
+
 2020-09-03  Mark Wielaard  <[email protected]>
 
        * dwarf.h: Add DW_CFA_AARCH64_negate_ra_state.
diff --git a/libdw/dwarf_getlocation.c b/libdw/dwarf_getlocation.c
index 4617f9e9..0ce2e08a 100644
--- a/libdw/dwarf_getlocation.c
+++ b/libdw/dwarf_getlocation.c
@@ -130,7 +130,7 @@ store_implicit_value (Dwarf *dbg, void **cache, Dwarf_Op 
*op)
   struct loc_block_s *block = libdw_alloc (dbg, struct loc_block_s,
                                           sizeof (struct loc_block_s), 1);
   const unsigned char *data = (const unsigned char *) (uintptr_t) op->number2;
-  uint64_t len = __libdw_get_uleb128 (&data, data + len_leb128 (Dwarf_Word));
+  uint64_t len = __libdw_get_uleb128_unchecked (&data);
   if (unlikely (len != op->number))
     return -1;
   block->addr = op;
diff --git a/libdw/memory-access.h b/libdw/memory-access.h
index a39ad6d2..70317d81 100644
--- a/libdw/memory-access.h
+++ b/libdw/memory-access.h
@@ -39,29 +39,6 @@
 
 #define len_leb128(var) ((8 * sizeof (var) + 6) / 7)
 
-static inline size_t
-__libdw_max_len_leb128 (const size_t type_len,
-                       const unsigned char *addr, const unsigned char *end)
-{
-  const size_t pointer_len = likely (addr < end) ? end - addr : 0;
-  return likely (type_len <= pointer_len) ? type_len : pointer_len;
-}
-
-static inline size_t
-__libdw_max_len_uleb128 (const unsigned char *addr, const unsigned char *end)
-{
-  const size_t type_len = len_leb128 (uint64_t);
-  return __libdw_max_len_leb128 (type_len, addr, end);
-}
-
-static inline size_t
-__libdw_max_len_sleb128 (const unsigned char *addr, const unsigned char *end)
-{
-  /* Subtract one step, so we don't shift into sign bit.  */
-  const size_t type_len = len_leb128 (int64_t) - 1;
-  return __libdw_max_len_leb128 (type_len, addr, end);
-}
-
 #define get_uleb128_step(var, addr, nth)                                     \
   do {                                                                       \
     unsigned char __b = *(addr)++;                                           \
@@ -79,12 +56,39 @@ __libdw_get_uleb128 (const unsigned char **addrp, const 
unsigned char *end)
      for the common single-byte case.  */
   get_uleb128_step (acc, *addrp, 0);
 
-  const size_t max = __libdw_max_len_uleb128 (*addrp - 1, end);
-  for (size_t i = 1; i < max; ++i)
+  const size_t max = len_leb128 (acc) - 1;
+  for (size_t i = 1; i < max && *addrp < end; ++i)
     get_uleb128_step (acc, *addrp, i);
-  /* Other implementations set VALUE to UINT_MAX in this
-     case.  So we better do this as well.  */
-  return UINT64_MAX;
+
+  /* A single step to check the final byte, which could cause
+     overflow.  */
+  if (*addrp < end)
+    {
+      const unsigned mask = (1u << (8 * sizeof (acc) - 7 * max)) - 1;
+      if (unlikely ((**addrp & mask) != 0))
+       {
+         /* Overflow.  */
+         ++*addrp;
+         return UINT64_MAX;
+       }
+      get_uleb128_step (acc, *addrp, max);
+    }
+
+  /* Now we can read any number of bytes, as long as the payload is 0.
+     Anything else would overflow.  */
+  while (*addrp < end)
+    {
+      unsigned char b = *(*addrp)++;
+      if (b == 0x0)
+       break;
+      if (b != 0x80)
+       {
+         /* Overflow.  */
+         return UINT64_MAX;
+       }
+    }
+
+  return acc;
 }
 
 static inline uint64_t
@@ -96,65 +100,72 @@ __libdw_get_uleb128_unchecked (const unsigned char **addrp)
      for the common single-byte case.  */
   get_uleb128_step (acc, *addrp, 0);
 
-  const size_t max = len_leb128 (uint64_t);
+  const size_t max = len_leb128 (acc) - 1;
   for (size_t i = 1; i < max; ++i)
     get_uleb128_step (acc, *addrp, i);
-  /* Other implementations set VALUE to UINT_MAX in this
-     case.  So we better do this as well.  */
-  return UINT64_MAX;
+
+  /* A single step to check the final byte, which could cause
+     overflow.  */
+  const unsigned mask = (1u << (8 * sizeof (acc) - 7 * max)) - 1;
+  if (unlikely ((**addrp & mask) != 0))
+    {
+      /* Overflow.  */
+      ++*addrp;
+      return UINT64_MAX;
+    }
+  get_uleb128_step (acc, *addrp, max);
+
+  /* Now we can read any number of bytes, as long as the payload is 0.
+     Anything else would overflow.  */
+  while (true)
+    {
+      unsigned char b = *(*addrp)++;
+      if (b == 0x0)
+       break;
+      if (b != 0x80)
+       {
+         /* Overflow.  */
+         return UINT64_MAX;
+       }
+    }
+
+  return acc;
 }
 
 /* Note, addr needs to me smaller than end. */
 #define get_uleb128(var, addr, end) ((var) = __libdw_get_uleb128 (&(addr), 
end))
 #define get_uleb128_unchecked(var, addr) ((var) = 
__libdw_get_uleb128_unchecked (&(addr)))
 
-/* The signed case is similar, but we sign-extend the result.  */
-
-#define get_sleb128_step(var, addr, nth)                                     \
-  do {                                                                       \
-    unsigned char __b = *(addr)++;                                           \
-    if (likely ((__b & 0x80) == 0))                                          \
-      {                                                                        
      \
-       struct { signed int i:7; } __s = { .i = __b };                        \
-       (var) |= (typeof (var)) __s.i * ((typeof (var)) 1 << ((nth) * 7));    \
-       return (var);                                                         \
-      }                                                                        
      \
-    (var) |= (typeof (var)) (__b & 0x7f) << ((nth) * 7);                     \
-  } while (0)
-
 static inline int64_t
 __libdw_get_sleb128 (const unsigned char **addrp, const unsigned char *end)
 {
-  int64_t acc = 0;
-
-  /* Unroll the first step to help the compiler optimize
-     for the common single-byte case.  */
-  get_sleb128_step (acc, *addrp, 0);
-
-  const size_t max = __libdw_max_len_sleb128 (*addrp - 1, end);
-  for (size_t i = 1; i < max; ++i)
-    get_sleb128_step (acc, *addrp, i);
-  /* Other implementations set VALUE to INT_MAX in this
-     case.  So we better do this as well.  */
-  return INT64_MAX;
+  const unsigned char *start = *addrp;
+  uint64_t value = __libdw_get_uleb128 (addrp, end);
+  if (unlikely (value == UINT64_MAX))
+    return INT64_MAX;
+  if (((*addrp)[-1] & 0x40) != 0)
+    {
+      size_t nbytes = *addrp - start;
+      if (7 * nbytes < 8 * sizeof (value))
+       value |= -(((typeof (value)) 1) << 7 * nbytes);
+    }
+  return value;
 }
 
 static inline int64_t
 __libdw_get_sleb128_unchecked (const unsigned char **addrp)
 {
-  int64_t acc = 0;
-
-  /* Unroll the first step to help the compiler optimize
-     for the common single-byte case.  */
-  get_sleb128_step (acc, *addrp, 0);
-
-  /* Subtract one step, so we don't shift into sign bit.  */
-  const size_t max = len_leb128 (int64_t) - 1;
-  for (size_t i = 1; i < max; ++i)
-    get_sleb128_step (acc, *addrp, i);
-  /* Other implementations set VALUE to INT_MAX in this
-     case.  So we better do this as well.  */
-  return INT64_MAX;
+  const unsigned char *start = *addrp;
+  uint64_t value = __libdw_get_uleb128_unchecked (addrp);
+  if (unlikely (value == UINT64_MAX))
+    return INT64_MAX;
+  if (((*addrp)[-1] & 0x40) != 0)
+    {
+      size_t nbytes = *addrp - start;
+      if (7 * nbytes < 8 * sizeof (value))
+       value |= -(((typeof (value)) 1) << 7 * nbytes);
+    }
+  return value;
 }
 
 #define get_sleb128(var, addr, end) ((var) = __libdw_get_sleb128 (&(addr), 
end))
diff --git a/tests/ChangeLog b/tests/ChangeLog
index aa68ffd3..5f075df8 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,10 @@
+2020-10-22  Tom Tromey  <[email protected]>
+
+       PR26773
+       * Makefile.am (check_PROGRAMS, TESTS): Add leb128.
+       (leb128_LDADD): New variable.
+       * leb128.c: New file.
+
 2020-10-20  Frank Ch. Eigler  <[email protected]>
 
        PR26756: more prometheus metrics
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 9d0707da..568a9f8d 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1,6 +1,6 @@
 ## Process this file with automake to create Makefile.in
 ##
-## Copyright (C) 1996-2019 Red Hat, Inc.
+## Copyright (C) 1996-2020 Red Hat, Inc.
 ## This file is part of elfutils.
 ##
 ## This file is free software; you can redistribute it and/or modify
@@ -63,7 +63,7 @@ check_PROGRAMS = arextract arsymtest newfile saridx scnnames 
sectiondump \
                  all-dwarf-ranges unit-info next_cfi \
                  elfcopy addsections xlate_notes elfrdwrnop \
                  dwelf_elf_e_machine_string \
-                 getphdrnum
+                 getphdrnum leb128
 
 asm_TESTS = asm-tst1 asm-tst2 asm-tst3 asm-tst4 asm-tst5 \
            asm-tst6 asm-tst7 asm-tst8 asm-tst9
@@ -185,7 +185,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh newfile 
test-nlist \
        run-elfclassify.sh run-elfclassify-self.sh \
        run-disasm-riscv64.sh \
        run-pt_gnu_prop-tests.sh \
-       run-getphdrnum.sh run-test-includes.sh
+       run-getphdrnum.sh run-test-includes.sh \
+       leb128
 
 if !BIARCH
 export ELFUTILS_DISABLE_BIARCH = 1
@@ -694,6 +695,7 @@ xlate_notes_LDADD = $(libelf)
 elfrdwrnop_LDADD = $(libelf)
 dwelf_elf_e_machine_string_LDADD = $(libelf) $(libdw)
 getphdrnum_LDADD = $(libelf) $(libdw)
+leb128_LDADD = $(libelf) $(libdw)
 
 # We want to test the libelf header against the system elf.h header.
 # Don't include any -I CPPFLAGS. Except when we install our own elf.h.
diff --git a/tests/leb128.c b/tests/leb128.c
new file mode 100644
index 00000000..856a2b89
--- /dev/null
+++ b/tests/leb128.c
@@ -0,0 +1,161 @@
+/* Test program for leb128
+   Copyright (C) 2020 Tom Tromey
+   This file is part of elfutils.
+
+   This file is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   elfutils is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <libdw.h>
+#include <libdwfl.h>
+#include "../libdwelf/libdwelf.h"
+#include "../libdw/libdwP.h"
+#include "../libdwfl/libdwflP.h"
+#include "../libdw/memory-access.h"
+
+#define OK 0
+#define FAIL 1
+
+static const unsigned char v0[] = { 0x0 };
+static const unsigned char v23[] = { 23 };
+static const unsigned char vm_2[] = { 0x7e };
+static const unsigned char v128[] = { 0x80, 0x01 };
+static const unsigned char vm_128[] = { 0x80, 0x7f };
+/* This is pathological but nothing seems to prevent it, and maybe
+   it would be useful for a (hypothetical) LEB128 relocation.  */
+static const unsigned char v0000[] =
+  {
+    0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x80, 0x80,
+    0x0
+  };
+static const unsigned char vhuge[] =
+  {
+    0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0x0
+  };
+static const unsigned char most_positive[] =
+  {
+    0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0x3f
+  };
+static const unsigned char most_negative[] =
+  {
+    0x80, 0x80, 0x80, 0x80, 0x80,
+    0x80, 0x80, 0x80, 0x40
+  };
+static const unsigned char minus_one[] =
+  {
+    0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0x7f
+  };
+static const unsigned char overflow[] =
+  {
+    0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xbf, 0x01
+  };
+
+static int
+test_one_sleb (const unsigned char *data, size_t len, int64_t expect)
+{
+  int64_t value;
+  const unsigned char *p;
+
+  p = data;
+  get_sleb128 (value, p, p + len);
+  if (value != expect || p != data + len)
+    return FAIL;
+
+  p = data;
+  get_sleb128_unchecked (value, p);
+  if (value != expect || p != data + len)
+    return FAIL;
+
+  return OK;
+}
+
+static int
+test_sleb (void)
+{
+#define TEST(ARRAY, V)                               \
+  if (test_one_sleb (ARRAY, sizeof (ARRAY), V) != OK) \
+    return FAIL;
+
+  TEST (v0, 0);
+  TEST (v23, 23);
+  TEST (vm_2, -2);
+  TEST (v128, 128);
+  TEST (vm_128, -128);
+  TEST (v0000, 0);
+  TEST (vhuge, 9223372036854775807ll);
+  TEST (most_positive, 4611686018427387903ll);
+  TEST (most_negative, -4611686018427387904ll);
+  TEST (minus_one, -1);
+  TEST (overflow, INT64_MAX);
+
+#undef TEST
+
+  return OK;
+}
+
+static int
+test_one_uleb (const unsigned char *data, size_t len, uint64_t expect)
+{
+  uint64_t value;
+  const unsigned char *p;
+
+  p = data;
+  get_uleb128 (value, p, p + len);
+  if (value != expect || p != data + len)
+    return FAIL;
+
+  p = data;
+  get_uleb128_unchecked (value, p);
+  if (value != expect || p != data + len)
+    return FAIL;
+
+  return OK;
+}
+
+static int
+test_uleb (void)
+{
+#define TEST(ARRAY, V)                               \
+  if (test_one_uleb (ARRAY, sizeof (ARRAY), V) != OK) \
+    return FAIL;
+
+  TEST (v0, 0);
+  TEST (v23, 23);
+  TEST (vm_2, 126);
+  TEST (v128, 128);
+  TEST (vm_128, 16256);
+  TEST (v0000, 0);
+  TEST (vhuge, 9223372036854775807ull);
+  TEST (most_positive, 4611686018427387903ull);
+  TEST (most_negative, 4611686018427387904ull);
+  TEST (minus_one, 9223372036854775807ull);
+  TEST (overflow, UINT64_MAX);
+
+#undef TEST
+
+  return OK;
+}
+
+int
+main (void)
+{
+  return test_sleb () || test_uleb ();
+}
-- 
2.17.2

[PATCH] Fix leb128 reading

Reply via email to