commit:     c8146cfbcd36f9be4a447bf057811fe2f6c543b2
Author:     Sergei Trofimovich <slyfox <AT> gentoo <DOT> org>
AuthorDate: Sat Mar  2 19:15:14 2019 +0000
Commit:     Sergei Trofimovich <slyfox <AT> gentoo <DOT> org>
CommitDate: Sat Mar  2 19:37:11 2019 +0000
URL:        https://gitweb.gentoo.org/proj/sandbox.git/commit/?id=c8146cfb

exec wrapper: add support for GNU_HASH parser

In bug #672918 exec's ELF parser was going out of symbol table
range. It happened to trigger on a binary linked by LLVM's lld.

sandbox has no precise way to determine symbol table size and
uses the heuristic of detecting SYMTAB dynamic section size:
STRTAB (or GNU_LIBLIST for prelinked binaries).

This is a weak assumption and does not hold for lld-linked
binaries where the ordering is SYMTAB->GNU_HASH->STRTAB.
As a result sandbox SIGSEGVs when parses GNU_HASH as SYMTAB
entries.

The change adds GNU_HASH parser to iterate exactly through
exported symbols. That way we avoid heuristic code completely.

While at it repobe GNU_LIBLIST hack as we should not rely
on size heuristics anymore.

This has a nice side-effect of speeding up ELF parsing as
GHU_HASH only iterates through exported symbols. HASH
comparison has all dynamic symbols: both exported and
imported.

Reported-by: Dennis Schridde
Bug: https://bugs.gentoo.org/672918
Signed-off-by: Sergei Trofimovich <slyfox <AT> gentoo.org>

 libsandbox/wrapper-funcs/__wrapper_exec.c | 108 ++++++++++++++++++++----------
 1 file changed, 71 insertions(+), 37 deletions(-)

diff --git a/libsandbox/wrapper-funcs/__wrapper_exec.c 
b/libsandbox/wrapper-funcs/__wrapper_exec.c
index 974156e..766245a 100644
--- a/libsandbox/wrapper-funcs/__wrapper_exec.c
+++ b/libsandbox/wrapper-funcs/__wrapper_exec.c
@@ -83,8 +83,8 @@ static bool sb_check_exec(const char *filename, char *const 
argv[])
 ({ \
        Elf##n##_Ehdr *ehdr = (void *)elf; \
        Elf##n##_Phdr *phdr = (void *)(elf + ehdr->e_phoff); \
-       Elf##n##_Addr vaddr, filesz, vsym = 0, vstr = 0, vhash = 0, vliblist = 
0; \
-       Elf##n##_Off offset, symoff = 0, stroff = 0, hashoff = 0, liblistoff = 
0; \
+       Elf##n##_Addr vaddr, filesz, vsym = 0, vstr = 0, vhash = 0, vgnuhash = 
0; \
+       Elf##n##_Off offset, symoff = 0, stroff = 0, hashoff = 0, gnuhashoff = 
0; \
        Elf##n##_Dyn *dyn; \
        Elf##n##_Sym *sym, *symend; \
        uint##n##_t ent_size = 0, str_size = 0; \
@@ -107,7 +107,7 @@ static bool sb_check_exec(const char *filename, char *const 
argv[])
                                case DT_STRTAB:      vstr = dyn->d_un.d_val; 
break; \
                                case DT_STRSZ:       str_size = 
dyn->d_un.d_val; break; \
                                case DT_HASH:        vhash = dyn->d_un.d_val; 
break; \
-                               case DT_GNU_LIBLIST: vliblist = 
dyn->d_un.d_val; break; \
+                               case DT_GNU_HASH:    vgnuhash = 
dyn->d_un.d_val; break; \
                                } \
                                ++dyn; \
                        } \
@@ -127,8 +127,8 @@ static bool sb_check_exec(const char *filename, char *const 
argv[])
                                stroff = offset + (vstr - vaddr); \
                        if (vhash >= vaddr && vhash < vaddr + filesz) \
                                hashoff = offset + (vhash - vaddr); \
-                       if (vliblist >= vaddr && vliblist < vaddr + filesz) \
-                               liblistoff = offset + (vliblist - vaddr); \
+                       if (vgnuhash >= vaddr && vgnuhash < vaddr + filesz) \
+                               gnuhashoff = offset + (vgnuhash - vaddr); \
                } \
                \
                /* Finally walk the symbol table.  This should generally be 
fast as \
@@ -137,47 +137,81 @@ static bool sb_check_exec(const char *filename, char 
*const argv[])
                 */ \
                if (symoff && stroff) { \
                        /* Nowhere is the # of symbols recorded, or the size of 
the symbol \
-                        * table.  Instead, we do what glibc does: use the sysv 
hash table \
-                        * if it exists, else assume that the string table 
always directly \
+                        * table.  Instead, we do what glibc does: use the gnu 
or sysv hash \
+                        * table if it exists, else assume that the string 
table always directly \
                         * follows the symbol table.  This seems like a poor 
assumption to \
                         * make, but glibc has gotten by this long.  See 
determine_info in \
                         * glibc's elf/dl-addr.c. \
                         * \
-                        * Turns out prelink will violate that assumption.  
Fortunately it \
-                        * will insert its liblist at the same location all the 
time -- it \
-                        * replaces the string table with its liblist table. \
-                        * \
-                        * Long term, we should behave the same as glibc and 
walk the gnu \
-                        * hash table first before falling back to the raw 
symbol table. \
-                        * \
                         * We don't sanity check the ranges here as you aren't 
executing \
                         * corrupt programs in the sandbox. \
                         */ \
                        sym = (void *)(elf + symoff); \
-                       if (vhash) { \
-                               /* Hash entries are always 32-bits. */ \
-                               uint32_t *hashes = (void *)(elf + hashoff); \
-                               symend = sym + hashes[1]; \
-                       } else if (vliblist) \
-                               symend = (void *)(elf + liblistoff); \
-                       else \
-                               symend = (void *)(elf + stroff); \
-                       \
-                       while (sym < symend) { \
-                               char *symname = (void *)(elf + stroff + 
sym->st_name); \
-                               if (ELF##n##_ST_VISIBILITY(sym->st_other) == 
STV_DEFAULT && \
-                                   sym->st_shndx != SHN_UNDEF && sym->st_shndx 
< SHN_LORESERVE && \
-                                   sym->st_name && \
-                                   /* Minor optimization to avoid strcmp. */ \
-                                   symname[0] == '_' && symname[1] == '_') { \
-                                       /* Blacklist internal C library 
symbols. */ \
-                                       for (i = 0; i < 
ARRAY_SIZE(libc_alloc_syms); ++i) \
-                                               if (!strcmp(symname, 
libc_alloc_syms[i])) { \
-                                                       run_in_process = false; 
\
-                                                       goto use_trace; \
-                                               } \
+                       if (vgnuhash) { \
+                               uint32_t *hash32 = (void *)(elf + gnuhashoff); \
+                               /* use glibc's elf/dl-lookup.c:_dl_setup_hash() 
as a reference */ \
+                               /*   DT_GNU_HASH header: */ \
+                               uint32_t nbuckets = *hash32++; \
+                               uint32_t symbias = *hash32++; \
+                               uint32_t bitmask_nwords = *hash32++; \
+                               hash32++; /* gnu_shift */ \
+                               hash32 += n / 32 * bitmask_nwords; /* 
gnu_bitmask */ \
+                               uint32_t *gnu_buckets = hash32; \
+                               hash32 += nbuckets; \
+                               uint32_t *gnu_chain_zero = hash32 - symbias; \
+                               \
+                               uint32_t bucket; \
+                               \
+                               for (bucket = 0; bucket < nbuckets; bucket++) { 
\
+                                       uint32_t symndx = gnu_buckets[bucket]; \
+                                       if (symndx != 0) { \
+                                               const uint32_t *hasharr = 
&gnu_chain_zero[symndx]; \
+                                               do { \
+                                                       Elf##n##_Sym * s = 
&sym[symndx]; \
+                                                       \
+                                                       /* keep in sync with 
'vhash' case */ \
+                                                       char *symname = (void 
*)(elf + stroff + s->st_name); \
+                                                       if 
(ELF##n##_ST_VISIBILITY(s->st_other) == STV_DEFAULT && \
+                                                           s->st_shndx != 
SHN_UNDEF && s->st_shndx < SHN_LORESERVE && \
+                                                           s->st_name && \
+                                                           /* Minor 
optimization to avoid strcmp. */ \
+                                                           symname[0] == '_' 
&& symname[1] == '_') { \
+                                                               /* Blacklist 
internal C library symbols. */ \
+                                                               for (i = 0; i < 
ARRAY_SIZE(libc_alloc_syms); ++i) \
+                                                                       if 
(!strcmp(symname, libc_alloc_syms[i])) { \
+                                                                               
run_in_process = false; \
+                                                                               
goto use_trace; \
+                                                                       } \
+                                                       } \
+                                                       ++symndx; \
+                                               } while ((*hasharr++ & 1u) == 
0); \
+                                       } \
+                               } \
+                       } else { \
+                               if (vhash) { \
+                                       /* Hash entries are always 32-bits. */ \
+                                       uint32_t *hashes = (void *)(elf + 
hashoff); \
+                                       symend = sym + hashes[1]; \
+                               } else \
+                                       symend = (void *)(elf + stroff); \
+                               \
+                               while (sym < symend) { \
+                                       /* keep insync with 'vgnuhash' case */ \
+                                       char *symname = (void *)(elf + stroff + 
sym->st_name); \
+                                       if 
(ELF##n##_ST_VISIBILITY(sym->st_other) == STV_DEFAULT && \
+                                           sym->st_shndx != SHN_UNDEF && 
sym->st_shndx < SHN_LORESERVE && \
+                                           sym->st_name && \
+                                           /* Minor optimization to avoid 
strcmp. */ \
+                                           symname[0] == '_' && symname[1] == 
'_') { \
+                                               /* Blacklist internal C library 
symbols. */ \
+                                               for (i = 0; i < 
ARRAY_SIZE(libc_alloc_syms); ++i) \
+                                                       if (!strcmp(symname, 
libc_alloc_syms[i])) { \
+                                                               run_in_process 
= false; \
+                                                               goto use_trace; 
\
+                                                       } \
+                                       } \
+                                       ++sym; \
                                } \
-                               ++sym; \
                        } \
                } \
                \

Reply via email to