Hi Song, On 03/06/2018 08:42 PM, Song Liu wrote: [...]> +/* > + * Parse build id from the note segment. This logic can be shared between > + * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are > + * identical. > + */ > +static inline int stack_map_parse_build_id(void *vm_start, > + unsigned char *build_id, > + void *note_start, > + Elf32_Word note_size) > +{ > + Elf32_Word note_offs = 0, new_offs; > + > + /* check for overflow */ > + if (note_start < vm_start || note_start + note_size < note_start) > + return -EINVAL; > + > + /* only supports note that fits in the first page */ > + if (note_start + note_size > vm_start + PAGE_SIZE) > + return -EINVAL; > + > + while (note_offs + sizeof(Elf32_Nhdr) < note_size) { > + Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs); > + > + if (nhdr->n_type == BPF_BUILD_ID && > + nhdr->n_namesz == sizeof("GNU") && > + nhdr->n_descsz == BPF_BUILD_ID_SIZE) { > + memcpy(build_id, > + note_start + note_offs + > + ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), > + BPF_BUILD_ID_SIZE); > + return 0; > + } > + new_offs = note_offs + sizeof(Elf32_Nhdr) + > + ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4); > + if (new_offs <= note_offs) /* overflow */ > + break; > + note_offs = new_offs; > + }; > + return -EINVAL; > +} > + > +/* Parse build ID from 32-bit ELF */ > +static int stack_map_get_build_id_32(void *vm_start, > + unsigned char *build_id) > +{ > + Elf32_Ehdr *ehdr = (Elf32_Ehdr *)vm_start; > + Elf32_Phdr *phdr; > + int i; > + > + /* only supports phdr that fits in one page */ > + if (ehdr->e_phnum > > + (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr)) > + return -EINVAL; > + > + phdr = (Elf32_Phdr *)(vm_start + sizeof(Elf32_Ehdr)); > + > + for (i = 0; i < ehdr->e_phnum; ++i) > + if (phdr[i].p_type == PT_NOTE) > + return stack_map_parse_build_id(vm_start, build_id, > + vm_start + phdr[i].p_offset, > + phdr[i].p_filesz); > + return -EINVAL; > +} > + > +/* Parse build ID from 64-bit ELF */ > +static int stack_map_get_build_id_64(void *vm_start, > + unsigned char *build_id) > +{ > + Elf64_Ehdr *ehdr = (Elf64_Ehdr *)vm_start; > + Elf64_Phdr *phdr; > + int i; > + > + /* only supports phdr that fits in one page */ > + if (ehdr->e_phnum > > + (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr)) > + return -EINVAL; > + > + phdr = (Elf64_Phdr *)(vm_start + sizeof(Elf64_Ehdr)); > + > + for (i = 0; i < ehdr->e_phnum; ++i) > + if (phdr[i].p_type == PT_NOTE) > + return stack_map_parse_build_id(vm_start, build_id, > + vm_start + phdr[i].p_offset, > + phdr[i].p_filesz); > + return -EINVAL; > +} > + > +/* Parse build ID of ELF file mapped to vma */ > +static int stack_map_get_build_id(struct vm_area_struct *vma, > + unsigned char *build_id) > +{ > + Elf32_Ehdr *ehdr; > + struct page *page; > + int ret; > + > + /* > + * vm_start is user memory, so we need to be careful with it. > + * We don't want too many copy_from_user to reduce overhead. > + * Most ELF file is at least one page long, and the build_id > + * is stored in the first page. Therefore, we limit the search of > + * build_id to the first page only. This can be made safe with > + * get_user_pages_fast(). If the file is smaller than PAGE_SIZE, > + * or the build_id is not in the first page, the look up fails. > + */ > + if (vma->vm_end - vma->vm_start < PAGE_SIZE || > + vma->vm_start & (PAGE_SIZE - 1)) /* page aligned */ > + return -EINVAL; > + > + if (get_user_pages_fast(vma->vm_start, 1, 0, &page) != 1)
Shouldn't this throw a splat? Implementations of get_user_pages_fast() call down_read() which has might_sleep() and we're under RCU read side here. > + return -EFAULT; > + > + ret = -EINVAL; > + ehdr = (Elf32_Ehdr *)page_address(page); > + > + /* compare magic x7f "ELF" */ > + if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0) > + goto out; > + > + /* only support executable file and shared object file */ > + if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) > + goto out; > + > + if (ehdr->e_ident[EI_CLASS] == 1) Minor nit: ehdr->e_ident[EI_CLASS] == ELFCLASS32 > + ret = stack_map_get_build_id_32(page_address(page), build_id); > + else if (ehdr->e_ident[EI_CLASS] == 2) Minor nit: ehdr->e_ident[EI_CLASS] == ELFCLASS64 > + ret = stack_map_get_build_id_64(page_address(page), build_id); > +out: > + put_page(page); > + return ret; > +} > + > +static int stack_map_get_build_id_offset(struct bpf_map *map, > + struct stack_map_bucket *bucket, > + u64 *ips, u32 trace_nr) > +{ > + int i; > + struct vm_area_struct *vma; > + struct bpf_stack_build_id *id_offs; > + int err; > + int successful_lookup = 0; > + [...] Thanks, Daniel