On 12/31/18 12:31 PM, Jakub Kicinski wrote: > On Sun, 30 Dec 2018 22:02:10 +0000, Yonghong Song wrote: >> On 12/28/18 7:09 PM, Jakub Kicinski wrote: >>> +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 >>> off, >>> + u32 cnt) >>> +{ >>> + struct bpf_subprog_info *need_first_linfo; >>> + struct bpf_prog *prog = env->prog; >>> + u32 i, l_off, l_cnt, nr_linfo; >>> + struct bpf_line_info *linfo; >>> + >>> + nr_linfo = prog->aux->nr_linfo; >>> + if (!nr_linfo || !cnt) >>> + return 0; >>> + >>> + linfo = prog->aux->linfo; >>> + >>> + /* progs are already adjusted/removed, if program starts on off, it may >>> + * had its start cut off and its line info may need to be preserved >>> + */ >>> + for (i = 0; i < env->subprog_cnt; i++) >>> + if (env->subprog_info[i].start >= off) >>> + break; >>> + if (i < env->subprog_cnt && env->subprog_info[i].start == off) >>> + need_first_linfo = &env->subprog_info[i]; >>> + else >>> + need_first_linfo = NULL; >>> + >>> + /* find first line info to remove */ >>> + for (i = 0; i < nr_linfo; i++) >>> + if (linfo[i].insn_off >= off) >>> + break; >>> + >>> + /* count lines to be removed */ >>> + l_off = i; >>> + l_cnt = 0; >>> + for (; i < nr_linfo; i++) >>> + if (linfo[i].insn_off < off + cnt) >>> + l_cnt++; >>> + else >>> + break; >>> + >>> + /* either we didn't actually cut the start or we can just use line info >>> + * of first instruction if it exists >>> + */ >>> + if (i < nr_linfo && linfo[i].insn_off == off + cnt) >>> + need_first_linfo = NULL; >>> + if (need_first_linfo) { >>> + if (WARN_ONCE(!l_cnt, >>> + "verifier bug - no linfo removed, yet its >>> missing")) >>> + return -EINVAL; >>> + if (WARN_ONCE(need_first_linfo->linfo_idx < l_off || >>> + need_first_linfo->linfo_idx >= l_off + l_cnt, >>> + "verifier bug - removed prog linfo not in removed >>> range")) >>> + return -EINVAL; >>> + /* subprog linfo_idx is not adjusted yet, so just find out >>> + * which line it used to be and swap it >>> + */ >>> + memmove(linfo + l_off, linfo + need_first_linfo->linfo_idx, >>> + sizeof(*linfo)); >>> + need_first_linfo->linfo_idx = l_off; >>> + linfo[l_off].insn_off = off; >>> + >>> + l_off++; >>> + l_cnt--; >>> + } >> >> In this routine, we handle need_first_linfo if the first insn of a >> subprogram is deleted. >> I wonder whether we need to handle the last deleted linfo as well. For >> example: >> func1: >> line_info1: >> insn1 >> insn2 >> func2: >> line_info2: >> insn3 >> insn4: >> >> if insn2 and insn3 are deleted, the above algorithm will produce: >> func1: >> line_info1: >> insn1 >> func2: >> insn4 >> >> func2 is missing the first line info, which should be line_info2 with >> adjusted insn offset to the start of func2. > > Mm.. should not happen, we should end up with: > > func1: > line_info1: > insn1 > func2: > line_info2: > insn4 > > func2 after func adjust will start at off and there is no line info for > off + cnt (insn4), so we will preserve line_info2.
Thanks for verification, I missed that >>> + /* count lines to be removed */ >>> + l_off = i; >>> + l_cnt = 0; >>> + for (; i < nr_linfo; i++) >>> + if (linfo[i].insn_off < off + cnt) >>> + l_cnt++; >>> + else >>> + break; once you reached linfo[i].insn_off >= off + cnt, no more counting. since l_cnt starts from 0, so the last one is actually preserved. > > I've added the following test_btf test, just to be sure: > > { > .descr = "line_info (dead end + subprog start w/ no linfo)", > .raw_types = { > BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] > */ > BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ > BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), > BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ > BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ > BTF_END_RAW, > }, > BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"), > .insns = { > BPF_MOV64_IMM(BPF_REG_0, 0), > BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 2), /* dead */ > BPF_CALL_REL(2), > BPF_EXIT_INSN(), > BPF_EXIT_INSN(), /* dead */ > /* func */ > BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* dead */ > BPF_MOV64_IMM(BPF_REG_0, 0), > BPF_EXIT_INSN(), > }, > .prog_type = BPF_PROG_TYPE_TRACEPOINT, > .func_info_cnt = 2, > .func_info_rec_size = 8, > .func_info = { {0, 3}, {5, 4}, }, > .line_info = { > BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), > BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), > BTF_END_RAW, > }, > .line_info_rec_size = sizeof(struct bpf_line_info), > .nr_jited_ksyms = 2, > }, > > ~# bpftool prog dump xlated id 48 > int main(int x): > ; /* main linfo */ > 0: (b7) r0 = 0 > 1: (85) call pc+1#0xffffffffc041e782 > 2: (95) exit > int func(int x): > ; /* func linfo */ > 3: (b7) r0 = 0 > 4: (95) exit > > ~# bpftool prog dump jited id 48 > int main(int x): > 0xffffffffc041cb2f: > ; /* main linfo */ > 0: push %rbp > 1: mov %rsp,%rbp > 4: sub $0x28,%rsp > b: sub $0x28,%rbp > f: mov %rbx,0x0(%rbp) > 13: mov %r13,0x8(%rbp) > 17: mov %r14,0x10(%rbp) > 1b: mov %r15,0x18(%rbp) > 1f: xor %eax,%eax > 21: mov %rax,0x20(%rbp) > 25: xor %eax,%eax > 27: callq 0x0000000000001c53 > 2c: mov 0x0(%rbp),%rbx > 30: mov 0x8(%rbp),%r13 > 34: mov 0x10(%rbp),%r14 > 38: mov 0x18(%rbp),%r15 > 3c: add $0x28,%rbp > 40: leaveq > 41: retq > > int func(int x): > 0xffffffffc041e782: > ; /* func linfo */ > 0: push %rbp > 1: mov %rsp,%rbp > 4: sub $0x28,%rsp > b: sub $0x28,%rbp > f: mov %rbx,0x0(%rbp) > 13: mov %r13,0x8(%rbp) > 17: mov %r14,0x10(%rbp) > 1b: mov %r15,0x18(%rbp) > 1f: xor %eax,%eax > 21: mov %rax,0x20(%rbp) > 25: xor %eax,%eax > 27: mov 0x0(%rbp),%rbx > 2b: mov 0x8(%rbp),%r13 > 2f: mov 0x10(%rbp),%r14 > 33: mov 0x18(%rbp),%r15 > 37: add $0x28,%rbp > 3b: leaveq > 3c: retq > >> Another example: >> func1: >> line_info1: >> insn1 >> insn2 >> line_info2: >> insn3 >> insn4 >> If insn2 and insn3 are deleted, we will get >> func1: >> line_info1: >> insn1 >> insn4 >> This is not ideal either. We should attribute insn4 to line_info2. > > Cool, I'm a little in the dark on what the definition of line info is, > so your suggestions are very much appreciated! If you're saying that > all instructions below line info are considered part of that "line", > that'd simplify the code. > > So should I always preserve "last" line info if first live instruction > doesn't have one? > > linfo1: > insn1 > insn2 /* dead */ > linfo2: > insn3 /* dead */ > insn4 > linfo3: > insn5 > > || > \/ > > linfo1: > insn1 > linfo2: > insn4 > linfo3: > insn5 Yes, the above is the desirable output. > > > That'd simplify things quite a bit! > >>> + >>> + /* remove the line info which refers to the removed instructions */ >>> + if (l_cnt) { >>> + memmove(linfo + l_off, linfo + i, >>> + sizeof(*linfo) * (nr_linfo - i)); >>> + >>> + prog->aux->nr_linfo -= l_cnt; >>> + nr_linfo = prog->aux->nr_linfo; >>> + } >>> + >>> + /* pull all linfo[i].insn_off >= off + cnt in by cnt */ >>> + for (i = l_off; i < nr_linfo; i++) >>> + linfo[i].insn_off -= cnt; >>> + >>> + /* fix up all subprogs (incl. 'exit') which start >= off */ >>> + for (i = 0; i <= env->subprog_cnt; i++) >>> + if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) >>> + env->subprog_info[i].linfo_idx -= l_cnt; >>> + >>> + return 0; >>> +} >>> + >>> +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, >>> u32 cnt) >>> +{ >>> + struct bpf_insn_aux_data *aux_data = env->insn_aux_data; >>> + unsigned int orig_prog_len = env->prog->len; >>> + int err; >>> + >>> + if (!cnt) >>> + return 0; >> >> This check is probably not needed as all call sites (including >> patch #4) guarantees non-zero cnt. > > Ack, thanks for the review, I was unsure if this is not convention. >