On 12/31/18 12:31 PM, Jakub Kicinski wrote:
> On Sun, 30 Dec 2018 22:02:10 +0000, Yonghong Song wrote:
>> On 12/28/18 7:09 PM, Jakub Kicinski wrote:
>>> +static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 
>>> off,
>>> +                                 u32 cnt)
>>> +{
>>> +   struct bpf_subprog_info *need_first_linfo;
>>> +   struct bpf_prog *prog = env->prog;
>>> +   u32 i, l_off, l_cnt, nr_linfo;
>>> +   struct bpf_line_info *linfo;
>>> +
>>> +   nr_linfo = prog->aux->nr_linfo;
>>> +   if (!nr_linfo || !cnt)
>>> +           return 0;
>>> +
>>> +   linfo = prog->aux->linfo;
>>> +
>>> +   /* progs are already adjusted/removed, if program starts on off, it may
>>> +    * had its start cut off and its line info may need to be preserved
>>> +    */
>>> +   for (i = 0; i < env->subprog_cnt; i++)
>>> +           if (env->subprog_info[i].start >= off)
>>> +                   break;
>>> +   if (i < env->subprog_cnt && env->subprog_info[i].start == off)
>>> +           need_first_linfo = &env->subprog_info[i];
>>> +   else
>>> +           need_first_linfo = NULL;
>>> +
>>> +   /* find first line info to remove */
>>> +   for (i = 0; i < nr_linfo; i++)
>>> +           if (linfo[i].insn_off >= off)
>>> +                   break;
>>> +
>>> +   /* count lines to be removed */
>>> +   l_off = i;
>>> +   l_cnt = 0;
>>> +   for (; i < nr_linfo; i++)
>>> +           if (linfo[i].insn_off < off + cnt)
>>> +                   l_cnt++;
>>> +           else
>>> +                   break;
>>> +
>>> +   /* either we didn't actually cut the start or we can just use line info
>>> +    * of first instruction if it exists
>>> +    */
>>> +   if (i < nr_linfo && linfo[i].insn_off == off + cnt)
>>> +           need_first_linfo = NULL;
>>> +   if (need_first_linfo) {
>>> +           if (WARN_ONCE(!l_cnt,
>>> +                         "verifier bug - no linfo removed, yet its 
>>> missing"))
>>> +                   return -EINVAL;
>>> +           if (WARN_ONCE(need_first_linfo->linfo_idx < l_off ||
>>> +                         need_first_linfo->linfo_idx >= l_off + l_cnt,
>>> +                         "verifier bug - removed prog linfo not in removed 
>>> range"))
>>> +                   return -EINVAL;
>>> +           /* subprog linfo_idx is not adjusted yet, so just find out
>>> +            * which line it used to be and swap it
>>> +            */
>>> +           memmove(linfo + l_off, linfo + need_first_linfo->linfo_idx,
>>> +                   sizeof(*linfo));
>>> +           need_first_linfo->linfo_idx = l_off;
>>> +           linfo[l_off].insn_off = off;
>>> +
>>> +           l_off++;
>>> +           l_cnt--;
>>> +   }
>>
>> In this routine, we handle need_first_linfo if the first insn of a
>> subprogram is deleted.
>> I wonder whether we need to handle the last deleted linfo as well. For
>> example:
>>      func1:
>>        line_info1:
>>           insn1
>>           insn2
>>      func2:
>>        line_info2:
>>           insn3
>>           insn4:
>>
>> if insn2 and insn3 are deleted, the above algorithm will produce:
>>       func1:
>>         line_info1:
>>          insn1
>>       func2:
>>          insn4
>>
>> func2 is missing the first line info, which should be line_info2 with
>> adjusted insn offset to the start of func2.
> 
> Mm.. should not happen, we should end up with:
> 
>       func1:
>         line_info1:
>           insn1
>       func2:
>         line_info2:
>           insn4
> 
> func2 after func adjust will start at off and there is no line info for
> off + cnt (insn4), so we will preserve line_info2.

Thanks for verification, I missed that
 >>> +  /* count lines to be removed */
 >>> +  l_off = i;
 >>> +  l_cnt = 0;
 >>> +  for (; i < nr_linfo; i++)
 >>> +          if (linfo[i].insn_off < off + cnt)
 >>> +                  l_cnt++;
 >>> +          else
 >>> +                  break;
once you reached linfo[i].insn_off >= off + cnt, no more counting.
since l_cnt starts from 0, so the last one is actually preserved.

> 
> I've added the following test_btf test, just to be sure:
> 
> {
>       .descr = "line_info (dead end + subprog start w/ no linfo)",
>       .raw_types = {
>               BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),   /* [1] 
> */
>               BTF_FUNC_PROTO_ENC(1, 1),                       /* [2] */
>                       BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
>               BTF_FUNC_ENC(NAME_TBD, 2),                      /* [3] */
>               BTF_FUNC_ENC(NAME_TBD, 2),                      /* [4] */
>               BTF_END_RAW,
>       },
>       BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"),
>       .insns = {
>               BPF_MOV64_IMM(BPF_REG_0, 0),
>               BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 2), /* dead */
>               BPF_CALL_REL(2),
>               BPF_EXIT_INSN(),
>               BPF_EXIT_INSN(), /* dead */
>               /* func */
>               BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* dead */
>               BPF_MOV64_IMM(BPF_REG_0, 0),
>               BPF_EXIT_INSN(),
>       },
>       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
>       .func_info_cnt = 2,
>       .func_info_rec_size = 8,
>       .func_info = { {0, 3}, {5, 4}, },
>       .line_info = {
>               BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10),
>               BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10),
>               BTF_END_RAW,
>       },
>       .line_info_rec_size = sizeof(struct bpf_line_info),
>       .nr_jited_ksyms = 2,
> },
> 
> ~# bpftool prog dump xlated id 48
> int main(int x):
> ; /* main linfo */
>     0: (b7) r0 = 0
>     1: (85) call pc+1#0xffffffffc041e782
>     2: (95) exit
> int func(int x):
> ; /* func linfo */
>     3: (b7) r0 = 0
>     4: (95) exit
> 
> ~# bpftool prog dump jited id 48
> int main(int x):
> 0xffffffffc041cb2f:
> ; /* main linfo */
>     0:        push   %rbp
>     1:        mov    %rsp,%rbp
>     4:        sub    $0x28,%rsp
>     b:        sub    $0x28,%rbp
>     f:        mov    %rbx,0x0(%rbp)
>    13:        mov    %r13,0x8(%rbp)
>    17:        mov    %r14,0x10(%rbp)
>    1b:        mov    %r15,0x18(%rbp)
>    1f:        xor    %eax,%eax
>    21:        mov    %rax,0x20(%rbp)
>    25:        xor    %eax,%eax
>    27:        callq  0x0000000000001c53
>    2c:        mov    0x0(%rbp),%rbx
>    30:        mov    0x8(%rbp),%r13
>    34:        mov    0x10(%rbp),%r14
>    38:        mov    0x18(%rbp),%r15
>    3c:        add    $0x28,%rbp
>    40:        leaveq
>    41:        retq
> 
> int func(int x):
> 0xffffffffc041e782:
> ; /* func linfo */
>     0:        push   %rbp
>     1:        mov    %rsp,%rbp
>     4:        sub    $0x28,%rsp
>     b:        sub    $0x28,%rbp
>     f:        mov    %rbx,0x0(%rbp)
>    13:        mov    %r13,0x8(%rbp)
>    17:        mov    %r14,0x10(%rbp)
>    1b:        mov    %r15,0x18(%rbp)
>    1f:        xor    %eax,%eax
>    21:        mov    %rax,0x20(%rbp)
>    25:        xor    %eax,%eax
>    27:        mov    0x0(%rbp),%rbx
>    2b:        mov    0x8(%rbp),%r13
>    2f:        mov    0x10(%rbp),%r14
>    33:        mov    0x18(%rbp),%r15
>    37:        add    $0x28,%rbp
>    3b:        leaveq
>    3c:        retq
> 
>> Another example:
>>      func1:
>>         line_info1:
>>            insn1
>>            insn2
>>         line_info2:
>>            insn3
>>            insn4
>> If insn2 and insn3 are deleted, we will get
>>       func1:
>>         line_info1:
>>           insn1
>>           insn4
>> This is not ideal either. We should attribute insn4 to line_info2.
> 
> Cool, I'm a little in the dark on what the definition of line info is,
> so your suggestions are very much appreciated!  If you're saying that
> all instructions below line info are considered part of that "line",
> that'd simplify the code.
> 
> So should I always preserve "last" line info if first live instruction
> doesn't have one?
> 
> linfo1:
>       insn1
>       insn2 /* dead */
> linfo2:
>       insn3 /* dead */
>       insn4
> linfo3:
>       insn5
> 
>    ||
>    \/
> 
> linfo1:
>       insn1
> linfo2:
>       insn4
> linfo3:
>       insn5

Yes, the above is the desirable output.

> 
> 
> That'd simplify things quite a bit!
> 
>>> +
>>> +   /* remove the line info which refers to the removed instructions */
>>> +   if (l_cnt) {
>>> +           memmove(linfo + l_off, linfo + i,
>>> +                   sizeof(*linfo) * (nr_linfo - i));
>>> +
>>> +           prog->aux->nr_linfo -= l_cnt;
>>> +           nr_linfo = prog->aux->nr_linfo;
>>> +   }
>>> +
>>> +   /* pull all linfo[i].insn_off >= off + cnt in by cnt */
>>> +   for (i = l_off; i < nr_linfo; i++)
>>> +           linfo[i].insn_off -= cnt;
>>> +
>>> +   /* fix up all subprogs (incl. 'exit') which start >= off */
>>> +   for (i = 0; i <= env->subprog_cnt; i++)
>>> +           if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
>>> +                   env->subprog_info[i].linfo_idx -= l_cnt;
>>> +
>>> +   return 0;
>>> +}
>>> +
>>> +static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, 
>>> u32 cnt)
>>> +{
>>> +   struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
>>> +   unsigned int orig_prog_len = env->prog->len;
>>> +   int err;
>>> +
>>> +   if (!cnt)
>>> +           return 0;
>>
>> This check is probably not needed as all call sites (including
>> patch #4) guarantees non-zero cnt.
> 
> Ack, thanks for the review, I was unsure if this is not convention.
> 

Reply via email to