On Thu, Feb 14, 2019 at 10:11 AM David Ahern <dsah...@gmail.com> wrote: > > On 2/13/19 11:09 PM, Peter Oskolkov wrote: > > On error the skb should be freed. Tested with diff/steps > > provided by David Ahern. > > > > Reported-by: David Ahern <dsah...@gmail.com> > > Fixes: 3bd0b15281af ("bpf: add handling of BPF_LWT_REROUTE to lwt_bpf.c") > > Signed-off-by: Peter Oskolkov <p...@google.com> > > --- > > net/core/lwt_bpf.c | 24 ++++++++++++++++-------- > > 1 file changed, 16 insertions(+), 8 deletions(-) > > > > diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c > > index 32251f3fcda0..f3273cbb6b22 100644 > > --- a/net/core/lwt_bpf.c > > +++ b/net/core/lwt_bpf.c > > @@ -179,18 +179,19 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) > > struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev); > > int oif = l3mdev ? l3mdev->ifindex : 0; > > struct dst_entry *dst = NULL; > > + int err = -EAFNOSUPPORT; > > struct sock *sk; > > struct net *net; > > bool ipv4; > > - int err; > > > > if (skb->protocol == htons(ETH_P_IP)) > > ipv4 = true; > > else if (skb->protocol == htons(ETH_P_IPV6)) > > ipv4 = false; > > else > > - return -EAFNOSUPPORT; > > + goto err; > > > > + err = -EINVAL; > > sk = sk_to_full_sk(skb->sk); > > if (sk) { > > if (sk->sk_bound_dev_if) > > @@ -216,7 +217,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) > > > > rt = ip_route_output_key(net, &fl4); > > if (IS_ERR(rt)) > > - return -EINVAL; > > + goto err; > > dst = &rt->dst; > > } else { > > struct ipv6hdr *iph6 = ipv6_hdr(skb); > > @@ -231,12 +232,15 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) > > fl6.saddr = iph6->saddr; > > > > err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6); > > - if (err || IS_ERR(dst)) > > - return -EINVAL; > > + if (err || IS_ERR(dst)) { > > + err = -EINVAL; > > + goto err; > > + } > > } > > if (unlikely(dst->error)) { > > dst_release(dst); > > - return -EINVAL; > > + err = -EINVAL; > > + goto err; > > } > > > > /* Although skb header was reserved in bpf_lwt_push_ip_encap(), it > > EINVAL is a confusing return code; it is not an EINVAL problem, it is a > routing problem:
Thanks, David! Sent a v2 of the patch. > > ... > starting egress IPv4 encap test > ping: sendmsg: Invalid argument > FAIL: test_ping: 1 > > > Versus returning the error from the lookup: > ... > starting egress IPv4 encap test > ping: sendmsg: No route to host > FAIL: test_ping: 1 > > > diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c > index f3273cbb6b22..a1901ba319fc 100644 > --- a/net/core/lwt_bpf.c > +++ b/net/core/lwt_bpf.c > @@ -191,7 +191,6 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) > else > goto err; > > - err = -EINVAL; > sk = sk_to_full_sk(skb->sk); > if (sk) { > if (sk->sk_bound_dev_if) > @@ -216,8 +215,10 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) > fl4.saddr = iph->saddr; > > rt = ip_route_output_key(net, &fl4); > - if (IS_ERR(rt)) > + if (IS_ERR(rt)) { > + err = PTR_ERR(rt); > goto err; > + } > dst = &rt->dst; > } else { > struct ipv6hdr *iph6 = ipv6_hdr(skb); > @@ -232,14 +233,12 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) > fl6.saddr = iph6->saddr; > > err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6); > - if (err || IS_ERR(dst)) { > - err = -EINVAL; > + if (err || IS_ERR(dst)) > goto err; > - } > } > if (unlikely(dst->error)) { > dst_release(dst); > - err = -EINVAL; > + err = dst->error; > goto err; > } > > > > > > @@ -246,17 +250,21 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) > > */ > > err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); > > if (unlikely(err)) > > - return err; > > + goto err; > > > > skb_dst_drop(skb); > > skb_dst_set(skb, dst); > > > > err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb); > > if (unlikely(err)) > > - return err; > > + goto err; > > > > /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ > > return LWTUNNEL_XMIT_DONE; > > + > > +err: > > + kfree_skb(skb); > > + return err; > > } > > > > static int bpf_xmit(struct sk_buff *skb) > > > > I figured it was a leaked skb. > > Also, the test script needs to be updated as well with the negative > tests -- ie., toggle the route from a dev/gateway to a reject > (e.g.,unreachable) and back. > > Also, don't exit on the first failure - run all of them. I'll refactor the test as you suggest here when I add VRF and GRO tests in a couple of weeks, if this is OK. > > Having the result line up is more user friendly. e.g., > > # ./fib_tests.sh > > Single path route test > Start point > TEST: IPv4 fibmatch [ OK ] > TEST: IPv6 fibmatch [ OK ] > Nexthop device deleted > TEST: IPv4 fibmatch - no route [ OK ] > TEST: IPv6 fibmatch - no route [ OK ] > ...