On 5/31/17 2:11 PM, John Fastabend wrote:
>> @@ -2721,14 +2724,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb,
>> struct nlmsghdr *nlh,
>> skb->protocol = htons(ETH_P_IP);
>> skb->dev = dev;
>> skb->mark = mark;
>> - err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
>> + err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
>> + dev, &res);
>>
>> rt = skb_rtable(skb);
>> if (err == 0 && rt->dst.error)
>> err = -rt->dst.error;
>> } else {
>> - rt = ip_route_output_key(net, &fl4);
>> -
>> + rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb);
>> err = 0;
>> if (IS_ERR(rt))
>> err = PTR_ERR(rt);
>> @@ -2737,7 +2740,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb,
>> struct nlmsghdr *nlh,
>> if (err)
>> goto errout_free;
>>
>> - skb_dst_set(skb, &rt->dst);
>
>
> Why did you remove this? Neither ip_route_input() or ip_route_output_key()
> seem to justify this with a quick scan on my side. Feel free to correct me
> here.
>
original patch was done in January. I forget why I took it out. It is
clearly needed to release the dst. Might as well undo the argument
change to rt_fill_info since it is attached to the skb. Something like
this (whitespace damaged on paste - stupid Mac):
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f1f2e5aaa2d6..93cca12a8319 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2547,8 +2547,9 @@ EXPORT_SYMBOL_GPL(ip_route_output_flow);
/* called with rcu_read_lock held */
static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32
table_id,
struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
- u32 seq, struct rtable *rt)
+ u32 seq)
{
+ struct rtable *rt = skb_rtable(skb);
struct rtmsg *r;
struct nlmsghdr *nlh;
unsigned long expires = 0;
@@ -2750,6 +2751,8 @@ static int inet_rtm_getroute(struct sk_buff
*in_skb, struct nlmsghdr *nlh,
if (err)
goto errout_free;
+ skb_dst_set(skb, &rt->dst);
+
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
@@ -2763,8 +2766,7 @@ static int inet_rtm_getroute(struct sk_buff
*in_skb, struct nlmsghdr *nlh,
fl4.flowi4_tos, res.fi, 0);
else
err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
- NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq,
- rt);
+ NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq);
if (err < 0)
goto errout_free;
Seems to work for me.