On 07/13/2018 04:37 PM, Paolo Abeni wrote:
> On Fri, 2018-07-13 at 16:13 +0200, Daniel Borkmann wrote:
>> On 07/13/2018 11:55 AM, Paolo Abeni wrote:
>>> This patch changes the TC_ACT_REDIRECT code path to allow
>>> providing the redirect parameters via the tcf_result argument.
>>>
>>> Such union is expanded to host the redirect device, the redirect
>>> direction (ingress/egress) and the stats to be updated on error
>>> conditions.
>>>
>>> Actions/classifiers using TC_ACT_REDIRECT can either:
>>> * fill the tcf_result redirect related fields
>>> * clear such fields and use the bpf per cpu redirect info
>>>
>>> skb_do_redirect now tries to fetch the relevant data from tcf_result
>>> and fall back to access redirect info. It also updates the stats
>>> accordingly to the redirect result, if provided by the caller.
>>>
>>> This will allow using the TC_ACT_REDIRECT action in more places in
>>> the next patch.
>>>
>>> Signed-off-by: Paolo Abeni <pab...@redhat.com>
>>> ---
>>>  include/net/sch_generic.h | 15 ++++++++++++++-
>>>  net/core/dev.c            |  4 ++--
>>>  net/core/filter.c         | 29 +++++++++++++++++++++++------
>>>  net/core/lwt_bpf.c        |  5 ++++-
>>>  net/sched/act_bpf.c       |  4 +++-
>>>  net/sched/cls_bpf.c       |  8 +++++---
>>>  6 files changed, 51 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>>> index 056dc1083aa3..dd9e00d017b3 100644
>>> --- a/include/net/sch_generic.h
>>> +++ b/include/net/sch_generic.h
>>> @@ -235,9 +235,22 @@ struct tcf_result {
>>>                     u32             classid;
>>>             };
>>>             const struct tcf_proto *goto_tp;
>>> +
>>> +           /* used by the TC_ACT_REDIRECT action */
>>> +           struct {
>>> +                   /* device and direction, or 0 bpf redirect */
>>> +                   long            dev_ingress;
>>> +                   struct gnet_stats_queue *qstats;
>>> +           };
>>>     };
>>>  };
>>>  
>>> +#define TCF_RESULT_REDIR_DEV(res) \
>>> +   ((struct net_device *)((res)->dev_ingress & ~1))
>>> +#define TCF_RESULT_REDIR_INGRESS(res) ((res)->dev_ingress & 1)
>>> +#define TCF_RESULT_SET_REDIRECT(res, dev, ingress) \
>>> +   ((res)->dev_ingress = (long)(dev) | (!!(ingress)))
>>> +
>>>  struct tcf_proto_ops {
>>>     struct list_head        head;
>>>     char                    kind[IFNAMSIZ];
>>> @@ -543,7 +556,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue 
>>> *dev_queue,
>>>                             struct netlink_ext_ack *extack);
>>>  void __qdisc_calculate_pkt_len(struct sk_buff *skb,
>>>                            const struct qdisc_size_table *stab);
>>> -int skb_do_redirect(struct sk_buff *);
>>> +int skb_do_redirect(struct sk_buff *skb, struct tcf_result *res);
>>>  
>>>  static inline void skb_reset_tc(struct sk_buff *skb)
>>>  {
>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>> index 14a748ee8cc9..a283dbfde30c 100644
>>> --- a/net/core/dev.c
>>> +++ b/net/core/dev.c
>>> @@ -3538,7 +3538,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, 
>>> struct net_device *dev)
>>>             return NULL;
>>>     case TC_ACT_REDIRECT:
>>>             /* No need to push/pop skb's mac_header here on egress! */
>>> -           skb_do_redirect(skb);
>>> +           skb_do_redirect(skb, &cl_res);
>>>             *ret = NET_XMIT_SUCCESS;
>>>             return NULL;
>>>     default:
>>> @@ -4600,7 +4600,7 @@ sch_handle_ingress(struct sk_buff *skb, struct 
>>> packet_type **pt_prev, int *ret,
>>>              * redirecting to another netdev
>>>              */
>>>             __skb_push(skb, skb->mac_len);
>>> -           skb_do_redirect(skb);
>>> +           skb_do_redirect(skb, &cl_res);
>>>             return NULL;
>>>     default:
>>>             break;
>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>> index b9ec916f4e3a..4f64cf5189e6 100644
>>> --- a/net/core/filter.c
>>> +++ b/net/core/filter.c
>>> @@ -2062,19 +2062,36 @@ BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
>>>     return TC_ACT_REDIRECT;
>>>  }
>>>  
>>> -int skb_do_redirect(struct sk_buff *skb)
>>> +int skb_do_redirect(struct sk_buff *skb, struct tcf_result *res)
>>>  {
>>> -   struct redirect_info *ri = this_cpu_ptr(&redirect_info);
>>> +   struct gnet_stats_queue *stats;
>>>     struct net_device *dev;
>>> +   int ret, flags;
>>>  
>>> -   dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
>>> -   ri->ifindex = 0;
>>> +   if (!res->dev_ingress) {
>>> +           struct redirect_info *ri = this_cpu_ptr(&redirect_info);
>>> +
>>> +           dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
>>> +           flags = ri->flags;
>>> +           ri->ifindex = 0;
>>> +           stats = NULL;
>>> +   } else {
>>> +           dev = TCF_RESULT_REDIR_DEV(res);
>>> +           flags = TCF_RESULT_REDIR_INGRESS(res) ? BPF_F_INGRESS : 0;
>>> +           stats = res->qstats;
>>> +   }
>>>     if (unlikely(!dev)) {
>>>             kfree_skb(skb);
>>> -           return -EINVAL;
>>> +           ret = -EINVAL;
>>> +           goto out;
>>>     }
>>>  
>>> -   return __bpf_redirect(skb, dev, ri->flags);
>>> +   ret = __bpf_redirect(skb, dev, flags);
>>> +
>>> +out:
>>> +   if (ret && stats)
>>> +           qstats_overlimit_inc(res->qstats);
>>> +   return ret;
>>>  }
>>>  
>>>  static const struct bpf_func_proto bpf_redirect_proto = {
>>> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
>>> index e7e626fb87bb..8dde1093994a 100644
>>> --- a/net/core/lwt_bpf.c
>>> +++ b/net/core/lwt_bpf.c
>>> @@ -65,7 +65,10 @@ static int run_lwt_bpf(struct sk_buff *skb, struct 
>>> bpf_lwt_prog *lwt,
>>>                                  lwt->name ? : "<unknown>");
>>>                     ret = BPF_OK;
>>>             } else {
>>> -                   ret = skb_do_redirect(skb);
>>> +                   struct tcf_result res;
>>> +
>>> +                   res.dev_ingress = 0;
>>> +                   ret = skb_do_redirect(skb, &res);
>>>                     if (ret == 0)
>>>                             ret = BPF_REDIRECT;
>>>             }
>>> diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
>>> index ac20266460c0..6fd46b691181 100644
>>> --- a/net/sched/act_bpf.c
>>> +++ b/net/sched/act_bpf.c
>>> @@ -67,10 +67,12 @@ static int tcf_bpf(struct sk_buff *skb, const struct 
>>> tc_action *act,
>>>      * returned.
>>>      */
>>>     switch (filter_res) {
>>> +   case TC_ACT_REDIRECT:
>>> +           res->dev_ingress = 0;
>>> +           /* fall-through */
>>>     case TC_ACT_PIPE:
>>>     case TC_ACT_RECLASSIFY:
>>>     case TC_ACT_OK:
>>> -   case TC_ACT_REDIRECT:
>>>             action = filter_res;
>>>             break;
>>>     case TC_ACT_SHOT:
>>> diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
>>> index 66e0ac9811f9..f0fb7ded8fe2 100644
>>> --- a/net/sched/cls_bpf.c
>>> +++ b/net/sched/cls_bpf.c
>>> @@ -65,14 +65,16 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 
>>> 1] = {
>>>                                 .len = sizeof(struct sock_filter) * 
>>> BPF_MAXINSNS },
>>>  };
>>>  
>>> -static int cls_bpf_exec_opcode(int code)
>>> +static int cls_bpf_exec_opcode(int code, struct tcf_result *res)
>>>  {
>>>     switch (code) {
>>> +   case TC_ACT_REDIRECT:
>>> +           res->dev_ingress = 0;
>>> +           /* fall-through */
>>>     case TC_ACT_OK:
>>>     case TC_ACT_SHOT:
>>>     case TC_ACT_STOLEN:
>>>     case TC_ACT_TRAP:
>>> -   case TC_ACT_REDIRECT:
>>>     case TC_ACT_UNSPEC:
>>>             return code;
>>>     default:
>>> @@ -113,7 +115,7 @@ static int cls_bpf_classify(struct sk_buff *skb, const 
>>> struct tcf_proto *tp,
>>>                     res->classid = TC_H_MAJ(prog->res.classid) |
>>>                                    qdisc_skb_cb(skb)->tc_classid;
>>>  
>>> -                   ret = cls_bpf_exec_opcode(filter_res);
>>> +                   ret = cls_bpf_exec_opcode(filter_res, res);
>>>                     if (ret == TC_ACT_UNSPEC)
>>>                             continue;
>>>                     break;
>>>
>>
>> Can't we just export the struct redirect_info and let others like
>> act_mirred use it, then we wouldn't need all these extra changes in
>> fast path?
> 
> Thank you for the feedback.
> 
> The use of tcf_result allows passing to the redirect helper the stats
> to be updated, and avoids an additional dev lookup for the mirred
> datapath.
> 
> Some changes are necessary to make the skb_do_redirect() function more
> generic, and code duplication could be reduced with some additional
> helper. Do you have so strong opinion against that?

Couldn't the needed pointers be added into struct redirect_info (and/or the
dev lookup be deferred for this specific scenario) thus that we would avoid
all the extra hassle just to transfer this state from yet a second entity?
I'd like to have the fast-path minimal and short, and I think this should be
doable this way for more broader reuse.

Thanks,
Daniel

Reply via email to