On 10/23/2017 11:28 PM, Jiri Pirko wrote:
[...]
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 031dffd..c7ddbdb 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -143,6 +143,36 @@ static inline int qdisc_avail_bulklimit(const struct 
netdev_queue *txq)
  #endif
  }

+/* Mini Qdisc serves for specific needs of ingress/clsact Qdisc.
+ * The fast path only needs to access filter list and to update stats
+ */
+struct mini_Qdisc {
+       struct tcf_proto __rcu *filter_list;
+       struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+       struct gnet_stats_queue __percpu *cpu_qstats;
+       struct mini_Qdisc __rcu **p_miniq;
+};
+
+static inline void mini_qdisc_init(struct mini_Qdisc *miniq,
+                                  struct Qdisc *qdisc,
+                                  struct mini_Qdisc __rcu **p_miniq)
+{
+       miniq->cpu_bstats = qdisc->cpu_bstats;
+       miniq->cpu_qstats = qdisc->cpu_qstats;
+       miniq->p_miniq = p_miniq;
+}
+
+static inline void mini_qdisc_enable(struct mini_Qdisc *miniq)
+{
+       rcu_assign_pointer(*miniq->p_miniq, miniq);
+}
+
+static inline void mini_qdisc_disable(struct mini_Qdisc *miniq)
+{
+       RCU_INIT_POINTER(*miniq->p_miniq, NULL);
+       rcu_barrier();

Can you add a comment against which call_rcu() above barrier
protects against?

+}
+
  struct Qdisc_class_ops {
        /* Child qdisc manipulation */
        struct netdev_queue *   (*select_queue)(struct Qdisc *, struct tcmsg *);
@@ -259,9 +289,13 @@ struct qdisc_skb_cb {
        unsigned char           data[QDISC_CB_PRIV_LEN];
  };

+typedef void tcf_chain_change_empty_t(struct tcf_proto __rcu **p_filter_chain,
+                                     bool empty);
+
  struct tcf_chain {
        struct tcf_proto __rcu *filter_chain;
        struct tcf_proto __rcu **p_filter_chain;
+       tcf_chain_change_empty_t *chain_change_empty;
        struct list_head list;
        struct tcf_block *block;
        u32 index; /* chain index */
@@ -605,6 +639,12 @@ static inline void qdisc_bstats_cpu_update(struct Qdisc 
*sch,
        bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
  }

+static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
+                                               const struct sk_buff *skb)
+{
+       bstats_cpu_update(this_cpu_ptr(miniq->cpu_bstats), skb);
+}
+
  static inline void qdisc_bstats_update(struct Qdisc *sch,
                                       const struct sk_buff *skb)
  {
@@ -648,6 +688,11 @@ static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
        this_cpu_inc(sch->cpu_qstats->drops);
  }

+static inline void mini_qdisc_qstats_cpu_drop(struct mini_Qdisc *miniq)
+{
+       this_cpu_inc(miniq->cpu_qstats->drops);
+}
+
  static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
  {
        sch->qstats.overlimits++;
diff --git a/net/core/dev.c b/net/core/dev.c
index 24ac908..b4a5812 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3274,14 +3274,16 @@ EXPORT_SYMBOL(dev_loopback_xmit);
  static struct sk_buff *
  sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
  {
-       struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
+       struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_egress);

We already have dev passed here, so lets use it as done previously.

        struct tcf_result cl_res;
+       struct tcf_proto *cl;

-       if (!cl)
+       if (!miniq)
                return skb;
+       cl = rcu_dereference_bh(miniq->filter_list);

This one still has two RCU dereferences instead of just one. Could
we bind the lifetime of the miniq 1:1 to the filter_list head such
that we can then also get rid of the 2nd rcu_dereference_bh() and
piggy-back on the first one for the filter_list there, thus we push
this into control slow-path instead?

        /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
-       qdisc_bstats_cpu_update(cl->q, skb);
+       mini_qdisc_bstats_cpu_update(miniq, skb);

        switch (tcf_classify(skb, cl, &cl_res, false)) {
        case TC_ACT_OK:
@@ -3289,7 +3291,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct 
net_device *dev)
                skb->tc_index = TC_H_MIN(cl_res.classid);
                break;
        case TC_ACT_SHOT:
-               qdisc_qstats_cpu_drop(cl->q);
+               mini_qdisc_qstats_cpu_drop(miniq);
                *ret = NET_XMIT_DROP;
                kfree_skb(skb);
                return NULL;
@@ -4189,16 +4191,19 @@ sch_handle_ingress(struct sk_buff *skb, struct 
packet_type **pt_prev, int *ret,
                   struct net_device *orig_dev)
  {
  #ifdef CONFIG_NET_CLS_ACT
-       struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+       struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
        struct tcf_result cl_res;
+       struct tcf_proto *cl;

        /* If there's at least one ingress present somewhere (so
         * we get here via enabled static key), remaining devices
         * that are not configured with an ingress qdisc will bail
         * out here.
         */
-       if (!cl)
+       if (!miniq)
                return skb;
+       cl = rcu_dereference_bh(miniq->filter_list);
+
        if (*pt_prev) {
                *ret = deliver_skb(skb, *pt_prev, orig_dev);
                *pt_prev = NULL;
@@ -4206,7 +4211,7 @@ sch_handle_ingress(struct sk_buff *skb, struct 
packet_type **pt_prev, int *ret,

        qdisc_skb_cb(skb)->pkt_len = skb->len;
        skb->tc_at_ingress = 1;
-       qdisc_bstats_cpu_update(cl->q, skb);
+       mini_qdisc_bstats_cpu_update(miniq, skb);

        switch (tcf_classify(skb, cl, &cl_res, false)) {
        case TC_ACT_OK:
@@ -4214,7 +4219,7 @@ sch_handle_ingress(struct sk_buff *skb, struct 
packet_type **pt_prev, int *ret,
                skb->tc_index = TC_H_MIN(cl_res.classid);
                break;
        case TC_ACT_SHOT:
-               qdisc_qstats_cpu_drop(cl->q);
+               mini_qdisc_qstats_cpu_drop(miniq);
                kfree_skb(skb);
                return NULL;
        case TC_ACT_STOLEN:

Thanks,
Daniel

Reply via email to