On Thu, Aug 31, 2017 at 06:26:22PM -0700, Vinicius Costa Gomes wrote:
> This queueing discipline implements the shaper algorithm defined by
> the 802.1Q-2014 Section 8.6.8.2 and detailed in Annex L.
> 
> It's primary usage is to apply some bandwidth reservation to user
> defined traffic classes, which are mapped to different queues via the
> mqprio qdisc.
> 
> Initially, it only supports offloading the traffic shaping work to
> supporting controllers.
> 
> Later, when a software implementation is added, the current dependency
> on being installed "under" mqprio can be lifted.
> 
> Signed-off-by: Vinicius Costa Gomes <vinicius.go...@intel.com>
> Signed-off-by: Jesus Sanchez-Palencia <jesus.sanchez-palen...@intel.com>

So, I started testing this in my VM to make sure I didn't do anything silly 
and it exploded spectacularly as it used the underlying e1000 driver which 
does not have a ndo_setup_tc present.

I commented inline below, but as a tl;dr

The cbs_init() would call into cbs_change() that would correctly detect 
that ndo_setup_tc is missing and abort. However, at this stage it would try 
to desroy the qdisc and in cbs_destroy() there's no such guard leading to a

BUG: unable to handle kernel NULL pointer dereference at 0000000000000010

Fixing that, another NULL would be found when the code walks into 
qdisc_destroy(q->qdisc).

Apart from this, it loaded fine after some wrestling with tc :)

Awesome! :D

> ---
>  include/linux/netdevice.h |   1 +
>  net/sched/Kconfig         |  11 ++
>  net/sched/Makefile        |   1 +
>  net/sched/sch_cbs.c       | 286 
> ++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 299 insertions(+)
>  create mode 100644 net/sched/sch_cbs.c
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 35de8312e0b5..dd9a2ecd0c03 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -775,6 +775,7 @@ enum tc_setup_type {
>       TC_SETUP_CLSFLOWER,
>       TC_SETUP_CLSMATCHALL,
>       TC_SETUP_CLSBPF,
> +     TC_SETUP_CBS,
>  };
>  
>  /* These structures hold the attributes of xdp state that are being passed
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index e70ed26485a2..c03d86a7775e 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -172,6 +172,17 @@ config NET_SCH_TBF
>         To compile this code as a module, choose M here: the
>         module will be called sch_tbf.
>  
> +config NET_SCH_CBS

Shouldn't this depend on NET_SCH_MQPRIO as it is supposed to hook into 
that?

@@ -173,6 +173,7 @@ config NET_SCH_TBF
          module will be called sch_tbf.
 
 config NET_SCH_CBS
+       depends on NET_SCH_MQPRIO
        tristate "Credit Based Shaper (CBS)"
        ---help---
          Say Y here if you want to use the Credit Based Shaper (CBS) packet

> +     tristate "Credit Based Shaper (CBS)"
> +     ---help---
> +       Say Y here if you want to use the Credit Based Shaper (CBS) packet
> +       scheduling algorithm.
> +
> +       See the top of <file:net/sched/sch_cbs.c> for more details.
> +
> +       To compile this code as a module, choose M here: the
> +       module will be called sch_cbs.
> +
>  config NET_SCH_GRED
>       tristate "Generic Random Early Detection (GRED)"
>       ---help---
> diff --git a/net/sched/Makefile b/net/sched/Makefile
> index 7b915d226de7..80c8f92d162d 100644
> --- a/net/sched/Makefile
> +++ b/net/sched/Makefile
> @@ -52,6 +52,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL)      += sch_fq_codel.o
>  obj-$(CONFIG_NET_SCH_FQ)     += sch_fq.o
>  obj-$(CONFIG_NET_SCH_HHF)    += sch_hhf.o
>  obj-$(CONFIG_NET_SCH_PIE)    += sch_pie.o
> +obj-$(CONFIG_NET_SCH_CBS)    += sch_cbs.o
>  
>  obj-$(CONFIG_NET_CLS_U32)    += cls_u32.o
>  obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
> diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
> new file mode 100644
> index 000000000000..1c86a9e14150
> --- /dev/null
> +++ b/net/sched/sch_cbs.c
> @@ -0,0 +1,286 @@
> +/*
> + * net/sched/sch_cbs.c       Credit Based Shaper
> + *
> + *           This program is free software; you can redistribute it and/or
> + *           modify it under the terms of the GNU General Public License
> + *           as published by the Free Software Foundation; either version
> + *           2 of the License, or (at your option) any later version.
> + *
> + * Authors:  Vininicius Costa Gomes <vinicius.go...@intel.com>
> + *
> + */
> +
> +#include <linux/module.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/string.h>
> +#include <linux/errno.h>
> +#include <linux/skbuff.h>
> +#include <net/netlink.h>
> +#include <net/sch_generic.h>
> +#include <net/pkt_sched.h>
> +
> +struct cbs_sched_data {
> +     struct Qdisc *qdisc; /* Inner qdisc, default - pfifo queue */
> +     s32 queue;
> +     s32 locredit;
> +     s32 hicredit;
> +     s32 sendslope;
> +     s32 idleslope;
> +};
> +
> +static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> +                    struct sk_buff **to_free)
> +{
> +     struct cbs_sched_data *q = qdisc_priv(sch);
> +     int ret;
> +
> +     ret = qdisc_enqueue(skb, q->qdisc, to_free);
> +     if (ret != NET_XMIT_SUCCESS) {
> +             if (net_xmit_drop_count(ret))
> +                     qdisc_qstats_drop(sch);
> +             return ret;
> +     }
> +
> +     qdisc_qstats_backlog_inc(sch, skb);
> +     sch->q.qlen++;
> +     return NET_XMIT_SUCCESS;
> +}
> +
> +static struct sk_buff *cbs_dequeue(struct Qdisc *sch)
> +{
> +     struct cbs_sched_data *q = qdisc_priv(sch);
> +     struct sk_buff *skb;
> +
> +     skb = q->qdisc->ops->peek(q->qdisc);
> +     if (skb) {
> +             skb = qdisc_dequeue_peeked(q->qdisc);
> +             if (unlikely(!skb))
> +                     return NULL;
> +
> +             qdisc_qstats_backlog_dec(sch, skb);
> +             sch->q.qlen--;
> +             qdisc_bstats_update(sch, skb);
> +
> +             return skb;
> +     }
> +     return NULL;
> +}
> +
> +static void cbs_reset(struct Qdisc *sch)
> +{
> +     struct cbs_sched_data *q = qdisc_priv(sch);
> +
> +     qdisc_reset(q->qdisc);
> +}
> +
> +static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = {
> +     [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) },
> +};
> +
> +static int cbs_change(struct Qdisc *sch, struct nlattr *opt)
> +{
> +     struct cbs_sched_data *q = qdisc_priv(sch);
> +     struct tc_cbs_qopt_offload cbs = { };
> +     struct nlattr *tb[TCA_CBS_MAX + 1];
> +     const struct net_device_ops *ops;
> +     struct tc_cbs_qopt *qopt;
> +     struct net_device *dev;
> +     int err;
> +
> +     err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL);
> +     if (err < 0)
> +             return err;
> +
> +     err = -EINVAL;
> +     if (!tb[TCA_CBS_PARMS])
> +             goto done;
> +
> +     qopt = nla_data(tb[TCA_CBS_PARMS]);
> +
> +     dev = qdisc_dev(sch);
> +     ops = dev->netdev_ops;
> +
> +     /* FIXME: this means that we can only install this qdisc
> +      * "under" mqprio. Do we need a more generic way to retrieve
> +      * the queue, or do we pass the netdev_queue to the driver?
> +      */
> +     cbs.queue = TC_H_MIN(sch->parent) - 1 - netdev_get_num_tc(dev);
> +
> +     cbs.enable = 1;
> +     cbs.hicredit = qopt->hicredit;
> +     cbs.locredit = qopt->locredit;
> +     cbs.idleslope = qopt->idleslope;
> +     cbs.sendslope = qopt->sendslope;
> +
> +     err = -ENOTSUPP;
> +     if (!ops->ndo_setup_tc)
> +             goto done;

This confuses tc a bit, and looking at other qdisc schedulers, it seems 
like EOPNOTSUPP is an alternative, this changes the output from

RTNETLINK answers: Unknown error 524
 - to
RTNETLINK answers: Operation not supported

which perhaps is more informative.

> +
> +     err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);
> +     if (err < 0)
> +             goto done;
> +
> +     q->queue = cbs.queue;
> +     q->hicredit = cbs.hicredit;
> +     q->locredit = cbs.locredit;
> +     q->idleslope = cbs.idleslope;
> +     q->sendslope = cbs.sendslope;
> +
> +done:
> +     return err;
> +}
> +
> +static int cbs_init(struct Qdisc *sch, struct nlattr *opt)
> +{
> +     struct cbs_sched_data *q = qdisc_priv(sch);
> +
> +     if (!opt)
> +             return -EINVAL;
> +
> +     q->qdisc = fifo_create_dflt(sch, &pfifo_qdisc_ops, 1024);
> +     qdisc_hash_add(q->qdisc, true);
> +
> +     return cbs_change(sch, opt);
> +}
> +
> +static void cbs_destroy(struct Qdisc *sch)
> +{
> +     struct cbs_sched_data *q = qdisc_priv(sch);
> +     struct tc_cbs_qopt_offload cbs = { };
> +     struct net_device *dev;
> +     int err;
> +
> +     q->hicredit = 0;
> +     q->locredit = 0;
> +     q->idleslope = 0;
> +     q->sendslope = 0;
> +
> +     dev = qdisc_dev(sch);
> +
> +     cbs.queue = q->queue;
> +     cbs.enable = 0;
> +
> +     err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CBS, &cbs);

This can lead to NULL pointer deref if it is not set (tested for in 
cbs_change and error there leads us here, which then explodes).

> +     if (err < 0)
> +             pr_warn("Couldn't reset queue %d to default values\n",
> +                     cbs.queue);
> +
> +     qdisc_destroy(q->qdisc);

Same, q->qdisc was NULL when cbs_init() failed.

> +}
> +
> +static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
> +{
> +     struct cbs_sched_data *q = qdisc_priv(sch);
> +     struct nlattr *nest;
> +     struct tc_cbs_qopt opt;
> +
> +     sch->qstats.backlog = q->qdisc->qstats.backlog;
> +     nest = nla_nest_start(skb, TCA_OPTIONS);
> +     if (!nest)
> +             goto nla_put_failure;
> +
> +     opt.hicredit = q->hicredit;
> +     opt.locredit = q->locredit;
> +     opt.sendslope = q->sendslope;
> +     opt.idleslope = q->idleslope;
> +
> +     if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt))
> +             goto nla_put_failure;
> +
> +     return nla_nest_end(skb, nest);
> +
> +nla_put_failure:
> +     nla_nest_cancel(skb, nest);
> +     return -1;
> +}
> +
> +static int cbs_dump_class(struct Qdisc *sch, unsigned long cl,
> +                       struct sk_buff *skb, struct tcmsg *tcm)
> +{
> +     struct cbs_sched_data *q = qdisc_priv(sch);
> +
> +     tcm->tcm_handle |= TC_H_MIN(1);
> +     tcm->tcm_info = q->qdisc->handle;
> +
> +     return 0;
> +}
> +
> +static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
> +                  struct Qdisc **old)
> +{
> +     struct cbs_sched_data *q = qdisc_priv(sch);
> +
> +     if (!new)
> +             new = &noop_qdisc;
> +
> +     *old = qdisc_replace(sch, new, &q->qdisc);
> +     return 0;
> +}
> +
> +static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg)
> +{
> +     struct cbs_sched_data *q = qdisc_priv(sch);
> +
> +     return q->qdisc;
> +}
> +
> +static unsigned long cbs_find(struct Qdisc *sch, u32 classid)
> +{
> +     return 1;
> +}
> +
> +static int cbs_delete(struct Qdisc *sch, unsigned long arg)
> +{
> +     return 0;
> +}
> +
> +static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker)
> +{
> +     if (!walker->stop) {
> +             if (walker->count >= walker->skip)
> +                     if (walker->fn(sch, 1, walker) < 0) {
> +                             walker->stop = 1;
> +                             return;
> +                     }
> +             walker->count++;
> +     }
> +}
> +
> +static const struct Qdisc_class_ops cbs_class_ops = {
> +     .graft          =       cbs_graft,
> +     .leaf           =       cbs_leaf,
> +     .find           =       cbs_find,
> +     .delete         =       cbs_delete,
> +     .walk           =       cbs_walk,
> +     .dump           =       cbs_dump_class,
> +};
> +
> +static struct Qdisc_ops cbs_qdisc_ops __read_mostly = {
> +     .next           =       NULL,
> +     .cl_ops         =       &cbs_class_ops,
> +     .id             =       "cbs",
> +     .priv_size      =       sizeof(struct cbs_sched_data),
> +     .enqueue        =       cbs_enqueue,
> +     .dequeue        =       cbs_dequeue,
> +     .peek           =       qdisc_peek_dequeued,
> +     .init           =       cbs_init,
> +     .reset          =       cbs_reset,
> +     .destroy        =       cbs_destroy,
> +     .change         =       cbs_change,
> +     .dump           =       cbs_dump,
> +     .owner          =       THIS_MODULE,
> +};
> +
> +static int __init cbs_module_init(void)
> +{
> +     return register_qdisc(&cbs_qdisc_ops);
> +}
> +
> +static void __exit cbs_module_exit(void)
> +{
> +     unregister_qdisc(&cbs_qdisc_ops);
> +}
> +module_init(cbs_module_init)
> +module_exit(cbs_module_exit)
> +MODULE_LICENSE("GPL");
> -- 
> 2.14.1
> 

-- 
Henrik Austad

Attachment: signature.asc
Description: PGP signature

Reply via email to