FQ-PIE schedules independent PIE controllers per flow but exposes no
per-flow AQM state. Without class-level statistics there is no way to
observe the per-flow drop probability, queue delay, deficit or 
dequeue rate from userspace.

Extend tc_fq_pie_xstats with a discriminated union, following the
same pattern established by FQ-CoDel (TCA_FQ_CODEL_XSTATS_CLASS):

  - Rename the existing flat struct to tc_fq_pie_qd_stats.
  - Add tc_fq_pie_cl_stats with per-flow prob, delay, deficit,
    avg_dq_rate, and dq_rate_estimating fields.
  - Wrap both in tc_fq_pie_xstats with a type discriminator
    (TCA_FQ_PIE_XSTATS_QDISC / TCA_FQ_PIE_XSTATS_CLASS).

Wire up fq_pie_class_ops (.walk, .dump, .dump_stats) so that
'tc -s class show' against an fq_pie qdisc reports per-flow state:

  prob               per-flow PIE drop probability
  delay              per-flow queue sojourn time (microseconds)
  deficit            remaining DRR byte credits (signed integer)
  avg_dq_rate        dequeue rate estimate in bytes/second
                     (dq_rate_estimator mode only)
  dq_rate_estimating flag indicating active delay estimation mode

Note: this changes the on-wire layout of tc_fq_pie_xstats. The type
field now occupies the first four bytes, displacing the former
packets_in field. Userspace must be updated concurrently; the
companion iproute2 patch handles this.

Fix the 'delay' field comment in struct tc_pie_xstats from "in ms" to
"in microseconds" to match the kernel's
PSCHED_TICKS2NS / NSEC_PER_USEC conversion.

Also correct the avg_dq_rate comment in tc_pie_xstats from
"bits/pie_time" to "bytes/second" to match the actual kernel
conversion (avg_dq_rate * PSCHED_TICKS_PER_SEC >> PIE_SCALE).

Signed-off-by: Hemendra M. Naik <[email protected]>
Signed-off-by: Vishal Kamath <[email protected]>
Signed-off-by: Mohit P. Tahiliani <[email protected]>
---
 include/uapi/linux/pkt_sched.h       |  29 +++++-
 net/sched/sch_fq_pie.c               | 131 ++++++++++++++++++++++++---
 tools/include/uapi/linux/pkt_sched.h |   4 +-
 3 files changed, 146 insertions(+), 18 deletions(-)

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 66e8072f44df..0ad895edcb07 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -910,9 +910,9 @@ enum {
 
 struct tc_pie_xstats {
        __u64 prob;                     /* current probability */
-       __u32 delay;                    /* current delay in ms */
+       __u32 delay;                    /* current delay in microseconds */
        __u32 avg_dq_rate;              /* current average dq_rate in
-                                        * bits/pie_time
+                                        * bytes/second
                                         */
        __u32 dq_rate_estimating;       /* is avg_dq_rate being calculated? */
        __u32 packets_in;               /* total number of packets enqueued */
@@ -943,7 +943,12 @@ enum {
 };
 #define TCA_FQ_PIE_MAX   (__TCA_FQ_PIE_MAX - 1)
 
-struct tc_fq_pie_xstats {
+enum {
+       TCA_FQ_PIE_XSTATS_QDISC,
+       TCA_FQ_PIE_XSTATS_CLASS,
+};
+
+struct tc_fq_pie_qd_stats {
        __u32 packets_in;       /* total number of packets enqueued */
        __u32 dropped;          /* packets dropped due to fq_pie_action */
        __u32 overlimit;        /* dropped due to lack of space in queue */
@@ -955,6 +960,24 @@ struct tc_fq_pie_xstats {
        __u32 memory_usage;     /* total memory across all queues */
 };
 
+struct tc_fq_pie_cl_stats {
+       __u64 prob;                     /* current probability */
+       __u32 delay;                    /* current delay in microseconds */
+       __s32 deficit;          /* number of remaining byte credits */
+       __u32 avg_dq_rate;              /* current average dq_rate in
+                                        * bytes/second
+                                        */
+       __u32 dq_rate_estimating;       /* is avg_dq_rate being calculated? */
+};
+
+struct tc_fq_pie_xstats {
+       __u32   type;
+       union {
+               struct tc_fq_pie_qd_stats qdisc_stats;
+               struct tc_fq_pie_cl_stats class_stats;
+       };
+};
+
 /* CBS */
 struct tc_cbs_qopt {
        __u8 offload;
diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 7becbf5362b3..d22c4e02d2d9 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -330,7 +330,7 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr 
*opt,
        /* tupdate is in jiffies */
        if (tb[TCA_FQ_PIE_TUPDATE])
                WRITE_ONCE(q->p_params.tupdate,
-                       usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])));
+                          
usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE])));
 
        if (tb[TCA_FQ_PIE_ALPHA])
                WRITE_ONCE(q->p_params.alpha,
@@ -509,24 +509,25 @@ static int fq_pie_dump(struct Qdisc *sch, struct sk_buff 
*skb)
 static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
        struct fq_pie_sched_data *q = qdisc_priv(sch);
-       struct tc_fq_pie_xstats st = { 0 };
+       struct tc_fq_pie_xstats st = {
+               .type   = TCA_FQ_PIE_XSTATS_QDISC,
+       };
        struct list_head *pos;
 
-       sch_tree_lock(sch);
-
-       st.packets_in   = q->stats.packets_in;
-       st.overlimit    = q->stats.overlimit;
-       st.overmemory   = q->overmemory;
-       st.dropped      = q->stats.dropped;
-       st.ecn_mark     = q->stats.ecn_mark;
-       st.new_flow_count = q->new_flow_count;
-       st.memory_usage   = q->memory_usage;
+       st.qdisc_stats.packets_in       = q->stats.packets_in;
+       st.qdisc_stats.overlimit        = q->stats.overlimit;
+       st.qdisc_stats.overmemory       = q->overmemory;
+       st.qdisc_stats.dropped          = q->stats.dropped;
+       st.qdisc_stats.ecn_mark         = q->stats.ecn_mark;
+       st.qdisc_stats.new_flow_count   = q->new_flow_count;
+       st.qdisc_stats.memory_usage     = q->memory_usage;
 
+       sch_tree_lock(sch);
        list_for_each(pos, &q->new_flows)
-               st.new_flows_len++;
+               st.qdisc_stats.new_flows_len++;
 
        list_for_each(pos, &q->old_flows)
-               st.old_flows_len++;
+               st.qdisc_stats.old_flows_len++;
        sch_tree_unlock(sch);
 
        return gnet_stats_copy_app(d, &st, sizeof(st));
@@ -561,7 +562,111 @@ static void fq_pie_destroy(struct Qdisc *sch)
        kvfree(q->flows);
 }
 
+static struct Qdisc *fq_pie_leaf(struct Qdisc *sch, unsigned long arg)
+{
+       return NULL;
+}
+
+static unsigned long fq_pie_find(struct Qdisc *sch, u32 classid)
+{
+       return 0;
+}
+
+static unsigned long fq_pie_bind(struct Qdisc *sch, unsigned long parent,
+                                u32 classid)
+{
+       return 0;
+}
+
+static void fq_pie_unbind(struct Qdisc *q, unsigned long cl)
+{
+}
+
+static struct tcf_block *fq_pie_tcf_block(struct Qdisc *sch, unsigned long cl,
+                                         struct netlink_ext_ack *extack)
+{
+       struct fq_pie_sched_data *q = qdisc_priv(sch);
+
+       if (cl)
+               return NULL;
+       return q->block;
+}
+
+static int fq_pie_dump_class(struct Qdisc *sch, unsigned long cl,
+                            struct sk_buff *skb, struct tcmsg *tcm)
+{
+       tcm->tcm_handle |= TC_H_MIN(cl);
+       return 0;
+}
+
+static int fq_pie_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+                                  struct gnet_dump *d)
+{
+       struct fq_pie_sched_data *q = qdisc_priv(sch);
+       struct gnet_stats_queue qs = { 0 };
+       struct tc_fq_pie_xstats xstats;
+       u32 idx = cl - 1;
+
+       if (idx < q->flows_cnt) {
+               const struct fq_pie_flow *flow = &q->flows[idx];
+
+               memset(&xstats, 0, sizeof(xstats));
+               xstats.type = TCA_FQ_PIE_XSTATS_CLASS;
+               xstats.class_stats.prob = READ_ONCE(flow->vars.prob) << 
BITS_PER_BYTE;
+               xstats.class_stats.delay =
+                       ((u32)PSCHED_TICKS2NS(READ_ONCE(flow->vars.qdelay))) /
+                       NSEC_PER_USEC;
+               xstats.class_stats.deficit = READ_ONCE(flow->deficit);
+               xstats.class_stats.dq_rate_estimating =
+                       READ_ONCE(q->p_params.dq_rate_estimator);
+
+               if (xstats.class_stats.dq_rate_estimating) {
+                       xstats.class_stats.avg_dq_rate =
+                               READ_ONCE(flow->vars.avg_dq_rate) *
+                               (PSCHED_TICKS_PER_SEC) >> PIE_SCALE;
+               }
+
+               qs.qlen    = READ_ONCE(flow->qlen);
+               qs.backlog = READ_ONCE(flow->backlog);
+       }
+       if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
+               return -1;
+       if (idx < q->flows_cnt)
+               return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+       return 0;
+}
+
+static void fq_pie_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+       struct fq_pie_sched_data *q = qdisc_priv(sch);
+       unsigned int i;
+
+       if (arg->stop)
+               return;
+
+       for (i = 0; i < q->flows_cnt; i++) {
+               if (list_empty(&q->flows[i].flowchain)) {
+                       arg->count++;
+                       continue;
+               }
+               if (!tc_qdisc_stats_dump(sch, i + 1, arg))
+                       break;
+       }
+}
+
+static const struct Qdisc_class_ops fq_pie_class_ops = {
+       .leaf           =       fq_pie_leaf,
+       .find           =       fq_pie_find,
+       .tcf_block      =       fq_pie_tcf_block,
+       .bind_tcf       =       fq_pie_bind,
+       .unbind_tcf     =       fq_pie_unbind,
+       .dump           =       fq_pie_dump_class,
+       .dump_stats     =       fq_pie_dump_class_stats,
+       .walk           =       fq_pie_walk,
+};
+
 static struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = {
+       .cl_ops         =       &fq_pie_class_ops,
        .id             = "fq_pie",
        .priv_size      = sizeof(struct fq_pie_sched_data),
        .enqueue        = fq_pie_qdisc_enqueue,
diff --git a/tools/include/uapi/linux/pkt_sched.h 
b/tools/include/uapi/linux/pkt_sched.h
index 587481a19433..9bc190f0b3cf 100644
--- a/tools/include/uapi/linux/pkt_sched.h
+++ b/tools/include/uapi/linux/pkt_sched.h
@@ -847,8 +847,8 @@ enum {
 
 struct tc_pie_xstats {
        __u32 prob;             /* current probability */
-       __u32 delay;            /* current delay in ms */
-       __u32 avg_dq_rate;      /* current average dq_rate in bits/pie_time */
+       __u32 delay;            /* current delay in microseconds */
+       __u32 avg_dq_rate;      /* current average dq_rate in bytes/second */
        __u32 packets_in;       /* total number of packets enqueued */
        __u32 dropped;          /* packets dropped due to pie_action */
        __u32 overlimit;        /* dropped due to lack of space in queue */
-- 
2.34.1


Reply via email to