Some changes:
1. need to select CONFIGFS into configuration
2. don't add declarations after code.
3. use unsigned not int for counters and mask.
4. don't return a structure (ie pkt_delay)
5. use enum for magic values
6. don't use GFP_ATOMIC unless you have to
7. check error values on configfs_init
8. map initialization is unneeded. static's always init to zero.
------------------
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index d10f353..a51de64 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -430,6 +430,8 @@ enum
TCA_NETEM_DELAY_DIST,
TCA_NETEM_REORDER,
TCA_NETEM_CORRUPT,
+ TCA_NETEM_TRACE,
+ TCA_NETEM_STATS,
__TCA_NETEM_MAX,
};
@@ -445,6 +447,35 @@ struct tc_netem_qopt
__u32 jitter; /* random jitter in latency (us) */
};
+struct tc_netem_stats
+{
+ int packetcount;
+ int packetok;
+ int normaldelay;
+ int drops;
+ int dupl;
+ int corrupt;
+ int novaliddata;
+ int uninitialized;
+ int bufferunderrun;
+ int bufferinuseempty;
+ int noemptybuffer;
+ int readbehindbuffer;
+ int buffer1_reloads;
+ int buffer2_reloads;
+ int tobuffer1_switch;
+ int tobuffer2_switch;
+ int switch_to_emptybuffer1;
+ int switch_to_emptybuffer2;
+};
+
+struct tc_netem_trace
+{
+ __u32 fid; /*flowid */
+ __u32 def; /* default action 0 = no delay, 1 = drop*/
+ __u32 ticks; /* number of ticks corresponding to 1ms */
+};
+
struct tc_netem_corr
{
__u32 delay_corr; /* delay correlation */
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 8298ea9..aee4bc6 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -232,6 +232,7 @@ config NET_SCH_DSMARK
config NET_SCH_NETEM
tristate "Network emulator (NETEM)"
+ select CONFIGFS_FS
---help---
Say Y if you want to emulate network delay, loss, and packet
re-ordering. This is often useful to simulate networks when
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 45939ba..521b9e3 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -11,6 +11,9 @@
*
* Authors: Stephen Hemminger <[EMAIL PROTECTED]>
* Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
+ * netem trace enhancement: Ariane Keller <[EMAIL PROTECTED]> ETH
Zurich
+ * Rainer Baumann <[EMAIL PROTECTED]>
ETH Zurich
+ * Ulrich Fiedler <[EMAIL PROTECTED]>
ETH Zurich
*/
#include <linux/module.h>
@@ -21,10 +24,16 @@ #include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/configfs.h>
+#include <linux/vmalloc.h>
#include <net/pkt_sched.h>
-#define VERSION "1.2"
+#include "net/flowseed.h"
+
+#define VERSION "1.3"
/* Network Emulation Queuing algorithm.
====================================
@@ -50,6 +59,11 @@ #define VERSION "1.2"
The simulator is limited by the Linux timer resolution
and will create packet bursts on the HZ boundary (1ms).
+
+ The trace option allows us to read the values for packet delay,
+ duplication, loss and corruption from a tracefile. This permits
+ the modulation of statistical properties such as long-range
+ dependences. See http://tcn.hypert.net.
*/
struct netem_sched_data {
@@ -65,6 +79,11 @@ struct netem_sched_data {
u32 duplicate;
u32 reorder;
u32 corrupt;
+ u32 tcnstop;
+ u32 trace;
+ u32 ticks;
+ u32 def;
+ u32 newdataneeded;
struct crndstate {
unsigned long last;
@@ -72,9 +91,13 @@ struct netem_sched_data {
} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
struct disttable {
- u32 size;
+ u32 size;
s16 table[0];
} *delay_dist;
+
+ struct tcn_statistic *statistic;
+ struct tcn_control *flowbuffer;
+ wait_queue_head_t my_event;
};
/* Time stamp put into socket buffer control block */
@@ -82,6 +105,18 @@ struct netem_skb_cb {
psched_time_t time_to_send;
};
+
+struct confdata {
+ int fid;
+ struct netem_sched_data * sched_data;
+};
+
+static struct confdata map[MAX_FLOWS];
+
+#define MASK_BITS 29
+#define MASK_DELAY ((1<<MASK_BITS)-1)
+#define MASK_HEAD ~MASK_DELAY
+
/* init_crandom - initialize correlated random number generator
* Use entropy source for initial seed.
*/
@@ -139,6 +174,103 @@ static long tabledist(unsigned long mu,
return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
}
+/* don't call this function directly. It is called after
+ * a packet has been taken out of a buffer and it was the last.
+ */
+static int reload_flowbuffer (struct netem_sched_data *q)
+{
+ struct tcn_control *flow = q->flowbuffer;
+
+ if (flow->buffer_in_use == flow->buffer1) {
+ flow->buffer1_empty = flow->buffer1;
+ if (flow->buffer2_empty) {
+ q->statistic->switch_to_emptybuffer2++;
+ return -EFAULT;
+ }
+
+ q->statistic->tobuffer2_switch++;
+
+ flow->buffer_in_use = flow->buffer2;
+ flow->offsetpos = flow->buffer2;
+
+ } else {
+ flow->buffer2_empty = flow->buffer2;
+
+ if (flow->buffer1_empty) {
+ q->statistic->switch_to_emptybuffer1++;
+ return -EFAULT;
+ }
+
+ q->statistic->tobuffer1_switch++;
+
+ flow->buffer_in_use = flow->buffer1;
+ flow->offsetpos = flow->buffer1;
+
+ }
+ /*the flowseed process can send more data*/
+ q->tcnstop = 0;
+ q->newdataneeded = 1;
+ wake_up(&q->my_event);
+ return 0;
+}
+
+/* return pktdelay with delay and drop/dupl/corrupt option */
+static int get_next_delay(struct netem_sched_data *q, enum tcn_flow *head)
+{
+ struct tcn_control *flow = q->flowbuffer;
+ u32 variout;
+
+ /*choose whether to drop or 0 delay packets on default*/
+ *head = q->def;
+
+ if (!flow) {
+ printk(KERN_ERR "netem: read from an uninitialized flow.\n");
+ q->statistic->uninitialized++;
+ return 0;
+ }
+
+ q->statistic->packetcount++;
+
+ /* check if we have to reload a buffer */
+ if (flow->offsetpos - flow->buffer_in_use == DATA_PACKAGE)
+ reload_flowbuffer(q);
+
+ /* sanity checks */
+ if ((flow->buffer_in_use == flow->buffer1 && flow->validdataB1)
+ || ( flow->buffer_in_use == flow->buffer2 && flow->validdataB2)) {
+
+ if (flow->buffer1_empty && flow->buffer2_empty) {
+ q->statistic->bufferunderrun++;
+ return 0;
+ }
+
+ if (flow->buffer1_empty == flow->buffer_in_use ||
+ flow->buffer2_empty == flow->buffer_in_use) {
+ q->statistic->bufferinuseempty++;
+ return 0;
+ }
+
+ if (flow->offsetpos - flow->buffer_in_use >=
+ DATA_PACKAGE) {
+ q->statistic->readbehindbuffer++;
+ return 0;
+ }
+ /*end of tracefile reached*/
+ } else {
+ q->statistic->novaliddata++;
+ return 0;
+ }
+
+ /* now it's safe to read */
+ variout = *flow->offsetpos++;
+ *head = (variout & MASK_HEAD) >> MASK_BITS;
+
+ (&q->statistic->normaldelay)[*head] += 1;
+ q->statistic->packetok++;
+
+ return ((variout & MASK_DELAY) * q->ticks) / 1000;
+}
+
/*
* Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length.
@@ -148,20 +280,25 @@ static long tabledist(unsigned long mu,
static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- /* We don't fill cb now as skb_unshare() may invalidate it */
struct netem_skb_cb *cb;
struct sk_buff *skb2;
- int ret;
- int count = 1;
+ enum tcn_flow action = FLOW_NORMAL;
+ psched_tdiff_t delay;
+ int ret, count = 1;
pr_debug("netem_enqueue skb=%p\n", skb);
- /* Random duplication */
- if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+ if (q->trace)
+ action = get_next_delay(q, &delay);
+
+ /* Random duplication */
+ if (q->trace ? action == FLOW_DUP :
+ (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)))
++count;
/* Random packet drop 0 => none, ~0 => all */
- if (q->loss && q->loss >= get_crandom(&q->loss_cor))
+ if (q->trace ? action == FLOW_DROP :
+ (q->loss && q->loss >= get_crandom(&q->loss_cor)))
--count;
if (count == 0) {
@@ -190,7 +327,8 @@ static int netem_enqueue(struct sk_buff
* If packet is going to be hardware checksummed, then
* do it now in software before we mangle it.
*/
- if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+ if (q->trace ? action == FLOW_MANGLE :
+ (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor))) {
if (!(skb = skb_unshare(skb, GFP_ATOMIC))
|| (skb->ip_summed == CHECKSUM_PARTIAL
&& skb_checksum_help(skb))) {
@@ -206,10 +344,10 @@ static int netem_enqueue(struct sk_buff
|| q->counter < q->gap /* inside last reordering gap */
|| q->reorder < get_crandom(&q->reorder_cor)) {
psched_time_t now;
- psched_tdiff_t delay;
- delay = tabledist(q->latency, q->jitter,
- &q->delay_cor, q->delay_dist);
+ if (!q->trace)
+ delay = tabledist(q->latency, q->jitter,
+ &q->delay_cor, q->delay_dist);
PSCHED_GET_TIME(now);
PSCHED_TADD2(now, delay, cb->time_to_send);
@@ -343,6 +481,65 @@ static int set_fifo_limit(struct Qdisc *
return ret;
}
+static void reset_stats(struct netem_sched_data * q)
+{
+ memset(q->statistic, 0, sizeof(*(q->statistic)));
+ return;
+}
+
+static void free_flowbuffer(struct netem_sched_data * q)
+{
+ if (q->flowbuffer != NULL) {
+ q->tcnstop = 1;
+ q->newdataneeded = 1;
+ wake_up(&q->my_event);
+
+ if (q->flowbuffer->buffer1 != NULL) {
+ kfree(q->flowbuffer->buffer1);
+ }
+ if (q->flowbuffer->buffer2 != NULL) {
+ kfree(q->flowbuffer->buffer2);
+ }
+ kfree(q->flowbuffer);
+ kfree(q->statistic);
+ q->flowbuffer = NULL;
+ q->statistic = NULL;
+ }
+}
+
+static int init_flowbuffer(unsigned int fid, struct netem_sched_data * q)
+{
+ int i, flowid = -1;
+
+ q->statistic = kzalloc(sizeof(*(q->statistic)), GFP_KERNEL;
+ init_waitqueue_head(&q->my_event);
+
+ for(i = 0; i < MAX_FLOWS; i++) {
+ if(map[i].fid == 0) {
+ flowid = i;
+ map[i].fid = fid;
+ map[i].sched_data = q;
+ break;
+ }
+ }
+
+ if (flowid != -1) {
+ q->flowbuffer = kmalloc(sizeof(*(q->flowbuffer)), GFP_KERNEL);
+ q->flowbuffer->buffer1 = kmalloc(DATA_PACKAGE, GFP_KERNEL);
+ q->flowbuffer->buffer2 = kmalloc(DATA_PACKAGE, GFP_KERNEL);
+
+ q->flowbuffer->buffer_in_use = q->flowbuffer->buffer1;
+ q->flowbuffer->offsetpos = q->flowbuffer->buffer1;
+ q->flowbuffer->buffer1_empty = q->flowbuffer->buffer1;
+ q->flowbuffer->buffer2_empty = q->flowbuffer->buffer2;
+ q->flowbuffer->flowid = flowid;
+ q->flowbuffer->validdataB1 = 0;
+ q->flowbuffer->validdataB2 = 0;
+ }
+
+ return flowid;
+}
+
/*
* Distribution data is a variable size payload containing
* signed 16 bit values.
@@ -414,6 +611,32 @@ static int get_corrupt(struct Qdisc *sch
return 0;
}
+static int get_trace(struct Qdisc *sch, const struct rtattr *attr)
+{
+ struct netem_sched_data *q = qdisc_priv(sch);
+ const struct tc_netem_trace *traceopt = RTA_DATA(attr);
+
+ if (RTA_PAYLOAD(attr) != sizeof(*traceopt))
+ return -EINVAL;
+
+ if (traceopt->fid) {
+ /*correction us -> ticks*/
+ q->ticks = traceopt->ticks;
+ int ind;
+ ind = init_flowbuffer(traceopt->fid, q);
+ if(ind < 0) {
+ printk("netem: maximum number of traces:%d"
+ " change in net/flowseedprocfs.h\n", MAX_FLOWS);
+ return -EINVAL;
+ }
+ q->trace = ind + 1;
+
+ } else
+ q->trace = 0;
+ q->def = traceopt->def;
+ return 0;
+}
+
/* Parse netlink message to set options */
static int netem_change(struct Qdisc *sch, struct rtattr *opt)
{
@@ -431,6 +654,14 @@ static int netem_change(struct Qdisc *sc
return ret;
}
+ if (q->trace) {
+ int temp = q->trace - 1;
+ q->trace = 0;
+ map[temp].fid = 0;
+ reset_stats(q);
+ free_flowbuffer(q);
+ }
+
q->latency = qopt->latency;
q->jitter = qopt->jitter;
q->limit = qopt->limit;
@@ -477,6 +708,11 @@ static int netem_change(struct Qdisc *sc
if (ret)
return ret;
}
+ if (tb[TCA_NETEM_TRACE-1]) {
+ ret = get_trace(sch, tb[TCA_NETEM_TRACE-1]);
+ if (ret)
+ return ret;
+ }
}
return 0;
@@ -572,6 +808,7 @@ static int netem_init(struct Qdisc *sch,
q->timer.function = netem_watchdog;
q->timer.data = (unsigned long) sch;
+ q->trace = 0;
q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops);
if (!q->qdisc) {
pr_debug("netem: qdisc create failed\n");
@@ -590,6 +827,12 @@ static void netem_destroy(struct Qdisc *
{
struct netem_sched_data *q = qdisc_priv(sch);
+ if (q->trace) {
+ int temp = q->trace - 1;
+ q->trace = 0;
+ map[temp].fid = 0;
+ free_flowbuffer(q);
+ }
del_timer_sync(&q->timer);
qdisc_destroy(q->qdisc);
kfree(q->delay_dist);
@@ -604,6 +847,7 @@ static int netem_dump(struct Qdisc *sch,
struct tc_netem_corr cor;
struct tc_netem_reorder reorder;
struct tc_netem_corrupt corrupt;
+ struct tc_netem_trace traceopt;
qopt.latency = q->latency;
qopt.jitter = q->jitter;
@@ -626,6 +870,35 @@ static int netem_dump(struct Qdisc *sch,
corrupt.correlation = q->corrupt_cor.rho;
RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
+ traceopt.fid = q->trace;
+ traceopt.def = q->def;
+ traceopt.ticks = q->ticks;
+ RTA_PUT(skb, TCA_NETEM_TRACE, sizeof(traceopt), &traceopt);
+
+ if (q->trace) {
+ struct tc_netem_stats tstats;
+
+ tstats.packetcount = q->statistic->packetcount;
+ tstats.packetok = q->statistic->packetok;
+ tstats.normaldelay = q->statistic->normaldelay;
+ tstats.drops = q->statistic->drops;
+ tstats.dupl = q->statistic->dupl;
+ tstats.corrupt = q->statistic->corrupt;
+ tstats.novaliddata = q->statistic->novaliddata;
+ tstats.uninitialized = q->statistic->uninitialized;
+ tstats.bufferunderrun = q->statistic->bufferunderrun;
+ tstats.bufferinuseempty = q->statistic->bufferinuseempty;
+ tstats.noemptybuffer = q->statistic->noemptybuffer;
+ tstats.readbehindbuffer = q->statistic->readbehindbuffer;
+ tstats.buffer1_reloads = q->statistic->buffer1_reloads;
+ tstats.buffer2_reloads = q->statistic->buffer2_reloads;
+ tstats.tobuffer1_switch = q->statistic->tobuffer1_switch;
+ tstats.tobuffer2_switch = q->statistic->tobuffer2_switch;
+ tstats.switch_to_emptybuffer1 =
q->statistic->switch_to_emptybuffer1;
+ tstats.switch_to_emptybuffer2 =
q->statistic->switch_to_emptybuffer2;
+ RTA_PUT(skb, TCA_NETEM_STATS, sizeof(tstats), &tstats);
+ }
+
rta->rta_len = skb->tail - b;
return skb->len;
@@ -709,6 +982,173 @@ static struct tcf_proto **netem_find_tcf
return NULL;
}
+/*configfs to read tcn delay values from userspace*/
+struct tcn_flow {
+ struct config_item item;
+};
+
+static struct tcn_flow *to_tcn_flow(struct config_item *item)
+{
+ return item ? container_of(item, struct tcn_flow, item) : NULL;
+}
+
+static struct configfs_attribute tcn_flow_attr_storeme = {
+ .ca_owner = THIS_MODULE,
+ .ca_name = "delayvalue",
+ .ca_mode = S_IRUGO | S_IWUSR,
+};
+
+static struct configfs_attribute *tcn_flow_attrs[] = {
+ &tcn_flow_attr_storeme,
+ NULL,
+};
+
+static ssize_t tcn_flow_attr_store(struct config_item *item,
+ struct configfs_attribute *attr,
+ const char *page, size_t count)
+{
+ char *p = (char *)page;
+ int fid, i, validData = 0;
+ int flowid = -1;
+ struct tcn_control *checkbuf;
+
+ if (count != DATA_PACKAGE_ID) {
+ printk("netem: Unexpected data received. %d\n", count);
+ return -EMSGSIZE;
+ }
+
+ memcpy(&fid, p + DATA_PACKAGE, sizeof(int));
+ memcpy(&validData, p + DATA_PACKAGE + sizeof(int), sizeof(int));
+
+ /* check whether this flow is registered */
+ for (i = 0; i < MAX_FLOWS; i++) {
+ if (map[i].fid == fid) {
+ flowid = i;
+ break;
+ }
+ }
+ /* exit if flow is not registered */
+ if (flowid < 0) {
+ printk("netem: Invalid FID received. Killing process.\n");
+ return -EINVAL;
+ }
+
+ checkbuf = map[flowid].sched_data->flowbuffer;
+ if (checkbuf == NULL) {
+ printk("netem: no flow registered");
+ return -ENOBUFS;
+ }
+
+ /* check if flowbuffer has empty buffer and copy data into it */
+ if (checkbuf->buffer1_empty != NULL) {
+ memcpy(checkbuf->buffer1, p, DATA_PACKAGE);
+ checkbuf->buffer1_empty = NULL;
+ checkbuf->validdataB1 = validData;
+ map[flowid].sched_data->statistic->buffer1_reloads++;
+
+ } else if (checkbuf->buffer2_empty != NULL) {
+ memcpy(checkbuf->buffer2, p, DATA_PACKAGE);
+ checkbuf->buffer2_empty = NULL;
+ checkbuf->validdataB2 = validData;
+ map[flowid].sched_data->statistic->buffer2_reloads++;
+
+ } else {
+ printk("netem: flow %d: no empty buffer. data loss.\n", flowid);
+ map[flowid].sched_data->statistic->noemptybuffer++;
+ }
+
+ if (validData) {
+ /* on initialization both buffers need data */
+ if (checkbuf->buffer2_empty != NULL) {
+ return DATA_PACKAGE_ID;
+ }
+ /* wait until new data is needed */
+ wait_event(map[flowid].sched_data->my_event,
+ map[flowid].sched_data->newdataneeded);
+ map[flowid].sched_data->newdataneeded = 0;
+
+ }
+
+ if (map[flowid].sched_data->tcnstop) {
+ return -ECANCELED;
+ }
+
+ return DATA_PACKAGE_ID;
+
+}
+
+static void tcn_flow_release(struct config_item *item)
+{
+ kfree(to_tcn_flow(item));
+
+}
+
+static struct configfs_item_operations tcn_flow_item_ops = {
+ .release = tcn_flow_release,
+ .store_attribute = tcn_flow_attr_store,
+};
+
+static struct config_item_type tcn_flow_type = {
+ .ct_item_ops = &tcn_flow_item_ops,
+ .ct_attrs = tcn_flow_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct config_item * tcn_make_item(struct config_group *group,
+ const char *name)
+{
+ struct tcn_flow *tcn_flow;
+
+ tcn_flow = kmalloc(sizeof(struct tcn_flow), GFP_KERNEL);
+ if (!tcn_flow)
+ return NULL;
+
+ memset(tcn_flow, 0, sizeof(struct tcn_flow));
+
+ config_item_init_type_name(&tcn_flow->item, name,
+ &tcn_flow_type);
+ return &tcn_flow->item;
+}
+
+static struct configfs_group_operations tcn_group_ops = {
+ .make_item = tcn_make_item,
+};
+
+static struct config_item_type tcn_type = {
+ .ct_group_ops = &tcn_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem tcn_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "tcn",
+ .ci_type = &tcn_type,
+ },
+ },
+};
+
+static __init int configfs_init(void)
+{
+ int ret;
+ struct configfs_subsystem *subsys = &tcn_subsys;
+
+ config_group_init(&subsys->su_group);
+ init_MUTEX(&subsys->su_sem);
+ ret = configfs_register_subsystem(subsys);
+ if (ret) {
+ printk(KERN_ERR "Error %d while registering subsystem %s\n",
+ ret, subsys->su_group.cg_item.ci_namebuf);
+ configfs_unregister_subsystem(&tcn_subsys);
+ }
+ return ret;
+}
+
+static void configfs_exit(void)
+{
+ configfs_unregister_subsystem(&tcn_subsys);
+}
+
static struct Qdisc_class_ops netem_class_ops = {
.graft = netem_graft,
.leaf = netem_leaf,
@@ -740,11 +1180,17 @@ static struct Qdisc_ops netem_qdisc_ops
static int __init netem_module_init(void)
{
+ int err;
+
pr_info("netem: version " VERSION "\n");
+ err = configfs_init();
+ if (err)
+ return err;
return register_qdisc(&netem_qdisc_ops);
}
static void __exit netem_module_exit(void)
{
+ configfs_exit();
unregister_qdisc(&netem_qdisc_ops);
}
module_init(netem_module_init)
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html