On Sat, 2007-07-07 at 17:10 +0200, Patrick McHardy wrote:
> On Sat, 7 Jul 2007, Ranko Zivojnovic wrote:
> > Maybe the appropriate way to fix this would to call gen_kill_estimator,
> > with the appropriate lock order, before the call to qdisc_destroy, so
> > when dev->queue_lock is taken for qdisc_destroy - the structure is
> > already off the list.
>
> Probably easier to just kill est_lock and use rcu lists.
> I'm currently travelling, I'll look into it tomorrow.
Patrick, I've taken liberty to try and implement this myself. Attached
is the whole new gen_estimator-fix-locking-and-timer-related-bugs.patch
that is RCU lists based. Please be kind to review.
I've just compiled it against 2.6.22 together with sch_htb patch and I
am currently testing it. I will let it run until tomorrow or until it
crashes.
R.
--- a/net/core/gen_estimator.c 2007-06-25 02:21:48.000000000 +0300
+++ b/net/core/gen_estimator.c 2007-07-09 14:27:12.053336875 +0300
@@ -79,7 +79,7 @@
struct gen_estimator
{
- struct gen_estimator *next;
+ struct list_head list;
struct gnet_stats_basic *bstats;
struct gnet_stats_rate_est *rate_est;
spinlock_t *stats_lock;
@@ -89,26 +89,27 @@
u32 last_packets;
u32 avpps;
u32 avbps;
+ struct rcu_head e_rcu;
};
struct gen_estimator_head
{
struct timer_list timer;
- struct gen_estimator *list;
+ struct list_head list;
};
static struct gen_estimator_head elist[EST_MAX_INTERVAL+1];
/* Estimator array lock */
-static DEFINE_RWLOCK(est_lock);
+static DEFINE_SPINLOCK(est_lock);
static void est_timer(unsigned long arg)
{
int idx = (int)arg;
struct gen_estimator *e;
- read_lock(&est_lock);
- for (e = elist[idx].list; e; e = e->next) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(e, &elist[idx].list, list) {
u64 nbytes;
u32 npackets;
u32 rate;
@@ -127,9 +128,9 @@
e->rate_est->pps = (e->avpps+0x1FF)>>10;
spin_unlock(e->stats_lock);
}
-
- mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
- read_unlock(&est_lock);
+ if (!list_empty(&elist[idx].list))
+ mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+ rcu_read_unlock();
}
/**
@@ -152,6 +153,7 @@
{
struct gen_estimator *est;
struct gnet_estimator *parm = RTA_DATA(opt);
+ int idx;
if (RTA_PAYLOAD(opt) < sizeof(*parm))
return -EINVAL;
@@ -163,7 +165,8 @@
if (est == NULL)
return -ENOBUFS;
- est->interval = parm->interval + 2;
+ INIT_LIST_HEAD(&est->list);
+ est->interval = idx = parm->interval + 2;
est->bstats = bstats;
est->rate_est = rate_est;
est->stats_lock = stats_lock;
@@ -173,20 +176,26 @@
est->last_packets = bstats->packets;
est->avpps = rate_est->pps<<10;
- est->next = elist[est->interval].list;
- if (est->next == NULL) {
- init_timer(&elist[est->interval].timer);
- elist[est->interval].timer.data = est->interval;
- elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
- elist[est->interval].timer.function = est_timer;
- add_timer(&elist[est->interval].timer);
+ spin_lock_bh(&est_lock);
+ if (!elist[idx].timer.function) {
+ INIT_LIST_HEAD(&elist[idx].list);
+ setup_timer(&elist[idx].timer, est_timer, est->interval);
}
- write_lock_bh(&est_lock);
- elist[est->interval].list = est;
- write_unlock_bh(&est_lock);
+
+ if (list_empty(&elist[est->interval].list))
+ mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
+
+ list_add_rcu(&est->list, &elist[idx].list);
+ spin_unlock_bh(&est_lock);
return 0;
}
+static void __gen_kill_estimator(struct rcu_head *head)
+{
+ struct gen_estimator *e = container_of(head, struct gen_estimator, e_rcu);
+ kfree(e);
+}
+
/**
* gen_kill_estimator - remove a rate estimator
* @bstats: basic statistics
@@ -199,26 +208,23 @@
struct gnet_stats_rate_est *rate_est)
{
int idx;
- struct gen_estimator *est, **pest;
+ struct gen_estimator *e, *n;
for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
- int killed = 0;
- pest = &elist[idx].list;
- while ((est=*pest) != NULL) {
- if (est->rate_est != rate_est || est->bstats != bstats) {
- pest = &est->next;
- continue;
- }
- write_lock_bh(&est_lock);
- *pest = est->next;
- write_unlock_bh(&est_lock);
+ /* Skip non initialized indexes */
+ if (!elist[idx].timer.function)
+ continue;
+
+ list_for_each_entry_safe(e, n, &elist[idx].list, list) {
+ if (e->rate_est != rate_est || e->bstats != bstats)
+ continue;
- kfree(est);
- killed++;
+ spin_lock_bh(&est_lock);
+ list_del_rcu(&e->list);
+ spin_unlock_bh(&est_lock);
+ call_rcu(&e->e_rcu, __gen_kill_estimator);
}
- if (killed && elist[idx].list == NULL)
- del_timer(&elist[idx].timer);
}
}