From: Krzysztof Oledzki <[EMAIL PROTECTED]>
Date: Fri, 16 Dec 2005 13:15:58 +0100 (CET)

> Thank you! Will test ASAP. Need day or two, I need to reassemble my
> IPSec netlab. ;)

Please let me know if it works as soon as you know.

I think for now it's more important to have things working than to
have the fastest implementation.  So if Krzysztof's tests go good I'll
push that minimal fix into 2.6.15 and 2.6.14-stable.

Meanwhile I have been brainstorming on how to do this efficiently.
If we could have a mapping from xfrm_state to bundles using that
state, it would help a lot.  Let's assume that we had that via
a list hung off of the struct xfrm_state, then at add time all
we'd need to do is something like the patch attached to the end
of this email.

In fact, with this list we can accurately say at delete time
whether "some xdst refers to this xfrm_state" and flush the
bundles precisely.  Subsequently xfrm_flush_bundles() moves
privately into xfrm_policy.c and it is only used on device
down events, and even that can be simplified further with this
new information.

The socket end of the deal is taken care of transparently, since
__sk_dst_check() will check dst->obsolete and thusly invoke
dst->ops->check().  The list addition code will add the top-level
bundle dst to the list hung off of xfrm_state, and that makes it
all work out.

After this, the only major hole remaining right now is wrt.  new
policies being inserted.  Sockets having cached IPSEC routes don't
notice that currently.  We will probably need to do a forced bundle
flush on all existing policies in order to cure that, and given the
frequency of policy insertions that probably isn't a big deal.

I didn't add the list insertion code yet to the patch below, that
needs a little bit more thinking.  I will work on that over the
weekend.  That makes this patch non-functional at this time, it is
just for comments and review therefore :-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 6c196a5..16a33a9 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -65,6 +65,7 @@ struct dst_entry
        struct neighbour        *neighbour;
        struct hh_cache         *hh;
        struct xfrm_state       *xfrm;
+       struct list_head        xdst;
 
        int                     (*input)(struct sk_buff*);
        int                     (*output)(struct sk_buff*);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 5beae1c..6ecffb3 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -95,6 +95,8 @@ struct xfrm_state
        struct xfrm_id          id;
        struct xfrm_selector    sel;
 
+       struct list_head        dst_list;
+
        /* Key manger bits */
        struct {
                u8              state;
@@ -889,7 +891,6 @@ struct xfrm_state * xfrm_find_acq(u8 mod
                                  int create, unsigned short family);
 extern void xfrm_policy_flush(void);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy 
*pol);
-extern int xfrm_flush_bundles(void);
 extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 0db9e57..d25a9a6 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1098,10 +1098,9 @@ static void __xfrm_garbage_collect(void)
        xfrm_prune_bundles(unused_bundle);
 }
 
-int xfrm_flush_bundles(void)
+static void xfrm_flush_bundles(void)
 {
        xfrm_prune_bundles(stale_bundle);
-       return 0;
 }
 
 void xfrm_init_pmtu(struct dst_entry *dst)
@@ -1146,6 +1145,8 @@ int xfrm_bundle_ok(struct xfrm_dst *firs
        do {
                struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 
+               if (dst->obsolete)
+                       return 0;
                if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
                        return 0;
                if (dst->xfrm->km.state != XFRM_STATE_VALID)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 7cf48aa..33b2042 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -48,8 +48,6 @@ static struct work_struct xfrm_state_gc_
 static struct list_head xfrm_state_gc_list = 
LIST_HEAD_INIT(xfrm_state_gc_list);
 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
 
-static int xfrm_state_gc_flush_bundles;
-
 static int __xfrm_state_delete(struct xfrm_state *x);
 
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
@@ -79,11 +77,6 @@ static void xfrm_state_gc_task(void *dat
        struct list_head *entry, *tmp;
        struct list_head gc_list = LIST_HEAD_INIT(gc_list);
 
-       if (xfrm_state_gc_flush_bundles) {
-               xfrm_state_gc_flush_bundles = 0;
-               xfrm_flush_bundles();
-       }
-
        spin_lock_bh(&xfrm_state_gc_lock);
        list_splice_init(&xfrm_state_gc_list, &gc_list);
        spin_unlock_bh(&xfrm_state_gc_lock);
@@ -211,6 +204,46 @@ void __xfrm_state_destroy(struct xfrm_st
 }
 EXPORT_SYMBOL(__xfrm_state_destroy);
 
+/* Mark all xfrm_dst's referring to X as obsolete.  */
+static void xfrm_state_mark_obsolete(struct xfrm_state *x)
+{
+       struct dst_entry *dst;
+
+       list_for_each_entry(dst, &x->dst_list, xdst)
+               dst->obsolete = 2;
+}
+
+/* Mark as obsolete every xfrm_state with the same DADDR/SADDR/PROTO
+ * as X0.
+ */
+static void __xfrm_state_invalidate(struct xfrm_state *x0)
+{
+       unsigned short family = x0->props.family;
+       xfrm_address_t *daddr = &x0->id.daddr;
+       xfrm_address_t *saddr = &x0->props.saddr;
+       __u8 proto = x0->id.proto;
+       struct xfrm_state *x;
+       unsigned int h;
+
+       h = xfrm_dst_hash(daddr, family);
+
+       list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
+               if (x == x0)
+                       continue;
+               if (x->props.family == family &&
+                   x->id.proto == proto &&
+                   xfrm_state_addr_check(x, daddr, saddr, family))
+                       xfrm_state_mark_obsolete(x);
+       }
+}
+
+static inline void xfrm_state_invalidate(struct xfrm_state *x0)
+{
+       spin_lock_bh(&xfrm_state_lock);
+       __xfrm_state_invalidate(x0);
+       spin_unlock_bh(&xfrm_state_lock);
+}
+
 static int __xfrm_state_delete(struct xfrm_state *x)
 {
        int err = -ESRCH;
@@ -228,15 +261,7 @@ static int __xfrm_state_delete(struct xf
                if (del_timer(&x->timer))
                        atomic_dec(&x->refcnt);
 
-               /* The number two in this test is the reference
-                * mentioned in the comment below plus the reference
-                * our caller holds.  A larger value means that
-                * there are DSTs attached to this xfrm_state.
-                */
-               if (atomic_read(&x->refcnt) > 2) {
-                       xfrm_state_gc_flush_bundles = 1;
-                       schedule_work(&xfrm_state_gc_work);
-               }
+               xfrm_state_mark_obsolete(x);
 
                /* All xfrm_state objects are created by xfrm_state_alloc.
                 * The xfrm_state_alloc call gives a reference, and that
@@ -430,6 +455,7 @@ void xfrm_state_insert(struct xfrm_state
 {
        spin_lock_bh(&xfrm_state_lock);
        __xfrm_state_insert(x);
+       __xfrm_state_invalidate(x);
        spin_unlock_bh(&xfrm_state_lock);
 }
 EXPORT_SYMBOL(xfrm_state_insert);
@@ -475,6 +501,8 @@ int xfrm_state_add(struct xfrm_state *x)
        err = 0;
 
 out:
+       if (!err)
+               __xfrm_state_invalidate(x);
        spin_unlock_bh(&xfrm_state_lock);
        xfrm_state_put_afinfo(afinfo);
 
@@ -512,6 +540,7 @@ int xfrm_state_update(struct xfrm_state 
 
        if (x1->km.state == XFRM_STATE_ACQ) {
                __xfrm_state_insert(x);
+               __xfrm_state_invalidate(x);
                x = NULL;
        }
        err = 0;
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to