On Sun, Jul 31, 2005 at 05:17:56PM +1000, herbert wrote:
>
> However, this is even better :) Except that I'm confused by what you
> said here that we need to key on the ID.  Why do we need to key on
> the ID? I would've thought that the saddr is sufficient.

Anyway, here is a patch to use inetpeer instead of that icky ipc
structure.  It sure cuts down the size of the patch :)

I've only compile-tested this one so please don't apply it.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -351,6 +351,7 @@ enum
        NET_TCP_BIC_BETA=108,
        NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
        NET_TCP_CONG_CONTROL=110,
+       NET_IPV4_IPFRAG_MAX_DIST=111,
 };
 
 enum {
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -25,6 +25,7 @@ struct inet_peer
        __u32                   v4daddr;        /* peer's address */
        __u16                   avl_height;
        __u16                   ip_id_count;    /* IP ID for the next packet */
+       atomic_t                rid;            /* Frag reception counter */
        __u32                   tcp_ts;
        unsigned long           tcp_ts_stamp;
 };
diff --git a/include/net/ip.h b/include/net/ip.h
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -45,6 +45,7 @@ struct inet_skb_parm
 #define IPSKB_TRANSLATED       2
 #define IPSKB_FORWARDED                4
 #define IPSKB_XFRM_TUNNEL_SIZE 8
+#define IPSKB_FRAG_COMPLETE    16
 };
 
 struct ipcm_cookie
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -401,6 +401,7 @@ struct inet_peer *inet_getpeer(__u32 dad
                return NULL;
        n->v4daddr = daddr;
        atomic_set(&n->refcnt, 1);
+       atomic_set(&n->rid, 0);
        n->ip_id_count = secure_ip_id(daddr);
        n->tcp_ts_stamp = 0;
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -22,6 +22,7 @@
  *             Patrick McHardy :       LRU queue of frag heads for evictor.
  */
 
+#include <linux/compiler.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -38,6 +39,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/checksum.h>
+#include <net/inetpeer.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/inet.h>
@@ -56,6 +58,8 @@
 int sysctl_ipfrag_high_thresh = 256*1024;
 int sysctl_ipfrag_low_thresh = 192*1024;
 
+int sysctl_ipfrag_max_dist;
+
 /* Important NOTE! Fragment queue must be destroyed before MSL expires.
  * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
  */
@@ -90,8 +94,11 @@ struct ipq {
        atomic_t        refcnt;
        struct timer_list timer;        /* when will this queue expire?         
*/
        struct ipq      **pprev;
-       int             iif;
        struct timeval  stamp;
+       int             iif;
+
+       unsigned int    rid;
+       struct inet_peer *peer;
 };
 
 /* Hash table. */
@@ -207,6 +214,9 @@ static void ip_frag_destroy(struct ipq *
        BUG_TRAP(qp->last_in&COMPLETE);
        BUG_TRAP(del_timer(&qp->timer) == 0);
 
+       if (qp->peer)
+               inet_putpeer(qp->peer);
+
        /* Release all fragment data. */
        fp = qp->fragments;
        while (fp) {
@@ -366,6 +376,7 @@ static struct ipq *ip_frag_create(unsign
        qp->meat = 0;
        qp->fragments = NULL;
        qp->iif = 0;
+       qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
 
        /* Initialize a timer for this entry. */
        init_timer(&qp->timer);
@@ -410,6 +421,48 @@ static inline struct ipq *ip_find(struct
        return ip_frag_create(hash, iph, user);
 }
 
+/* Is the fragment too far ahead to be part of ipq? */
+static inline int ip_frag_too_far(struct ipq *qp)
+{
+       struct inet_peer *peer = qp->peer;
+       unsigned int max = sysctl_ipfrag_max_dist;
+       unsigned int start, end;
+
+       if (!peer || !max)
+               return 0;
+
+       start = qp->rid;
+       end = atomic_inc_return(&peer->rid);
+       qp->rid = end;
+
+       return qp->fragments && (unsigned int)(end - start) >= max;
+}
+
+static int ip_frag_reinit(struct ipq *qp)
+{
+       struct sk_buff *fp;
+
+       if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
+               atomic_inc(&qp->refcnt);
+               return -ETIMEDOUT;
+       }
+
+       fp = qp->fragments;
+       do {
+               struct sk_buff *xp = fp->next;
+               frag_kfree_skb(fp, NULL);
+               fp = xp;
+       } while (fp);
+
+       qp->last_in = 0;
+       qp->len = 0;
+       qp->meat = 0;
+       qp->fragments = NULL;
+       qp->iif = 0;
+
+       return 0;
+}
+
 /* Add new segment to existing queue. */
 static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
@@ -420,6 +473,12 @@ static void ip_frag_queue(struct ipq *qp
        if (qp->last_in & COMPLETE)
                goto err;
 
+       if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
+           unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+               ipq_kill(qp);
+               goto err;
+       }
+
        offset = ntohs(skb->nh.iph->frag_off);
        flags = offset & ~IP_OFFSET;
        offset &= IP_OFFSET;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -447,6 +447,7 @@ int ip_fragment(struct sk_buff *skb, int
 
        hlen = iph->ihl * 4;
        mtu = dst_mtu(&rt->u.dst) - hlen;       /* Size of data space */
+       IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 
        /* When frag_list is given, use it. First, check its validity:
         * some transformers could create wrong frag_list or break existing
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -30,6 +30,7 @@ extern int sysctl_ipfrag_low_thresh;
 extern int sysctl_ipfrag_high_thresh; 
 extern int sysctl_ipfrag_time;
 extern int sysctl_ipfrag_secret_interval;
+extern int sysctl_ipfrag_max_dist;
 
 /* From ip_output.c */
 extern int sysctl_ip_dynaddr;
@@ -643,6 +644,14 @@ ctl_table ipv4_table[] = {
                .strategy       = &sysctl_jiffies
        },
        {
+               .ctl_name       = NET_IPV4_IPFRAG_MAX_DIST,
+               .procname       = "ipfrag_max_dist",
+               .data           = &sysctl_ipfrag_max_dist,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec,
+       },
+       {
                .ctl_name       = NET_TCP_NO_METRICS_SAVE,
                .procname       = "tcp_no_metrics_save",
                .data           = &sysctl_tcp_nometrics_save,

Reply via email to