On Sun, Jul 31, 2005 at 05:17:56PM +1000, herbert wrote: > > However, this is even better :) Except that I'm confused by what you > said here that we need to key on the ID. Why do we need to key on > the ID? I would've thought that the saddr is sufficient.
Anyway, here is a patch to use inetpeer instead of that icky ipc structure. It sure cuts down the size of the patch :) I've only compile-tested this one so please don't apply it. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -351,6 +351,7 @@ enum NET_TCP_BIC_BETA=108, NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, NET_TCP_CONG_CONTROL=110, + NET_IPV4_IPFRAG_MAX_DIST=111, }; enum { diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -25,6 +25,7 @@ struct inet_peer __u32 v4daddr; /* peer's address */ __u16 avl_height; __u16 ip_id_count; /* IP ID for the next packet */ + atomic_t rid; /* Frag reception counter */ __u32 tcp_ts; unsigned long tcp_ts_stamp; }; diff --git a/include/net/ip.h b/include/net/ip.h --- a/include/net/ip.h +++ b/include/net/ip.h @@ -45,6 +45,7 @@ struct inet_skb_parm #define IPSKB_TRANSLATED 2 #define IPSKB_FORWARDED 4 #define IPSKB_XFRM_TUNNEL_SIZE 8 +#define IPSKB_FRAG_COMPLETE 16 }; struct ipcm_cookie diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -401,6 +401,7 @@ struct inet_peer *inet_getpeer(__u32 dad return NULL; n->v4daddr = daddr; atomic_set(&n->refcnt, 1); + atomic_set(&n->rid, 0); n->ip_id_count = secure_ip_id(daddr); n->tcp_ts_stamp = 0; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -22,6 +22,7 @@ * Patrick McHardy : LRU queue of frag heads for evictor. */ +#include <linux/compiler.h> #include <linux/config.h> #include <linux/module.h> #include <linux/types.h> @@ -38,6 +39,7 @@ #include <net/ip.h> #include <net/icmp.h> #include <net/checksum.h> +#include <net/inetpeer.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/inet.h> @@ -56,6 +58,8 @@ int sysctl_ipfrag_high_thresh = 256*1024; int sysctl_ipfrag_low_thresh = 192*1024; +int sysctl_ipfrag_max_dist; + /* Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. */ @@ -90,8 +94,11 @@ struct ipq { atomic_t refcnt; struct timer_list timer; /* when will this queue expire? */ struct ipq **pprev; - int iif; struct timeval stamp; + int iif; + + unsigned int rid; + struct inet_peer *peer; }; /* Hash table. */ @@ -207,6 +214,9 @@ static void ip_frag_destroy(struct ipq * BUG_TRAP(qp->last_in&COMPLETE); BUG_TRAP(del_timer(&qp->timer) == 0); + if (qp->peer) + inet_putpeer(qp->peer); + /* Release all fragment data. */ fp = qp->fragments; while (fp) { @@ -366,6 +376,7 @@ static struct ipq *ip_frag_create(unsign qp->meat = 0; qp->fragments = NULL; qp->iif = 0; + qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; /* Initialize a timer for this entry. */ init_timer(&qp->timer); @@ -410,6 +421,48 @@ static inline struct ipq *ip_find(struct return ip_frag_create(hash, iph, user); } +/* Is the fragment too far ahead to be part of ipq? */ +static inline int ip_frag_too_far(struct ipq *qp) +{ + struct inet_peer *peer = qp->peer; + unsigned int max = sysctl_ipfrag_max_dist; + unsigned int start, end; + + if (!peer || !max) + return 0; + + start = qp->rid; + end = atomic_inc_return(&peer->rid); + qp->rid = end; + + return qp->fragments && (unsigned int)(end - start) >= max; +} + +static int ip_frag_reinit(struct ipq *qp) +{ + struct sk_buff *fp; + + if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { + atomic_inc(&qp->refcnt); + return -ETIMEDOUT; + } + + fp = qp->fragments; + do { + struct sk_buff *xp = fp->next; + frag_kfree_skb(fp, NULL); + fp = xp; + } while (fp); + + qp->last_in = 0; + qp->len = 0; + qp->meat = 0; + qp->fragments = NULL; + qp->iif = 0; + + return 0; +} + /* Add new segment to existing queue. */ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { @@ -420,6 +473,12 @@ static void ip_frag_queue(struct ipq *qp if (qp->last_in & COMPLETE) goto err; + if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && + unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { + ipq_kill(qp); + goto err; + } + offset = ntohs(skb->nh.iph->frag_off); flags = offset & ~IP_OFFSET; offset &= IP_OFFSET; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -447,6 +447,7 @@ int ip_fragment(struct sk_buff *skb, int hlen = iph->ihl * 4; mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ + IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; /* When frag_list is given, use it. First, check its validity: * some transformers could create wrong frag_list or break existing diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -30,6 +30,7 @@ extern int sysctl_ipfrag_low_thresh; extern int sysctl_ipfrag_high_thresh; extern int sysctl_ipfrag_time; extern int sysctl_ipfrag_secret_interval; +extern int sysctl_ipfrag_max_dist; /* From ip_output.c */ extern int sysctl_ip_dynaddr; @@ -643,6 +644,14 @@ ctl_table ipv4_table[] = { .strategy = &sysctl_jiffies }, { + .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, + .procname = "ipfrag_max_dist", + .data = &sysctl_ipfrag_max_dist, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = NET_TCP_NO_METRICS_SAVE, .procname = "tcp_no_metrics_save", .data = &sysctl_tcp_nometrics_save,