On Sun, 31 Jul 2005, Herbert Xu wrote:
>
> Anyway, here is a patch to use inetpeer instead of that icky ipc
> structure.? It sure cuts down the size of the patch :)
> ....
Herbert;
I've been using the updated patch, and I like it. struct inet_peer
is the right place to do this accounting.
I made a few bug fixes and have tested it, tried to break it, etc.
Seems to do the trick. The latest iteration is attached. Please have
a look.
Regards.
--
Arthur
diff -pur linux.orig/include/linux/sysctl.h linux.new/include/linux/sysctl.h
--- linux.orig/include/linux/sysctl.h 2005-08-03 11:43:40.923892254 -0700
+++ linux.new/include/linux/sysctl.h 2005-08-04 16:58:17.901171101 -0700
@@ -352,6 +352,7 @@ enum
NET_TCP_BIC_BETA=108,
NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
NET_TCP_CONG_CONTROL=110,
+ NET_IPV4_IPFRAG_MAX_DIST=111,
};
enum {
diff -pur linux.orig/include/net/inetpeer.h linux.new/include/net/inetpeer.h
--- linux.orig/include/net/inetpeer.h 2005-08-03 11:44:01.778605212 -0700
+++ linux.new/include/net/inetpeer.h 2005-08-04 16:58:38.694360121 -0700
@@ -25,6 +25,7 @@ struct inet_peer
__u32 v4daddr; /* peer's address */
__u16 avl_height;
__u16 ip_id_count; /* IP ID for the next packet */
+ atomic_t rid; /* Frag reception counter */
__u32 tcp_ts;
unsigned long tcp_ts_stamp;
};
diff -pur linux.orig/include/net/ip.h linux.new/include/net/ip.h
--- linux.orig/include/net/ip.h 2005-08-03 11:44:08.654654565 -0700
+++ linux.new/include/net/ip.h 2005-08-04 16:58:50.460109760 -0700
@@ -45,6 +45,7 @@ struct inet_skb_parm
#define IPSKB_TRANSLATED 2
#define IPSKB_FORWARDED 4
#define IPSKB_XFRM_TUNNEL_SIZE 8
+#define IPSKB_FRAG_COMPLETE 16
};
struct ipcm_cookie
diff -pur linux.orig/net/ipv4/inetpeer.c linux.new/net/ipv4/inetpeer.c
--- linux.orig/net/ipv4/inetpeer.c 2005-08-03 11:44:40.086627938 -0700
+++ linux.new/net/ipv4/inetpeer.c 2005-08-04 16:59:20.251440976 -0700
@@ -401,6 +401,7 @@ struct inet_peer *inet_getpeer(__u32 dad
return NULL;
n->v4daddr = daddr;
atomic_set(&n->refcnt, 1);
+ atomic_set(&n->rid, 0);
n->ip_id_count = secure_ip_id(daddr);
n->tcp_ts_stamp = 0;
diff -pur linux.orig/net/ipv4/ip_fragment.c linux.new/net/ipv4/ip_fragment.c
--- linux.orig/net/ipv4/ip_fragment.c 2005-08-03 11:44:48.086712630 -0700
+++ linux.new/net/ipv4/ip_fragment.c 2005-08-04 17:03:02.162971536 -0700
@@ -22,6 +22,7 @@
* Patrick McHardy : LRU queue of frag heads for evictor.
*/
+#include <linux/compiler.h>
#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
@@ -38,6 +39,7 @@
#include <net/ip.h>
#include <net/icmp.h>
#include <net/checksum.h>
+#include <net/inetpeer.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/inet.h>
@@ -56,6 +58,8 @@
int sysctl_ipfrag_high_thresh = 256*1024;
int sysctl_ipfrag_low_thresh = 192*1024;
+int sysctl_ipfrag_max_dist = 64;
+
/* Important NOTE! Fragment queue must be destroyed before MSL expires.
* RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
*/
@@ -90,8 +94,11 @@ struct ipq {
atomic_t refcnt;
struct timer_list timer; /* when will this queue expire?
*/
struct ipq **pprev;
- int iif;
struct timeval stamp;
+ int iif;
+
+ unsigned int rid;
+ struct inet_peer *peer;
};
/* Hash table. */
@@ -207,6 +214,9 @@ static void ip_frag_destroy(struct ipq *
BUG_TRAP(qp->last_in&COMPLETE);
BUG_TRAP(del_timer(&qp->timer) == 0);
+ if (qp->peer)
+ inet_putpeer(qp->peer);
+
/* Release all fragment data. */
fp = qp->fragments;
while (fp) {
@@ -366,6 +376,9 @@ static struct ipq *ip_frag_create(unsign
qp->meat = 0;
qp->fragments = NULL;
qp->iif = 0;
+ qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
+ if (qp->peer)
+ qp->rid = atomic_read(&qp->peer->rid);
/* Initialize a timer for this entry. */
init_timer(&qp->timer);
@@ -410,6 +423,63 @@ static inline struct ipq *ip_find(struct
return ip_frag_create(hash, iph, user);
}
+/* Is the fragment too far ahead to be part of ipq? */
+static inline int ip_frag_too_far(struct ipq *qp)
+{
+ struct inet_peer *peer = qp->peer;
+ unsigned int max = sysctl_ipfrag_max_dist;
+ unsigned int start, end;
+
+ int rc;
+
+ if (!peer || !max)
+ return 0;
+
+ start = ++qp->rid;
+ end = atomic_inc_return(&peer->rid);
+
+ rc = qp->fragments && (end - start) >= max;
+
+ if (rc) {
+ IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+ }
+
+ return rc;
+}
+
+static int ip_frag_reinit(struct ipq *qp)
+{
+ struct sk_buff *fp;
+
+ if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
+ atomic_inc(&qp->refcnt);
+ return -ETIMEDOUT;
+ }
+
+ fp = qp->fragments;
+ do {
+ struct sk_buff *xp = fp->next;
+ frag_kfree_skb(fp, NULL);
+ fp = xp;
+ } while (fp);
+
+ qp->last_in = 0;
+ qp->len = 0;
+ qp->meat = 0;
+ qp->fragments = NULL;
+ qp->iif = 0;
+ if (sysctl_ipfrag_max_dist) {
+ if (qp->peer == NULL) {
+ qp->peer = inet_getpeer(qp->saddr, 1);
+ }
+ if (qp->peer) {
+ qp->rid = atomic_read(&qp->peer->rid);
+ }
+ }
+
+ return 0;
+}
+
/* Add new segment to existing queue. */
static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
@@ -420,6 +490,12 @@ static void ip_frag_queue(struct ipq *qp
if (qp->last_in & COMPLETE)
goto err;
+ if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
+ unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+ ipq_kill(qp);
+ goto err;
+ }
+
offset = ntohs(skb->nh.iph->frag_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
diff -pur linux.orig/net/ipv4/ip_output.c linux.new/net/ipv4/ip_output.c
--- linux.orig/net/ipv4/ip_output.c 2005-08-03 11:44:53.139500496 -0700
+++ linux.new/net/ipv4/ip_output.c 2005-08-04 16:59:52.609205635 -0700
@@ -447,6 +447,7 @@ int ip_fragment(struct sk_buff *skb, int
hlen = iph->ihl * 4;
mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */
+ IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
/* When frag_list is given, use it. First, check its validity:
* some transformers could create wrong frag_list or break existing
diff -pur linux.orig/net/ipv4/sysctl_net_ipv4.c
linux.new/net/ipv4/sysctl_net_ipv4.c
--- linux.orig/net/ipv4/sysctl_net_ipv4.c 2005-08-03 11:45:01.530214323
-0700
+++ linux.new/net/ipv4/sysctl_net_ipv4.c 2005-08-04 17:00:04.807577047
-0700
@@ -30,6 +30,7 @@ extern int sysctl_ipfrag_low_thresh;
extern int sysctl_ipfrag_high_thresh;
extern int sysctl_ipfrag_time;
extern int sysctl_ipfrag_secret_interval;
+extern int sysctl_ipfrag_max_dist;
/* From ip_output.c */
extern int sysctl_ip_dynaddr;
@@ -50,6 +51,7 @@ extern int inet_peer_gc_mintime;
extern int inet_peer_gc_maxtime;
#ifdef CONFIG_SYSCTL
+static int zero;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -643,6 +645,15 @@ ctl_table ipv4_table[] = {
.strategy = &sysctl_jiffies
},
{
+ .ctl_name = NET_IPV4_IPFRAG_MAX_DIST,
+ .procname = "ipfrag_max_dist",
+ .data = &sysctl_ipfrag_max_dist,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = &zero
+ },
+ {
.ctl_name = NET_TCP_NO_METRICS_SAVE,
.procname = "tcp_no_metrics_save",
.data = &sysctl_tcp_nometrics_save,