Attached is the patch for kernel version 2.6.13-rc3.
diff -Naur linux-2.6.13-rc3/include/linux/tcp.h linux-2.6.13-rc3.patched/include/linux/tcp.h --- linux-2.6.13-rc3/include/linux/tcp.h 2005-07-26 10:16:10.000000000 -0700 +++ linux-2.6.13-rc3.patched/include/linux/tcp.h 2005-07-26 12:23:53.795497200 -0700 @@ -235,6 +235,8 @@ return (struct tcp_request_sock *)req; } +struct toe_funcs; + struct tcp_sock { /* inet_sock has to be the first member of tcp_sock */ struct inet_sock inet; @@ -342,6 +344,8 @@ struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ + struct toe_funcs *toe_specific; /* Operations overriden by TOEs */ + __u32 rcv_wnd; /* Current receiver window */ __u32 rcv_wup; /* rcv_nxt on last window update sent */ __u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ diff -Naur linux-2.6.13-rc3/include/linux/toedev.h linux-2.6.13-rc3.patched/include/linux/toedev.h --- linux-2.6.13-rc3/include/linux/toedev.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.13-rc3.patched/include/linux/toedev.h 2005-07-26 12:23:53.796497048 -0700 @@ -0,0 +1,129 @@ +/***************************************************************************** + * * + * File: * + * toedev.h * + * * + * Description: * + * TOE device definitions. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: [EMAIL PROTECTED] * + * * + * Authors: Dimitrios Michailidis <[EMAIL PROTECTED]> * + * * + * History: * + * * + ****************************************************************************/ +/* $Date: 2005/07/09 00:52:28 $ $RCSfile: toedev.h,v $ $Revision: 1.14 $ */ + +#ifndef _TOEDEV_H_ +#define _TOEDEV_H_ + +#include <linux/list.h> +#include <asm/atomic.h> + +#define TOENAMSIZ 16 + +/* belongs in linux/netdevice.h */ +#define NETIF_F_TCPIP_OFFLOAD (1 << 16) + +/* Get the toedev associated with a net_device */ +#define TOEDEV(netdev) ((struct toedev *)(netdev)->ec_ptr) + +/* TOE type ids */ +enum { + TOE_ID_CHELSIO_T1 = 1, + TOE_ID_CHELSIO_T1C, + TOE_ID_CHELSIO_T3, +}; + +struct toe_id { + unsigned int id; + unsigned long data; +}; + +#define END_OF_TOE_ID_TABLE { 0, 0UL } + +struct net_device; +struct neighbour; +struct tom_info; +struct proc_dir_entry; +struct sock; +struct sk_buff; + +struct toedev { + char name[TOENAMSIZ]; /* TOE device name */ + struct list_head toe_list; /* for list linking */ + int toe_index; /* unique TOE device index */ + unsigned int ttid; /* TOE type id */ + unsigned long flags; /* device flags */ + unsigned int mtu; /* max size of TX offloaded data */ + unsigned int nconn; /* max # of offloaded connections */ + struct net_device *lldev; /* LL device associated with TOE messages */ + const struct tom_info *offload_mod; /* attached TCP offload module */ + struct proc_dir_entry *proc_dir; /* root of proc dir for this TOE */ + int (*open)(struct toedev *dev); + int (*close)(struct toedev *dev); + int (*can_offload)(struct toedev *dev, struct sock *sk); + int (*connect)(struct toedev *dev, struct sock *sk); + int (*send)(struct toedev *dev, struct sk_buff *skb); + int (*recv)(struct toedev *dev, struct sk_buff **skb, int n); + int (*ctl)(struct toedev *dev, unsigned int req, void *data); + void (*neigh_update)(struct net_device *lldev, struct toedev *dev, struct neighbour *neigh, int fl); + void *priv; /* driver private data */ + void *l2opt; /* optional layer 2 data */ + void *l3opt; /* optional layer 3 data */ + void *l4opt; /* optional layer 4 data */ + void *ulp; /* ulp stuff */ + atomic_t refcnt; /* reference count */ +}; + +struct tom_info { + int (*attach)(struct toedev *dev, const struct toe_id *entry); + int (*detach)(struct toedev *dev); + const char *name; + struct toe_id *id_table; + struct list_head list_node; +}; + +/* Flags for toe_neigh_update() */ +enum { + NEIGH_ADDR_CHANGED = 1 +}; + +static inline void toedev_hold(struct toedev *dev) +{ + atomic_inc(&dev->refcnt); +} + +static inline void toedev_put(struct toedev *dev) +{ + atomic_dec(&dev->refcnt); +} + +int register_tom(struct tom_info *t); +int unregister_tom(struct tom_info *t); +int register_toedev(struct toedev *dev, const char *name); +int activate_toedev(struct toedev *dev); +struct toedev *alloc_toedev(void); +void toe_set_lldev(struct toedev *dev, struct net_device *lldev); +int toe_send(struct toedev *dev, struct sk_buff *skb); +int toe_receive_skb(struct toedev *dev, struct sk_buff **skb, int n); +void toe_neigh_update(struct neighbour *neigh, int flags); +#endif diff -Naur linux-2.6.13-rc3/include/net/offload.h linux-2.6.13-rc3.patched/include/net/offload.h --- linux-2.6.13-rc3/include/net/offload.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.13-rc3.patched/include/net/offload.h 2005-07-26 12:23:53.796497048 -0700 @@ -0,0 +1,88 @@ +/***************************************************************************** + * * + * File: * + * offload.h * + * * + * Description: * + * TCP offload support. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: [EMAIL PROTECTED] * + * * + * Authors: Dimitrios Michailidis <[EMAIL PROTECTED]> * + * * + * History: * + * * + ****************************************************************************/ +/* $Date: 2005/07/09 00:52:28 $ $RCSfile: offload.h,v $ $Revision: 1.11 $ */ + +#ifndef _NET_OFFLOAD_H +#define _NET_OFFLOAD_H + +enum { + OFFLOAD_LISTEN_START, + OFFLOAD_LISTEN_STOP +}; + +/* Returns true if sk is an offloaded IPv4 TCP socket. */ +#define IS_OFFLOADED(sk) (((sk)->sk_family == AF_INET && (sk)->sk_prot != &tcp_prot)) + +struct toedev; +struct sk_buff; +struct sock; + +/* Per-skb backlog handler. Run when a socket's backlog is processed. */ +struct blog_skb_cb { + void (*backlog_rcv)(struct sock *sk, struct sk_buff *skb); + struct toedev *dev; +}; + +#define BLOG_SKB_CB(skb) ((struct blog_skb_cb *)(skb)->cb) + +/* belongs in linux/tcp_diag.h */ +#define TCPDIAG_OFFLOAD 5 + +/* so does this */ +struct tcpdiag_offload { + unsigned int offload_dev_idx; + unsigned int offload_cookie; + unsigned int mem; +}; + +struct notifier_block; + +/* + * TCP operations that a TOE wants to override but cannot through existing + * means. + */ +struct toe_funcs { + void (*rcv_consumed)(struct sock *sk, int consumed); + void (*pmtu_changed)(struct sock *sk); + void (*set_keepalive)(struct sock *sk, int on_off); + void (*tcpdiag_offload_info)(const struct sock *sk, + struct tcpdiag_offload *oinfo); + int (*sendskb)(struct sock *sk, struct sk_buff *skb, int flags); +}; + +extern int register_listen_offload_notifier(struct notifier_block *nb); +extern int unregister_listen_offload_notifier(struct notifier_block *nb); +extern int tcp_listen_offload_stop(struct sock *sk); +extern int tcp_listen_offload(struct sock *sk); +extern int tcp_connect_offload(struct sock *sk); +#endif diff -Naur linux-2.6.13-rc3/net/core/Makefile linux-2.6.13-rc3.patched/net/core/Makefile --- linux-2.6.13-rc3/net/core/Makefile 2005-07-26 10:16:10.000000000 -0700 +++ linux-2.6.13-rc3.patched/net/core/Makefile 2005-07-26 12:23:53.797496896 -0700 @@ -17,3 +17,4 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NET_RADIO) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o +obj-y += toedev.o diff -Naur linux-2.6.13-rc3/net/core/neighbour.c linux-2.6.13-rc3.patched/net/core/neighbour.c --- linux-2.6.13-rc3/net/core/neighbour.c 2005-06-30 10:48:51.000000000 -0700 +++ linux-2.6.13-rc3.patched/net/core/neighbour.c 2005-07-26 15:36:56.174707920 -0700 @@ -32,6 +32,7 @@ #include <net/sock.h> #include <linux/rtnetlink.h> #include <linux/random.h> +#include <linux/toedev.h> #include <linux/string.h> #define NEIGH_DEBUG 1 @@ -763,6 +764,7 @@ NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); neigh->nud_state = NUD_STALE; neigh_suspect(neigh); + toe_neigh_update(neigh, 0); } } else if (state & NUD_DELAY) { if (time_before_eq(now, @@ -770,6 +772,7 @@ NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); neigh->nud_state = NUD_REACHABLE; neigh_connect(neigh); + toe_neigh_update(neigh, 0); next = neigh->confirmed + neigh->parms->reachable_time; } else { NEIGH_PRINTK2("neigh %p is probed.\n", neigh); @@ -788,6 +791,7 @@ neigh->nud_state = NUD_FAILED; notify = 1; + toe_neigh_update(neigh, 0); NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); NEIGH_PRINTK2("neigh %p is failed.\n", neigh); @@ -952,6 +956,7 @@ if (old & NUD_CONNECTED) neigh_suspect(neigh); neigh->nud_state = new; + toe_neigh_update(neigh, 0); err = 0; #ifdef CONFIG_ARPD notify = old & NUD_VALID; @@ -1031,6 +1036,7 @@ notify = 1; #endif } + toe_neigh_update(neigh, lladdr != neigh->ha ? NEIGH_ADDR_CHANGED : 0); if (new == old) goto out; if (new & NUD_CONNECTED) diff -Naur linux-2.6.13-rc3/net/core/toedev.c linux-2.6.13-rc3.patched/net/core/toedev.c --- linux-2.6.13-rc3/net/core/toedev.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.13-rc3.patched/net/core/toedev.c 2005-07-26 12:23:53.799496592 -0700 @@ -0,0 +1,475 @@ +/***************************************************************************** + * * + * File: * + * toedev.c * + * * + * Description: * + * TOE device support infrastructure. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: [EMAIL PROTECTED] * + * * + * Authors: Dimitrios Michailidis <[EMAIL PROTECTED]> * + * * + * History: * + * * + ****************************************************************************/ +/* $Date: 2005/07/09 00:52:28 $ $RCSfile: toedev.c,v $ $Revision: 1.22 $ */ + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include <linux/toedev.h> +#include <net/neighbour.h> +#include <asm/semaphore.h> + +/* 2.4 compatibility */ +#ifndef subsys_initcall +#define subsys_initcall(fn) module_init(fn) + +static int boot_phase = 1; +#else +#define boot_phase 0 +#endif + +#ifndef __raise_softirq_irqoff +#define __raise_softirq_irqoff(nr) __cpu_raise_softirq(smp_processor_id(), nr) +#endif + +static DECLARE_MUTEX(toedev_db_lock); +static LIST_HEAD(toedev_list); +static LIST_HEAD(tom_list); + +static int toedev_init(void); + +/* + * Returns the entry in the TOE id table 'table' that has a given id, or NULL + * if the id is not found. + */ +static const struct toe_id *id_find(unsigned int id, + const struct toe_id *table) +{ + const struct toe_id *p; + + for (p = table; p->id; ++p) + if (p->id == id) return p; + return NULL; +} + +/* + * Returns true if a TOE device is presently attached to an offload module. + */ +static inline int is_attached(const struct toedev *dev) +{ + return dev->offload_mod != NULL; +} + +/* + * Try to attach a new TOE device to an existing TCP offload module that can + * handle the device's TOE id. Returns 0 if it succeeds. + * + * Must be called with the toedev_db_lock held. + */ +static int toedev_attach(struct toedev *dev) +{ + struct tom_info *t; + + list_for_each_entry(t, &tom_list, list_node) { + const struct toe_id *entry; + + entry = id_find(dev->ttid, t->id_table); + if (entry && t->attach(dev, entry) == 0) { + dev->offload_mod = t; + return 0; + } + } + return -ENOPROTOOPT; +} + +/* + * Register a TCP Offload Module (TOM). + */ +int register_tom(struct tom_info *t) +{ + down(&toedev_db_lock); + list_add(&t->list_node, &tom_list); + up(&toedev_db_lock); + return 0; +} + +/* + * Unregister a TCP Offload Module (TOM). Note that this does not affect any + * TOE devices to which the TOM is already attached. + */ +int unregister_tom(struct tom_info *t) +{ + down(&toedev_db_lock); + list_del(&t->list_node); + up(&toedev_db_lock); + return 0; +} + +/* + * Find a TOE device by name. Must be called with toedev_db_lock held. + */ +static struct toedev *__find_toedev_by_name(const char *name) +{ + struct toedev *dev; + + list_for_each_entry(dev, &toedev_list, toe_list) { + if (!strncmp(dev->name, name, TOENAMSIZ)) return dev; + } + return NULL; +} + +#if 0 +/* + * Find a TOE device by name. + */ +static struct toedev *find_toedev_by_name(const char *name) +{ + struct toedev *dev; + + down(&toedev_db_lock); + dev = __find_toedev_by_name(name); + if (dev) toedev_hold(dev); + up(&toedev_db_lock); + return dev; +} +#endif + +/* + * Find a TOE device by index. Must be called with toedev_db_lock held. + */ +static struct toedev *__find_toedev_by_index(int index) +{ + struct toedev *dev; + + list_for_each_entry(dev, &toedev_list, toe_list) { + if (dev->toe_index == index) return dev; + } + return NULL; +} + +/* + * Return true if a TOE device is already registered. + * Must be called with the toedev_db_lock held. + */ +static int toedev_registered(const struct toedev *dev) +{ + struct toedev *d; + + list_for_each_entry(d, &toedev_list, toe_list) { + if (d == dev) return 1; + } + return 0; +} + +/* + * Finalize the name of a TOE device by assigning values to any format strings + * in its name. + */ +static int toedev_assign_name(struct toedev *dev, const char *name, int limit) +{ + int i; + + for (i = 0; i < limit; ++i) { + char s[TOENAMSIZ]; + + snprintf(s, sizeof(s), name, i); + if (!__find_toedev_by_name(s)) { + strcpy(dev->name, s); + return 0; + } + } + return -1; +} + +/* + * Allocate a unique index for a TOE device. We keep the index within 30 bits + * to allow it to be used as a sysctl index, which uses signed IDs. + * + * We don't handle index exhaustion. Guess why. + */ +static int toedev_new_index(void) +{ + static int toe_index; + + for (;;) { + if (++toe_index & 0xc0000000) toe_index = 1; + if (!__find_toedev_by_index(toe_index)) + return toe_index; + } +} + +#ifdef CONFIG_PROC_FS +#include <linux/proc_fs.h> + +static struct proc_dir_entry *toedev_proc_root; + +/* XXX This doesn't handle module unloading properly. Do we need to? */ + +static int devices_read_proc(char *buf, char **start, off_t offset, + int length, int *eof, void *data) +{ + int len; + struct toedev *dev; + struct net_device *ndev; + + len = sprintf(buf, "Device Offload Module Interfaces\n"); + + down(&toedev_db_lock); + list_for_each_entry(dev, &toedev_list, toe_list) { + len += sprintf(buf + len, "%-16s %-20s", dev->name, + is_attached(dev) ? dev->offload_mod->name : "<None>"); + read_lock(&dev_base_lock); + for (ndev = dev_base; ndev; ndev = ndev->next) { + if (TOEDEV(ndev) == dev) + len += sprintf(buf + len, " %s", ndev->name); + } + read_unlock(&dev_base_lock); + len += sprintf(buf + len, "\n"); + if (len >= length) break; + } + up(&toedev_db_lock); + + if (len > length) len = length; + *eof = 1; + return len; +} + +static void toe_proc_cleanup(void) +{ + remove_proc_entry("devices", toedev_proc_root); + remove_proc_entry("net/toe", NULL); + toedev_proc_root = NULL; +} + +static struct proc_dir_entry *create_toe_proc_dir(const char *name) +{ + struct proc_dir_entry *d; + + if (!toedev_proc_root) return NULL; + + d = proc_mkdir(name, toedev_proc_root); + if (d) + d->owner = THIS_MODULE; + return d; +} + +#if 0 +static void delete_toe_proc_dir(struct toedev *dev) +{ + if (dev->proc_dir) { + remove_proc_entry(dev->name, toedev_proc_root); + dev->proc_dir = NULL; + } +} +#endif + +static int __init toe_proc_init(void) +{ + struct proc_dir_entry *d; + + toedev_proc_root = proc_mkdir("net/toe", NULL); + if (!toedev_proc_root) return -ENOMEM; + toedev_proc_root->owner = THIS_MODULE; + + d = create_proc_read_entry("devices", 0, toedev_proc_root, + devices_read_proc, NULL); + if (!d) goto cleanup; + d->owner = THIS_MODULE; + return 0; + + cleanup: + toe_proc_cleanup(); + return -ENOMEM; +} +#else +#define toe_proc_init() 0 +#define create_toe_proc_dir(name) NULL +#define delete_toe_proc_dir(dev) +#endif + +/* + * Register a TOE device and try to attach an appropriate TCP offload module + * to it. 'name' is a template that may contain at most one %d format + * specifier. + */ +int register_toedev(struct toedev *dev, const char *name) +{ + int ret; + char *p; + + if (boot_phase) toedev_init(); + + /* Validate the name template. Only one %d allowed. */ + if (!name || !*name) return -EINVAL; + p = strchr(name, '%'); + if (p && (p[1] != 'd' || strchr(p + 2, '%'))) return -EINVAL; + + down(&toedev_db_lock); + if (toedev_registered(dev)) { // device already registered + ret = -EEXIST; + goto out; + } + + if ((ret = toedev_assign_name(dev, name, 32)) != 0) goto out; + + dev->proc_dir = create_toe_proc_dir(dev->name); + dev->toe_index = toedev_new_index(); + dev->offload_mod = NULL; + list_add_tail(&dev->toe_list, &toedev_list); + toedev_hold(dev); + out: + up(&toedev_db_lock); + return ret; +} + +/* + * Allocate and initialize a toedev structure. + */ +struct toedev *alloc_toedev(void) +{ + struct toedev *dev = kmalloc(sizeof(struct toedev), GFP_KERNEL); + + if (dev) { + memset(dev, 0, sizeof(struct toedev)); + atomic_set(&dev->refcnt, 0); + INIT_LIST_HEAD(&dev->toe_list); + } + return dev; +} + +/* + * Activate a TOE device. + */ +int activate_toedev(struct toedev *dev) +{ + int ret = 0; + + down(&toedev_db_lock); + if (!toedev_registered(dev)) + ret = -ENODEV; + else if (!is_attached(dev)) + ret = toedev_attach(dev); + up(&toedev_db_lock); + return ret; +} + +/* + * Set the link-layer device associated with a TOE. For sniffing purposes any + * messages sent to/received from the TOE will be associated with this device. + */ +void toe_set_lldev(struct toedev *dev, struct net_device *lldev) +{ + struct net_device *olddev = dev->lldev; + + if (lldev) dev_hold(lldev); + dev->lldev = lldev; + if (olddev) dev_put(olddev); +} + +/* + * Sends an sk_buff to a TOE driver after dealing with any active network taps. + */ +int toe_send(struct toedev *dev, struct sk_buff *skb) +{ + int r; + + local_bh_disable(); + if (unlikely(netdev_nit)) { /* deal with active taps */ + skb->nh.raw = skb->data; + skb->dev = dev->lldev; + dev_queue_xmit_nit(skb, skb->dev); + } + r = dev->send(dev, skb); + local_bh_enable(); + return r; +} + +/** + * toe_receive_skb - process n received TOE packets + * @dev: the toe device + * @skb: an array of offload packets + * @n: the number of offload packets + * + * Process an array of ingress offload packets. Each packet is forwarded + * to any active network taps and then passed to the toe device's receive + * method. We optimize passing packets to the receive method by passing + * it the whole array at once except when there are active taps. + */ +int toe_receive_skb(struct toedev *dev, struct sk_buff **skb, int n) +{ + int i; + + if (likely(!netdev_nit)) + return dev->recv(dev, skb, n); + + for (i = 0; i < n; ++i) { + struct sk_buff *p = *skb++; + + p->dev = dev->lldev; + skb_get(p); + netif_receive_skb(p); + p->dev = NULL; + dev->recv(dev, &p, 1); + } + return 0; +} + +void toe_neigh_update(struct neighbour *neigh, int flags) +{ + struct net_device *dev = neigh->dev; + + if (dev && (dev->features & NETIF_F_TCPIP_OFFLOAD)) { + struct toedev *tdev = TOEDEV(dev); + + if (tdev && tdev->neigh_update) + tdev->neigh_update(neigh->dev, tdev, neigh, flags); + } +} + +static int __init toedev_init(void) +{ +#ifndef boot_phase + if (!boot_phase) return 0; // duplicate call --- 2.4 only + boot_phase = 0; +#endif + + /* We tolerate proc failures */ + if (toe_proc_init()) + printk(KERN_WARNING "Unable to create /proc/net/toe entries\n"); + + return 0; +} + +subsys_initcall(toedev_init); + +EXPORT_SYMBOL(register_tom); +EXPORT_SYMBOL(unregister_tom); +EXPORT_SYMBOL(register_toedev); +EXPORT_SYMBOL(alloc_toedev); +EXPORT_SYMBOL(activate_toedev); +EXPORT_SYMBOL(toe_set_lldev); +EXPORT_SYMBOL(toe_send); +EXPORT_SYMBOL(toe_receive_skb); diff -Naur linux-2.6.13-rc3/net/ipv4/Makefile linux-2.6.13-rc3.patched/net/ipv4/Makefile --- linux-2.6.13-rc3/net/ipv4/Makefile 2005-06-30 10:48:51.000000000 -0700 +++ linux-2.6.13-rc3.patched/net/ipv4/Makefile 2005-07-26 12:31:39.075763848 -0700 @@ -8,7 +8,7 @@ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ - sysctl_net_ipv4.o fib_frontend.o fib_semantics.o + sysctl_net_ipv4.o fib_frontend.o fib_semantics.o offload.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o diff -Naur linux-2.6.13-rc3/net/ipv4/offload.c linux-2.6.13-rc3.patched/net/ipv4/offload.c --- linux-2.6.13-rc3/net/ipv4/offload.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.13-rc3.patched/net/ipv4/offload.c 2005-07-26 12:23:53.800496440 -0700 @@ -0,0 +1,164 @@ +/***************************************************************************** + * * + * File: * + * offload.c * + * * + * Description: * + * TCP offload support. * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License, version 2, as * + * published by the Free Software Foundation. * + * * + * You should have received a copy of the GNU General Public License along * + * with this program; if not, write to the Free Software Foundation, Inc., * + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * + * * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * + * * + * http://www.chelsio.com * + * * + * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * + * All rights reserved. * + * * + * Maintainers: [EMAIL PROTECTED] * + * * + * Authors: Dimitrios Michailidis <[EMAIL PROTECTED]> * + * * + * History: * + * * + ****************************************************************************/ +/* $Date: 2005/07/09 00:52:28 $ $RCSfile: offload.c,v $ $Revision: 1.15 $ */ + +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/notifier.h> +#include <linux/toedev.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <net/offload.h> +#include <asm/semaphore.h> + +#ifdef sk_state // 2.6 +# define inet_addr_info(sk) inet_sk(sk) +#else // 2.4 +# define sk_route_caps route_caps +# define sk_family family +# define sk_backlog_rcv backlog_rcv +# define inet_addr_info(sk) sk +#endif + +/* Replace this with a R/W semaphore someday. See kernel/profile.c */ +static DECLARE_MUTEX(notify_mutex); +static struct notifier_block *listen_offload_notify_list; + +int register_listen_offload_notifier(struct notifier_block *nb) +{ + int err; + + down(¬ify_mutex); + err = notifier_chain_register(&listen_offload_notify_list, nb); + up(¬ify_mutex); + return err; +} + +int unregister_listen_offload_notifier(struct notifier_block *nb) +{ + int err; + + down(¬ify_mutex); + err = notifier_chain_unregister(&listen_offload_notify_list, nb); + up(¬ify_mutex); + return err; +} + +/* + * Called when an active open has been requested through connect(2). Decides + * if the connection may be offloaded based on the system's offload policies + * and the capabilities of the egress interface. + * + * Returns 1 if the connection is offloaded and 0 otherwise. + */ +int tcp_connect_offload(struct sock *sk) +{ + if (sk->sk_route_caps & NETIF_F_TCPIP_OFFLOAD) { + struct toedev *dev = TOEDEV(__sk_dst_get(sk)->dev); + + if (!dev || !dev->can_offload(dev, sk)) return 0; + /* XXX check offload policies */ + if (dev->connect(dev, sk) == 0) return 1; + } + return 0; +} + +/* + * TOE capable backlog handler. This is used for offloaded listening sockets + * so they can deal with non-IP (TOE) packets queued in their backlogs. We + * distinguish TOE from IP packets easily as the former lack network headers. + * Such TOE packets are fed to a TOE-specific backlog handler. + */ +static int listen_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + if (likely(skb->h.raw != skb->nh.raw)) + return tcp_v4_do_rcv(sk, skb); + BLOG_SKB_CB(skb)->backlog_rcv(sk, skb); + return 0; +} + +/* + * Called when the SW stack has transitioned a socket to listen state. + * We check if the socket should be offloaded according to the current + * offloading policies, and if so, publish an OFFLOAD_LISTEN_START event. + */ +int tcp_listen_offload(struct sock *sk) +{ + // IPv4 only for now + if (sk->sk_family != PF_INET) return 0; + + // filter out loopback listens + if (LOOPBACK(inet_addr_info(sk)->rcv_saddr)) return 0; + +// if (nf_sock_hook(PF_INET, NF_IP_OFFLOAD, sk) != NF_ACCEPT) return 0; + + // Install a TOE capable backlog handler + sk->sk_backlog_rcv = listen_backlog_rcv; + + down(¬ify_mutex); + notifier_call_chain(&listen_offload_notify_list, OFFLOAD_LISTEN_START, sk); + up(¬ify_mutex); + return 1; +} + +/* + * Called through a netfilter hook when a socket starts listening. + * Publishes an OFFLOAD_LISTEN_START event. + */ +static int tcp_listen_offload_start(struct sk_buff *skb) +{ + down(¬ify_mutex); + notifier_call_chain(&listen_offload_notify_list, OFFLOAD_LISTEN_START, + skb->sk); + up(¬ify_mutex); + return 0; +} + +/* + * Called when the SW stack is preparing to close an existing listening socket. + * We publish an OFFLOAD_LISTEN_STOP event. + */ +int tcp_listen_offload_stop(struct sock *sk) +{ + down(¬ify_mutex); + notifier_call_chain(&listen_offload_notify_list, OFFLOAD_LISTEN_STOP, sk); + up(¬ify_mutex); + return 0; +} + +EXPORT_SYMBOL(register_listen_offload_notifier); +EXPORT_SYMBOL(unregister_listen_offload_notifier); + +// Local Variables: +// c-basic-offset: 4 +// End: diff -Naur linux-2.6.13-rc3/net/ipv4/tcp.c linux-2.6.13-rc3.patched/net/ipv4/tcp.c --- linux-2.6.13-rc3/net/ipv4/tcp.c 2005-07-26 10:16:10.000000000 -0700 +++ linux-2.6.13-rc3.patched/net/ipv4/tcp.c 2005-07-26 12:23:53.801496288 -0700 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $ + * Version: $Id: tcp.c,v 1.1 2005/06/20 19:32:40 dm Exp $ * * Authors: Ross Biro * Fred N. van Kempen, <[EMAIL PROTECTED]> @@ -262,6 +262,7 @@ #include <net/tcp.h> #include <net/xfrm.h> #include <net/ip.h> +#include <net/offload.h> #include <asm/uaccess.h> @@ -483,6 +484,7 @@ sk_dst_reset(sk); sk->sk_prot->hash(sk); + tcp_listen_offload(sk); return 0; } @@ -735,6 +737,9 @@ ssize_t res; struct sock *sk = sock->sk; + if (sk->sk_prot->sendpage) + return sk->sk_prot->sendpage(sk, page, offset, size, flags); + #define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) if (!(sk->sk_route_caps & NETIF_F_SG) || @@ -994,7 +999,7 @@ * this, no blocking and very strange errors 8) */ -static int tcp_recv_urg(struct sock *sk, long timeo, +/* static */ int tcp_recv_urg(struct sock *sk, long timeo, struct msghdr *msg, int len, int flags, int *addr_len) { @@ -1193,8 +1198,12 @@ tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ - if (copied) - cleanup_rbuf(sk, copied); + if (copied) { + if (tp->toe_specific) + tp->toe_specific->rcv_consumed(sk, copied); + else + cleanup_rbuf(sk, copied); + } return copied; } @@ -1615,6 +1624,7 @@ sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TCP_LISTEN) { + tcp_listen_offload_stop(sk); tcp_set_state(sk, TCP_CLOSE); /* Special case. */ diff -Naur linux-2.6.13-rc3/net/ipv4/tcp_diag.c linux-2.6.13-rc3.patched/net/ipv4/tcp_diag.c --- linux-2.6.13-rc3/net/ipv4/tcp_diag.c 2005-06-30 10:48:54.000000000 -0700 +++ linux-2.6.13-rc3.patched/net/ipv4/tcp_diag.c 2005-07-26 12:35:21.637929256 -0700 @@ -1,7 +1,7 @@ /* * tcp_diag.c Module for monitoring TCP sockets. * - * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ + * Version: $Id: tcp_diag.c,v 1.1 2005/06/20 19:32:40 dm Exp $ * * Authors: Alexey Kuznetsov, <[EMAIL PROTECTED]> * @@ -24,6 +24,7 @@ #include <net/tcp.h> #include <net/ipv6.h> #include <net/inet_common.h> +#include <net/offload.h> #include <linux/inet.h> #include <linux/stddef.h> @@ -54,6 +55,7 @@ struct nlmsghdr *nlh; struct tcp_info *info = NULL; struct tcpdiag_meminfo *minfo = NULL; + struct tcpdiag_offload *oinfo = NULL; unsigned char *b = skb->tail; nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r)); @@ -70,6 +72,9 @@ strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1), tp->ca_ops->name); } + if ((ext & (1 << (TCPDIAG_OFFLOAD - 1))) && + tp->toe_specific && tp->toe_specific->tcpdiag_offload_info) + oinfo = TCPDIAG_PUT(skb, TCPDIAG_OFFLOAD, sizeof(*oinfo)); } r->tcpdiag_family = sk->sk_family; r->tcpdiag_state = sk->sk_state; @@ -163,6 +168,9 @@ if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info) tp->ca_ops->get_info(tp, ext, skb); + if (oinfo) + tp->toe_specific->tcpdiag_offload_info(sk, oinfo); + nlh->nlmsg_len = skb->tail - b; return skb->len; diff -Naur linux-2.6.13-rc3/net/ipv4/tcp_ipv4.c linux-2.6.13-rc3.patched/net/ipv4/tcp_ipv4.c --- linux-2.6.13-rc3/net/ipv4/tcp_ipv4.c 2005-07-26 10:16:10.000000000 -0700 +++ linux-2.6.13-rc3.patched/net/ipv4/tcp_ipv4.c 2005-07-26 12:23:53.803495984 -0700 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $ + * Version: $Id: tcp_ipv4.c,v 1.1 2005/06/20 19:32:40 dm Exp $ * * IPv4 specific functions * @@ -68,6 +68,7 @@ #include <net/ipv6.h> #include <net/inet_common.h> #include <net/xfrm.h> +#include <net/offload.h> #include <linux/inet.h> #include <linux/ipv6.h> @@ -151,7 +152,7 @@ } /* Caller must disable local BH processing. */ -static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) +/* static */ __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) { struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)]; @@ -351,7 +352,7 @@ } } -static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) +/* static */ __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) { struct hlist_head *list; rwlock_t *lock; @@ -835,6 +836,9 @@ __sk_dst_set(sk, &rt->u.dst); tcp_v4_setup_caps(sk, &rt->u.dst); + if (tcp_connect_offload(sk)) + return 0; + if (!tp->write_seq) tp->write_seq = secure_tcp_sequence_number(inet->saddr, inet->daddr, diff -Naur linux-2.6.13-rc3/net/ipv4/tcp_timer.c linux-2.6.13-rc3.patched/net/ipv4/tcp_timer.c --- linux-2.6.13-rc3/net/ipv4/tcp_timer.c 2005-07-26 10:16:10.000000000 -0700 +++ linux-2.6.13-rc3.patched/net/ipv4/tcp_timer.c 2005-07-26 12:23:53.803495984 -0700 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $ + * Version: $Id: tcp_timer.c,v 1.1 2005/06/20 19:32:40 dm Exp $ * * Authors: Ross Biro * Fred N. van Kempen, <[EMAIL PROTECTED]> @@ -22,6 +22,7 @@ #include <linux/module.h> #include <net/tcp.h> +#include <net/offload.h> int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; @@ -559,6 +560,11 @@ if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return; + if (tcp_sk(sk)->toe_specific) { + tcp_sk(sk)->toe_specific->set_keepalive(sk, val); + return; + } + if (val && !sock_flag(sk, SOCK_KEEPOPEN)) tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); else if (!val)