Hi,
I'm working on a new feature which will allow TCP connections to be
timing controlled by the ethernet hardware driver, actually the mlxen
driver. The main missing piece in the kernel is to allow the mbuf's
flowid value to be overwritten in "struct inpcb" once the connection is
established and to have a callback once the TCP connection is gone so
that the assigned "flowid" can be freed by the ethernet hardware driver.
The "flowid" will be used to assign the outgoing data traffic of a
specific TCP connections to a hardware controlled queue, which in
advance contain certain parameters about the timing for the transmitted
packets.
To be able to set the flowid I'm using existing functions in the kernel
TCP code to lookup the "inpcb" structure based on the 4-tuple, via the
"ifp->if_ioctl()" callback of the network adapter. I'm also registering
a function method table so that I get a callback when the TCP connection
is gone.
A this point of development I would like to get some feedback from
FreeBSD network guys about my attached patch proposal.
The motivation for this work is to have a more reliable TCP
transmissions typically for fixed-rate media content going some
distance. To illustrate this I will give you an example from the world
of VoIP, which is using UDP. When doing long-distance VoIP calls through
various unknown networks and routers it makes a very big difference if
you are sending data 20ms apart or 40ms apart, even at the exact same
rate. In the one case you might experience a bunch of packet drops, and
in the other case, everything is fine. Why? Because the number of
packets you send per second, and the timing is important. The goal is to
apply some timing rules for TCP, to increase the factor of successful
transmission, and to reduce the amount of data loss. For high throughput
applications we want to do this by means of hardware.
While at it I would like to "typedef" the flowid used by mbufs, "struct
inpcb" and many more places. Where would the right place be to put such
a definition? In "sys/mbuf.h"?
Comments are appreciated!
--HPS
=== sys/netinet/in_pcb.c
==================================================================
--- sys/netinet/in_pcb.c (revision 268358)
+++ sys/netinet/in_pcb.c (local)
@@ -1173,6 +1173,100 @@
}
/*
+ * in_pcb_handle_ratectlreq - this function sets the hardware flow ID
+ * for a given IPv4 connection based on the input arguments.
+ *
+ * Return values:
+ * 0: Success
+ * Non-zero: Failure
+ */
+int
+in_pcb_handle_ratectlreq(struct ifnet *ifp, struct in_ratectlreq *req,
+ const struct in_flowid_methods *mtod, void *arg)
+{
+ struct inpcb *inp;
+ int error;
+
+ if (ifp == NULL || req == NULL || mtod == NULL ||
+ mtod->inf_alloc == NULL || mtod->inf_rateset == NULL ||
+ mtod->inf_free == NULL)
+ return (EINVAL);
+
+ inp = in_pcblookup(&V_tcbinfo,
+ req->ifreq_dst.sin_addr, req->ifreq_dst.sin_port,
+ req->ifreq_src.sin_addr, req->ifreq_src.sin_port,
+ INPLOOKUP_WLOCKPCB, ifp);
+ if (inp == NULL)
+ return (ENOENT);
+
+ INP_WLOCK_ASSERT(inp);
+
+ if (inp->inp_flowid_mtod == NULL) {
+ error = mtod->inf_alloc(arg, &inp->inp_flowid);
+ if (error != 0)
+ goto done;
+ inp->inp_flowid_mtod = mtod;
+ inp->inp_flowid_arg = arg;
+ /* ensure that the flow ID is not overwritten */
+ inp->inp_flags |= INP_HW_FLOWID;
+ inp->inp_flags &= ~INP_SW_FLOWID;
+ inp->inp_flowtype = M_HASHTYPE_NONE;
+ }
+ error = inp->inp_flowid_mtod->inf_rateset(inp->inp_flowid_arg,
+ inp->inp_flowid, req->ifreq_baudrate);
+done:
+ INP_WUNLOCK(inp);
+ return (error);
+}
+
+/*
+ * in6_pcb_handle_ratectlreq - this function sets the hardware flow ID
+ * for a given IPv6 connection based on the input arguments.
+ *
+ * Return values:
+ * 0: Success
+ * Non-zero: Failure
+ */
+int
+in6_pcb_handle_ratectlreq(struct ifnet *ifp, struct in6_ratectlreq *req,
+ const struct in_flowid_methods *mtod, void *arg)
+{
+ struct inpcb *inp;
+ int error;
+
+ if (ifp == NULL || req == NULL || mtod == NULL ||
+ mtod->inf_alloc == NULL || mtod->inf_rateset == NULL ||
+ mtod->inf_free == NULL)
+ return (EINVAL);
+
+ inp = in6_pcblookup(&V_tcbinfo,
+ &req->ifreq_dst.sin6_addr, req->ifreq_dst.sin6_port,
+ &req->ifreq_src.sin6_addr, req->ifreq_src.sin6_port,
+ INPLOOKUP_WLOCKPCB, ifp);
+ if (inp == NULL)
+ return (ENOENT);
+
+ INP_WLOCK_ASSERT(inp);
+
+ if (inp->inp_flowid_mtod == NULL) {
+ error = mtod->inf_alloc(arg, &inp->inp_flowid);
+ if (error != 0)
+ goto done;
+ inp->inp_flowid_mtod = mtod;
+ inp->inp_flowid_arg = arg;
+ /* ensure that the flow ID is not overwritten */
+ inp->inp_flags |= INP_HW_FLOWID;
+ inp->inp_flags &= ~INP_SW_FLOWID;
+ inp->inp_flowtype = M_HASHTYPE_NONE;
+ }
+ error = inp->inp_flowid_mtod->inf_rateset(inp->inp_flowid_arg,
+ inp->inp_flowid, req->ifreq_baudrate);
+done:
+ INP_WUNLOCK(inp);
+ return (error);
+}
+
+/*
* Unconditionally schedule an inpcb to be freed by decrementing its
* reference count, which should occur only after the inpcb has been detached
* from its socket. If another thread holds a temporary reference (acquired
@@ -1192,6 +1286,12 @@
INP_WLOCK_ASSERT(inp);
/* XXXRW: Do as much as possible here. */
+
+ /* Release flow ID, if any */
+ if (inp->inp_flowid_mtod != NULL) {
+ inp->inp_flowid_mtod->inf_free(
+ inp->inp_flowid_arg, inp->inp_flowid);
+ }
#ifdef IPSEC
if (inp->inp_sp != NULL)
ipsec_delete_pcbpolicy(inp);
=== sys/netinet/in_pcb.h
==================================================================
--- sys/netinet/in_pcb.h (revision 268358)
+++ sys/netinet/in_pcb.h (local)
@@ -39,6 +39,7 @@
#define _NETINET_IN_PCB_H_
#include <sys/queue.h>
+#include <sys/mbuf.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_rwlock.h>
@@ -127,6 +128,23 @@
struct icmp6_filter;
+/*
+ * The following functions must be non-blocking, because they are
+ * executing in the fast-path:
+ */
+typedef int (in_flowid_alloc_t)(void *, m_flowid_t *);
+typedef int (in_flowid_rateset_t)(void *, m_flowid_t, uint64_t);
+typedef void (in_flowid_free_t)(void *, m_flowid_t);
+
+struct in_ratectlreq;
+struct in6_ratectlreq;
+
+struct in_flowid_methods {
+ in_flowid_alloc_t *inf_alloc;
+ in_flowid_rateset_t *inf_rateset;
+ in_flowid_free_t *inf_free;
+};
+
/*-
* struct inpcb captures the network layer state for TCP, UDP, and raw IPv4
* and IPv6 sockets. In the case of TCP, further per-connection state is
@@ -177,7 +195,9 @@
u_char inp_ip_ttl; /* (i) time to live proto */
u_char inp_ip_p; /* (c) protocol proto */
u_char inp_ip_minttl; /* (i) minimum TTL or drop */
- uint32_t inp_flowid; /* (x) flow id / queue id */
+ m_flowid_t inp_flowid; /* (x) flow ID / queue ID */
+ const struct in_flowid_methods *inp_flowid_mtod; /* (x) flow ID callback methods */
+ void *inp_flowid_arg; /* (x) argument for flow ID methods */
u_int inp_refcount; /* (i) refcount */
void *inp_pspare[5]; /* (x) route caching / general use */
uint32_t inp_flowtype; /* (x) M_HASHTYPE value */
@@ -635,6 +655,10 @@
void in_pcbdisconnect(struct inpcb *);
void in_pcbdrop(struct inpcb *);
void in_pcbfree(struct inpcb *);
+int in_pcb_handle_ratectlreq(struct ifnet *, struct in_ratectlreq *,
+ const struct in_flowid_methods *, void *);
+int in6_pcb_handle_ratectlreq(struct ifnet *, struct in6_ratectlreq *,
+ const struct in_flowid_methods *, void *);
int in_pcbinshash(struct inpcb *);
int in_pcbinshash_nopcbgroup(struct inpcb *);
int in_pcbladdr(struct inpcb *, struct in_addr *, struct in_addr *,
=== sys/netinet/in_var.h
==================================================================
--- sys/netinet/in_var.h (revision 268358)
+++ sys/netinet/in_var.h (local)
@@ -81,6 +81,14 @@
struct sockaddr_in ifra_mask;
int ifra_vhid;
};
+
+struct in_ratectlreq {
+ char ifreq_name[IFNAMSIZ];
+ struct sockaddr_in ifreq_dst;
+ struct sockaddr_in ifreq_src;
+ uint64_t ifreq_baudrate; /* bits per second */
+};
+
/*
* Given a pointer to an in_ifaddr (ifaddr),
* return a pointer to the addr as a sockaddr_in.
=== sys/netinet6/in6_var.h
==================================================================
--- sys/netinet6/in6_var.h (revision 268358)
+++ sys/netinet6/in6_var.h (local)
@@ -307,6 +307,13 @@
struct in6_addrlifetime ifra_lifetime;
};
+struct in6_ratectlreq {
+ char ifreq_name[IFNAMSIZ];
+ struct sockaddr_in6 ifreq_dst;
+ struct sockaddr_in6 ifreq_src;
+ uint64_t ifreq_baudrate; /* bits per second */
+};
+
/* prefix type macro */
#define IN6_PREFIX_ND 1
#define IN6_PREFIX_RR 2
@@ -435,6 +442,7 @@
#define SIOCDIFADDR_IN6 _IOW('i', 25, struct in6_ifreq)
#define OSIOCAIFADDR_IN6 _IOW('i', 26, struct oin6_aliasreq)
#define SIOCAIFADDR_IN6 _IOW('i', 27, struct in6_aliasreq)
+#define SIOCSRATECTL_IN6 _IOW('i',110, struct in6_ratectlreq)
#define SIOCSIFPHYADDR_IN6 _IOW('i', 70, struct in6_aliasreq)
#define SIOCGIFPSRCADDR_IN6 _IOWR('i', 71, struct in6_ifreq)
=== sys/sys/mbuf.h
==================================================================
--- sys/sys/mbuf.h (revision 268358)
+++ sys/sys/mbuf.h (local)
@@ -114,6 +114,8 @@
void (*m_tag_free)(struct m_tag *);
};
+typedef uint32_t m_flowid_t;
+
/*
* Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
* Size ILP32: 48
@@ -125,7 +127,7 @@
int32_t len; /* total packet length */
/* Layer crossing persistent information. */
- uint32_t flowid; /* packet's 4-tuple system */
+ m_flowid_t flowid; /* packet's 4-tuple system */
uint64_t csum_flags; /* checksum and offload features */
uint16_t fibnum; /* this packet should use this fib */
uint8_t cosqos; /* class/quality of service */
=== sys/sys/sockio.h
==================================================================
--- sys/sys/sockio.h (revision 268358)
+++ sys/sys/sockio.h (local)
@@ -128,4 +128,6 @@
#define SIOCDIFGROUP _IOW('i', 137, struct ifgroupreq) /* delete ifgroup */
#define SIOCGIFGMEMB _IOWR('i', 138, struct ifgroupreq) /* get members */
+#define SIOCSRATECTL _IOW('i', 139, struct in_ratectlreq)
+
#endif /* !_SYS_SOCKIO_H_ */
_______________________________________________
freebsd-current@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"