On Mon, Oct 01, 2007 at 08:50:50PM -0700, David Miller wrote:
> From: [EMAIL PROTECTED] (Larry McVoy)
> Date: Mon, 1 Oct 2007 19:20:59 -0700
>
> > A short summary is "can someone please post a test program that sources
> > and sinks data at the wire speed?" because apparently I'm too old and
> > clueless to write such a thing.
>
> You're not showing us your test program so there is no way we
> can help you out.
Attached. Drop it into an lmbench tree and build it.
> My initial inclination, even without that critical information,
> is to ask whether you are setting any socket options in way?
The only one I was playing with was SO_RCVBUF/SO_SNDBUF and I tried
disabling that and I tried playing with the read/write size. Didn't
help.
> In particular, SO_RCVLOWAT can have a large effect here, if you're
> setting it to something, that would explain why dd is doing better. A
> lot of people link to "helper libraries" with interfaces to setup
> sockets with all sorts of socket option settings by default, try not
> using such things if possible.
Agreed. That was my first thought as well, I must have been doing
something that messed up the defaults. But you did get the strace
output, there wasn't anything weird there.
> You also shouldn't dork at all with the receive and send buffer sizes.
> They are adjusted dynamically by the kernel as the window grows. But
> if you set them to specific values, this dynamic logic is turned off.
Yeah, dorking with those is left over from the bad old days of '95
when lmbench was first shipped. But I turned that all off and no
difference.
So feel free to show me where I'm an idiot in the code, but if you
can't, then what would rock would be a little send.c / recv.c that
demonstrated filling the pipe.
--
---
Larry McVoy lm at bitmover.com http://www.bitkeeper.com
/*
* bytes_tcp.c - simple TCP bandwidth source/sink
*
* server usage: bytes_tcp -s
* client usage: bytes_tcp hostname [msgsize]
*
* Copyright (c) 1994 Larry McVoy.
* Copyright (c) 2002 Carl Staelin. Distributed under the FSF GPL with
* additional restriction that results may published only if
* (1) the benchmark is unmodified, and
* (2) the version in the sccsid below is included in the report.
* Support for this development by Sun Microsystems is gratefully acknowledged.
*/
char *id = "$Id$\n";
#include "bench.h"
#define XFER (1024*1024)
int server_main(int ac, char **av);
int client_main(int ac, char **av);
void source(int data);
void
transfer(int get, int server, char *buf)
{
int c;
while ((get > 0) && (c = read(server, buf, XFER)) > 0) {
get -= c;
}
if (c < 0) {
perror("bytes_tcp: transfer: read failed");
exit(4);
}
}
/* ARGSUSED */
int
client_main(int ac, char **av)
{
int server;
int get = 256 << 20;
char buf[XFER];
char* usage = "usage: %s -remotehost OR %s remotehost [msgsize]\n";
if (ac != 2 && ac != 3) {
(void)fprintf(stderr, usage, av[0], av[0]);
exit(0);
}
if (ac == 3) get = bytes(av[2]);
server = tcp_connect(av[1], TCP_DATA+1, SOCKOPT_READ|SOCKOPT_REUSE);
if (server < 0) {
perror("bytes_tcp: could not open socket to server");
exit(2);
}
transfer(get, server, buf);
close(server);
exit(0);
/*NOTREACHED*/
}
void
child()
{
wait(0);
signal(SIGCHLD, child);
}
/* ARGSUSED */
int
server_main(int ac, char **av)
{
int data, newdata;
signal(SIGCHLD, child);
data = tcp_server(TCP_DATA+1, SOCKOPT_READ|SOCKOPT_WRITE|SOCKOPT_REUSE);
for ( ;; ) {
newdata = tcp_accept(data, SOCKOPT_WRITE|SOCKOPT_READ);
switch (fork()) {
case -1:
perror("fork");
break;
case 0:
source(newdata);
exit(0);
default:
close(newdata);
break;
}
}
}
void
source(int data)
{
char buf[XFER];
while (write(data, buf, sizeof(buf)) > 0);
}
int
main(int ac, char **av)
{
char* usage = "Usage: %s -s OR %s -serverhost OR %s serverhost [msgsize]\n";
if (ac < 2 || 3 < ac) {
fprintf(stderr, usage, av[0], av[0], av[0]);
exit(1);
}
if (ac == 2 && !strcmp(av[1], "-s")) {
if (fork() == 0) server_main(ac, av);
exit(0);
} else {
client_main(ac, av);
}
return(0);
}
/*
* tcp_lib.c - routines for managing TCP connections.
*
* Positive port/program numbers are RPC ports, negative ones are TCP ports.
*
* Copyright (c) 1994-1996 Larry McVoy.
*/
#define _LIB /* bench.h needs this */
#include "bench.h"
/*
* Get a TCP socket, bind it, figure out the port,
* and advertise the port as program "prog".
*
* XXX - it would be nice if you could advertise ascii strings.
*/
int
tcp_server(int prog, int rdwr)
{
int sock;
struct sockaddr_in s;
#ifdef LIBTCP_VERBOSE
fprintf(stderr, "tcp_server(%u, %u)\n", prog, rdwr);
#endif
if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
perror("socket");
exit(1);
}
sock_optimize(sock, rdwr);
bzero((void*)&s, sizeof(s));
s.sin_family = AF_INET;
if (prog < 0) {
s.sin_port = htons(-prog);
}
if (bind(sock, (struct sockaddr*)&s, sizeof(s)) < 0) {
perror("bind");
exit(2);
}
if (listen(sock, 100) < 0) {
perror("listen");
exit(4);
}
if (prog > 0) {
#ifdef LIBTCP_VERBOSE
fprintf(stderr, "Server port %d\n", sockport(sock));
#endif
(void)pmap_unset((u_long)prog, (u_long)1);
if (!pmap_set((u_long)prog, (u_long)1, (u_long)IPPROTO_TCP,
(unsigned short)sockport(sock))) {
perror("pmap_set");
exit(5);
}
}
return (sock);
}
/*
* Unadvertise the socket
*/
int
tcp_done(int prog)
{
if (prog > 0) {
pmap_unset((u_long)prog, (u_long)1);
}
return (0);
}
/*
* Accept a connection and return it
*/
int
tcp_accept(int sock, int rdwr)
{
struct sockaddr_in s;
int newsock, namelen;
namelen = sizeof(s);
bzero((void*)&s, namelen);
retry:
if ((newsock = accept(sock, (struct sockaddr*)&s, &namelen)) < 0) {
if (errno == EINTR)
goto retry;
perror("accept");
exit(6);
}
#ifdef LIBTCP_VERBOSE
fprintf(stderr, "Server newsock port %d\n", sockport(newsock));
#endif
sock_optimize(newsock, rdwr);
return (newsock);
}
/*
* Connect to the TCP socket advertised as "prog" on "host" and
* return the connected socket.
*
* Hacked Thu Oct 27 1994 to cache pmap_getport calls. This saves
* about 4000 usecs in loopback lat_connect calls. I suppose we
* should time gethostbyname() & pmap_getprot(), huh?
*/
int
tcp_connect(char *host, int prog, int rdwr)
{
static struct hostent *h;
static struct sockaddr_in s;
static u_short save_port;
static u_long save_prog;
static char *save_host;
int sock;
static int tries = 0;
if ((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0) {
perror("socket");
exit(1);
}
if (rdwr & SOCKOPT_PID) {
static unsigned short port;
struct sockaddr_in sin;
if (!port) {
port = (unsigned short)(getpid() << 4);
if (port < 1024) {
port += 1024;
}
}
do {
port++;
bzero((void*)&sin, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_port = htons(port);
} while (bind(sock, (struct sockaddr*)&sin, sizeof(sin)) == -1);
}
#ifdef LIBTCP_VERBOSE
else {
struct sockaddr_in sin;
bzero((void*)&sin, sizeof(sin));
sin.sin_family = AF_INET;
if (bind(sock, (struct sockaddr*)&sin, sizeof(sin)) < 0) {
perror("bind");
exit(2);
}
}
fprintf(stderr, "Client port %d\n", sockport(sock));
#endif
sock_optimize(sock, rdwr);
if (!h || host != save_host || prog != save_prog) {
save_host = host; /* XXX - counting on them not
* changing it - benchmark only.
*/
save_prog = prog;
if (!(h = gethostbyname(host))) {
perror(host);
exit(2);
}
bzero((void *) &s, sizeof(s));
s.sin_family = AF_INET;
bcopy((void*)h->h_addr, (void *)&s.sin_addr, h->h_length);
if (prog > 0) {
save_port = pmap_getport(&s, prog,
(u_long)1, IPPROTO_TCP);
if (!save_port) {
perror("lib TCP: No port found");
exit(3);
}
#ifdef LIBTCP_VERBOSE
fprintf(stderr, "Server port %d\n", save_port);
#endif
s.sin_port = htons(save_port);
} else {
s.sin_port = htons(-prog);
}
}
if (connect(sock, (struct sockaddr*)&s, sizeof(s)) < 0) {
if (errno == ECONNRESET || errno == ECONNREFUSED) {
close(sock);
if (++tries > 10) return(-1);
return (tcp_connect(host, prog, rdwr));
}
perror("connect");
exit(4);
}
tries = 0;
return (sock);
}
#define LIBTCP_VERBOSE
void
sock_optimize(int sock, int flags)
{
return;
if (flags & SOCKOPT_READ) {
int sockbuf = SOCKBUF;
while (setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &sockbuf,
sizeof(int))) {
sockbuf >>= 1;
}
#ifdef LIBTCP_VERBOSE
fprintf(stderr, "sockopt %d: RCV: %dK\n", sock, sockbuf>>10);
#endif
}
if (flags & SOCKOPT_WRITE) {
int sockbuf = SOCKBUF;
while (setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &sockbuf,
sizeof(int))) {
sockbuf >>= 1;
}
#ifdef LIBTCP_VERBOSE
fprintf(stderr, "sockopt %d: SND: %dK\n", sock, sockbuf>>10);
#endif
}
if (flags & SOCKOPT_REUSE) {
int val = 1;
if (setsockopt(sock, SOL_SOCKET,
SO_REUSEADDR, &val, sizeof(val)) == -1) {
perror("SO_REUSEADDR");
}
}
}
int
sockport(int s)
{
int namelen;
struct sockaddr_in sin;
namelen = sizeof(sin);
if (getsockname(s, (struct sockaddr *)&sin, &namelen) < 0) {
perror("getsockname");
return(-1);
}
return ((int)ntohs(sin.sin_port));
}