I've spent the last week or so trying to track down a recurring
problem I'm seeing with UDP datagram handling.  I'm new to the
internals of the Linux network stack, but it appears to me that
there's a substantial error in recent kernels' handling of UDP
checksum errors.

The behavior I'm seeing:  I have a UDP server that receives lots of
datagrams from many devices on a single port. A small number of those
devices occasionally send packets with bad UDP checksums.  After I
receive one of these bad packets, the next recvmsg() made on the
socket returns data from the bad-checksum packet, but the
source-address and length of the next (good) packet that arrived at
the port.

I believe this issue was introduced between linux 3.18 and 3.19, by a
set of changes to net/core/datagram.c that made
skb_copy_and_csum_datagram_msg() and related functions use the
iov_iter structure to copy data to user buffers.  In the case where
those functions copy a datagram and then conclude that the checksum is
invalid, they don't remove the already-copied data from the user's
iovec; when udp_recvmsg() calls skb_copy_and_csum_datagram_msg() for a
second time, looking at the next datagram on the queue, that second
datagram's data is appended to the first datagram's.  So when
recvmsg() returns to the user, the return value and msg_name reflect
the second datagram, but the first bytes in the user's iovec come from
the first (bad) datagram.

(I've attached a test program that demonstrates the problem.  Note
that it sees correct behavior unless the bad-checksum packet has > 68
bytes of UDP data; otherwise, the packet doesn't make it past the
CHECKSUM_BREAK test, and never enters
skp_copy_and_csum_datagram_msg().)

The fix for UDP seems pretty simple; the iov_iter's iov_offset member
just needs to be set back to zero on a checksum failure.  But it looks
like skb_copy_and_csum_datagram_msg() is also called from tcp_input.c,
where I assume that multiple sk_buffs can be copied-and-csum'd into
the same iov -- if that's right, it seems like iov_iter needs some
additional state to support rolling-back the most recent copy without
losing previous ones.

Any thoughts?
#include <string.h>
#include <unistd.h>
#include <netdb.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/uio.h>
#include <arpa/inet.h>

#include <netinet/udp.h>


int main(int argc, char **argv)
{
  int ret;

  if (argc < 2) {
    fprintf(stderr, "Usage: csumtest <bytes-in-bad-packet>\n");
    exit(1);
  }

  struct sockaddr_in addr;
  socklen_t addrLen = sizeof(addr);  
  addr.sin_family = AF_INET;
  addr.sin_addr.s_addr = inet_addr("127.0.0.1");
  addr.sin_port = htons(0);

  int listen = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
  if (listen < 0)
  {
    fprintf(stderr, "failed to create listening socket (err=%d: %s)\n", errno, strerror(errno));
    exit(1);
  }

  ret = bind(listen, (struct sockaddr *) &addr, addrLen);
  if (ret != 0)
  {
    fprintf(stderr, "failed to bind listening socket socket (err=%d: %s)\n", errno, strerror(errno));
    exit(1);
  }

  ret = getsockname(listen, (struct sockaddr *) &addr, &addrLen);
  if (ret != 0)
  {
    fprintf(stderr, "getsockname failed for listening socket socket (err=%d: %s)\n", errno, strerror(errno));
    exit(1);
  }
  else
  {
    printf("listening on port %d\n", ntohs(addr.sin_port));
  }


  
  // Send a packet with deliberately bad checksum
  char rawBuf[4096];
  memset(rawBuf, 0, 4096);
  struct udphdr *udph = (struct udphdr *) rawBuf;

  // Send as UDP data the string "BAD DATA", repeated as necessary
  // to fill the number of bytes requested on the command-line
  char *data = rawBuf + sizeof(struct udphdr);
  int badBytes = atoi(argv[1]);
  int i;
  for (i = 0; i < badBytes; i += 8)
  {
    strncpy(data + i, "BAD DATA", 8);
  }

  // UDP headers contain a bogus checksum, but are otherwise reasonable
  udph->check = htons(0xbadd);
  udph->source = htons(18);
  udph->dest = addr.sin_port;
  udph->len = htons(sizeof(struct udphdr) + badBytes);

  int raw = socket(AF_INET, SOCK_RAW, IPPROTO_UDP);
  ret = sendto(raw, rawBuf,
	       sizeof(struct udphdr) + badBytes, 0,
	       (struct sockaddr *) &addr, addrLen);
  if (ret < 0)
  {
    fprintf(stderr, "raw send failed (err=%d: %s)\n", errno, strerror(errno));
    exit(1);
  }
  
  
  // Send a second, legitimate UDP packet
  int cooked = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
  char *second = "Good data";
  ret = sendto(cooked, second, strlen(second), 0,
	       (struct sockaddr *) &addr, addrLen);
  if (ret < 0)
  {
    fprintf(stderr, "second cooked send failed (err=%d: %s)\n", errno, strerror(errno));
    exit(1);
  }

  // Read what's queued up for us.
  while (1)
  {
    struct msghdr msg;
    struct iovec  iov;
    char readBuf[4096];
    ssize_t bytes;
    
    memset(readBuf, 0, 4096);
    
    iov.iov_base = readBuf;
    iov.iov_len = sizeof(readBuf);

    msg.msg_name = NULL;
    msg.msg_namelen = 0;
    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;
    msg.msg_control = NULL;
    msg.msg_controllen = 0;
    msg.msg_flags = 0;
    
    bytes = recvmsg(listen, &msg, 0);
    if (bytes < 0)
    {
      fprintf(stderr, "getsockname failed for listening socket socket (err=%d: %s)\n", errno, strerror(errno));
      exit(1);
    }

    printf("recvmsg returned %ld bytes: %s\n", bytes, readBuf);

    if (bytes == 9)
      break;
  }
}

Reply via email to