/*
 * traceroute implementation that does not require root privilege.
 *
 * This requires a Linux 2.4 kernel.
 *
 * Copyright (C) 2000, Olaf Kirch <okir@caldera.de>
 */

#include <sys/types.h>
#include <sys/socket.h>
#include <sys/poll.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <netinet/ip_icmp.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <errno.h>
#include <stdio.h>
#include <unistd.h>
#include <time.h>
#include "utils.h"

/* The kernel wants these */
typedef u_int32_t	__u32;
typedef u_int8_t	__u8;
#include <linux/errqueue.h>

#define MAXHOPS		255
#define MAXPROBES	6
#define MAXGATEWAYS	9

struct hop {
	int		fd;
	unsigned int	sent;
	unsigned int	recvd;
	time_t		nextsend;
	unsigned int	final;

	/* Previously printed address */
	struct in_addr	prev_addr;

	struct probe {
	    struct sock_extended_err ee;
	    struct timeval	sent_time;
	    struct timeval	recvd_time;
	    struct in_addr	responder;
	    const char *	err_ind;
	    unsigned int	printed;
	}		probe[MAXPROBES];
};

static struct hop	hops[MAXHOPS];
static unsigned int	first_hop, last_hop;
static unsigned int	probe_timeout = 5,
			probe_retries = 3,
			max_hops = 30;
static unsigned int	have_final_response;

static u_int16_t	src_port = 64000;
static struct in_addr	dst_addr;
static const char *	dst_name;
static u_int16_t	dst_port = 33434;
static u_int8_t		dst_tos;

static int		opt_noresolver = 0;
static int		opt_dontfrag = IP_PMTUDISC_DONT;
static int		opt_rr = 0;
static struct in_addr	opt_gateway[MAXGATEWAYS];
static unsigned int	ngateways;

static unsigned char	packet[65536];
static unsigned int	packetsize = 40;
static unsigned char	ipoptions[40];
static unsigned int	ipoptions_len = 0;

static void		hop_next(void);
static void		hop_xmit(struct hop *);
static int		hop_sendmsg(int);
static void		hop_recv(unsigned int);
static int		hop_recverr(int, struct probe *);
static void		hop_print(struct hop *);
static int		hop_init(struct hop *, unsigned int);
static void		ipopt_init(void);
static void		usage(int exval);

int
main(int argc, char **argv)
{
	unsigned int	n;
	int		c;

	while ((c = getopt(argc, argv, "Ff:g:m:np:t:w:q:R")) != -1) {
		switch (c) {
		case 'F':
			opt_dontfrag = IP_PMTUDISC_DO;
			break;

		case 'f':
			first_hop = getnum("hop", optarg, 1, 255);
			break;

		case 'g':
			fatal("option -g not yet supported");
			if (ngateways >= MAXGATEWAYS)
				fatal("too many gateways");
			if (!getaddr(optarg, opt_gateway + ngateways))
				return 1;
			ngateways++;
			break;

		case 'm':
			max_hops = getnum("hop", optarg, 1, 255);
			break;

		case 'n':
			opt_noresolver = 1;
			break;

		case 'p':
			dst_port = getnum("port", optarg, 1, 65535);
			break;

		case 't':
			dst_tos = getnum("TOS", optarg, 0, 255);
			break;

		case 'w':
			probe_timeout = getnum("timeout", optarg, 1, 256);
			break;

		case 'q':
			probe_retries = getnum("retry", optarg, 1, 1024);
			break;

		case 'R':
			fatal("option -R not yet supported");
			opt_rr = 1;
			break;

		default:
			usage(1);
		}
	}

	if (optind == argc - 2) {
		packetsize = getnum("packet length", argv[optind+1],
					1, 65536);
	} else
	if (optind != argc - 1)
		usage(1);

	dst_name = argv[optind];
	if (!getaddr(dst_name, &dst_addr))
		return 1;

	if (first_hop >= max_hops) {
		fprintf(stderr, "first hop %u larger than max hops %u\n",
			first_hop, max_hops);
		return 1;
	}

	/* Initialize packet */
	for (n = 0; n < packetsize; n++)
		packet[n] = 0x40 + (n & 0x3f);

	/* Initialize IP options */
	ipopt_init();

	/* Start by sending the first packet */
	last_hop = first_hop;
	hop_next();

	printf("traceroute to %s (%s), %u hops max, %u byte packets\n",
		dst_name, inet_ntoa(dst_addr),
		max_hops, packetsize);

	while (1) {
		struct pollfd	pfd[MAXHOPS];
		time_t		now, timeout;
		struct hop	*hop;
		unsigned int	m, n;

		now = time(NULL);
		timeout = now + 5;

		memset(pfd, 0, sizeof(pfd));
		for (m = 0, hop = hops + first_hop; hop < hops + last_hop; m++, hop++) {
			pfd[m].events = POLLERR;
			pfd[m].fd = hop->fd;
			if (hop->nextsend <= now)
				hop_xmit(hop);
			if (hop->nextsend && hop->nextsend < timeout)
				timeout = hop->nextsend;
		}

		poll(pfd, m, timeout - now);

		/* Receive any pending ICMP errors */
		for (n = 0; n < m; n++) {
			if (!(pfd[n].revents & POLLERR))
				continue;
			hop_recv(first_hop + n);
		}

		/* Now loop over all hop structures and see whether we can
		 * add more probes, or wrap up the timed out ones */
		while (first_hop < last_hop) {
			hop = hops + first_hop;
			hop_print(hop);
			if (hop->nextsend)
				break;

			/* This one is complete */
			printf("\n");
			close(hop->fd);
			first_hop++;
			if (hop->final)
				goto done;
		}

		/* Did we receive a response for the last hop? */
		if (last_hop > first_hop
		 && !have_final_response
		 && last_hop < max_hops) {
			hop = hops + last_hop - 1;
			if (hop->recvd == 1)
				hop_next();
		}
	}

done:	return 0;
}

static void
usage(int exval)
{
	fprintf(stderr,
	"usage: traceroute [-nF] [-f first_ttl] [-m max_hops] [-p port]\n"
	"           [-t tos] [-w timeout] [-q nqueries] host [packetlen]\n");
	exit(1);
}

static void
hop_next(void)
{
	struct hop	*hop = hops + last_hop;

	hop_init(hop, ++last_hop);
	hop->nextsend = time(NULL);
}

static void
hop_xmit(struct hop *hop)
{
	/* Adjust the counter of received packets so that
	 * asterisks for dropped packets get printed */
	hop->recvd = hop->sent;

	/* The final packet has timed out, or was received */
	if (hop->sent >= probe_retries) {
		hop->nextsend = 0;
		return;
	}

	/* Send packet */
	if (hop_sendmsg(hop->fd) < 0)
		perror("send failed");
	gettimeofday(&(hop->probe + hop->sent)->sent_time, NULL);

	/* Set timeout */
	hop->nextsend = time(NULL) + probe_timeout;
	hop->sent++;
}

static void
hop_recv(unsigned int num)
{
	struct hop	*hop = hops + num;
	struct probe	*p;
	const char	*errstring = 0;

	p = hop->probe + hop->recvd;

	/* recv errmsg */
	if (hop_recverr(hop->fd, p) < 0)
		return;

	/* Trigger next transmit */
	hop->nextsend = time(NULL);
	hop->recvd++;

	if (p->ee.ee_origin != SO_EE_ORIGIN_ICMP) {
		errstring = "??";
		goto out;
	}

	/* Handle error codes */
	switch (p->ee.ee_type) {
	case ICMP_TIME_EXCEEDED:
		return;

	case ICMP_DEST_UNREACH:
		switch (p->ee.ee_code) {
		case ICMP_UNREACH_NET:
		case ICMP_UNREACH_NET_UNKNOWN:
		case ICMP_UNREACH_ISOLATED:
		case ICMP_UNREACH_NET_PROHIB:
		case ICMP_UNREACH_TOSNET:
		case ICMP_UNREACH_FILTER_PROHIB:
			errstring = "N!";
			break;

		case ICMP_UNREACH_HOST:
		case ICMP_UNREACH_HOST_UNKNOWN:
		case ICMP_UNREACH_HOST_PROHIB:
		case ICMP_UNREACH_TOSHOST:
		case ICMP_UNREACH_HOST_PRECEDENCE:
			errstring = "H!";
			break;

		case ICMP_UNREACH_PORT:
			/* we've reached the destintation host */
			break;

		case ICMP_UNREACH_PROTOCOL:
			errstring = "P!";
			break;

		case ICMP_UNREACH_NEEDFRAG:
			errstring = "F!";
			break;

		case ICMP_UNREACH_SRCFAIL:
			errstring = "S!";
			break;

		default:
			errstring = "!!";
			break;
		}
		break;
	
	default:
		errstring = "??";
	}

out:
	p->err_ind = errstring;
	have_final_response = 1;
	hop->final = 1;
}

static void
hop_print(struct hop *hop)
{
	struct probe	*p = hop->probe;
	struct timeval	delta;
	unsigned int	nr;

	for (nr = 0; nr < hop->recvd; nr++, p++) {
		if (p->printed)
			continue;
		if (nr == 0)
			printf("%2u ", (hop - hops));
		if (p->responder.s_addr == 0) {
			printf(" *");
		} else {
			printf(" ");
			timersub(&p->recvd_time, &p->sent_time, &delta);
			if (delta.tv_sec < 0)
				timerclear(&delta);
			if (p->responder.s_addr != hop->prev_addr.s_addr)
				printaddr(p->responder, !opt_noresolver);
			hop->prev_addr = p->responder;
			if (p->err_ind)
				printf("(%s)", p->err_ind);
			printf("  %lu.%03lu ms",
				1000 * delta.tv_sec + delta.tv_usec / 1000,
				delta.tv_usec % 1000);
		}
		p->printed = 1;
	}
	fflush(stdout);
}

static int
hop_init(struct hop *hop, unsigned int ttl)
{
	struct sockaddr_in	sin;
	int			fd, val;

	memset(hop, 0, sizeof(*hop));
	if ((fd = socket(PF_INET, SOCK_DGRAM, 0)) < 0)
		fatal("unable to create UDP socket: %m");

	val = 1;
	if (setsockopt(fd, SOL_IP, IP_RECVERR, &val, sizeof(val)) < 0)
		fatal("unable to set SO_RECVERR: %m");
	if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMP, &val, sizeof(val)) < 0)
		fatal("unable to set SO_TIMESTAMP: %m");

	val = ttl;
	if (setsockopt(fd, SOL_IP, IP_TTL, &val, sizeof(val)) < 0)
		fatal("unable to set TTL: %m");
	val = dst_tos;
	if (setsockopt(fd, SOL_IP, IP_TOS, &val, sizeof(val)) < 0)
		fatal("unable to set TOS: %m");
	val = opt_dontfrag;
	if (setsockopt(fd, SOL_IP, IP_MTU_DISCOVER, &val, sizeof(val)) < 0)
		fatal("unable to set MTU_DISCOVER: %m");

	/* XXX: Set SO_PKTINFO if output device and/or source addr is
	 * specified */

	memset(&sin, 0, sizeof(sin));
	sin.sin_family = AF_INET;

	while (1) {
		sin.sin_port = htons(src_port);
		src_port++;

		if (bind(fd, (struct sockaddr *) &sin, sizeof(sin)) >= 0)
			break;
		if (errno != EADDRINUSE)
			fatal("unable to bind socket: %m");
	}

	sin.sin_addr = dst_addr;
	sin.sin_port = htons(dst_port);
	dst_port++;

	if (connect(fd, (struct sockaddr *) &sin, sizeof(sin)) < 0)
		fatal("Unable to connect to %s: %m", inet_ntoa(dst_addr));

	hop->fd = fd;
	return 0;
}

static int
hop_sendmsg(int fd)
{
	struct msghdr	msg;
	unsigned char	control[1024];
	struct cmsghdr	*cm;
	struct iovec	iov;

	memset(&msg, 0, sizeof(msg));
	iov.iov_base	= packet;
	iov.iov_len	= packetsize;

	msg.msg_iov	= &iov;
	msg.msg_iovlen	= 1;

	/* Copy IP options, if specified.
	 * Ick ick ick! This control message wants a
	 * struct ip_options with the compiled options
	 * in them. The struct is surrounded by an
	 * #ifdef __KERNEL__ though...
	 */
	if (ipoptions_len) {
		msg.msg_control	= control;
		msg.msg_controllen = sizeof(control);

		cm = (struct cmsghdr *) control;
		cm->cmsg_level = SOL_IP;
		cm->cmsg_type  = IP_RETOPTS;

		memcpy(CMSG_DATA(cm), ipoptions, ipoptions_len);
		cm->cmsg_len = CMSG_LEN(ipoptions_len);

		msg.msg_controllen = cm->cmsg_len;
	}

	return sendmsg(fd, &msg, 0);
}

static int
hop_recverr(int fd, struct probe *p)
{
	struct sockaddr_in sin;
	struct msghdr	msg;
	unsigned char	control[1024];
	struct cmsghdr	*cm;

	memset(&msg, 0, sizeof(msg));
	memset(&sin, 0, sizeof(sin));
	memset(&p->ee, 0, sizeof(p->ee));
	gettimeofday(&p->recvd_time, NULL);
	p->responder.s_addr = 0;

	msg.msg_name	= &sin;
	msg.msg_namelen	= sizeof(sin);
	msg.msg_control	= control;
	msg.msg_controllen = sizeof(control);

	if (recvmsg(fd, &msg, MSG_ERRQUEUE) < 0)
		return -1;

	/* CMSG_* macros are broken in current glibc */
	for (cm = (struct cmsghdr *) control;
	     (caddr_t) cm < (caddr_t) (control + msg.msg_controllen);
	     cm = (struct cmsghdr *) ((caddr_t) cm + cm->cmsg_len)) {
		struct sock_extended_err *ep;
		struct sockaddr_in	*offender;

		if (cm->cmsg_level == SOL_SOCKET
		 && cm->cmsg_type == SO_TIMESTAMP) {
			memcpy(&p->recvd_time, CMSG_DATA(cm),
					sizeof(struct timeval));
			continue;
		}

		if (cm->cmsg_level != SOL_IP
		 || cm->cmsg_type != IP_RECVERR)
		 	continue;

		ep = (struct sock_extended_err *) CMSG_DATA(cm);
		memcpy(&p->ee, ep, sizeof(p->ee));

		if (SO_EE_OFFENDER(ep)->sa_family == AF_INET) {
			offender = (struct sockaddr_in *) SO_EE_OFFENDER(ep);
			p->responder = offender->sin_addr;
		}
	}

	return 0;
}

static void
ipopt_init(void)
{
	unsigned int	nr;

	if (ngateways) {
		ipoptions[0] = IPOPT_NOP; /* pad */
		ipoptions[1] = IPOPT_LSRR;
		ipoptions[2] = 39;
		ipoptions[3] = 4;
		for (nr = 0; nr < ngateways; nr++) {
			memcpy(ipoptions + ((nr + 1) << 2),
				&opt_gateway[nr], 4);
		}
		ipoptions_len = 40;
	} else
	if (opt_rr) {
		ipoptions[0] = IPOPT_NOP; /* pad */
		ipoptions[1] = IPOPT_RR;
		ipoptions[2] = 39;
		ipoptions[3] = 4;
		ipoptions_len = 40;
	}
}
