Hello,
</snip>
> > @@ -790,6 +795,7 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int
> > *phlen)
> > memcpy(ip + 1, p->ipopt_list, optlen);
> > *phlen = sizeof(struct ip) + optlen;
> > ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
> > + ip->ip_hl += (optlen >> 2);
> > return (m);
> > }
>
> I think ip_insertoptions() is not intended to insert additional
> options, but to set them. The *phlen is set to htons(ntohs(ip->ip_len)
> + optlen), so the option length is optlen. The ip->ip_hl += makes
> no sense. I am not sure if ip->ip_hl is always initialized.
>
> ip_insertoptions() returns the option length in &hlen. I think it
> is the callers job to write it into the header.
>
> Instead of touching ip_insertoptions() I would set ip->ip_hl = hlen
> >> 2 in this block, like it is done in ip_output().
>
> > + if (opts != NULL) {
> > m = ip_insertoptions(m, opts, &hlen);
> > + ip->ip_v = IPVERSION;
> > + ip->ip_off &= htons(IP_DF);
> > + ip->ip_id = htons(ip_randomid());
> > + ipstat_inc(ips_localout);
> > + ip_send_raw(m);
> > + } else
yes, you are right. below is updated diff I would like to commit.
>
> Appart from that, adding a special task seems the way to go.
I think so too. Alternative way would be to pass send flags via m_pkthdr
in mbuf, however there is no space. We would have to add a new member
to m_pkthdr. I see such change as too intrusive (given we address a true
corner case).
thanks and
regards
sashan
--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
index a007aa6c2b3..eb03bbee0b3 100644
--- a/sys/netinet/ip_icmp.c
+++ b/sys/netinet/ip_icmp.c
@@ -846,10 +846,20 @@ icmp_send(struct mbuf *m, struct mbuf *opts)
printf("icmp_send dst %s src %s\n", dst, src);
}
#endif
- if (opts != NULL)
+ /*
+ * ip_send() cannot handle IP options properly. So in case we have
+ * options fill out the IP header here and use ip_send_raw() instead.
+ */
+ if (opts != NULL) {
m = ip_insertoptions(m, opts, &hlen);
-
- ip_send(m);
+ ip->ip_hl = (hlen >> 2);
+ ip->ip_v = IPVERSION;
+ ip->ip_off &= htons(IP_DF);
+ ip->ip_id = htons(ip_randomid());
+ ipstat_inc(ips_localout);
+ ip_send_raw(m);
+ } else
+ ip_send(m);
}
u_int32_t
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 0ec3f723be4..6c935cc88c2 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -139,6 +139,7 @@ struct cpumem *ipcounters;
int ip_sysctl_ipstat(void *, size_t *, void *);
static struct mbuf_queue ipsend_mq;
+static struct mbuf_queue ipsendraw_mq;
extern struct niqueue arpinq;
@@ -147,7 +148,11 @@ int ip_dooptions(struct mbuf *, struct ifnet *);
int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **);
static void ip_send_dispatch(void *);
+static void ip_sendraw_dispatch(void *);
static struct task ipsend_task = TASK_INITIALIZER(ip_send_dispatch,
&ipsend_mq);
+static struct task ipsendraw_task =
+ TASK_INITIALIZER(ip_sendraw_dispatch, &ipsendraw_mq);
+
/*
* Used to save the IP options in case a protocol wants to respond
* to an incoming packet over the same route if the packet got here
@@ -217,6 +222,7 @@ ip_init(void)
DP_SET(rootonlyports.udp, defrootonlyports_udp[i]);
mq_init(&ipsend_mq, 64, IPL_SOFTNET);
+ mq_init(&ipsendraw_mq, 64, IPL_SOFTNET);
#ifdef IPSEC
ipsec_init();
@@ -1777,7 +1783,7 @@ ip_savecontrol(struct inpcb *inp, struct mbuf **mp,
struct ip *ip,
}
void
-ip_send_dispatch(void *xmq)
+ip_send_do_dispatch(void *xmq, int flags)
{
struct mbuf_queue *mq = xmq;
struct mbuf *m;
@@ -1789,14 +1795,33 @@ ip_send_dispatch(void *xmq)
NET_LOCK();
while ((m = ml_dequeue(&ml)) != NULL) {
- ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
+ ip_output(m, NULL, NULL, flags, NULL, NULL, 0);
}
NET_UNLOCK();
}
+void
+ip_sendraw_dispatch(void *xmq)
+{
+ ip_send_do_dispatch(xmq, IP_RAWOUTPUT);
+}
+
+void
+ip_send_dispatch(void *xmq)
+{
+ ip_send_do_dispatch(xmq, 0);
+}
+
void
ip_send(struct mbuf *m)
{
mq_enqueue(&ipsend_mq, m);
task_add(net_tq(0), &ipsend_task);
}
+
+void
+ip_send_raw(struct mbuf *m)
+{
+ mq_enqueue(&ipsendraw_mq, m);
+ task_add(net_tq(0), &ipsendraw_task);
+}
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index c01a3e7803c..e19b744fdf3 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -765,6 +765,13 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int
*phlen)
optlen = opt->m_len - sizeof(p->ipopt_dst);
if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
return (m); /* XXX should fail */
+
+ /* check if options will fit to IP header */
+ if ((optlen + (ip->ip_hl << 2)) > (0x0f << 2)) {
+ *phlen = sizeof (struct ip);
+ return (m);
+ }
+
if (p->ipopt_dst.s_addr)
ip->ip_dst = p->ipopt_dst;
if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index 7ede24ce922..1a43675a7ac 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -240,6 +240,7 @@ struct mbuf *
u_int16_t
ip_randomid(void);
void ip_send(struct mbuf *);
+void ip_send_raw(struct mbuf *);
void ip_slowtimo(void);
struct mbuf *
ip_srcroute(struct mbuf *);