Since cipso_v4_error might be called from different network stack layers, we can't safely use icmp_send there. icmp_send copies IP options with ip_option_echo, which uses IPCB to take access to IP header compiled data. But after commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses"), IPCB can't be used above IP layer. This patch fixes the problem by creating in cipso_v4_error a local copy of compiled IP options and using it with introduced __icmp_send function. This looks some overloaded, but in quite rare error conditions only.
The original discussion is here: https://lore.kernel.org/linux-security-module/16659801547571...@sas1-890ba5c2334a.qloud-c.yandex.net/ Signed-off-by: Sergey Nazarov <s-naza...@yandex.ru> --- include/net/icmp.h | 9 ++++++++- net/ipv4/cipso_ipv4.c | 18 ++++++++++++++++-- net/ipv4/icmp.c | 7 ++++--- 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/include/net/icmp.h b/include/net/icmp.h index 6ac3a5b..e0f709d 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -22,6 +22,7 @@ #include <net/inet_sock.h> #include <net/snmp.h> +#include <net/ip.h> struct icmp_err { int errno; @@ -39,7 +40,13 @@ struct icmp_err { struct sk_buff; struct net; -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt); +static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); +} + int icmp_rcv(struct sk_buff *skb); int icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 777fa3b..234d12e 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1735,13 +1735,27 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) */ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { + unsigned char optbuf[sizeof(struct ip_options) + 40]; + struct ip_options *opt = (struct ip_options *)optbuf; + if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; + /* + * We might be called above the IP layer, + * so we can not use icmp_send and IPCB here. + */ + + memset(opt, 0, sizeof(struct ip_options)); + opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); + memcpy(opt->__data, (unsigned char *)&(ip_hdr(skb)[1]), opt->optlen); + if (ip_options_compile(dev_net(skb->dev), opt, NULL)) + return; + if (gateway) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt); else - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); + __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt); } /** diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 065997f..3f24414 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -570,7 +570,8 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) * MUST reply to only the first fragment. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) +void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, + const struct ip_options *opt) { struct iphdr *iph; int room; @@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph->tos; mark = IP4_REPLY_MARK(net, skb_in->mark); - if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in)) + if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt)) goto out_unlock; @@ -742,7 +743,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) local_bh_enable(); out:; } -EXPORT_SYMBOL(icmp_send); +EXPORT_SYMBOL(__icmp_send); static void icmp_socket_deliver(struct sk_buff *skb, u32 info) --