Certain system process significant unconnected UDP workload.
It would be preferrable to disable UDP early demux for those systems
and enable it for TCP only.

v1->v2: Change function pointer instead of adding conditional as
suggested by Stephen.

Signed-off-by: Subash Abhinov Kasiviswanathan <subas...@codeaurora.org>
Suggested-by: Eric Dumazet <eduma...@google.com>
Cc: Stephen Hemminger <step...@networkplumber.org>
---
 include/net/netns/ipv4.h   |  2 ++
 include/net/tcp.h          |  2 ++
 include/net/udp.h          |  2 ++
 net/ipv4/af_inet.c         | 22 ++++++++++++++++++++--
 net/ipv4/sysctl_net_ipv4.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/tcp_ipv6.c        | 10 +++++++++-
 6 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 0378e88..1e74da23 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -86,6 +86,8 @@ struct netns_ipv4 {
        /* Shall we try to damage output packets if routing dev changes? */
        int sysctl_ip_dynaddr;
        int sysctl_ip_early_demux;
+       int sysctl_tcp_early_demux;
+       int sysctl_udp_early_demux;
 
        int sysctl_fwmark_reflect;
        int sysctl_tcp_fwmark_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6061963..3b6446d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1953,4 +1953,6 @@ static inline void tcp_listendrop(const struct sock *sk)
        __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
 }
 
+void tcp_v4_early_demux_configure(int enable);
+void tcp_v6_early_demux_configure(int enable);
 #endif /* _TCP_H */
diff --git a/include/net/udp.h b/include/net/udp.h
index 1661791..7de31d5 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -373,4 +373,6 @@ struct udp_iter_state {
 #if IS_ENABLED(CONFIG_IPV6)
 void udpv6_encap_enable(void);
 #endif
+
+void udp_v4_early_demux_configure(int enable);
 #endif /* _UDP_H */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f750698..3e11d74 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1579,7 +1579,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, 
size_t syncp_offset)
 };
 #endif
 
-static const struct net_protocol tcp_protocol = {
+static struct net_protocol tcp_protocol = {
        .early_demux    =       tcp_v4_early_demux,
        .handler        =       tcp_v4_rcv,
        .err_handler    =       tcp_v4_err,
@@ -1588,7 +1588,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, 
size_t syncp_offset)
        .icmp_strict_tag_validation = 1,
 };
 
-static const struct net_protocol udp_protocol = {
+static struct net_protocol udp_protocol = {
        .early_demux =  udp_v4_early_demux,
        .handler =      udp_rcv,
        .err_handler =  udp_err,
@@ -1596,6 +1596,22 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, 
size_t syncp_offset)
        .netns_ok =     1,
 };
 
+void tcp_v4_early_demux_configure(int enable)
+{
+       if (enable)
+               tcp_protocol.early_demux = tcp_v4_early_demux;
+       else
+               tcp_protocol.early_demux = NULL;
+}
+
+void udp_v4_early_demux_configure(int enable)
+{
+       if (enable)
+               udp_protocol.early_demux = udp_v4_early_demux;
+       else
+               udp_protocol.early_demux = NULL;
+}
+
 static const struct net_protocol icmp_protocol = {
        .handler =      icmp_rcv,
        .err_handler =  icmp_err,
@@ -1700,6 +1716,8 @@ static __net_init int inet_init_net(struct net *net)
        net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
        net->ipv4.sysctl_ip_dynaddr = 0;
        net->ipv4.sysctl_ip_early_demux = 1;
+       net->ipv4.sysctl_udp_early_demux = 1;
+       net->ipv4.sysctl_tcp_early_demux = 1;
 
        return 0;
 }
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index b2fa498..c61383b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -253,6 +253,39 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, 
int write,
        return ret;
 }
 
+static int proc_tcp_early_demux(struct ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int ret = 0;
+
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+       if (write && !ret) {
+               int enabled = init_net.ipv4.sysctl_tcp_early_demux;
+
+               tcp_v4_early_demux_configure(enabled);
+               tcp_v6_early_demux_configure(enabled);
+       }
+
+       return ret;
+}
+
+static int proc_udp_early_demux(struct ctl_table *table, int write,
+                               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int ret = 0;
+
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+       if (write && !ret) {
+               int enabled = init_net.ipv4.sysctl_udp_early_demux;
+
+               udp_v4_early_demux_configure(enabled);
+       }
+
+       return ret;
+}
+
 static struct ctl_table ipv4_table[] = {
        {
                .procname       = "tcp_timestamps",
@@ -737,6 +770,20 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, 
int write,
                .proc_handler   = proc_dointvec
        },
        {
+               .procname       = "udp_early_demux",
+               .data           = &init_net.ipv4.sysctl_udp_early_demux,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_udp_early_demux
+       },
+       {
+               .procname       = "tcp_early_demux",
+               .data           = &init_net.ipv4.sysctl_tcp_early_demux,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_tcp_early_demux
+       },
+       {
                .procname       = "ip_default_ttl",
                .data           = &init_net.ipv4.sysctl_ip_default_ttl,
                .maxlen         = sizeof(int),
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4c60c6f..0dd761c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1926,13 +1926,21 @@ struct proto tcpv6_prot = {
        .diag_destroy           = tcp_abort,
 };
 
-static const struct inet6_protocol tcpv6_protocol = {
+static struct inet6_protocol tcpv6_protocol = {
        .early_demux    =       tcp_v6_early_demux,
        .handler        =       tcp_v6_rcv,
        .err_handler    =       tcp_v6_err,
        .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
+void tcp_v6_early_demux_configure(int enable)
+{
+       if (enable)
+               tcpv6_protocol.early_demux = tcp_v6_early_demux;
+       else
+               tcpv6_protocol.early_demux = NULL;
+}
+
 static struct inet_protosw tcpv6_protosw = {
        .type           =       SOCK_STREAM,
        .protocol       =       IPPROTO_TCP,
-- 
1.9.1

Reply via email to