On Tue, 2016-12-06 at 11:34 +0100, Paolo Abeni wrote:
> On Mon, 2016-12-05 at 09:57 -0800, Eric Dumazet wrote:
> > From: Eric Dumazet <eduma...@google.com>
> > 
> > In UDP recvmsg() path we currently access 3 cache lines from an skb
> > while holding receive queue lock, plus another one if packet is
> > dequeued, since we need to change skb->next->prev
> > 
> > 1st cache line (contains ->next/prev pointers, offsets 0x00 and 0x08)
> > 2nd cache line (skb->len & skb->peeked, offsets 0x80 and 0x8e)
> > 3rd cache line (skb->truesize/users, offsets 0xe0 and 0xe4)
> > 
> > skb->peeked is only needed to make sure 0-length packets are properly
> > handled while MSG_PEEK is operated.
> > 
> > I had first the intent to remove skb->peeked but the "MSG_PEEK at
> > non-zero offset" support added by Sam Kumar makes this not possible.
> > 
> > This patch avoids one cache line miss during the locked section, when
> > skb->len and skb->peeked do not have to be read.
> > 
> > It also avoids the skb_set_peeked() cost for non empty UDP datagrams.
> > 
> > Signed-off-by: Eric Dumazet <eduma...@google.com>
> 
> Thank you for all the good work.
> 
> After all your improvement, I see the cacheline miss in inet_recvmsg()
> as a major perf offender for the user space process in the udp flood
> scenario due to skc_rxhash sharing the same sk_drops cacheline.
> 
> Using an udp-specific drop counter (and an sk_drops accessor to wrap
> sk_drops access where needed), we could avoid such cache miss. With that
> - patch for udp.h only below - I get 3% improvement on top of all the
> pending udp patches, and the gain should be more relevant after the 2
> queues rework. What do you think ?

Here follow what I'm experimenting. 

The 'pcflag' changes is not strictly needed, but it shrinks the udp_sock
struct a bit, so that the newly added cacheline does not create
additional holes - with my kconfig, at least. I can use a separate patch
for that chunk.
---
diff --git a/include/linux/udp.h b/include/linux/udp.h
index d1fd8cd..a21baaf 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -49,7 +49,11 @@ struct udp_sock {
        unsigned int     corkflag;      /* Cork is required */
        __u8             encap_type;    /* Is this an Encapsulation socket? */
        unsigned char    no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
-                        no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
+                        no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
+                        pcflag:6;      /* UDP-Lite specific, moved here to */
+                                       /* fill an hole, marks socket as */
+                                       /* UDP-Lite if > 0    */
+
        /*
         * Following member retains the information to create a UDP header
         * when the socket is uncorked.
@@ -64,8 +68,7 @@ struct udp_sock {
 #define UDPLITE_BIT      0x1           /* set by udplite proto init function */
 #define UDPLITE_SEND_CC  0x2           /* set via udplite setsockopt         */
 #define UDPLITE_RECV_CC  0x4           /* set via udplite setsocktopt        */
-       __u8             pcflag;        /* marks socket as UDP-Lite if > 0    */
-       __u8             unused[3];
+
        /*
         * For encapsulation sockets.
         */
@@ -79,6 +82,9 @@ struct udp_sock {
        int                     (*gro_complete)(struct sock *sk,
                                                struct sk_buff *skb,
                                                int nhoff);
+
+       /* since we are prone to drops, avoid dirtying any sk cacheline */
+       atomic_t                drops ____cacheline_aligned_in_smp;
 };
 
 static inline struct udp_sock *udp_sk(const struct sock *sk)
@@ -106,6 +112,17 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
        return udp_sk(sk)->no_check6_rx;
 }
 
+static inline int udp_drops_read(const struct sock *sk)
+{
      * +       return atomic_read(&udp_sk(sk)->drops);
+}
+
+static inline void
+udp_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
+{
+       SOCK_SKB_CB(skb)->dropcount = udp_drops_read(sk);
+}
+
 #define udp_portaddr_for_each_entry(__sk, list) \
        hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
 
diff --git a/include/net/sock.h b/include/net/sock.h
index ed75dec..113e495 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2138,6 +2138,8 @@ struct sock_skb_cb {
        SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
 }
 
+int sk_drops_read(const struct sock *sk);
+
 static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
 {
        int segs = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 6b10573..dc41727 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -9,6 +9,7 @@
 #include <net/sock.h>
 #include <linux/kernel.h>
 #include <linux/tcp.h>
+#include <linux/udp.h>
 #include <linux/workqueue.h>
 
 #include <linux/inet_diag.h>
@@ -19,6 +20,14 @@
 static DEFINE_MUTEX(sock_diag_table_mutex);
 static struct workqueue_struct *broadcast_wq;
 
+int sk_drops_read(const struct sock *sk)
+{
+       if (sk->sk_protocol == IPPROTO_UDP)
+               return udp_drops_read(sk);
+       else
+               return atomic_read(&sk->sk_drops);
+}
+
 static u64 sock_gen_cookie(struct sock *sk)
 {
        while (1) {
@@ -67,7 +76,7 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff 
*skb, int attrtype)
        mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
        mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
        mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
-       mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
+       mem[SK_MEMINFO_DROPS] = sk_drops_read(sk);
 
        return nla_put(skb, attrtype, sizeof(mem), &mem);
 }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fbd6b69..d7c4980 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1174,6 +1174,11 @@ int udp_sendpage(struct sock *sk, struct page *page, int 
offset,
        return ret;
 }
 
+static void udp_drops_inc(struct sock *sk)
+{
+       atomic_inc(&udp_sk(sk)->drops);
+}
+
 /* fully reclaim rmem/fwd memory allocated for skb */
 static void udp_rmem_release(struct sock *sk, int size, int partial)
 {
@@ -1244,7 +1249,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct 
sk_buff *skb)
        /* no need to setup a destructor, we will explicitly release the
         * forward allocated memory on dequeue
         */
-       sock_skb_set_dropcount(sk, skb);
+       udp_skb_set_dropcount(sk, skb);
 
        __skb_queue_tail(list, skb);
        spin_unlock(&list->lock);
@@ -1258,7 +1263,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct 
sk_buff *skb)
        atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
 
 drop:
-       atomic_inc(&sk->sk_drops);
+       udp_drops_inc(sk);
        return err;
 }
 EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
@@ -1319,7 +1324,7 @@ static int first_packet_length(struct sock *sk)
                                IS_UDPLITE(sk));
                __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
                                IS_UDPLITE(sk));
-               atomic_inc(&sk->sk_drops);
+               udp_drops_inc(sk);
                __skb_unlink(skb, rcvq);
                total += skb->truesize;
                kfree_skb(skb);
@@ -1417,7 +1422,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, 
size_t len, int noblock,
 
        if (unlikely(err)) {
                if (!peeked) {
-                       atomic_inc(&sk->sk_drops);
+                       udp_drops_inc(sk);
                        UDP_INC_STATS(sock_net(sk),
                                      UDP_MIB_INERRORS, is_udplite);
                }
@@ -1714,7 +1719,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff 
*skb)
        __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 drop:
        __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-       atomic_inc(&sk->sk_drops);
+       udp_drops_inc(sk);
        kfree_skb(skb);
        return -1;
 }
@@ -1772,7 +1777,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, 
struct sk_buff *skb,
                nskb = skb_clone(skb, GFP_ATOMIC);
 
                if (unlikely(!nskb)) {
-                       atomic_inc(&sk->sk_drops);
+                       udp_drops_inc(sk);
                        __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
                                        IS_UDPLITE(sk));
                        __UDP_INC_STATS(net, UDP_MIB_INERRORS,
@@ -2491,7 +2496,7 @@ static void udp4_format_sock(struct sock *sp, struct 
seq_file *f,
                from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)),
                0, sock_i_ino(sp),
                atomic_read(&sp->sk_refcnt), sp,
-               atomic_read(&sp->sk_drops));
+               udp_drops_read(sp));
 }
 
 int udp4_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c5d76d2..9f46dff 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -1038,5 +1038,5 @@ void ip6_dgram_sock_seq_show(struct seq_file *seq, struct 
sock *sp,
                   0,
                   sock_i_ino(sp),
                   atomic_read(&sp->sk_refcnt), sp,
-                  atomic_read(&sp->sk_drops));
+                  sk_drops_read(sp));
 }


Reply via email to