Analysis of the structure virtnet_info using pahole gives the
following stats.
        /* size: 256, cachelines: 4, members: 25 */
        /* sum members: 245, holes: 3, sum holes: 11 */
        /* paddings: 1, sum paddings: 4 */

Reordering the order in which the members of virtnet_info are declared
helps in packing byte holes in the middle of virtnet_info, reduce the
size required by the structure by 8 bytes, and also allows members to be
stored without overstepping the boundaries of a cacheline (for a
cacheline of size 64bytes) unnecessarily.

Analysis using pahole post-reordering of members gives the following
stats.
        /* size: 248, cachelines: 4, members: 25 */
        /* padding: 3 */
        /* paddings: 1, sum paddings: 4 */
        /* last cacheline: 56 bytes */

Signed-off-by: Anant Thazhemadam <anant.thazhema...@gmail.com>
---
The complete analysis done by pahole can be found below.
Before the change:
                struct virtnet_info {
                struct virtio_device *     vdev;                 /*     0     8 
*/
                struct virtqueue *         cvq;                  /*     8     8 
*/
                struct net_device *        dev;                  /*    16     8 
*/
                struct send_queue *        sq;                   /*    24     8 
*/
                struct receive_queue *     rq;                   /*    32     8 
*/
                unsigned int               status;               /*    40     4 
*/
                u16                        max_queue_pairs;      /*    44     2 
*/
                u16                        curr_queue_pairs;     /*    46     2 
*/
                u16                        xdp_queue_pairs;      /*    48     2 
*/
                bool                       big_packets;          /*    50     1 
*/
                bool                       mergeable_rx_bufs;    /*    51     1 
*/
                bool                       has_cvq;              /*    52     1 
*/
                bool                       any_header_sg;        /*    53     1 
*/
                u8                         hdr_len;              /*    54     1 
*/

                /* XXX 1 byte hole, try to pack */

                struct delayed_work refill;                      /*    56    88 
*/

                /* XXX last struct has 4 bytes of padding */

                /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
                struct work_struct config_work;                  /*   144    32 
*/
                bool                       affinity_hint_set;    /*   176     1 
*/

                /* XXX 7 bytes hole, try to pack */

                struct hlist_node  node;                         /*   184    16 
*/
                /* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
                struct hlist_node  node_dead;                    /*   200    16 
*/
                struct control_buf *       ctrl;                 /*   216     8 
*/
                u8                         duplex;               /*   224     1 
*/

                /* XXX 3 bytes hole, try to pack */

                u32                        speed;                /*   228     4 
*/
                long unsigned int          guest_offloads;       /*   232     8 
*/
                long unsigned int          guest_offloads_capable; /*   240     
8 */
                struct failover *          failover;             /*   248     8 
*/

                /* size: 256, cachelines: 4, members: 25 */
                /* sum members: 245, holes: 3, sum holes: 11 */
                /* paddings: 1, sum paddings: 4 */
        };

After the Change:
        struct virtnet_info {
                struct virtio_device *     vdev;                 /*     0     8 
*/
                struct virtqueue *         cvq;                  /*     8     8 
*/
                struct net_device *        dev;                  /*    16     8 
*/
                struct send_queue *        sq;                   /*    24     8 
*/
                struct receive_queue *     rq;                   /*    32     8 
*/
                unsigned int               status;               /*    40     4 
*/
                u16                        max_queue_pairs;      /*    44     2 
*/
                u16                        curr_queue_pairs;     /*    46     2 
*/
                u16                        xdp_queue_pairs;      /*    48     2 
*/
                bool                       big_packets;          /*    50     1 
*/
                bool                       mergeable_rx_bufs;    /*    51     1 
*/
                bool                       has_cvq;              /*    52     1 
*/
                bool                       any_header_sg;        /*    53     1 
*/
                bool                       affinity_hint_set;    /*    54     1 
*/
                u8                         hdr_len;              /*    55     1 
*/
                struct control_buf *       ctrl;                 /*    56     8 
*/
                /* --- cacheline 1 boundary (64 bytes) --- */
                struct work_struct config_work;                  /*    64    32 
*/
                struct hlist_node  node;                         /*    96    16 
*/
                struct hlist_node  node_dead;                    /*   112    16 
*/
                /* --- cacheline 2 boundary (128 bytes) --- */
                long unsigned int          guest_offloads;       /*   128     8 
*/
                long unsigned int          guest_offloads_capable; /*   136     
8 */
                struct failover *          failover;             /*   144     8 
*/
                struct delayed_work refill;                      /*   152    88 
*/

                /* XXX last struct has 4 bytes of padding */

                /* --- cacheline 3 boundary (192 bytes) was 48 bytes ago --- */
                u32                        speed;                /*   240     4 
*/
                u8                         duplex;               /*   244     1 
*/

                /* size: 248, cachelines: 4, members: 25 */
                /* padding: 3 */
                /* paddings: 1, sum paddings: 4 */
                /* last cacheline: 56 bytes */
        };

It can be seen that the size has reduced by 8 bytes, and the holes have been 
eliminated
as well. Also, more members of virtnet_info are accomodated within one 
cacheline 
(without unnecessarily crossing over the cacheline boundary).


 drivers/net/virtio_net.c | 42 ++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 263b005981bd..32747f1980ae 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -137,29 +137,29 @@ struct receive_queue {
 
        struct napi_struct napi;
 
+       /* Name of this receive queue: input.$index */
+       char name[40];
+
        struct bpf_prog __rcu *xdp_prog;
 
        struct virtnet_rq_stats stats;
 
+       /* RX: fragments + linear part + virtio header */
+       struct scatterlist sg[MAX_SKB_FRAGS + 2];
+
+       /* Page frag for packet buffer allocation. */
+       struct page_frag alloc_frag;
+
        /* Chain pages by the private ptr. */
        struct page *pages;
 
        /* Average packet length for mergeable receive buffers. */
        struct ewma_pkt_len mrg_avg_pkt_len;
 
-       /* Page frag for packet buffer allocation. */
-       struct page_frag alloc_frag;
-
-       /* RX: fragments + linear part + virtio header */
-       struct scatterlist sg[MAX_SKB_FRAGS + 2];
+       struct xdp_rxq_info xdp_rxq;
 
        /* Min single buffer size for mergeable buffers case. */
        unsigned int min_buf_len;
-
-       /* Name of this receive queue: input.$index */
-       char name[40];
-
-       struct xdp_rxq_info xdp_rxq;
 };
 
 /* Control VQ buffers: protected by the rtnl lock */
@@ -202,33 +202,33 @@ struct virtnet_info {
        /* Host can handle any s/g split between our header and packet data */
        bool any_header_sg;
 
+       /* Does the affinity hint is set for virtqueues? */
+       bool affinity_hint_set;
+
        /* Packet virtio header size */
        u8 hdr_len;
 
-       /* Work struct for refilling if we run low on memory. */
-       struct delayed_work refill;
+       struct control_buf *ctrl;
 
        /* Work struct for config space updates */
        struct work_struct config_work;
 
-       /* Does the affinity hint is set for virtqueues? */
-       bool affinity_hint_set;
-
        /* CPU hotplug instances for online & dead */
        struct hlist_node node;
        struct hlist_node node_dead;
 
-       struct control_buf *ctrl;
-
-       /* Ethtool settings */
-       u8 duplex;
-       u32 speed;
-
        unsigned long guest_offloads;
        unsigned long guest_offloads_capable;
 
        /* failover when STANDBY feature enabled */
        struct failover *failover;
+
+       /* Work struct for refilling if we run low on memory. */
+       struct delayed_work refill;
+
+       /* Ethtool settings */
+       u32 speed;
+       u8 duplex;
 };
 
 struct padded_vnet_hdr {
-- 
2.25.1

Reply via email to