From: Willem de Bruijn <will...@google.com>

Add two counters to tun ethool stats to count zerocopy completions.
- tx_zerocopy counts completions that succeeded without copy.
- tx_zerocopy_err counts those that triggered a copy.

Tun intercepts completions by replacing the zerocopy completion
handler (ubuf_info) prepared by the packet source (vhost-net)
with its own.

To avoid adding another ubuf_info alloc/free in the datapath, only
enter this mode if the packet source passed an array of two ubuf_info
to the tun device.

Pass msg_controllen to tun_get_user to detect this.

Signed-off-by: Willem de Bruijn <will...@google.com>
---
 drivers/net/tun.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 69 insertions(+), 10 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index df6ef9670d05..286787d8e875 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -149,6 +149,8 @@ struct tun_pcpu_stats {
        u32 rx_dropped;
        u32 tx_dropped;
        u32 rx_frame_errors;
+       u64 tx_zerocopy;
+       u64 tx_zerocopy_err;
 };
 
 /* A tun_file connects an open character device to a tuntap netdevice. It
@@ -201,6 +203,8 @@ static const struct {
        { "tx_packets" },
        { "rx_bytes" },
        { "tx_bytes" },
+       { "tx_zerocopy" },
+       { "tx_zerocopy_err" },
 };
 
 /* Since the socket were moved to tun_file, to preserve the behavior of persist
@@ -1082,8 +1086,9 @@ static void tun_set_headroom(struct net_device *dev, int 
new_hr)
        tun->align = new_hr;
 }
 
-static void
-tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+static void __tun_net_get_stats64(struct net_device *dev,
+                                 struct rtnl_link_stats64 *stats,
+                                 u64 *ethtool_stats)
 {
        u32 rx_dropped = 0, tx_dropped = 0, rx_frame_errors = 0;
        struct tun_struct *tun = netdev_priv(dev);
@@ -1103,6 +1108,16 @@ tun_net_get_stats64(struct net_device *dev, struct 
rtnl_link_stats64 *stats)
                        txbytes         = p->tx_bytes;
                } while (u64_stats_fetch_retry(&p->syncp, start));
 
+               if (ethtool_stats) {
+                       ethtool_stats[0] += rxpackets;
+                       ethtool_stats[1] += txpackets;
+                       ethtool_stats[2] += rxbytes;
+                       ethtool_stats[3] += txbytes;
+                       ethtool_stats[4] += p->tx_zerocopy;
+                       ethtool_stats[5] += p->tx_zerocopy_err;
+                       continue;
+               }
+
                stats->rx_packets       += rxpackets;
                stats->rx_bytes         += rxbytes;
                stats->tx_packets       += txpackets;
@@ -1113,11 +1128,21 @@ tun_net_get_stats64(struct net_device *dev, struct 
rtnl_link_stats64 *stats)
                rx_frame_errors += p->rx_frame_errors;
                tx_dropped      += p->tx_dropped;
        }
+
+       if (ethtool_stats)
+               return;
+
        stats->rx_dropped  = rx_dropped;
        stats->rx_frame_errors = rx_frame_errors;
        stats->tx_dropped = tx_dropped;
 }
 
+static void tun_net_get_stats64(struct net_device *dev,
+                               struct rtnl_link_stats64 *stats)
+{
+       __tun_net_get_stats64(dev, stats, NULL);
+}
+
 static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
                       struct netlink_ext_ack *extack)
 {
@@ -1537,10 +1562,44 @@ static struct sk_buff *tun_build_skb(struct tun_struct 
*tun,
        return NULL;
 }
 
+static void tun_zerocopy_callback(struct ubuf_info *ubuf, bool success)
+{
+       struct tun_struct *tun = ubuf->ctx;
+
+       if (success)
+               this_cpu_inc(tun->pcpu_stats->tx_zerocopy);
+       else
+               this_cpu_inc(tun->pcpu_stats->tx_zerocopy_err);
+
+       ubuf = ubuf - 1;
+       ubuf->callback(ubuf, success);
+}
+
+static void tun_set_zerocopy(struct tun_struct *tun, struct sk_buff *skb,
+                            struct ubuf_info *ubuf, size_t len)
+{
+       const int ulen = sizeof(*ubuf);
+
+       if (len != ulen && len != ulen * 2) {
+               WARN_ON_ONCE(1);
+               return;
+       }
+
+       /* if caller passed two ubuf, one is for tun to interpose callback */
+       if (len == ulen * 2) {
+               ubuf = ubuf + 1;
+               ubuf->callback = tun_zerocopy_callback;
+               ubuf->ctx = tun;
+               refcount_set(&ubuf->refcnt, 1);
+       }
+
+       skb_shinfo(skb)->destructor_arg = ubuf;
+}
+
 /* Get packet from user space buffer */
 static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
-                           void *msg_control, struct iov_iter *from,
-                           int noblock, bool more)
+                           void *msg_control, size_t msg_controllen,
+                           struct iov_iter *from, int noblock, bool more)
 {
        struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
        struct sk_buff *skb;
@@ -1713,7 +1772,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, 
struct tun_file *tfile,
 
        /* copy skb_ubuf_info for callback when skb has no error */
        if (zerocopy) {
-               skb_shinfo(skb)->destructor_arg = msg_control;
+               tun_set_zerocopy(tun, skb, (void *)msg_control, msg_controllen);
                skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
                skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
        } else if (msg_control) {
@@ -1798,7 +1857,7 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
        if (!tun)
                return -EBADFD;
 
-       result = tun_get_user(tun, tfile, NULL, from,
+       result = tun_get_user(tun, tfile, NULL, 0, from,
                              file->f_flags & O_NONBLOCK, false);
 
        tun_put(tun);
@@ -2058,7 +2117,8 @@ static int tun_sendmsg(struct socket *sock, struct msghdr 
*m, size_t total_len)
        if (!tun)
                return -EBADFD;
 
-       ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
+       ret = tun_get_user(tun, tfile, m->msg_control, m->msg_controllen,
+                          &m->msg_iter,
                           m->msg_flags & MSG_DONTWAIT,
                           m->msg_flags & MSG_MORE);
        tun_put(tun);
@@ -2983,10 +3043,9 @@ static void tun_get_ethtool_stats(struct net_device *dev,
 {
        const int ethtool_stats_bytelen =
                ARRAY_SIZE(tun_ethtool_stats_keys) * sizeof(u64);
-       struct rtnl_link_stats64 link_stats64 = {0};
 
-       tun_net_get_stats64(dev, &link_stats64);
-       memcpy(data, &link_stats64, ethtool_stats_bytelen);
+       memset(data, 0, ethtool_stats_bytelen);
+       __tun_net_get_stats64(dev, NULL, data);
 }
 
 static const struct ethtool_ops tun_ethtool_ops = {
-- 
2.14.2.920.gcf0c67979c-goog

Reply via email to