Add support for zerocopy sendfile when using TLS device offload.
Before this patch, TLS device offload would copy sendfile data to a
bounce buffer. This can be avoided when the user knows that page cache
data is not modified. For example, when a serving static files.
Removing this copy improves performance significaintly, as TLS and TCP
sendfile perform the same operations, and the only overhead is TLS
header/trailer insertion.

This patch adds two configuration knobs to control TLS zerocopy sendfile:
1) socket option named TLS_TX_ZEROCOPY_SENDFILE that enables
applications to use zerocopy sendfile on a per-socket basis.
2) global sysctl named tls_zerocopy_sendfile that defines the default
for the entire system.

Non TLS device enabled sockets are not affected by this option,
and attempts to configure it will fail.

Signed-off-by: Boris Pismenny <bor...@mellanox.com>
---
 include/net/netns/ipv4.h   |  4 ++++
 include/net/tls.h          |  1 +
 include/uapi/linux/tls.h   |  1 +
 net/ipv4/sysctl_net_ipv4.c |  9 +++++++
 net/tls/tls_device.c       | 39 ++++++++++++++++++++++--------
 net/tls/tls_main.c         | 60 ++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 104 insertions(+), 10 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 9e36738c1fe1..bc828d272151 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -196,6 +196,10 @@ struct netns_ipv4 {
        int sysctl_igmp_llm_reports;
        int sysctl_igmp_qrv;
 
+#ifdef CONFIG_TLS_DEVICE
+       int sysctl_tls_zerocopy_sendfile;
+#endif
+
        struct ping_group_range ping_group_range;
 
        atomic_t dev_addr_genid;
diff --git a/include/net/tls.h b/include/net/tls.h
index e5dac7e74e79..f80985ac55de 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -172,6 +172,7 @@ struct tls_record_info {
 
 struct tls_offload_context_tx {
        struct crypto_aead *aead_send;
+       bool zerocopy_sendpage;
        spinlock_t lock;        /* protects records list */
        struct list_head records_list;
        struct tls_record_info *open_record;
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index bcd2869ed472..d6f65f5d206f 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -39,6 +39,7 @@
 /* TLS socket options */
 #define TLS_TX                 1       /* Set transmit parameters */
 #define TLS_RX                 2       /* Set receive parameters */
+#define TLS_TX_ZEROCOPY_SENDFILE       3       /* transmit zerocopy sendfile */
 
 /* Supported versions */
 #define TLS_VERSION_MINOR(ver) ((ver) & 0xFF)
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5653e3b011bf..0a0fc29225a2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1353,6 +1353,15 @@ static int proc_fib_multipath_hash_policy(struct 
ctl_table *table, int write,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ONE
        },
+#ifdef CONFIG_TLS_DEVICE
+       {
+               .procname       = "tls_zerocopy_sendfile",
+               .data           = &init_net.ipv4.sysctl_tls_zerocopy_sendfile,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec
+       },
+#endif
        { }
 };
 
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 18fa6067bb7f..092b20428c15 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -413,7 +413,8 @@ static int tls_device_copy_data(void *addr, size_t bytes, 
struct iov_iter *i)
 static int tls_push_data(struct sock *sk,
                         struct iov_iter *msg_iter,
                         size_t size, int flags,
-                        unsigned char record_type)
+                        unsigned char record_type,
+                        struct page *zc_page)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
        struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -482,11 +483,21 @@ static int tls_push_data(struct sock *sk,
                copy = min_t(size_t, size, (pfrag->size - pfrag->offset));
                copy = min_t(size_t, copy, (max_open_record_len - record->len));
 
-               rc = tls_device_copy_data(page_address(pfrag->page) +
-                                         pfrag->offset, copy, msg_iter);
-               if (rc)
-                       goto handle_error;
-               tls_append_frag(record, pfrag, copy);
+               if (!zc_page) {
+                       rc = tls_device_copy_data(page_address(pfrag->page) +
+                                                 pfrag->offset, copy, 
msg_iter);
+                       if (rc)
+                               goto handle_error;
+                       tls_append_frag(record, pfrag, copy);
+               } else {
+                       struct page_frag _pfrag;
+
+                       copy = min_t(size_t, size, (max_open_record_len - 
record->len));
+                       _pfrag.page = zc_page;
+                       _pfrag.offset = 0;
+                       _pfrag.size = copy;
+                       tls_append_frag(record, &_pfrag, copy);
+               }
 
                size -= copy;
                if (!size) {
@@ -548,7 +559,7 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, 
size_t size)
        }
 
        rc = tls_push_data(sk, &msg->msg_iter, size,
-                          msg->msg_flags, record_type);
+                          msg->msg_flags, record_type, NULL);
 
 out:
        release_sock(sk);
@@ -560,9 +571,10 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
                        int offset, size_t size, int flags)
 {
        struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
        struct iov_iter msg_iter;
-       char *kaddr = kmap(page);
        struct kvec iov;
+       char *kaddr;
        int rc;
 
        if (flags & MSG_SENDPAGE_NOTLAST)
@@ -576,11 +588,18 @@ int tls_device_sendpage(struct sock *sk, struct page 
*page,
                goto out;
        }
 
+       if (ctx->zerocopy_sendpage) {
+               rc = tls_push_data(sk, &msg_iter, size,
+                                  flags, TLS_RECORD_TYPE_DATA, page);
+               goto out;
+       }
+
+       kaddr = kmap(page);
        iov.iov_base = kaddr + offset;
        iov.iov_len = size;
        iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
        rc = tls_push_data(sk, &msg_iter, size,
-                          flags, TLS_RECORD_TYPE_DATA);
+                          flags, TLS_RECORD_TYPE_DATA, NULL);
        kunmap(page);
 
 out:
@@ -654,7 +673,7 @@ static int tls_device_push_pending_record(struct sock *sk, 
int flags)
        struct iov_iter msg_iter;
 
        iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
-       return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
+       return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA, 
NULL);
 }
 
 void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index ec10041c6b7d..b95437c91339 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -422,6 +422,27 @@ static int do_tls_getsockopt_tx(struct sock *sk, char 
__user *optval,
        return rc;
 }
 
+static int do_tls_getsockopt_tx_zc(struct sock *sk, char __user *optval,
+                                  int __user *optlen)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_offload_context_tx *ctx;
+       int len;
+
+       if (get_user(len, optlen))
+               return -EFAULT;
+
+       len = min_t(unsigned int, len, sizeof(int));
+       if (len < 0)
+               return -EINVAL;
+
+       if (!tls_ctx || tls_ctx->tx_conf != TLS_HW)
+               return -EBUSY;
+
+       ctx = tls_offload_ctx_tx(tls_ctx);
+       return ctx->zerocopy_sendpage;
+}
+
 static int do_tls_getsockopt(struct sock *sk, int optname,
                             char __user *optval, int __user *optlen)
 {
@@ -431,6 +452,9 @@ static int do_tls_getsockopt(struct sock *sk, int optname,
        case TLS_TX:
                rc = do_tls_getsockopt_tx(sk, optval, optlen);
                break;
+       case TLS_TX_ZEROCOPY_SENDFILE:
+               rc = do_tls_getsockopt_tx_zc(sk, optval, optlen);
+               break;
        default:
                rc = -ENOPROTOOPT;
                break;
@@ -450,6 +474,15 @@ static int tls_getsockopt(struct sock *sk, int level, int 
optname,
        return do_tls_getsockopt(sk, optname, optval, optlen);
 }
 
+static void tls_set_tx_zerocopy_sendfile(struct tls_context *tls_ctx,
+                                        int val)
+{
+       struct tls_offload_context_tx *ctx;
+
+       ctx = tls_offload_ctx_tx(tls_ctx);
+       ctx->zerocopy_sendpage = val;
+}
+
 static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
                                  unsigned int optlen, int tx)
 {
@@ -533,8 +566,11 @@ static int do_tls_setsockopt_conf(struct sock *sk, char 
__user *optval,
                rc = tls_set_device_offload(sk, ctx);
                conf = TLS_HW;
                if (!rc) {
+                       int zc = 
sock_net(sk)->ipv4.sysctl_tls_zerocopy_sendfile;
+
                        TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE);
                        TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE);
+                       tls_set_tx_zerocopy_sendfile(ctx, zc);
                } else {
                        rc = tls_set_sw_offload(sk, ctx, 1);
                        if (rc)
@@ -579,6 +615,25 @@ static int do_tls_setsockopt_conf(struct sock *sk, char 
__user *optval,
        return rc;
 }
 
+static int do_tls_setsockopt_tx_zc(struct sock *sk, char __user *optval,
+                                  unsigned int optlen)
+{
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       int val;
+
+       if (!tls_ctx || tls_ctx->tx_conf != TLS_HW)
+               return -EINVAL;
+
+       if (optlen < sizeof(int))
+               return -EINVAL;
+
+       if (get_user(val, (int __user *)optval))
+               return -EFAULT;
+
+       tls_set_tx_zerocopy_sendfile(tls_ctx, val);
+       return 0;
+}
+
 static int do_tls_setsockopt(struct sock *sk, int optname,
                             char __user *optval, unsigned int optlen)
 {
@@ -592,6 +647,11 @@ static int do_tls_setsockopt(struct sock *sk, int optname,
                                            optname == TLS_TX);
                release_sock(sk);
                break;
+       case TLS_TX_ZEROCOPY_SENDFILE:
+               lock_sock(sk);
+               rc = do_tls_setsockopt_tx_zc(sk, optval, optlen);
+               release_sock(sk);
+               break;
        default:
                rc = -ENOPROTOOPT;
                break;
-- 
1.8.3.1

Reply via email to