From: Paolo Abeni <pab...@redhat.com> Place receive window tuning in the recvmsg path. This makes sure the size is only increased when userspace consumes data.
Previously we would grow the sk receive buffer towards tcp_rmem[2], now we so only if userspace reads data. Simply adjust the msk rcvbuf size to the largest receive buffer of any of the existing subflows. Signed-off-by: Paolo Abeni <pab...@redhat.com> Signed-off-by: Florian Westphal <f...@strlen.de> --- This patch is new in v2. net/mptcp/protocol.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index dbb86cbb9e77..89a35c3fc499 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -190,13 +190,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, return false; } - if (!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { - int rcvbuf = max(ssk->sk_rcvbuf, sk->sk_rcvbuf); - - if (rcvbuf > sk->sk_rcvbuf) - sk->sk_rcvbuf = rcvbuf; - } - tp = tcp_sk(ssk); do { u32 map_remaining, offset; @@ -933,6 +926,25 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk) return moved > 0; } +static void mptcp_rcv_space_adjust(struct mptcp_sock *msk) +{ + const struct mptcp_subflow_context *subflow; + struct sock *sk = (struct sock *)msk; + const struct sock *ssk; + int rcvbuf = 0; + + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) + return; + + mptcp_for_each_subflow(msk, subflow) { + ssk = mptcp_subflow_tcp_sock(subflow); + rcvbuf = max(ssk->sk_rcvbuf, rcvbuf); + } + + if (rcvbuf) + sk->sk_rcvbuf = rcvbuf; +} + static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { @@ -962,6 +974,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); __mptcp_flush_join_list(msk); + mptcp_rcv_space_adjust(msk); + while (len > (size_t)copied) { int bytes_read; @@ -975,8 +989,10 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; if (skb_queue_empty(&sk->sk_receive_queue) && - __mptcp_move_skbs(msk)) + __mptcp_move_skbs(msk)) { + mptcp_rcv_space_adjust(msk); continue; + } /* only the master socket status is relevant here. The exit * conditions mirror closely tcp_recvmsg() -- 2.26.2