From: Li Xiasong <[email protected]>

When TCP option space is insufficient (e.g., when sending ADD_ADDR with an
IPv6 address and port while tcp_timestamps is enabled), the original code
jumped to out_unlock without clearing the addr_signal flag. This caused
mptcp_pm_add_timer to keep rescheduling indefinitely, not sending ADD_ADDR,
preventing subsequent addresses in the endpoint list from being announced.

Handle this case by clearing the ADD_ADDR signal and skipping the matching
ADD_ADDR retransmission entry. The skip path cancels the matching timer
(with id check) and advances PM state progression, preserving forward
progress to subsequent PM work.

This cancellation is inherently best-effort. A concurrent add_timer
callback may already be running and may acquire pm.lock before the
cancel path updates entry state. In that case, one final ADD_ADDR
transmit attempt can still be executed.

Once the cancel path sets entry->retrans_times to ADD_ADDR_RETRANS_MAX,
the callback-side retrans_times check suppresses further ADD_ADDR
retransmissions.

Note that when an ADD_ADDR is being prepared, a pure-ACK is queued. On
the output side, it means that it is fine to skip non-pure-ACK packets,
when drop_other_suboptions is set: a pure-ACK will be processed soon
after.

Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
Cc: [email protected]
Signed-off-by: Li Xiasong <[email protected]>
Reviewed-by: Matthieu Baerts (NGI0) <[email protected]>
Signed-off-by: Matthieu Baerts (NGI0) <[email protected]>
---
v2: add a note for sashiko-nipa's: it's a false positive.
---
 net/mptcp/pm.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 3c152bf66cd5..3e770c7407e1 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -364,7 +364,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
 
        spin_lock_bh(&msk->pm.lock);
 
-       if (!mptcp_pm_should_add_signal_addr(msk)) {
+       /* The cancel path (mptcp_pm_del_add_timer()) can race with this
+        * callback. Once cancel updates retrans_times to MAX, suppress further
+        * retransmissions here. If this callback acquires pm.lock first, one
+        * final transmit attempt is still possible.
+        */
+       if (entry->retrans_times < ADD_ADDR_RETRANS_MAX &&
+           !mptcp_pm_should_add_signal_addr(msk)) {
                pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id);
                mptcp_pm_announce_addr(msk, &entry->addr, false);
                mptcp_pm_add_addr_send_ack(msk);
@@ -414,8 +420,12 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
        /* Note: entry might have been removed by another thread.
         * We hold rcu_read_lock() to ensure it is not freed under us.
         */
-       if (stop_timer)
-               sk_stop_timer_sync(sk, &entry->add_timer);
+       if (stop_timer) {
+               if (check_id)
+                       sk_stop_timer(sk, &entry->add_timer);
+               else
+                       sk_stop_timer_sync(sk, &entry->add_timer);
+       }
 
        rcu_read_unlock();
        return entry;
@@ -882,6 +892,7 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const 
struct sk_buff *skb,
                              struct mptcp_addr_info *addr, bool *echo,
                              bool *drop_other_suboptions)
 {
+       bool skip_add_addr = false;
        int ret = false;
        u8 add_addr;
        u8 family;
@@ -903,24 +914,49 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, 
const struct sk_buff *skb,
        }
 
        *echo = mptcp_pm_should_add_signal_echo(msk);
-       port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port);
-
-       family = *echo ? msk->pm.remote.family : msk->pm.local.family;
-       if (remaining < mptcp_add_addr_len(family, *echo, port))
-               goto out_unlock;
-
        if (*echo) {
                *addr = msk->pm.remote;
                add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO);
+               port = !!msk->pm.remote.port;
+               family = msk->pm.remote.family;
        } else {
                *addr = msk->pm.local;
                add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL);
+               port = !!msk->pm.local.port;
+               family = msk->pm.local.family;
        }
-       WRITE_ONCE(msk->pm.addr_signal, add_addr);
+
+       if (remaining < mptcp_add_addr_len(family, *echo, port)) {
+               struct net *net = sock_net((struct sock *)msk);
+
+               if (!*drop_other_suboptions)
+                       goto out_unlock;
+
+               if (*echo) {
+                       MPTCP_INC_STATS(net, MPTCP_MIB_ECHOADDTXDROP);
+               } else {
+                       skip_add_addr = true;
+                       MPTCP_INC_STATS(net, MPTCP_MIB_ADDADDRTXDROP);
+               }
+               goto drop_signal_mark;
+       }
+
        ret = true;
 
+drop_signal_mark:
+       WRITE_ONCE(msk->pm.addr_signal, add_addr);
+
 out_unlock:
        spin_unlock_bh(&msk->pm.lock);
+
+       /* On pure-ACK option-space exhaustion, stop retrying this ADD_ADDR:
+        * clear the signal bit, cancel the matching retransmission timer, and
+        * let the PM state machine progress.
+        */
+       if (skip_add_addr) {
+               mptcp_pm_del_add_timer(msk, addr, true);
+               mptcp_pm_subflow_established(msk);
+       }
        return ret;
 }
 

-- 
2.53.0


Reply via email to