In the latter patch, the bpf prog only wants to be called to handle
a header option if that particular header option cannot be handled by
the kernel.  This unknown option could be written by the peer's bpf-prog.
It could also be a new standard option that the running kernel does not
support it while a bpf-prog can handle it.

In a latter patch, the bpf prog will be called from tcp_validate_incoming()
if there is unknown option and a flag is set in tp->bpf_sock_ops_cb_flags.

Instead of using skb->cb[] in an earlier attempt, this patch
adds an optional arg "bool *unknown_opt" to tcp_parse_options().
The bool will be set to true if it has encountered an option
that the kernel does not recognize.

Signed-off-by: Martin KaFai Lau <ka...@fb.com>
---
 drivers/infiniband/hw/cxgb4/cm.c |  2 +-
 include/net/tcp.h                |  3 ++-
 net/ipv4/syncookies.c            |  2 +-
 net/ipv4/tcp_input.c             | 40 +++++++++++++++++++++-----------
 net/ipv4/tcp_minisocks.c         |  4 ++--
 net/ipv6/syncookies.c            |  2 +-
 6 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 30e08bcc9afb..dedca6576bb9 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3949,7 +3949,7 @@ static void build_cpl_pass_accept_req(struct sk_buff 
*skb, int stid , u8 tos)
         */
        memset(&tmp_opt, 0, sizeof(tmp_opt));
        tcp_clear_options(&tmp_opt);
-       tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
+       tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL, NULL);
 
        req = __skb_push(skb, sizeof(*req));
        memset(req, 0, sizeof(*req));
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 895e7aabf136..d49d8f1c961a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -413,7 +413,8 @@ int tcp_mmap(struct file *file, struct socket *sock,
 #endif
 void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
                       struct tcp_options_received *opt_rx,
-                      int estab, struct tcp_fastopen_cookie *foc);
+                      int estab, struct tcp_fastopen_cookie *foc,
+                      bool *unknown_opt);
 const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
 /*
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 9a4f6b16c9bc..fd39aed4fcd3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -313,7 +313,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct 
sk_buff *skb)
 
        /* check for timestamp cookie support */
        memset(&tcp_opt, 0, sizeof(tcp_opt));
-       tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+       tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, NULL);
 
        if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
                tsoff = secure_tcp_ts_off(sock_net(sk),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6c38ca9de17e..d9c878001be2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3799,7 +3799,7 @@ static void tcp_parse_fastopen_option(int len, const 
unsigned char *cookie,
        foc->exp = exp_opt;
 }
 
-static void smc_parse_options(const struct tcphdr *th,
+static bool smc_parse_options(const struct tcphdr *th,
                              struct tcp_options_received *opt_rx,
                              const unsigned char *ptr,
                              int opsize)
@@ -3808,10 +3808,13 @@ static void smc_parse_options(const struct tcphdr *th,
        if (static_branch_unlikely(&tcp_have_smc)) {
                if (th->syn && !(opsize & 1) &&
                    opsize >= TCPOLEN_EXP_SMC_BASE &&
-                   get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
+                   get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) {
                        opt_rx->smc_ok = 1;
+                       return true;
+               }
        }
 #endif
+       return false;
 }
 
 /* Try to parse the MSS option from the TCP header. Return 0 on failure, 
clamped
@@ -3864,7 +3867,8 @@ static u16 tcp_parse_mss_option(const struct tcphdr *th, 
u16 user_mss)
 void tcp_parse_options(const struct net *net,
                       const struct sk_buff *skb,
                       struct tcp_options_received *opt_rx, int estab,
-                      struct tcp_fastopen_cookie *foc)
+                      struct tcp_fastopen_cookie *foc,
+                      bool *unknown_opt)
 {
        const unsigned char *ptr;
        const struct tcphdr *th = tcp_hdr(skb);
@@ -3962,15 +3966,23 @@ void tcp_parse_options(const struct net *net,
                                 */
                                if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE &&
                                    get_unaligned_be16(ptr) ==
-                                   TCPOPT_FASTOPEN_MAGIC)
+                                   TCPOPT_FASTOPEN_MAGIC) {
                                        tcp_parse_fastopen_option(opsize -
                                                TCPOLEN_EXP_FASTOPEN_BASE,
                                                ptr + 2, th->syn, foc, true);
-                               else
-                                       smc_parse_options(th, opt_rx, ptr,
-                                                         opsize);
+                                       break;
+                               }
+
+                               if (smc_parse_options(th, opt_rx, ptr, opsize))
+                                       break;
+
+                               if (unknown_opt)
+                                       *unknown_opt = true;
                                break;
 
+                       default:
+                               if (unknown_opt)
+                                       *unknown_opt = true;
                        }
                        ptr += opsize-2;
                        length -= opsize;
@@ -4003,7 +4015,8 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock 
*tp, const struct tcphdr
  */
 static bool tcp_fast_parse_options(const struct net *net,
                                   const struct sk_buff *skb,
-                                  const struct tcphdr *th, struct tcp_sock *tp)
+                                  const struct tcphdr *th, struct tcp_sock *tp,
+                                  bool *unknown_opt)
 {
        /* In the spirit of fast parsing, compare doff directly to constant
         * values.  Because equality is used, short doff can be ignored here.
@@ -4017,7 +4030,7 @@ static bool tcp_fast_parse_options(const struct net *net,
                        return true;
        }
 
-       tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
+       tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL, unknown_opt);
        if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
                tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
@@ -5492,9 +5505,10 @@ static bool tcp_validate_incoming(struct sock *sk, 
struct sk_buff *skb,
 {
        struct tcp_sock *tp = tcp_sk(sk);
        bool rst_seq_match = false;
+       bool unknown_opt = false;
 
        /* RFC1323: H1. Apply PAWS check first. */
-       if (tcp_fast_parse_options(sock_net(sk), skb, th, tp) &&
+       if (tcp_fast_parse_options(sock_net(sk), skb, th, tp, &unknown_opt) &&
            tp->rx_opt.saw_tstamp &&
            tcp_paws_discard(sk, skb)) {
                if (!th->rst) {
@@ -5866,7 +5880,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, 
struct sk_buff *synack,
                /* Get original SYNACK MSS value if user MSS sets mss_clamp */
                tcp_clear_options(&opt);
                opt.user_mss = opt.mss_clamp = 0;
-               tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
+               tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL, NULL);
                mss = opt.mss_clamp;
        }
 
@@ -5951,7 +5965,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, 
struct sk_buff *skb,
        int saved_clamp = tp->rx_opt.mss_clamp;
        bool fastopen_fail;
 
-       tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
+       tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc, NULL);
        if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
                tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
@@ -6685,7 +6699,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
        tmp_opt.mss_clamp = af_ops->mss_clamp;
        tmp_opt.user_mss  = tp->rx_opt.user_mss;
        tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
-                         want_cookie ? NULL : &foc);
+                         want_cookie ? NULL : &foc, NULL);
 
        if (want_cookie && !tmp_opt.saw_tstamp)
                tcp_clear_options(&tmp_opt);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 495dda2449fe..61f9194802c4 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -98,7 +98,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, 
struct sk_buff *skb,
 
        tmp_opt.saw_tstamp = 0;
        if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
-               tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
+               tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL, NULL);
 
                if (tmp_opt.saw_tstamp) {
                        if (tmp_opt.rcv_tsecr)
@@ -580,7 +580,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff 
*skb,
 
        tmp_opt.saw_tstamp = 0;
        if (th->doff > (sizeof(struct tcphdr)>>2)) {
-               tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
+               tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL, NULL);
 
                if (tmp_opt.saw_tstamp) {
                        tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 13235a012388..f22961a73c2b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -157,7 +157,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct 
sk_buff *skb)
 
        /* check for timestamp cookie support */
        memset(&tcp_opt, 0, sizeof(tcp_opt));
-       tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+       tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, NULL);
 
        if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
                tsoff = secure_tcpv6_ts_off(sock_net(sk),
-- 
2.24.1

Reply via email to