On 11/13/20 8:09 PM, Ricardo Dias wrote:
> When the TCP stack is in SYN flood mode, the server child socket is
> created from the SYN cookie received in a TCP packet with the ACK flag
> set.
> 
> The child socket is created when the server receives the first TCP
> packet with a valid SYN cookie from the client. Usually, this packet
> corresponds to the final step of the TCP 3-way handshake, the ACK
> packet. But is also possible to receive a valid SYN cookie from the
> first TCP data packet sent by the client, and thus create a child socket
> from that SYN cookie.
> 
>


...

> Signed-off-by: Ricardo Dias <rd...@singlestore.com>
> ---



> +#endif
>  #include <net/secure_seq.h>
>  #include <net/ip.h>
>  #include <net/tcp.h>
> @@ -510,17 +513,27 @@ static u32 inet_sk_port_offset(const struct sock *sk)
>                                         inet->inet_dport);
>  }
>  
> -/* insert a socket into ehash, and eventually remove another one
> - * (The another one can be a SYN_RECV or TIMEWAIT
> +/* Insert a socket into ehash, and eventually remove another one
> + * (The another one can be a SYN_RECV or TIMEWAIT)
> + * If an existing socket already exists, it returns that socket
> + * through the esk parameter.
>   */
> -bool inet_ehash_insert(struct sock *sk, struct sock *osk)
> +bool inet_ehash_insert(struct sock *sk, struct sock *osk, struct sock **esk)
>  {
>       struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
>       struct hlist_nulls_head *list;
>       struct inet_ehash_bucket *head;
> -     spinlock_t *lock;
> +     const struct hlist_nulls_node *node;
> +     struct sock *_esk;

> +     spinlock_t *lock; /* protects hashinfo socket entry */
         Please do not add this comment, I find it confusing, and breaking 
reverse tree.

> +     struct net *net = sock_net(sk);
> +     const int dif = sk->sk_bound_dev_if;
> +     const int sdif = sk->sk_bound_dev_if;
>       bool ret = true;
>  
> +     INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
> +     const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
> +
>       WARN_ON_ONCE(!sk_unhashed(sk));
>  
>       sk->sk_hash = sk_ehashfn(sk);
> @@ -532,16 +545,49 @@ bool inet_ehash_insert(struct sock *sk, struct sock 
> *osk)
>       if (osk) {
>               WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
>               ret = sk_nulls_del_node_init_rcu(osk);
> +     } else if (esk) {

You could move here all the needed new variables, and also
     the INET_ADDR_COOKIE(...), const __portpair ports = ...;

This would allow to keep a nice reverse tree order, and keep scope small.

                const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, 
sk->sk_num);
                INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
                const int sdif = sk->sk_bound_dev_if;
                const int dif = sk->sk_bound_dev_if;
                const struct hlist_nulls_node *node;
                struct net *net = sock_net(sk);

Alternatively you could also move this code into a helper function.


               } else if (esk) {
                       *esk = inet_ehash_lookup_by_sk(sk, node);
               }

> +             sk_nulls_for_each_rcu(_esk, node, list) {
> +                     if (_esk->sk_hash != sk->sk_hash)
> +                             continue;
> +                     if (sk->sk_family == AF_INET) {
> +                             if (unlikely(INET_MATCH(_esk, net, acookie,
> +                                                     sk->sk_daddr,
> +                                                     sk->sk_rcv_saddr,
> +                                                     ports, dif, sdif))) {
> +                                     refcount_inc(&_esk->sk_refcnt);
> +                                     goto found;
> +                             }
> +                     }
> +#if IS_ENABLED(CONFIG_IPV6)
> +                     else if (sk->sk_family == AF_INET6) {
> +                             if (unlikely(INET6_MATCH(_esk, net,
> +                                                      &sk->sk_v6_daddr,
> +                                                      &sk->sk_v6_rcv_saddr,
> +                                                      ports, dif, sdif))) {
> +                                     refcount_inc(&_esk->sk_refcnt);
> +                                     goto found;
> +                             }
> +                     }
> +#endif
> +             }
> +
>

Reply via email to