On Wed, Aug 21, 2019 at 11:46:25PM +0200, Sabrina Dubroca wrote:
> TCP encapsulation of IKE and IPsec messages (RFC 8229) is implemented
> as a TCP ULP, overriding in particular the sendmsg and recvmsg
> operations. A Stream Parser is used to extract messages out of the TCP
> stream using the first 2 bytes as length marker. Received IKE messages
> are put on "ike_queue", waiting to be dequeued by the custom recvmsg
> implementation. Received ESP messages are sent to XFRM, like with UDP
> encapsulation.
> 
> Some of this code is taken from the original submission by Herbert
> Xu. Currently, only IPv4 is supported, like for UDP encapsulation.
> 
> Co-developed-by: Herbert Xu <herb...@gondor.apana.org.au>
> Signed-off-by: Herbert Xu <herb...@gondor.apana.org.au>
> Signed-off-by: Sabrina Dubroca <s...@queasysnail.net>
> ---
>  include/net/espintcp.h   |  38 +++
>  include/net/xfrm.h       |   1 +
>  include/uapi/linux/udp.h |   1 +
>  net/ipv4/esp4.c          | 189 ++++++++++++++-
>  net/xfrm/Kconfig         |   9 +
>  net/xfrm/Makefile        |   1 +
>  net/xfrm/espintcp.c      | 505 +++++++++++++++++++++++++++++++++++++++
>  net/xfrm/xfrm_policy.c   |   7 +
>  net/xfrm/xfrm_state.c    |   3 +
>  9 files changed, 751 insertions(+), 3 deletions(-)
>  create mode 100644 include/net/espintcp.h
>  create mode 100644 net/xfrm/espintcp.c
> 

...

> +static struct sock *esp_find_tcp_sk(struct xfrm_state *x)
> +{
> +     struct xfrm_encap_tmpl *encap = x->encap;
> +     struct esp_tcp_sk *esk;
> +     __be16 sport, dport;
> +     struct sock *nsk;
> +     struct sock *sk;
> +
> +     sk = rcu_dereference(x->encap_sk);
> +     if (sk && sk->sk_state == TCP_ESTABLISHED)
> +             return sk;
> +
> +     spin_lock_bh(&x->lock);
> +     sport = encap->encap_sport;
> +     dport = encap->encap_dport;
> +     nsk = rcu_dereference_protected(x->encap_sk,
> +                                     lockdep_is_held(&x->lock));
> +     if (sk && sk == nsk) {
> +             esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
> +             if (!esk) {
> +                     spin_unlock_bh(&x->lock);
> +                     return ERR_PTR(-ENOMEM);
> +             }
> +             RCU_INIT_POINTER(x->encap_sk, NULL);
> +             esk->sk = sk;
> +             call_rcu(&esk->rcu, esp_free_tcp_sk);

I don't understand this, can you please explain what you are doing here?


> +     }
> +     spin_unlock_bh(&x->lock);
> +
> +     sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4,
> +                                  dport, x->props.saddr.a4, sport, 0);
> +     if (!sk)
> +             return ERR_PTR(-ENOENT);
> +
> +     if (!tcp_is_ulp_esp(sk)) {
> +             sock_put(sk);
> +             return ERR_PTR(-EINVAL);
> +     }
> +
> +     spin_lock_bh(&x->lock);
> +     nsk = rcu_dereference_protected(x->encap_sk,
> +                                     lockdep_is_held(&x->lock));
> +     if (encap->encap_sport != sport ||
> +         encap->encap_dport != dport) {
> +             sock_put(sk);
> +             sk = nsk ?: ERR_PTR(-EREMCHG);
> +     } else if (sk == nsk) {
> +             sock_put(sk);
> +     } else {
> +             rcu_assign_pointer(x->encap_sk, sk);
> +     }
> +     spin_unlock_bh(&x->lock);
> +
> +     return sk;
> +}
> +
> +static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
> +{
> +     struct sock *sk;
> +     int err;
> +
> +     rcu_read_lock();
> +
> +     sk = esp_find_tcp_sk(x);
> +     err = PTR_ERR(sk);
> +     if (IS_ERR(sk))

Maybe better 'if (err)'?

> +             goto out;
> +
> +     bh_lock_sock(sk);
> +     if (sock_owned_by_user(sk)) {
> +             err = espintcp_queue_out(sk, skb);
> +             if (err < 0)
> +                     goto unlock_sock;

This goto is not needed.

> +     } else {
> +             err = espintcp_push_skb(sk, skb);
> +     }
> +
> +unlock_sock:
> +     bh_unlock_sock(sk);
> +out:
> +     rcu_read_unlock();
> +     return err;
> +}
> +
> +static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk,
> +                                struct sk_buff *skb)
> +{
> +     struct dst_entry *dst = skb_dst(skb);
> +     struct xfrm_state *x = dst->xfrm;
> +
> +     return esp_output_tcp_finish(x, skb);
> +}
> +
> +static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
> +{
> +     int err;
> +
> +     local_bh_disable();
> +     err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
> +     local_bh_enable();
> +
> +     /* EINPROGRESS just happens to do the right thing.  It
> +      * actually means that the skb has been consumed and
> +      * isn't coming back.
> +      */
> +     return err ?: -EINPROGRESS;
> +}
> +#endif
> +
>  static void esp_output_done(struct crypto_async_request *base, int err)
>  {
>       struct sk_buff *skb = base->data;
> @@ -147,7 +272,13 @@ static void esp_output_done(struct crypto_async_request 
> *base, int err)
>               secpath_reset(skb);
>               xfrm_dev_resume(skb);
>       } else {
> -             xfrm_output_resume(skb, err);
> +#ifdef CONFIG_XFRM_ESPINTCP

Do we really need all these ifdef? I guess most of them could
be avoided with some code refactorization.

> +             if (!err &&
> +                 x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
> +                     esp_output_tail_tcp(x, skb);
> +             else
> +#endif
> +                     xfrm_output_resume(skb, err);
>       }
>  }

...

> @@ -296,7 +460,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff 
> *skb, struct esp_info *
>       struct sk_buff *trailer;
>       int tailen = esp->tailen;
>  
> -     /* this is non-NULL only with UDP Encapsulation */
> +     /* this is non-NULL only with TCP/UDP Encapsulation */
>       if (x->encap) {
>               int err = esp_output_encap(x, skb, esp);
>  
> @@ -491,6 +655,11 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff 
> *skb, struct esp_info *
>       if (sg != dsg)
>               esp_ssg_unref(x, tmp);
>  
> +#ifdef CONFIG_XFRM_ESPINTCP
> +     if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
> +             err = esp_output_tail_tcp(x, skb);
> +#endif
> +
>  error_free:
>       kfree(tmp);
>  error:
> @@ -617,10 +786,16 @@ int esp_input_done2(struct sk_buff *skb, int err)
>  
>       if (x->encap) {
>               struct xfrm_encap_tmpl *encap = x->encap;
> +             struct tcphdr *th = (void *)(skb_network_header(skb) + ihl);

This gives a unused variable warning if CONFIG_XFRM_ESPINTCP
is not set.

>               struct udphdr *uh = (void *)(skb_network_header(skb) + ihl);
>               __be16 source;
>  
>               switch (x->encap->encap_type) {
> +#ifdef CONFIG_XFRM_ESPINTCP
> +             case TCP_ENCAP_ESPINTCP:
> +                     source = th->source;
> +                     break;
> +#endif
>               case UDP_ENCAP_ESPINUDP:
>               case UDP_ENCAP_ESPINUDP_NON_IKE:
>                       source = uh->source;
> @@ -1017,6 +1192,14 @@ static int esp_init_state(struct xfrm_state *x)
>               case UDP_ENCAP_ESPINUDP_NON_IKE:
>                       x->props.header_len += sizeof(struct udphdr) + 2 * 
> sizeof(u32);
>                       break;
> +#ifdef CONFIG_XFRM_ESPINTCP
> +             case TCP_ENCAP_ESPINTCP:
> +                     /* only the length field, TCP encap is done by
> +                      * the socket
> +                      */
> +                     x->props.header_len += 2;
> +                     break;
> +#endif
>               }
>       }
>  
> diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
> index c967fc3c38c8..ccc012b3ea10 100644
> --- a/net/xfrm/Kconfig
> +++ b/net/xfrm/Kconfig
> @@ -71,6 +71,15 @@ config XFRM_IPCOMP
>       select CRYPTO
>       select CRYPTO_DEFLATE
>  
> +config XFRM_ESPINTCP
> +     bool "ESP in TCP encapsulation (RFC 8229)"
> +     depends on XFRM && INET_ESP
> +     select STREAM_PARSER

I'm getting these compile errors:

espintcp.o: In function `espintcp_close':
/home/klassert/git/linux-stk/net/xfrm/espintcp.c:469: undefined reference to 
`sk_msg_free'
net/xfrm/espintcp.o: In function `espintcp_sendmsg':
/home/klassert/git/linux-stk/net/xfrm/espintcp.c:302: undefined reference to 
`sk_msg_alloc'
/home/klassert/git/linux-stk/net/xfrm/espintcp.c:316: undefined reference to 
`sk_msg_memcopy_from_iter'
/home/klassert/git/linux-stk/net/xfrm/espintcp.c:341: undefined reference to 
`sk_msg_free'
/home/klassert/git/linux-stk/net/xfrm/espintcp.c:321: undefined reference to 
`sk_msg_memcopy_from_iter'
/home/klassert/git/linux-stk/Makefile:1067: recipe for target 'vmlinux' failed
make[1]: *** [vmlinux] Error 1

I guess you need to select NET_SOCK_MSG.

Btw. I've updated the ipsec-next tree, so patch 1 is not needed anymore.

Everything else looks good!

Reply via email to