This patch extends the xfrm_type by an encap function pointer and implements esp4_gso_encap and esp6_gso_encap. These functions doing the basic esp encapsulation for a GSO packet. In case the GSP packet needs to be segmented in software, we add gso_segment functions. The secpath is extended to pass the needed informations down the layers. This codepath is going to be used on esp hardware offloads.
Signed-off-by: Steffen Klassert <steffen.klass...@secunet.com> --- include/net/xfrm.h | 14 ++++++- net/ipv4/esp4.c | 29 +++++++++++-- net/ipv4/esp4_offload.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/esp6.c | 27 +++++++++++- net/ipv6/esp6_offload.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_replay.c | 3 +- 6 files changed, 280 insertions(+), 7 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5016fa2..1af8489 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -372,6 +372,7 @@ struct xfrm_type { int (*init_state)(struct xfrm_state *x); void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct sk_buff *skb); + void (*encap)(struct xfrm_state *, struct sk_buff *pskb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); int (*reject)(struct xfrm_state *, struct sk_buff *, const struct flowi *); @@ -979,7 +980,18 @@ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); struct sec_path { atomic_t refcnt; int len; - struct xfrm_state *xvec[XFRM_MAX_DEPTH]; + + /* Output sequence number for replay protection on offloading. */ + struct { + __u32 low; + __u32 hi; + } seq; + + struct xfrm_state *xvec[XFRM_MAX_DEPTH]; + + __u8 proto; + __u8 flags; +#define SKB_GSO_SEGMENT 1 }; static inline int secpath_exists(struct sk_buff *skb) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index cb1c10b..f61ba3c2 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -182,6 +182,22 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } +static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_esp_hdr *esph; + struct iphdr *iph = ip_hdr(skb); + int proto = iph->protocol; + + skb_push(skb, -skb_network_offset(skb)); + esph = ip_esp_hdr(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + + esph->spi = x->id.spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + + skb->sp->proto = proto; +} + static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { struct esp_output_extra *extra; @@ -207,7 +223,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int extralen; int tailen; __be64 seqno; - __u8 proto = *skb_mac_header(skb); + __u8 proto; /* skb is pure payload to encrypt */ @@ -231,12 +247,18 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) assoclen = sizeof(*esph); extralen = 0; + if (skb->sp && (skb->sp->flags & SKB_GSO_SEGMENT)) { + proto = skb->sp->proto; + } else { + proto = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + } + if (x->props.flags & XFRM_STATE_ESN) { extralen += sizeof(*extra); assoclen += sizeof(__be32); } - *skb_mac_header(skb) = IPPROTO_ESP; esph = ip_esp_hdr(skb); /* this is non-NULL only with UDP Encapsulation */ @@ -942,7 +964,8 @@ static const struct xfrm_type esp_type = .destructor = esp_destroy, .get_mtu = esp4_get_mtu, .input = esp_input, - .output = esp_output + .output = esp_output, + .encap = esp4_gso_encap, }; static struct xfrm4_protocol esp4_protocol = { diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 7277d15..d0831a8 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -71,10 +71,118 @@ static int esp4_gro_complete(struct sk_buff *skb, int nhoff) return err; } +static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct ip_esp_hdr *esph; + struct sk_buff *skb2; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct xfrm_state *x; + struct sec_path *sp; + struct crypto_aead *aead; + int err = 0; + const struct net_offload *ops; + int omaclen; + __u32 seq; + __u32 seqhi; + + sp = skb->sp; + if (!sp || !sp->len) + goto out; + + seq = sp->seq.low; + seqhi = sp->seq.hi; + + x = sp->xvec[sp->len - 1]; + aead = x->data; + esph = ip_esp_hdr(skb); + + omaclen = skb->mac_len; + if (esph->spi != x->id.spi) + goto out; + + if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) + goto out; + + __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); + + skb->encap_hdr_csum = 1; + + if (x->props.mode == XFRM_MODE_TUNNEL) { + __skb_push(skb, skb->mac_len); + segs = skb_mac_gso_segment(skb, features); + } else { + skb->transport_header += x->props.header_len; + ops = rcu_dereference(inet_offloads[sp->proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + } + if (IS_ERR(segs)) + goto out; + if (segs == NULL) + return ERR_PTR(-EINVAL); + __skb_pull(skb, skb->data - skb_mac_header(skb)); + + skb2 = segs; + do { + struct sk_buff *nskb = skb2->next; + + if (x->props.mode == XFRM_MODE_TUNNEL) { + skb2->network_header = skb2->network_header - x->props.header_len; + skb2->transport_header = skb2->network_header + sizeof(struct iphdr); + skb_reset_mac_len(skb2); + skb_pull(skb2, skb2->mac_len + x->props.header_len); + } else { + /* skb2 mac and data are pointing at the start of + * mac address. Pull data forward to point to IP + * payload past ESP header (i.e., transport data + * that needs to be encrypted). + * When IPsec transport mode is stacked with a tunnel, + * the skb2->data needs to point at the inner IP + * header for tunnelled packets. After ->gso_segment, + * the skb2 wil have the network/ip header pointing + * at the inner IP header, and the transport_header + * will be pointing at the inner IP payload. Thus we + * need to use omaclen and the outer iphdr length to + * make sure that pointers are set up correctly in + * every case. + */ + struct iphdr *oiph = + (struct iphdr *)(skb2->data + omaclen); + int ihl = oiph->ihl * 4; + + __skb_pull(skb2, omaclen + ihl + x->props.header_len); + + /* move ->transport_header to point to esp header */ + skb_reset_transport_header(skb2); + skb2->transport_header -= x->props.header_len; + } + + skb2->sp->flags |= SKB_GSO_SEGMENT; + skb2->sp->seq.low = seq; + skb2->sp->seq.hi = xfrm_replay_seqhi(x, ntohl(seq)); + + err = x->type->output(x, skb2); + if (err) { + kfree_skb_list(segs); + return ERR_PTR(err); + } + + seq++; + + skb_push(skb2, skb2->mac_len); + skb2 = nskb; + } while (skb2); + +out: + return segs; +} + static const struct net_offload esp4_offload = { .callbacks = { .gro_receive = esp4_gro_receive, .gro_complete = esp4_gro_complete, + .gso_segment = esp4_gso_segment, }, }; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2280bc6..9bcb32b 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -198,6 +198,22 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) esp_output_done(base, err); } +static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_esp_hdr *esph; + struct ipv6hdr *iph = ipv6_hdr(skb); + int proto = iph->nexthdr; + + skb_push(skb, -skb_network_offset(skb)); + esph = ip_esp_hdr(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + + esph->spi = x->id.spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + + skb->sp->proto = proto; +} + static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; @@ -223,7 +239,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) u8 *vaddr; __be32 *seqhi; __be64 seqno; - __u8 proto = *skb_mac_header(skb); + __u8 proto; /* skb is pure payload to encrypt */ aead = x->data; @@ -247,12 +263,18 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) assoclen = sizeof(*esph); seqhilen = 0; + if (skb->sp && (skb->sp->flags & SKB_GSO_SEGMENT)) { + proto = skb->sp->proto; + } else { + proto = *skb_mac_header(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + } + if (x->props.flags & XFRM_STATE_ESN) { seqhilen += sizeof(__be32); assoclen += seqhilen; } - *skb_mac_header(skb) = IPPROTO_ESP; esph = ip_esp_hdr(skb); if (!skb_cloned(skb)) { @@ -871,6 +893,7 @@ static const struct xfrm_type esp6_type = { .get_mtu = esp6_get_mtu, .input = esp6_input, .output = esp6_output, + .encap = esp6_gso_encap, .hdr_offset = xfrm6_find_1stfragopt, }; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index e0006cf..51efab0 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -75,10 +75,116 @@ static int esp6_gro_complete(struct sk_buff *skb, int nhoff) return err; } +static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct ip_esp_hdr *esph; + struct sk_buff *skb2; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct xfrm_state *x; + struct sec_path *sp; + struct crypto_aead *aead; + int err = 0; + const struct net_offload *ops; + int omaclen; + __u32 seq; + __u32 seqhi; + + sp = skb->sp; + if (!sp || !sp->len) + goto out; + + seq = sp->seq.low; + seqhi = sp->seq.hi; + + x = sp->xvec[sp->len - 1]; + aead = x->data; + esph = ip_esp_hdr(skb); + + omaclen = skb->mac_len; + if (esph->spi != x->id.spi) + goto out; + + if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) + goto out; + + __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); + + skb->encap_hdr_csum = 1; + + if (x->props.mode == XFRM_MODE_TUNNEL) { + __skb_push(skb, skb->mac_len); + segs = skb_mac_gso_segment(skb, features); + } else { + skb->transport_header += x->props.header_len; + ops = rcu_dereference(inet_offloads[sp->proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + } + if (IS_ERR(segs)) + goto out; + if (segs == NULL) + return ERR_PTR(-EINVAL); + __skb_pull(skb, skb->data - skb_mac_header(skb)); + + skb2 = segs; + do { + struct sk_buff *nskb = skb2->next; + + if (x->props.mode == XFRM_MODE_TUNNEL) { + skb2->network_header = skb2->network_header - x->props.header_len; + skb2->transport_header = skb2->network_header + sizeof(struct ipv6hdr); + skb_reset_mac_len(skb2); + skb_pull(skb2, skb2->mac_len + x->props.header_len); + } else { + /* skb2 mac and data are pointing at the start of + * mac address. Pull data forward to point to IP + * payload past ESP header (i.e., transport data + * that needs to be encrypted). + * When IPsec transport mode is stacked with a tunnel, + * the skb2->data needs to point at the inner IP + * header for tunnelled packets. After ->gso_segment, + * the skb2 wil have the network/ip header pointing + * at the inner IP header, and the transport_header + * will be pointing at the inner IP payload. Thus we + * need to use omaclen and the outer iphdr length to + * make sure that pointers are set up correctly in + * every case. + */ + + __skb_pull(skb2, omaclen + sizeof(struct ipv6hdr) + x->props.header_len); + + /* move ->transport_header to point to esp header */ + skb_reset_transport_header(skb2); + skb2->transport_header -= x->props.header_len; + } + + skb2->sp->flags |= SKB_GSO_SEGMENT; + skb2->sp->seq.low = seq; + skb2->sp->seq.hi = xfrm_replay_seqhi(x, ntohl(seq)); + + err = x->type->output(x, skb2); + if (err) { + kfree_skb_list(segs); + return ERR_PTR(err); + } + + seq++; + + skb_push(skb2, skb2->mac_len); + skb2 = nskb; + } while (skb2); + +out: + return segs; +} + + static const struct net_offload esp6_offload = { .callbacks = { .gro_receive = esp6_gro_receive, .gro_complete = esp6_gro_complete, + .gso_segment = esp6_gso_segment, }, }; diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index cdc2e2e..20e68a3 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -45,7 +45,8 @@ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) return seq_hi; } - +EXPORT_SYMBOL(xfrm_replay_seqhi); +; static void xfrm_replay_notify(struct xfrm_state *x, int event) { struct km_event c; -- 1.9.1