Currently, the VTI input path works by first looking up the VTI
by its IP addresses, then setting the tunnel pointer in the
XFRM_TUNNEL_SKB_CB, and then having xfrm_input override the mark
with the mark in the tunnel.

This patch changes the order so that the tunnel is found by a
callback from xfrm_input. Each tunnel type (currently only ip_vti
and ip6_vti) implements a lookup function pointer that finds the
tunnel and sets it in the CB, and also does a state lookup.

This has the advantage that much more information is available to
the tunnel lookup function, including the looked-up XFRM state.
This will be used in a future change to allow finding the tunnel
not just from the IP addresses, but also from the xfrm lookup.

The lookup function pointer occupies the same space in the
XFRM_TUNNEL_SKB_CB as the IPv4/IPv6 tunnel pointer. The semantics
of the field are:
- When not running a handler that uses tunnels: always null.
- At the beginning of xfrm_input: lookup function pointer.
- After xfrm_input calls the lookup function: tunnel if found,
  else null.

Signed-off-by: Lorenzo Colitti <[email protected]>
---
 include/net/xfrm.h     |  2 ++
 net/ipv4/ip_vti.c      | 43 ++++++++++++++++++++++++++++++++++++----
 net/ipv6/ip6_vti.c     | 53 +++++++++++++++++++++++++++++++++++++++++++++-----
 net/ipv6/xfrm6_input.c |  1 -
 net/xfrm/xfrm_input.c  | 34 +++++++++++++++++++-------------
 5 files changed, 109 insertions(+), 24 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9d3b7c0ac6e2..3d245f2f6f6c 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -653,6 +653,8 @@ struct xfrm_tunnel_skb_cb {
        } header;
 
        union {
+               int (*lookup)(struct sk_buff *skb, int nexthdr, __be32 spi,
+                             __be32 seq, struct xfrm_state **x);
                struct ip_tunnel *ip4;
                struct ip6_tnl *ip6;
        } tunnel;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 949f432a5f04..850625598187 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -49,8 +49,8 @@ static struct rtnl_link_ops vti_link_ops __read_mostly;
 static unsigned int vti_net_id __read_mostly;
 static int vti_tunnel_init(struct net_device *dev);
 
-static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
-                    int encap_type)
+static struct ip_tunnel *
+vti4_find_tunnel(struct sk_buff *skb, __be32 spi, struct xfrm_state **x)
 {
        struct ip_tunnel *tunnel;
        const struct iphdr *iph = ip_hdr(skb);
@@ -59,19 +59,52 @@ static int vti_input(struct sk_buff *skb, int nexthdr, 
__be32 spi,
 
        tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
                                  iph->saddr, iph->daddr, 0);
+       if (tunnel) {
+               *x = xfrm_state_lookup(net, be32_to_cpu(tunnel->parms.i_key),
+                                      (xfrm_address_t *)&iph->daddr,
+                                      spi, iph->protocol, AF_INET);
+       }
+
+       return tunnel;
+}
+
+static int vti_lookup(struct sk_buff *skb, int nexthdr, __be32 spi, __be32 seq,
+                     struct xfrm_state **x)
+{
+       struct net *net = dev_net(skb->dev);
+       struct ip_tunnel *tunnel;
+
+       tunnel = vti4_find_tunnel(skb, spi, x);
        if (tunnel) {
                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
                        goto drop;
 
+               if (!*x) {
+                       XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
+                       xfrm_audit_state_notfound(skb, AF_INET, spi, seq);
+                       tunnel->dev->stats.rx_errors++;
+                       tunnel->dev->stats.rx_dropped++;
+                       goto drop;
+               }
+
                XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
 
-               return xfrm_input(skb, nexthdr, spi, encap_type);
+               return 0;
        }
 
        return -EINVAL;
 drop:
+       if (*x)
+               xfrm_state_put(*x);
        kfree_skb(skb);
-       return 0;
+       return -ESRCH;
+}
+
+static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
+                    int encap_type)
+{
+       XFRM_TUNNEL_SKB_CB(skb)->tunnel.lookup = vti_lookup;
+       return xfrm_input(skb, nexthdr, spi, encap_type);
 }
 
 static int vti_rcv(struct sk_buff *skb)
@@ -93,6 +126,8 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
        u32 orig_mark = skb->mark;
        int ret;
 
+       XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+
        if (!tunnel)
                return 1;
 
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index dbb74f3c57a7..d0676f2f99eb 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -297,13 +297,33 @@ static void vti6_dev_uninit(struct net_device *dev)
        dev_put(dev);
 }
 
-static int vti6_rcv(struct sk_buff *skb)
+static struct ip6_tnl *
+vti6_find_tunnel(struct sk_buff *skb, __be32 spi, struct xfrm_state **x)
 {
+       const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+       struct net *net = dev_net(skb->dev);
        struct ip6_tnl *t;
+
+       t = vti6_tnl_lookup(net, &ipv6h->saddr, &ipv6h->daddr);
+       if (t) {
+               *x = xfrm_state_lookup(net, be32_to_cpu(t->parms.i_key),
+                                      (xfrm_address_t *)&ipv6h->daddr,
+                                      spi, ipv6h->nexthdr, AF_INET6);
+       }
+
+       return t;
+}
+
+int
+vti6_lookup(struct sk_buff *skb, int nexthdr, __be32 spi, __be32 seq,
+           struct xfrm_state **x)
+{
        const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+       struct net *net = dev_net(skb->dev);
+       struct ip6_tnl *t;
 
        rcu_read_lock();
-       t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr);
+       t = vti6_find_tunnel(skb, spi, x);
        if (t) {
                if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) {
                        rcu_read_unlock();
@@ -312,7 +332,7 @@ static int vti6_rcv(struct sk_buff *skb)
 
                if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
                        rcu_read_unlock();
-                       return 0;
+                       goto discard;
                }
 
                if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
@@ -321,15 +341,36 @@ static int vti6_rcv(struct sk_buff *skb)
                        goto discard;
                }
 
+               if (!*x) {
+                       XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
+                       xfrm_audit_state_notfound(skb, AF_INET6, spi, seq);
+                       t->dev->stats.rx_errors++;
+                       t->dev->stats.rx_dropped++;
+                       rcu_read_unlock();
+                       goto discard;
+               }
+
                rcu_read_unlock();
 
-               return xfrm6_rcv_tnl(skb, t);
+               XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+
+               return 0;
        }
        rcu_read_unlock();
        return -EINVAL;
 discard:
+       if (*x)
+               xfrm_state_put(*x);
        kfree_skb(skb);
-       return 0;
+       return -ESRCH;
+}
+
+static int vti6_rcv(struct sk_buff *skb)
+{
+       int nexthdr = skb_network_header(skb)[IP6CB(skb)->nhoff];
+
+       XFRM_TUNNEL_SKB_CB(skb)->tunnel.lookup = vti6_lookup;
+       return xfrm6_rcv_spi(skb, nexthdr, 0, NULL);
 }
 
 static int vti6_rcv_cb(struct sk_buff *skb, int err)
@@ -343,6 +384,8 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
        u32 orig_mark = skb->mark;
        int ret;
 
+       XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+
        if (!t)
                return 1;
 
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index fe04e23af986..6d1b734fef8d 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -25,7 +25,6 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff 
*skb)
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
                  struct ip6_tnl *t)
 {
-       XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
        XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
        XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
        return xfrm_input(skb, nexthdr, spi, 0);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index ac277b97e0d7..7b54f58454ee 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -267,18 +267,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 
spi, int encap_type)
 
        family = XFRM_SPI_SKB_CB(skb)->family;
 
-       /* if tunnel is present override skb->mark value with tunnel i_key */
-       switch (family) {
-       case AF_INET:
-               if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4)
-                       mark = 
be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key);
-               break;
-       case AF_INET6:
-               if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6)
-                       mark = 
be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key);
-               break;
-       }
-
        err = secpath_set(skb);
        if (err) {
                XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR);
@@ -293,14 +281,29 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 
spi, int encap_type)
 
        daddr = (xfrm_address_t *)(skb_network_header(skb) +
                                   XFRM_SPI_SKB_CB(skb)->daddroff);
+
+       if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.lookup) {
+               err = XFRM_TUNNEL_SKB_CB(skb)->tunnel.lookup(skb, nexthdr,
+                                                            spi, seq, &x);
+               if (err) {
+                       XFRM_TUNNEL_SKB_CB(skb)->tunnel.lookup = NULL;
+                       return err;
+               }
+       }
+
        do {
                if (skb->sp->len == XFRM_MAX_DEPTH) {
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+                       if (x)
+                               xfrm_state_put(x);
                        goto drop;
                }
 
-               x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
-               if (x == NULL) {
+               if (!x)
+                       x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr,
+                                             family);
+
+               if (!x) {
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
                        xfrm_audit_state_notfound(skb, family, spi, seq);
                        goto drop;
@@ -420,6 +423,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 
spi, int encap_type)
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
                        goto drop;
                }
+
+               if (!err)
+                       x = NULL;
        } while (!err);
 
        err = xfrm_rcv_cb(skb, family, x->type->proto, 0);
-- 
2.15.1.504.g5279b80103-goog

Reply via email to