On Fri, 01 Dec 2006 13:41:39 +0900 Kazunori MIYAZAWA <[EMAIL PROTECTED]> wrote:
> David Miller wrote: > > From: Kazunori MIYAZAWA <[EMAIL PROTECTED]> > > Date: Fri, 24 Nov 2006 14:38:39 +0900 > > > > What is going on here? > > > >> + /* Without this, the atomic inc below segfaults */ > >> + if (encap_family == AF_INET6) { > >> + rt->peer = NULL; > >> + rt_bind_peer(rt,1); > >> + } > > ... > >> - dst_prev->output = xfrm4_output; > >> + if (dst_prev->xfrm->props.family == AF_INET) > >> + dst_prev->output = xfrm4_output; > >> +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) > >> + else > >> + dst_prev->output = xfrm6_output; > >> +#endif > >> if (rt->peer) > >> atomic_inc(&rt->peer->refcnt); > > > > If it's non-NULL and you get a segfault for atomic_inc() that > > means there is garbage here, and it seems that if you're > > setting it to NULL explicitly then it's just a workaround > > for whatever problem is causing it to be non-NULL to begin > > with. > > > > What is putting a non-valid pointer value there? Is this an IPV6 or > > IPSEC dst route by chance? If so, that makes this change really > > wrong, and we are corrupting the route by running rt_bind_peer() on > > it. rt_bind_peer() is only valid on ipv4 route entries. > > Thank you for your good catch. > I think atomic_inc must be done in case of props.family == AF_INET. > And we probably should manage reference count of the device in case of > AF_INET6. > > Anyway I'll check and fix it. > > Thank you. > Hello David, Sorry, I mixed up changes in the patches. I described that [4/7] will add "IPv6 over IPv4 IPsec tunnel". However I send a patch for "IPv4 over IPv6" as a reply because it includes the discussing item. So this patch adds IPv4 over IPv6 IPsec tunnel. It's complicated. I deleted subustituting NULL for rt->peer and moved atomic_inc stuff under the "if" section. BTW, I have a question about descrementing the reference count of rt->peer. The reference cound in normal "dst" structure is decremented by calling inet_putpeer from ipv4_dst_destroy. But xfrm4_dst_destroy does not call inet_putpeer. Where do we decrement the count? Should xfrm4_dst_destroy do that? Signed-off-by: Miika Komu <[EMAIL PROTECTED]> Signed-off-by: Diego Beltrami <[EMAIL PROTECTED]> Signed-off-by: Kazunori Miyazawa <[EMAIL PROTECTED]> --- net/ipv4/xfrm4_policy.c | 68 ++++++++++++++++++++++++++++++++---------- net/ipv6/xfrm6_mode_tunnel.c | 42 ++++++++++++++++++++------ 2 files changed, 83 insertions(+), 27 deletions(-) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index d4107bb..37b14e8 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -72,17 +72,20 @@ __xfrm4_bundle_create(struct xfrm_policy struct dst_entry *dst, *dst_prev; struct rtable *rt0 = (struct rtable*)(*dst_p); struct rtable *rt = rt0; - __be32 remote = fl->fl4_dst; - __be32 local = fl->fl4_src; struct flowi fl_tunnel = { .nl_u = { .ip4_u = { - .saddr = local, - .daddr = remote, + .saddr = fl->fl4_src, + .daddr = fl->fl4_dst, .tos = fl->fl4_tos } } }; + union { + struct in6_addr *in6; + struct in_addr *in; + } remote, local; + unsigned short encap_family = 0; int i; int err; int header_len = 0; @@ -94,7 +97,6 @@ __xfrm4_bundle_create(struct xfrm_policy for (i = 0; i < nx; i++) { struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops); struct xfrm_dst *xdst; - int tunnel = 0; if (unlikely(dst1 == NULL)) { err = -ENOBUFS; @@ -116,19 +118,45 @@ __xfrm4_bundle_create(struct xfrm_policy dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - remote = xfrm[i]->id.daddr.a4; - local = xfrm[i]->props.saddr.a4; - tunnel = 1; + + if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) { + encap_family = xfrm[i]->props.family; + + switch(encap_family) { + case AF_INET: + remote.in = (struct in_addr*)&xfrm[i]->id.daddr; + local.in = (struct in_addr*)&xfrm[i]->props.saddr; + break; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + case AF_INET6: + remote.in6 = (struct in6_addr*)&xfrm[i]->id.daddr; + local.in6 = (struct in6_addr*)&xfrm[i]->props.saddr; + break; +#endif + default: + BUG_ON(1); + } } header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; - if (tunnel) { - fl_tunnel.fl4_src = local; - fl_tunnel.fl4_dst = remote; + if (encap_family) { + switch(encap_family) { + case AF_INET: + fl_tunnel.fl4_dst = remote.in->s_addr; + fl_tunnel.fl4_src = local.in->s_addr; + break; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + case AF_INET6: + ipv6_addr_copy(&fl_tunnel.fl6_dst, remote.in6); + ipv6_addr_copy(&fl_tunnel.fl6_src, local.in6); + break; +#endif + default: + BUG_ON(1); + } err = xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET); + &fl_tunnel, encap_family); if (err) goto error; } else @@ -162,10 +190,16 @@ __xfrm4_bundle_create(struct xfrm_policy /* Copy neighbout for reachability confirmation */ dst_prev->neighbour = neigh_clone(rt->u.dst.neighbour); dst_prev->input = rt->u.dst.input; - dst_prev->output = xfrm4_output; - if (rt->peer) - atomic_inc(&rt->peer->refcnt); - x->u.rt.peer = rt->peer; + if (dst_prev->xfrm->props.family == AF_INET) { + dst_prev->output = xfrm4_output; + if (rt->peer) + atomic_inc(&rt->peer->refcnt); + x->u.rt.peer = rt->peer; + } +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + else + dst_prev->output = xfrm6_output; +#endif /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 5e7d8a7..51eb59d 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -25,6 +25,12 @@ static inline void ipip6_ecn_decapsulate IP6_ECN_set_ce(inner_iph); } +static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb) +{ + if (INET_ECN_is_ce(ipv6_get_dsfield(skb->nh.ipv6h))) + IP_ECN_set_ce(skb->h.ipiph); +} + /* Add encapsulation header. * * The top IP header will be constructed per RFC 2401. The following fields @@ -40,6 +46,7 @@ static inline void ipip6_ecn_decapsulate static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; + struct xfrm_dst *xdst = (struct xfrm_dst*)dst; struct ipv6hdr *iph, *top_iph; int dsfield; @@ -52,16 +59,24 @@ static int xfrm6_tunnel_output(struct xf skb->h.ipv6h = top_iph + 1; top_iph->version = 6; - top_iph->priority = iph->priority; - top_iph->flow_lbl[0] = iph->flow_lbl[0]; - top_iph->flow_lbl[1] = iph->flow_lbl[1]; - top_iph->flow_lbl[2] = iph->flow_lbl[2]; + if (xdst->route->ops->family == AF_INET6) { + top_iph->priority = iph->priority; + top_iph->flow_lbl[0] = iph->flow_lbl[0]; + top_iph->flow_lbl[1] = iph->flow_lbl[1]; + top_iph->flow_lbl[2] = iph->flow_lbl[2]; + top_iph->nexthdr = IPPROTO_IPV6; + } else { + top_iph->priority = 0; + top_iph->flow_lbl[0] = 0; + top_iph->flow_lbl[1] = 0; + top_iph->flow_lbl[2] = 0; + top_iph->nexthdr = IPPROTO_IPIP; + } dsfield = ipv6_get_dsfield(top_iph); dsfield = INET_ECN_encapsulate(dsfield, dsfield); if (x->props.flags & XFRM_STATE_NOECN) dsfield &= ~INET_ECN_MASK; ipv6_change_dsfield(top_iph, 0, dsfield); - top_iph->nexthdr = IPPROTO_IPV6; top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT); ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr); ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr); @@ -72,7 +87,8 @@ static int xfrm6_tunnel_input(struct xfr { int err = -EINVAL; - if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6) + if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6 + && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP) goto out; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; @@ -81,10 +97,16 @@ static int xfrm6_tunnel_input(struct xfr (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; - if (x->props.flags & XFRM_STATE_DECAP_DSCP) - ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h); - if (!(x->props.flags & XFRM_STATE_NOECN)) - ipip6_ecn_decapsulate(skb); + if (skb->nh.raw[IP6CB(skb)->nhoff] == IPPROTO_IPV6) { + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h); + if (!(x->props.flags & XFRM_STATE_NOECN)) + ipip6_ecn_decapsulate(skb); + } else { + if (!(x->props.flags & XFRM_STATE_NOECN)) + ip6ip_ecn_decapsulate(skb); + skb->protocol = htons(ETH_P_IP); + } skb->mac.raw = memmove(skb->data - skb->mac_len, skb->mac.raw, skb->mac_len); skb->nh.raw = skb->data; -- 1.4.1 - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html