[PATCH RFC 0/1] xfrm: dst lookup doesn't account for fwmark
I ran into an issue trying to route outgoing ipsec traffic from an ipsec responder hub that uses fwmark to route out a specific interface. The fwmark points to a route table that contains a default route out a specific interface. The fwmark is applied based on incoming interface of incoming traffic. I noticed that the mark was not being used when doing a route lookup. Is there any reason why this is? If not does this patch look reasonable? I just followed similar logic to how TOS was being used before route-lookup. I've only tested on an earlier kernel (3.14) and it works. Thanks! Here's an example: The ipsec policy is simple, just a tunnel from 192.168.0.0/24 <--> 192.168.1.0/24 the ipsec hub is at 35.0.0.1 but when traffic is being responded too, instead of the expected interface (eth2) we end it out a different interface (eth0) route table: # ip rule list 0: from all lookup local 32766: from all lookup main 32767: from all lookup default 4: from all fwmark 0x2 lookup 3 4: from all fwmark 0x4 lookup 4 5: from all lookup 3 # ip route 25.0.0.0/30 dev eth0 src 25.0.0.1 35.0.0.0/30 dev eth2 src 35.0.0.1 192.168.1.0/24 dev lan1 src 192.168.1.1 # ip route list table 4 default via 35.0.0.2 dev eth2 onlink # ip route list table 3 default via 25.0.0.2 dev eth0 onlink I noticed this in netfilter trace, traffic matches the ipsec policy: [ 85.910365] TRACE: mangle:PREROUTING:rule:4 IN=lan1 OUT= MAC=00:30:44:ff:b6:43:42:17:87:41:3c:eb:08:00 SRC=192.168.1.5 DST=192.168.0.5 LEN=84 TOS=0x00 PREC=0x00 TTL=64 ID=35127 PROTO=ICMP TYPE=0 CODE=0 ID=15069 SEQ=1 MARK=0x4 [ 85.911895] TRACE: mangle:FORWARD:rule:2 IN=lan1 ***OUT=eth0*** MAC=00:30:44:ff:b6:43:42:17:87:41:3c:eb:08:00 SRC=192.168.1.5 DST=192.168.0.5 LEN=84 TOS=0x00 PREC=0x00 TTL=63 ID=35127 PROTO=ICMP TYPE=0 CODE=0 ID=15069 SEQ=1 MARK=0x4
[PATCH RFC 1/1] xfrm: dst lookup doesn't account for fwmark
If route table includes routing based on fwmark, xfrm will not take it into account when routing ipsec traffic. We address this issue by adding fwmark information before calling route lookup. Signed-off-by: Doug Applegate --- include/net/xfrm.h | 3 ++- net/ipv4/xfrm4_policy.c | 15 +++ net/ipv6/xfrm6_policy.c | 11 +-- net/xfrm/xfrm_policy.c | 28 +++- 4 files changed, 45 insertions(+), 12 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index adfebd6..13e80d1 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -282,7 +282,7 @@ struct xfrm_policy_afinfo { struct dst_ops *dst_ops; void (*garbage_collect)(struct net *net); struct dst_entry *(*dst_lookup)(struct net *net, - int tos, int oif, + int tos, int oif, int mark, const xfrm_address_t *saddr, const xfrm_address_t *daddr); int (*get_saddr)(struct net *net, int oif, @@ -292,6 +292,7 @@ struct xfrm_policy_afinfo { struct flowi *fl, int reverse); int (*get_tos)(const struct flowi *fl); + int (*get_mark)(const struct flowi *fl); int (*init_path)(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 7b0edb3..5dcd411 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -20,7 +20,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo; static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, -int tos, int oif, +int tos, int oif, int mark, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { @@ -30,6 +30,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; fl4->flowi4_oif = oif; + fl4->flowi4_mark = mark; if (saddr) fl4->saddr = saddr->a4; @@ -42,13 +43,13 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, return ERR_CAST(rt); } -static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, +static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, int mark, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { struct flowi4 fl4; - return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr); + return __xfrm4_dst_lookup(net, &fl4, tos, oif, mark, saddr, daddr); } static int xfrm4_get_saddr(struct net *net, int oif, @@ -57,7 +58,7 @@ static int xfrm4_get_saddr(struct net *net, int oif, struct dst_entry *dst; struct flowi4 fl4; - dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr); + dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; @@ -71,6 +72,11 @@ static int xfrm4_get_tos(const struct flowi *fl) return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */ } +static int xfrm4_get_mark(const struct flowi *fl) +{ + return fl->u.ip4.flowi4_mark; +} + static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { @@ -278,6 +284,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .get_saddr = xfrm4_get_saddr, .decode_session = _decode_session4, .get_tos = xfrm4_get_tos, + .get_mark = xfrm4_get_mark, .init_path = xfrm4_init_path, .fill_dst = xfrm4_fill_dst, .blackhole_route = ipv4_blackhole_route, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index c074771..16a0af5 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -27,7 +27,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, +static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, int mark, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { @@ -38,6 +38,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; + fl6.flowi6_mark = mark memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); @@ -59,7 +60,7 @@ static int xfrm6_get_saddr(struct net *net, int oif, struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr); + dst = xfrm6_dst_lookup(net, 0, oif, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; @@ -74,6 +75,11 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } +static int xfrm6_get_mark(const struct flowi *fl) +{ + return fl->u.ip6.flowi6_mark; +} + static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { @@ -298,6 +304,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, + .get_mark = xfrm6_get_mark
Re: [PATCH RFC 0/1] xfrm: dst lookup doesn't account for fwmark
Shortly I will post an updated patch. Please ignore the 'onlink' on those routes. It is irrelevant. The particular use case is load balancing by using fwmark. Consider if we don't define any routes except load balancing based on fwmarks. The only requirement is that the traffic must exit the same interface it comes in on. Therefore you have these firewall rules: -A PREROUTING -m state --state NEW -i eth0 -j MARK --set-xmark 0x1/0x7e -A PREROUTING -m state --state NEW -i eth1 -j MARK --set-xmark 0x2/0x7e # ip rule list 4: from all fwmark 0x1 lookup 3 4: from all fwmark 0x2 lookup 4 # ip route list table 3 default via 25.0.0.2 dev eth0 # ip route list table 4 default via 35.0.0.2 dev eth1 outgoing traffic gets the mark restored: -A OUTPUT -m mark --mark 0x0/0x7e -j CONNMARK --restore-mark --nfmask 0x7e --ctmask 0x7e Imagine vpn traffic coming in on eth1, therefore the reroute check before postrouting should specify the outgoing interface as eth1, however, for vpn traffic the mark isn't taking in account that the traffic could egress the incorrect interface as shown in the trace I sent earlier. Furthermore, I seem to recall, that the transformed packet (the one encapsulated with the outer ip address) will inherit the mark from the inner ipsec packet and a second reroute check is not performed. There are a number of ways to work around this bug. However, this patch addresses this issue and makes it easy to add other flow information in case we need it down the line. Thanks! Doug On Wed, Jul 27, 2016 at 11:20 PM, Steffen Klassert wrote: > On Fri, Jul 22, 2016 at 03:50:30PM -0600, Doug Applegate wrote: >> I ran into an issue trying to route outgoing ipsec traffic from an >> ipsec responder hub that uses fwmark to route out a specific >> interface. The fwmark points to a route table that contains a default >> route out a specific interface. The fwmark is applied based on >> incoming interface of incoming traffic. I noticed that the mark was >> not being used when doing a route lookup. Is there any reason why this >> is? If not does this patch look reasonable? I just followed similar >> logic to how TOS was being used before route-lookup. I've only tested >> on an earlier kernel (3.14) and it works. Thanks! >> >> Here's an example: >> >> The ipsec policy is simple, just a tunnel from 192.168.0.0/24 <--> >> 192.168.1.0/24 the ipsec hub is at 35.0.0.1 but when traffic is being >> responded too, instead of the expected interface (eth2) we end it out >> a different interface (eth0) >> >> route table: >> # ip rule list >> 0: from all lookup local >> 32766: from all lookup main >> 32767: from all lookup default >> 4: from all fwmark 0x2 lookup 3 >> 4: from all fwmark 0x4 lookup 4 >> 5: from all lookup 3 >> >> # ip route >> 25.0.0.0/30 dev eth0 src 25.0.0.1 >> 35.0.0.0/30 dev eth2 src 35.0.0.1 >> 192.168.1.0/24 dev lan1 src 192.168.1.1 >> >> # ip route list table 4 >> default via 35.0.0.2 dev eth2 onlink >> # ip route list table 3 >> default via 25.0.0.2 dev eth0 onlink > > Can you provide some more information about your usecase? > Mapping between interfaces and IP addresses, policies > and states would be good. > > Btw. why do you need to insert the routes as 'onlink'? > This disables some checks if this route is valid.
Re: [PATCH RFC 1/1] xfrm: dst lookup doesn't account for fwmark
,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, } static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, - int tos, int oif, + const struct flowi *fl, xfrm_address_t *prev_saddr, xfrm_address_t *prev_daddr, int family) @@ -155,7 +155,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, fl, saddr, daddr, family); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -1395,7 +1395,7 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) } static int -xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, +xfrm_get_saddr(struct net *net, const struct flowi *fl, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; @@ -1403,7 +1403,7 @@ xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(net, oif, local, remote); + err = afinfo->get_saddr(net, fl, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1432,7 +1432,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, remote = &tmpl->id.daddr; local = &tmpl->saddr; if (xfrm_addr_any(local, tmpl->encap_family)) { - error = xfrm_get_saddr(net, fl->flowi_oif, + error = xfrm_get_saddr(net, fl, &tmp, remote, tmpl->encap_family); if (error) @@ -1712,7 +1712,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; - dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, + dst = xfrm_dst_lookup(xfrm[i], fl, &saddr, &daddr, family); err = PTR_ERR(dst); if (IS_ERR(dst)) -- 2.5.0 On Fri, Jul 22, 2016 at 3:50 PM, Doug Applegate wrote: > If route table includes routing based on fwmark, xfrm will not take it > into account when routing ipsec traffic. We address this issue by adding > fwmark information before calling route lookup. > > Signed-off-by: Doug Applegate > --- > include/net/xfrm.h | 3 ++- > net/ipv4/xfrm4_policy.c | 15 +++ > net/ipv6/xfrm6_policy.c | 11 +-- > net/xfrm/xfrm_policy.c | 28 +++- > 4 files changed, 45 insertions(+), 12 deletions(-) > > diff --git a/include/net/xfrm.h b/include/net/xfrm.h > index adfebd6..13e80d1 100644 > --- a/include/net/xfrm.h > +++ b/include/net/xfrm.h > @@ -282,7 +282,7 @@ struct xfrm_policy_afinfo { > struct dst_ops *dst_ops; > void (*garbage_collect)(struct net *net); > struct dst_entry *(*dst_lookup)(struct net *net, > - int tos, int oif, > + int tos, int oif, int mark, > const xfrm_address_t *saddr, > const xfrm_address_t *daddr); > int (*get_saddr)(struct net *net, int oif, > @@ -292,6 +292,7 @@ struct xfrm_policy_afinfo { >struct flowi *fl, >int reverse); > int (*get_tos)(const struct flowi *fl); > + int (*get_mark)(const struct flowi *fl); > int (*init_path)(struct xfrm_dst *path, > struct dst_entry *dst, > int nfheader_len); > diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c > index 7b0edb3..5dcd411 100644 > --- a/net/ipv4/xfrm4_policy.c > +++ b/net/ipv4/xfrm4_policy.c > @@ -20,7 +20,7 @@ > static struct xfrm_policy_afinfo xfrm4_policy_afinfo; > > static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct > flowi4 *fl4, > -int tos, int oif, > +int tos, int oif, int mark, > const xfrm_address_t *saddr, > const xfrm_address_t *daddr) > { > @@ -30,6 +30,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct > net *net, struct flowi4 *fl4, > fl4->daddr = daddr->a4; > fl4->flowi4_tos = tos; > fl4->flowi4_oif = oif; > + fl4->flowi4_mark = mark; > if (saddr) > fl4->saddr = saddr->a4; > > @@ -42,13 +43,13 @@ static struct dst_entry *__xfrm4_dst_lookup(struct > net *net, struct flowi4 *fl4, > return ERR_CAST(rt); > } > > -static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, > +static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, > int oif, int mark, >const xfrm_address_t *saddr, >const xfrm_address_t *daddr) > { > struct flowi4 fl4; > > - return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr); > + return __xfrm4_dst_lookup(net, &fl4, tos, oif, mark, saddr, daddr); > } > > static int xfrm4_get_saddr(struct net *net, int oif, > @@ -57,7 +58,7 @@ static int xfrm4_get_saddr(struct net *net, int oif, > struct dst_entry *dst; > struct flowi4 fl4; > > - dst = __xfrm4_dst_lookup