On Tue, 2018-02-27 at 19:52 -0800, Roopa Prabhu wrote: > From: Roopa Prabhu <ro...@cumulusnetworks.com> > > Dissect flow in fwd path if fib rules require it. Controlled by > a flag to avoid penatly for the common case. Flag is set when fib > rules with sport, dport and proto match that require flow dissect > are installed. Also passes the dissected hash keys to the multipath > hash function when applicable to avoid dissecting the flow again. > icmp packets will continue to use inner header for hash > calculations (Thanks to Nikolay Aleksandrov for some review here). > > Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com> > --- > include/net/ip_fib.h | 27 ++++++++++++++++++++++++++- > include/net/netns/ipv4.h | 1 + > net/ipv4/fib_rules.c | 8 ++++++++ > net/ipv4/fib_semantics.c | 2 +- > net/ipv4/route.c | 43 +++++++++++++++++++++++++++++-------------- > 5 files changed, 65 insertions(+), 16 deletions(-) > > diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h > index f805243..9c4f52c 100644 > --- a/include/net/ip_fib.h > +++ b/include/net/ip_fib.h > @@ -293,6 +293,13 @@ static inline unsigned int fib4_rules_seq_read(struct > net *net) > return 0; > } > > +static inline bool fib4_rules_early_flow_dissect(struct net *net, > + struct sk_buff *skb, > + struct flowi4 *fl4, > + struct flow_keys *flkeys) > +{ > + return false; > +} > #else /* CONFIG_IP_MULTIPLE_TABLES */ > int __net_init fib4_rules_init(struct net *net); > void __net_exit fib4_rules_exit(struct net *net); > @@ -341,6 +348,24 @@ bool fib4_rule_default(const struct fib_rule *rule); > int fib4_rules_dump(struct net *net, struct notifier_block *nb); > unsigned int fib4_rules_seq_read(struct net *net); > > +static inline bool fib4_rules_early_flow_dissect(struct net *net, > + struct sk_buff *skb, > + struct flowi4 *fl4, > + struct flow_keys *flkeys) > +{ > + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; > + > + if (!net->ipv4.fib_rules_require_fldissect) > + return false; > + > + skb_flow_dissect_flow_keys(skb, flkeys, flag); > + fl4->fl4_sport = flkeys->ports.src; > + fl4->fl4_dport = flkeys->ports.dst; > + fl4->flowi4_proto = flkeys->basic.ip_proto; > + > + return true; > +} > + > #endif /* CONFIG_IP_MULTIPLE_TABLES */ > > /* Exported by fib_frontend.c */ > @@ -371,7 +396,7 @@ int fib_sync_up(struct net_device *dev, unsigned int > nh_flags); > > #ifdef CONFIG_IP_ROUTE_MULTIPATH > int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, > - const struct sk_buff *skb); > + const struct sk_buff *skb, struct flow_keys *flkeys); > #endif > void fib_select_multipath(struct fib_result *res, int hash); > void fib_select_path(struct net *net, struct fib_result *res, > diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h > index 44668c2..3a970e4 100644 > --- a/include/net/netns/ipv4.h > +++ b/include/net/netns/ipv4.h > @@ -52,6 +52,7 @@ struct netns_ipv4 { > #ifdef CONFIG_IP_MULTIPLE_TABLES > struct fib_rules_ops *rules_ops; > bool fib_has_custom_rules; > + unsigned int fib_rules_require_fldissect; > struct fib_table __rcu *fib_main; > struct fib_table __rcu *fib_default; > #endif > diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c > index 16083b8..737d11b 100644 > --- a/net/ipv4/fib_rules.c > +++ b/net/ipv4/fib_rules.c > @@ -255,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, > struct sk_buff *skb, > } > #endif > > + if (fib_rule_requires_fldissect(rule)) > + net->ipv4.fib_rules_require_fldissect++; > + > rule4->src_len = frh->src_len; > rule4->srcmask = inet_make_mask(rule4->src_len); > rule4->dst_len = frh->dst_len; > @@ -283,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule) > net->ipv4.fib_num_tclassid_users--; > #endif > net->ipv4.fib_has_custom_rules = true; > + > + if (net->ipv4.fib_rules_require_fldissect && > + fib_rule_requires_fldissect(rule)) > + net->ipv4.fib_rules_require_fldissect--; > errout: > return err; > } > @@ -400,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net) > goto fail; > net->ipv4.rules_ops = ops; > net->ipv4.fib_has_custom_rules = false; > + net->ipv4.fib_rules_require_fldissect = 0; > return 0; > > fail: > diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c > index cd46d76..b0c398a 100644 > --- a/net/ipv4/fib_semantics.c > +++ b/net/ipv4/fib_semantics.c > @@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result > *res, > > #ifdef CONFIG_IP_ROUTE_MULTIPATH > if (res->fi->fib_nhs > 1) { > - int h = fib_multipath_hash(res->fi, fl4, skb); > + int h = fib_multipath_hash(res->fi, fl4, skb, NULL); > > fib_select_multipath(res, h); > } > diff --git a/net/ipv4/route.c b/net/ipv4/route.c > index 26eefa2..3bb686d 100644 > --- a/net/ipv4/route.c > +++ b/net/ipv4/route.c > @@ -1783,7 +1783,7 @@ static void ip_multipath_l3_keys(const struct sk_buff > *skb, > > /* if skb is set it will be used and fl4 can be NULL */ > int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, > - const struct sk_buff *skb) > + const struct sk_buff *skb, struct flow_keys *flkeys) > { > struct net *net = fi->fib_net; > struct flow_keys hash_keys; > @@ -1810,14 +1810,23 @@ int fib_multipath_hash(const struct fib_info *fi, > const struct flowi4 *fl4, > if (skb->l4_hash) > return skb_get_hash_raw(skb) >> 1; > memset(&hash_keys, 0, sizeof(hash_keys)); > - skb_flow_dissect_flow_keys(skb, &keys, flag); > > - hash_keys.control.addr_type = > FLOW_DISSECTOR_KEY_IPV4_ADDRS; > - hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; > - hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; > - hash_keys.ports.src = keys.ports.src; > - hash_keys.ports.dst = keys.ports.dst; > - hash_keys.basic.ip_proto = keys.basic.ip_proto; > + if (flkeys) { > + hash_keys.control.addr_type = > FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + hash_keys.addrs.v4addrs.src = > flkeys->addrs.v4addrs.src; > + hash_keys.addrs.v4addrs.dst = > flkeys->addrs.v4addrs.dst; > + hash_keys.ports.src = flkeys->ports.src; > + hash_keys.ports.dst = flkeys->ports.dst; > + hash_keys.basic.ip_proto = > flkeys->basic.ip_proto; > + } else { > + skb_flow_dissect_flow_keys(skb, &keys, flag); > + hash_keys.control.addr_type = > FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + hash_keys.addrs.v4addrs.src = > keys.addrs.v4addrs.src; > + hash_keys.addrs.v4addrs.dst = > keys.addrs.v4addrs.dst; > + hash_keys.ports.src = keys.ports.src; > + hash_keys.ports.dst = keys.ports.dst; > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > + } > } else { > memset(&hash_keys, 0, sizeof(hash_keys)); > hash_keys.control.addr_type = > FLOW_DISSECTOR_KEY_IPV4_ADDRS; > @@ -1838,11 +1847,12 @@ int fib_multipath_hash(const struct fib_info *fi, > const struct flowi4 *fl4, > static int ip_mkroute_input(struct sk_buff *skb, > struct fib_result *res, > struct in_device *in_dev, > - __be32 daddr, __be32 saddr, u32 tos) > + __be32 daddr, __be32 saddr, u32 tos, > + struct flow_keys *hkeys) > { > #ifdef CONFIG_IP_ROUTE_MULTIPATH > if (res->fi && res->fi->fib_nhs > 1) { > - int h = fib_multipath_hash(res->fi, NULL, skb); > + int h = fib_multipath_hash(res->fi, NULL, skb, hkeys); > > fib_select_multipath(res, h); > } > @@ -1868,13 +1878,14 @@ static int ip_route_input_slow(struct sk_buff *skb, > __be32 daddr, __be32 saddr, > struct fib_result *res) > { > struct in_device *in_dev = __in_dev_get_rcu(dev); > + struct flow_keys *flkeys = NULL, _flkeys; > + struct net *net = dev_net(dev); > struct ip_tunnel_info *tun_info; > - struct flowi4 fl4; > + int err = -EINVAL; > unsigned int flags = 0; > u32 itag = 0; > struct rtable *rth; > - int err = -EINVAL; > - struct net *net = dev_net(dev); > + struct flowi4 fl4; > bool do_cache; > > /* IP on this device is disabled. */ > @@ -1933,6 +1944,10 @@ static int ip_route_input_slow(struct sk_buff *skb, > __be32 daddr, __be32 saddr, > fl4.daddr = daddr; > fl4.saddr = saddr; > fl4.flowi4_uid = sock_net_uid(net, NULL); > + > + if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) > + flkeys = &_flkeys; > + > err = fib_lookup(net, &fl4, res, 0); > if (err != 0) { > if (!IN_DEV_FORWARD(in_dev)) > @@ -1958,7 +1973,7 @@ static int ip_route_input_slow(struct sk_buff *skb, > __be32 daddr, __be32 saddr, > if (res->type != RTN_UNICAST) > goto martian_destination; > > - err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos); > + err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys); > out: return err; > > brd_input:
LGTM Acked-by: Paolo Abeni <pab...@redhat.com>