Make FIBs per-namespace and adds additional key (net namespace) to lookups in routing cache.
Signed-off-by: Dmitry Mishin <[EMAIL PROTECTED]> --- include/linux/net_namespace.h | 12 +++ include/net/flow.h | 3 include/net/ip_fib.h | 46 +++++++++++--- net/core/fib_rules.c | 42 ++++++++++--- net/core/net_namespace.c | 11 +++ net/ipv4/fib_frontend.c | 135 ++++++++++++++++++++++++++++++++++-------- net/ipv4/fib_hash.c | 10 +-- net/ipv4/fib_rules.c | 86 ++++++++++++++++++++++---- net/ipv4/fib_semantics.c | 100 ++++++++++++++++++++----------- net/ipv4/fib_trie.c | 18 ++++- net/ipv4/route.c | 32 +++++++++ 11 files changed, 396 insertions(+), 99 deletions(-) --- linux-2.6.20-rc4-mm1.net_ns.orig/include/linux/net_namespace.h +++ linux-2.6.20-rc4-mm1.net_ns/include/linux/net_namespace.h @@ -11,6 +11,18 @@ struct net_namespace { struct net_device *dev_base_p, **dev_tail_p; struct net_device *loopback_dev_p; struct pcpu_lstats *pcpu_lstats_p; +#ifndef CONFIG_IP_MULTIPLE_TABLES + struct fib_table *fib4_local_table, *fib4_main_table; +#else + struct list_head fib_rules_ops_list; + struct fib_rules_ops *fib4_rules_ops; +#endif + struct hlist_head *fib4_tables; + struct hlist_head *fib4_hash, *fib4_laddrhash; + unsigned fib4_hash_size, fib4_info_cnt; +#ifdef CONFIG_IP_FIB_TRIE + int fib4_trie_last_dflt; +#endif unsigned int hash; }; --- linux-2.6.20-rc4-mm1.net_ns.orig/include/net/flow.h +++ linux-2.6.20-rc4-mm1.net_ns/include/net/flow.h @@ -82,6 +82,9 @@ struct flowi { #define fl_mh_type uli_u.mht.type #endif __u32 secid; /* used by xfrm; see secid.txt */ +#ifdef CONFIG_NET_NS + struct net_namespace *net_ns; +#endif } __attribute__((__aligned__(BITS_PER_LONG/8))); #define FLOW_DIR_IN 0 --- linux-2.6.20-rc4-mm1.net_ns.orig/include/net/ip_fib.h +++ linux-2.6.20-rc4-mm1.net_ns/include/net/ip_fib.h @@ -18,6 +18,7 @@ #include <net/flow.h> #include <linux/seq_file.h> +#include <linux/net_namespace.h> #include <net/fib_rules.h> struct fib_config { @@ -171,14 +172,21 @@ struct fib_table { #ifndef CONFIG_IP_MULTIPLE_TABLES -extern struct fib_table *ip_fib_local_table; -extern struct fib_table *ip_fib_main_table; +#ifndef CONFIG_NET_NS +extern struct fib_table *ip_fib_local_table_static; +extern struct fib_table *ip_fib_main_table_static; +#define ip_fib_local_table_ns() ip_fib_local_table_static +#define ip_fib_main_table_ns() ip_fib_main_table_static +#else +#define ip_fib_local_table_ns() (current_net_ns->fib4_local_table) +#define ip_fib_main_table_ns() (current_net_ns->fib4_main_table) +#endif static inline struct fib_table *fib_get_table(u32 id) { if (id != RT_TABLE_LOCAL) - return ip_fib_main_table; - return ip_fib_local_table; + return ip_fib_main_table_ns(); + return ip_fib_local_table_ns(); } static inline struct fib_table *fib_new_table(u32 id) @@ -188,21 +196,29 @@ static inline struct fib_table *fib_new_ static inline int fib_lookup(const struct flowi *flp, struct fib_result *res) { - if (ip_fib_local_table->tb_lookup(ip_fib_local_table, flp, res) && - ip_fib_main_table->tb_lookup(ip_fib_main_table, flp, res)) + struct fib_table *tb; + + tb = ip_fib_local_table_ns(); + if (!tb->tb_lookup(tb, flp, res)) + return 0; + tb = ip_fib_main_table_ns(); + if (tb->tb_lookup(tb, flp, res)) return -ENETUNREACH; return 0; } static inline void fib_select_default(const struct flowi *flp, struct fib_result *res) { + struct fib_table *tb; + + tb = ip_fib_main_table_ns(); if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) - ip_fib_main_table->tb_select_default(ip_fib_main_table, flp, res); + tb->tb_select_default(ip_fib_main_table_ns(), flp, res); } #else /* CONFIG_IP_MULTIPLE_TABLES */ -#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) -#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) +#define ip_fib_local_table_ns() fib_get_table(RT_TABLE_LOCAL) +#define ip_fib_main_table_ns() fib_get_table(RT_TABLE_MAIN) extern int fib_lookup(struct flowi *flp, struct fib_result *res); @@ -215,6 +231,10 @@ extern void fib_select_default(const str /* Exported by fib_frontend.c */ extern struct nla_policy rtm_ipv4_policy[]; extern void ip_fib_init(void); +#ifdef CONFIG_NET_NS +extern int ip_fib_struct_init(void); +extern void ip_fib_struct_cleanup(struct net_namespace *); +#endif extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); @@ -230,6 +250,9 @@ extern int ip_fib_check_default(__be32 g extern int fib_sync_down(__be32 local, struct net_device *dev, int force); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); +#ifdef CONFIG_NET_NS +extern void fib_hashtable_destroy(struct net_namespace *); +#endif /* Exported by fib_hash.c */ extern struct fib_table *fib_hash_init(u32 id); @@ -237,7 +260,10 @@ extern struct fib_table *fib_hash_init(u #ifdef CONFIG_IP_MULTIPLE_TABLES extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb); -extern void __init fib4_rules_init(void); +#ifdef CONFIG_NET_NS +extern void fib4_rules_cleanup(struct net_namespace *); +#endif +extern int fib4_rules_init(void); #ifdef CONFIG_NET_CLS_ROUTE extern u32 fib_rules_tclass(struct fib_result *res); --- linux-2.6.20-rc4-mm1.net_ns.orig/net/core/fib_rules.c +++ linux-2.6.20-rc4-mm1.net_ns/net/core/fib_rules.c @@ -13,7 +13,12 @@ #include <linux/list.h> #include <net/fib_rules.h> -static LIST_HEAD(rules_ops); +#ifndef CONFIG_NET_NS +static struct list_head rules_ops_static; +#define rules_ops_ns() rules_ops_static +#else +#define rules_ops_ns() (current_net_ns->fib_rules_ops_list) +#endif static DEFINE_SPINLOCK(rules_mod_lock); static void notify_rule_change(int event, struct fib_rule *rule, @@ -22,10 +27,12 @@ static void notify_rule_change(int event static struct fib_rules_ops *lookup_rules_ops(int family) { + struct list_head *ops_list; struct fib_rules_ops *ops; + ops_list = &rules_ops_ns(); rcu_read_lock(); - list_for_each_entry_rcu(ops, &rules_ops, list) { + list_for_each_entry_rcu(ops, ops_list, list) { if (ops->family == family) { if (!try_module_get(ops->owner)) ops = NULL; @@ -47,6 +54,7 @@ static void rules_ops_put(struct fib_rul int fib_rules_register(struct fib_rules_ops *ops) { int err = -EEXIST; + struct list_head *ops_list; struct fib_rules_ops *o; if (ops->rule_size < sizeof(struct fib_rule)) @@ -57,12 +65,13 @@ int fib_rules_register(struct fib_rules_ ops->action == NULL) return -EINVAL; + ops_list = &rules_ops_ns(); spin_lock(&rules_mod_lock); - list_for_each_entry(o, &rules_ops, list) + list_for_each_entry(o, ops_list, list) if (ops->family == o->family) goto errout; - list_add_tail_rcu(&ops->list, &rules_ops); + list_add_tail_rcu(&ops->list, ops_list); err = 0; errout: spin_unlock(&rules_mod_lock); @@ -85,10 +94,12 @@ static void cleanup_ops(struct fib_rules int fib_rules_unregister(struct fib_rules_ops *ops) { int err = 0; + struct list_head *ops_list; struct fib_rules_ops *o; + ops_list = &rules_ops_ns(); spin_lock(&rules_mod_lock); - list_for_each_entry(o, &rules_ops, list) { + list_for_each_entry(o, ops_list, list) { if (o == ops) { list_del_rcu(&o->list); cleanup_ops(ops); @@ -131,6 +142,14 @@ int fib_rules_lookup(struct fib_rules_op rcu_read_lock(); + err = -EINVAL; + if (ops->rules_list->next == NULL) { + if (net_ratelimit()) + printk(" *** NULL head, ops %p, list %p\n", + ops, ops->rules_list); + goto out; + } + list_for_each_entry_rcu(rule, ops->rules_list, list) { if (!fib_rule_match(rule, ops, fl, flags)) continue; @@ -141,6 +160,12 @@ int fib_rules_lookup(struct fib_rules_op arg->rule = rule; goto out; } + if (rule->list.next == NULL) { + if (net_ratelimit()) + printk(" *** NULL, ops %p, list %p, item %p\n", + ops, ops->rules_list, rule); + goto out; + } } err = -ENETUNREACH; @@ -439,19 +464,21 @@ static int fib_rules_event(struct notifi void *ptr) { struct net_device *dev = ptr; + struct list_head *ops_list; struct fib_rules_ops *ops; ASSERT_RTNL(); rcu_read_lock(); + ops_list = &rules_ops_ns(); switch (event) { case NETDEV_REGISTER: - list_for_each_entry(ops, &rules_ops, list) + list_for_each_entry(ops, ops_list, list) attach_rules(ops->rules_list, dev); break; case NETDEV_UNREGISTER: - list_for_each_entry(ops, &rules_ops, list) + list_for_each_entry(ops, ops_list, list) detach_rules(ops->rules_list, dev); break; } @@ -467,6 +494,7 @@ static struct notifier_block fib_rules_n static int __init fib_rules_init(void) { + INIT_LIST_HEAD(&rules_ops_ns()); return register_netdevice_notifier(&fib_rules_notifier); } --- linux-2.6.20-rc4-mm1.net_ns.orig/net/core/net_namespace.c +++ linux-2.6.20-rc4-mm1.net_ns/net/core/net_namespace.c @@ -11,6 +11,7 @@ #include <linux/net_namespace.h> #include <linux/net.h> #include <linux/netdevice.h> +#include <net/ip_fib.h> struct net_namespace init_net_ns = { .kref = { @@ -36,7 +37,7 @@ static struct net_namespace *clone_net_n { struct net_namespace *ns; - ns = kmalloc(sizeof(struct net_namespace), GFP_KERNEL); + ns = kzalloc(sizeof(struct net_namespace), GFP_KERNEL); if (!ns) return NULL; @@ -47,12 +48,19 @@ static struct net_namespace *clone_net_n if ((push_net_ns(ns)) != old_ns) BUG(); +#ifdef CONFIG_IP_MULTIPLE_TABLES + INIT_LIST_HEAD(&ns->fib_rules_ops_list); +#endif + if (ip_fib_struct_init()) + goto out_fib4; if (loopback_init()) goto out_loopback; pop_net_ns(old_ns); return ns; out_loopback: + ip_fib_struct_cleanup(ns); +out_fib4: pop_net_ns(old_ns); BUG_ON(atomic_read(&ns->kref.refcount) != 1); kfree(ns); @@ -100,6 +108,7 @@ void free_net_ns(struct kref *kref) ns, atomic_read(&ns->kref.refcount)); return; } + ip_fib_struct_cleanup(ns); kfree(ns); } --- linux-2.6.20-rc4-mm1.net_ns.orig/net/ipv4/fib_frontend.c +++ linux-2.6.20-rc4-mm1.net_ns/net/ipv4/fib_frontend.c @@ -50,18 +50,24 @@ #define FFprint(a...) printk(KERN_DEBUG a) #ifndef CONFIG_IP_MULTIPLE_TABLES - -struct fib_table *ip_fib_local_table; -struct fib_table *ip_fib_main_table; - +#ifndef CONFIG_NET_NS +struct fib_table *ip_fib_local_table_static; +struct fib_table *ip_fib_main_table_static; +#endif #define FIB_TABLE_HASHSZ 1 -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; - #else - #define FIB_TABLE_HASHSZ 256 -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; +#endif +#ifndef CONFIG_NET_NS +static struct hlist_head fib_table_hash_static[FIB_TABLE_HASHSZ]; +#define fib_table_hash_ns(ns) fib_table_hash_static +#else +#define fib_table_hash_ns(ns) (ns->fib4_tables) +#endif +#define fib_table_hash_current() fib_table_hash_ns(current_net_ns) + +#ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_table *fib_new_table(u32 id) { struct fib_table *tb; @@ -76,21 +82,23 @@ struct fib_table *fib_new_table(u32 id) if (!tb) return NULL; h = id & (FIB_TABLE_HASHSZ - 1); - hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); + hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash_current()[h]); return tb; } struct fib_table *fib_get_table(u32 id) { struct fib_table *tb; + struct hlist_head *list; struct hlist_node *node; unsigned int h; if (id == 0) id = RT_TABLE_MAIN; h = id & (FIB_TABLE_HASHSZ - 1); + list = &fib_table_hash_current()[h]; rcu_read_lock(); - hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { + hlist_for_each_entry_rcu(tb, node, list, tb_hlist) { if (tb->tb_id == id) { rcu_read_unlock(); return tb; @@ -101,15 +109,17 @@ struct fib_table *fib_get_table(u32 id) } #endif /* CONFIG_IP_MULTIPLE_TABLES */ -static void fib_flush(void) +static void fib_flush_ns(struct net_namespace *ns) { int flushed = 0; struct fib_table *tb; + struct hlist_head *list; struct hlist_node *node; unsigned int h; for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) + list = &fib_table_hash_ns(ns)[h]; + hlist_for_each_entry(tb, node, list, tb_hlist) flushed += tb->tb_flush(tb); } @@ -117,6 +127,11 @@ static void fib_flush(void) rt_cache_flush(-1); } +static inline void fib_flush(void) +{ + fib_flush_ns(current_net_ns); +} + /* * Find the first device with a given source address. */ @@ -125,14 +140,15 @@ struct net_device * ip_dev_find(__be32 a { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; + struct fib_table *tb; struct net_device *dev = NULL; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; #endif - if (!ip_fib_local_table || - ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res)) + tb = ip_fib_local_table_ns(); + if (!tb || tb->tb_lookup(tb, &fl, &res)) return NULL; if (res.type != RTN_LOCAL) goto out; @@ -149,6 +165,7 @@ unsigned inet_addr_type(__be32 addr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; + struct fib_table *tb; unsigned ret = RTN_BROADCAST; if (ZERONET(addr) || BADCLASS(addr)) @@ -160,10 +177,10 @@ unsigned inet_addr_type(__be32 addr) res.r = NULL; #endif - if (ip_fib_local_table) { + tb = ip_fib_local_table_ns(); + if (tb) { ret = RTN_UNICAST; - if (!ip_fib_local_table->tb_lookup(ip_fib_local_table, - &fl, &res)) { + if (!tb->tb_lookup(tb, &fl, &res)) { ret = res.type; fib_res_put(&res); } @@ -582,6 +599,7 @@ int inet_dump_fib(struct sk_buff *skb, s unsigned int h, s_h; unsigned int e = 0, s_e; struct fib_table *tb; + struct hlist_head *list; struct hlist_node *node; int dumped = 0; @@ -594,7 +612,8 @@ int inet_dump_fib(struct sk_buff *skb, s for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { e = 0; - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { + list = &fib_table_hash_current()[h]; + hlist_for_each_entry(tb, node, list, tb_hlist) { if (e < s_e) goto next; if (dumped) @@ -896,25 +915,91 @@ static struct notifier_block fib_netdev_ .notifier_call =fib_netdev_event, }; -void __init ip_fib_init(void) +#ifdef CONFIG_NET_NS +int inline ip_fib_struct_init(void) +{ + struct hlist_head *tables; + unsigned int i; + + tables = kmalloc(FIB_TABLE_HASHSZ * sizeof(*tables), GFP_KERNEL); + if (tables == NULL) + return -ENOMEM; + for (i = 0; i < FIB_TABLE_HASHSZ; i++) + INIT_HLIST_HEAD(&tables[i]); + fib_table_hash_current() = tables; + +#ifdef CONFIG_IP_FIB_TRIE + current_net_ns->fib4_trie_last_dflt = -1; +#endif +#ifndef CONFIG_IP_MULTIPLE_TABLES + ip_fib_local_table_ns() = fib_hash_init(RT_TABLE_LOCAL); + hlist_add_head_rcu(&ip_fib_local_table_ns()->tb_hlist, + &fib_table_hash_current()[0]); + ip_fib_main_table_ns() = fib_hash_init(RT_TABLE_MAIN); + hlist_add_head_rcu(&ip_fib_main_table_ns()->tb_hlist, + &fib_table_hash_current()[0]); +#else + if (fib4_rules_init()) { + kfree(tables); + fib_table_hash_current() = NULL; + return -ENOMEM; + } +#endif + return 0; +} + +#else /* !defined(CONFIG_NET_NS) */ + +int inline ip_fib_struct_init(void) { unsigned int i; for (i = 0; i < FIB_TABLE_HASHSZ; i++) - INIT_HLIST_HEAD(&fib_table_hash[i]); + INIT_HLIST_HEAD(&fib_table_hash_static[i]); #ifndef CONFIG_IP_MULTIPLE_TABLES - ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); - hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); - ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); - hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); + ip_fib_local_table_ns() = fib_hash_init(RT_TABLE_LOCAL); + hlist_add_head_rcu(&ip_fib_local_table_ns()->tb_hlist, + &fib_table_hash_current()[0]); + ip_fib_main_table_ns() = fib_hash_init(RT_TABLE_MAIN); + hlist_add_head_rcu(&ip_fib_main_table_ns()->tb_hlist, + &fib_table_hash_current()[0]); + return 0; #else - fib4_rules_init(); + return fib4_rules_init(); #endif +} +#endif /* CONFIG_NET_NS */ + +void __init ip_fib_init(void) +{ + ip_fib_struct_init(); register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); nl_fib_lookup_init(); } +#ifdef CONFIG_NET_NS +void ip_fib_struct_cleanup(struct net_namespace *ns) +{ + rtnl_lock(); +#ifdef CONFIG_IP_MULTIPLE_TABLES + fib4_rules_cleanup(ns); +#endif + /* + * FIB should already be empty since there is no netdevice, + * but clear it anyway + */ + fib_flush_ns(ns); + rt_cache_flush(0); +#ifdef CONFIG_IP_MULTIPLE_TABLES + kfree(ns->fib4_tables); + ns->fib4_tables = NULL; +#endif + fib_hashtable_destroy(ns); + rtnl_unlock(); +} +#endif /* CONFIG_NET_NS */ + EXPORT_SYMBOL(inet_addr_type); EXPORT_SYMBOL(ip_dev_find); --- linux-2.6.20-rc4-mm1.net_ns.orig/net/ipv4/fib_hash.c +++ linux-2.6.20-rc4-mm1.net_ns/net/ipv4/fib_hash.c @@ -629,7 +629,9 @@ static int fn_flush_list(struct fn_zone list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) { struct fib_info *fi = fa->fa_info; - if (fi && (fi->fib_flags&RTNH_F_DEAD)) { + if (fi == NULL) + continue; + if (fi->fib_flags&RTNH_F_DEAD) { write_lock_bh(&fib_hash_lock); list_del(&fa->fa_list); if (list_empty(&f->fn_alias)) { @@ -757,7 +759,7 @@ static int fn_hash_dump(struct fib_table return skb->len; } -#ifdef CONFIG_IP_MULTIPLE_TABLES +#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_NET_NS) struct fib_table * fib_hash_init(u32 id) #else struct fib_table * __init fib_hash_init(u32 id) @@ -810,7 +812,7 @@ struct fib_iter_state { static struct fib_alias *fib_get_first(struct seq_file *seq) { struct fib_iter_state *iter = seq->private; - struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data; + struct fn_hash *table = (struct fn_hash *) ip_fib_main_table_ns()->tb_data; iter->bucket = 0; iter->hash_head = NULL; @@ -949,7 +951,7 @@ static void *fib_seq_start(struct seq_fi void *v = NULL; read_lock(&fib_hash_lock); - if (ip_fib_main_table) + if (ip_fib_main_table_ns()) v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; return v; } --- linux-2.6.20-rc4-mm1.net_ns.orig/net/ipv4/fib_rules.c +++ linux-2.6.20-rc4-mm1.net_ns/net/ipv4/fib_rules.c @@ -32,7 +32,7 @@ #include <net/ip_fib.h> #include <net/fib_rules.h> -static struct fib_rules_ops fib4_rules_ops; +static struct fib_rules_ops fib4_rules_ops_static; struct fib4_rule { @@ -76,7 +76,12 @@ static struct fib4_rule local_rule = { }, }; -static LIST_HEAD(fib4_rules); +#ifndef CONFIG_NET_NS +static LIST_HEAD(fib4_rules_static); +#define fib4_rules_ops_ns() fib4_rules_ops_static +#else +#define fib4_rules_ops_ns() (*current_net_ns->fib4_rules_ops) +#endif #ifdef CONFIG_NET_CLS_ROUTE u32 fib_rules_tclass(struct fib_result *res) @@ -92,7 +97,7 @@ int fib_lookup(struct flowi *flp, struct }; int err; - err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); + err = fib_rules_lookup(&fib4_rules_ops_ns(), flp, 0, &arg); res->r = arg.rule; return err; @@ -285,11 +290,13 @@ int fib4_rules_dump(struct sk_buff *skb, static u32 fib4_rule_default_pref(void) { struct list_head *pos; + struct list_head *fib4_rules; struct fib_rule *rule; - if (!list_empty(&fib4_rules)) { - pos = fib4_rules.next; - if (pos->next != &fib4_rules) { + fib4_rules = fib4_rules_ops_ns().rules_list; + if (!list_empty(fib4_rules)) { + pos = fib4_rules->next; + if (pos->next != fib4_rules) { rule = list_entry(pos->next, struct fib_rule, list); if (rule->pref) return rule->pref - 1; @@ -306,7 +313,7 @@ static size_t fib4_rule_nlmsg_payload(st + nla_total_size(4); /* flow */ } -static struct fib_rules_ops fib4_rules_ops = { +static struct fib_rules_ops fib4_rules_ops_static = { .family = AF_INET, .rule_size = sizeof(struct fib4_rule), .action = fib4_rule_action, @@ -318,15 +325,68 @@ static struct fib_rules_ops fib4_rules_o .nlmsg_payload = fib4_rule_nlmsg_payload, .nlgroup = RTNLGRP_IPV4_RULE, .policy = fib4_rule_policy, - .rules_list = &fib4_rules, .owner = THIS_MODULE, }; -void __init fib4_rules_init(void) +#ifndef CONFIG_NET_NS + +int fib4_rules_init(void) +{ + fib4_rules_ops_static.rules_list = &fib4_rules_static, + list_add_tail(&local_rule.common.list, &fib4_rules_static); + list_add_tail(&main_rule.common.list, &fib4_rules_static); + list_add_tail(&default_rule.common.list, &fib4_rules_static); + fib_rules_register(&fib4_rules_ops_static); + return 0; +} + +#else + +static int fib4_rule_create(struct fib4_rule *orig, struct list_head *head) +{ + struct fib4_rule *p; + + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) + return -1; + memcpy(p, orig, sizeof(*p)); + list_add_tail_rcu(&p->common.list, head); + return 0; +} + +int fib4_rules_init(void) { - list_add_tail(&local_rule.common.list, &fib4_rules); - list_add_tail(&main_rule.common.list, &fib4_rules); - list_add_tail(&default_rule.common.list, &fib4_rules); + struct fib_rules_ops *ops; + struct list_head *rules; + + ops = kmalloc(sizeof(*ops) + sizeof(*rules), GFP_KERNEL); + if (ops == NULL) + goto out; + memcpy(ops, &fib4_rules_ops_static, sizeof(*ops)); + rules = (struct list_head *)(ops + 1); + INIT_LIST_HEAD(rules); + ops->rules_list = rules; + current_net_ns->fib4_rules_ops = ops; + + fib_rules_register(ops); + + if (fib4_rule_create(&local_rule, rules) || + fib4_rule_create(&main_rule, rules) || + fib4_rule_create(&default_rule, rules)) + goto out_rule; + + return 0; - fib_rules_register(&fib4_rules_ops); +out_rule: + fib_rules_unregister(ops); /* list cleanup is inside */ + kfree(ops); +out: + return -ENOMEM; } + +void fib4_rules_cleanup(struct net_namespace *ns) +{ + fib_rules_unregister(ns->fib4_rules_ops); +} + +#endif --- linux-2.6.20-rc4-mm1.net_ns.orig/net/ipv4/fib_semantics.c +++ linux-2.6.20-rc4-mm1.net_ns/net/ipv4/fib_semantics.c @@ -51,10 +51,21 @@ #define FSprintk(a...) static DEFINE_SPINLOCK(fib_info_lock); -static struct hlist_head *fib_info_hash; -static struct hlist_head *fib_info_laddrhash; -static unsigned int fib_hash_size; -static unsigned int fib_info_cnt; +#ifndef CONFIG_NET_NS +static struct hlist_head *fib_info_hash_static; +static struct hlist_head *fib_info_laddrhash_static; +static unsigned int fib_hash_size_static; +static unsigned int fib_info_cnt_static; +#define fib_info_hash(ns) fib_info_hash_static +#define fib_info_laddrhash(ns) fib_info_laddrhash_static +#define fib_hash_size(ns) fib_hash_size_static +#define fib_info_cnt(ns) fib_info_cnt_static +#else +#define fib_info_hash(ns) ((ns)->fib4_hash) +#define fib_info_laddrhash(ns) ((ns)->fib4_laddrhash) +#define fib_hash_size(ns) ((ns)->fib4_hash_size) +#define fib_info_cnt(ns) ((ns)->fib4_info_cnt) +#endif #define DEVINDEX_HASHBITS 8 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) @@ -154,7 +165,7 @@ void free_fib_info(struct fib_info *fi) dev_put(nh->nh_dev); nh->nh_dev = NULL; } endfor_nexthops(fi); - fib_info_cnt--; + fib_info_cnt(current_net_ns)--; kfree(fi); } @@ -197,9 +208,10 @@ static __inline__ int nh_comp(const stru return 0; } -static inline unsigned int fib_info_hashfn(const struct fib_info *fi) +static inline unsigned int fib_info_hashfn(const struct fib_info *fi, + struct net_namespace *ns) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = (fib_hash_size(ns) - 1); unsigned int val = fi->fib_nhs; val ^= fi->fib_protocol; @@ -209,15 +221,16 @@ static inline unsigned int fib_info_hash return (val ^ (val >> 7) ^ (val >> 12)) & mask; } -static struct fib_info *fib_find_info(const struct fib_info *nfi) +static noinline struct fib_info *fib_find_info(const struct fib_info *nfi) { + struct net_namespace *ns = current_net_ns; struct hlist_head *head; struct hlist_node *node; struct fib_info *fi; unsigned int hash; - hash = fib_info_hashfn(nfi); - head = &fib_info_hash[hash]; + hash = fib_info_hashfn(nfi, ns); + head = &fib_info_hash(ns)[hash]; hlist_for_each_entry(fi, node, head, fib_hash) { if (fi->fib_nhs != nfi->fib_nhs) @@ -237,11 +250,13 @@ static struct fib_info *fib_find_info(co static inline unsigned int fib_devindex_hashfn(unsigned int val) { - unsigned int mask = DEVINDEX_HASHSIZE - 1; + unsigned int r, mask = DEVINDEX_HASHSIZE - 1; - return (val ^ + r = val ^ (val >> DEVINDEX_HASHBITS) ^ - (val >> (DEVINDEX_HASHBITS * 2))) & mask; + (val >> (DEVINDEX_HASHBITS * 2)); + r ^= net_ns_hash(current_net_ns); + return r & mask; } /* Check, that the gateway is already configured. @@ -592,9 +607,10 @@ out: return 0; } -static inline unsigned int fib_laddr_hashfn(__be32 val) +static inline unsigned int fib_laddr_hashfn(__be32 val, + struct net_namespace *ns) { - unsigned int mask = (fib_hash_size - 1); + unsigned int mask = (fib_hash_size(ns) - 1); return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; } @@ -623,17 +639,18 @@ static void fib_hash_move(struct hlist_h struct hlist_head *new_laddrhash, unsigned int new_size) { + struct net_namespace *ns = current_net_ns; struct hlist_head *old_info_hash, *old_laddrhash; - unsigned int old_size = fib_hash_size; + unsigned int old_size = fib_hash_size(ns); unsigned int i, bytes; spin_lock_bh(&fib_info_lock); - old_info_hash = fib_info_hash; - old_laddrhash = fib_info_laddrhash; - fib_hash_size = new_size; + old_info_hash = fib_info_hash(ns); + old_laddrhash = fib_info_laddrhash(ns); + fib_hash_size(ns) = new_size; for (i = 0; i < old_size; i++) { - struct hlist_head *head = &fib_info_hash[i]; + struct hlist_head *head = &old_info_hash[i]; struct hlist_node *node, *n; struct fib_info *fi; @@ -643,15 +660,15 @@ static void fib_hash_move(struct hlist_h hlist_del(&fi->fib_hash); - new_hash = fib_info_hashfn(fi); + new_hash = fib_info_hashfn(fi, ns); dest = &new_info_hash[new_hash]; hlist_add_head(&fi->fib_hash, dest); } } - fib_info_hash = new_info_hash; + fib_info_hash(ns) = new_info_hash; for (i = 0; i < old_size; i++) { - struct hlist_head *lhead = &fib_info_laddrhash[i]; + struct hlist_head *lhead = &old_laddrhash[i]; struct hlist_node *node, *n; struct fib_info *fi; @@ -661,12 +678,12 @@ static void fib_hash_move(struct hlist_h hlist_del(&fi->fib_lhash); - new_hash = fib_laddr_hashfn(fi->fib_prefsrc); + new_hash = fib_laddr_hashfn(fi->fib_prefsrc, ns); ldest = &new_laddrhash[new_hash]; hlist_add_head(&fi->fib_lhash, ldest); } } - fib_info_laddrhash = new_laddrhash; + fib_info_laddrhash(ns) = new_laddrhash; spin_unlock_bh(&fib_info_lock); @@ -675,9 +692,23 @@ static void fib_hash_move(struct hlist_h fib_hash_free(old_laddrhash, bytes); } +#ifdef CONFIG_NET_NS +void fib_hashtable_destroy(struct net_namespace *ns) +{ + unsigned int bytes; + + bytes = ns->fib4_hash_size * sizeof(struct hlist_head *); + fib_hash_free(ns->fib4_hash, bytes); + ns->fib4_hash = NULL; + fib_hash_free(ns->fib4_laddrhash, bytes); + ns->fib4_laddrhash = NULL; +} +#endif + struct fib_info *fib_create_info(struct fib_config *cfg) { int err; + struct net_namespace *ns = current_net_ns; struct fib_info *fi = NULL; struct fib_info *ofi; int nhs = 1; @@ -702,8 +733,8 @@ struct fib_info *fib_create_info(struct #endif err = -ENOBUFS; - if (fib_info_cnt >= fib_hash_size) { - unsigned int new_size = fib_hash_size << 1; + if (fib_info_cnt(ns) >= fib_hash_size(ns)) { + unsigned int new_size = fib_hash_size(ns) << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; unsigned int bytes; @@ -723,14 +754,14 @@ struct fib_info *fib_create_info(struct fib_hash_move(new_info_hash, new_laddrhash, new_size); } - if (!fib_hash_size) + if (!fib_hash_size(ns)) goto failure; } fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (fi == NULL) goto failure; - fib_info_cnt++; + fib_info_cnt(ns)++; fi->fib_protocol = cfg->fc_protocol; fi->fib_flags = cfg->fc_flags; @@ -837,11 +868,11 @@ link_it: atomic_inc(&fi->fib_clntref); spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, - &fib_info_hash[fib_info_hashfn(fi)]); + &fib_info_hash(ns)[fib_info_hashfn(fi, ns)]); if (fi->fib_prefsrc) { struct hlist_head *head; - head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; + head = &fib_info_laddrhash(ns)[fib_laddr_hashfn(fi->fib_prefsrc, ns)]; hlist_add_head(&fi->fib_lhash, head); } change_nexthops(fi) { @@ -1043,15 +1074,16 @@ nla_put_failure: int fib_sync_down(__be32 local, struct net_device *dev, int force) { + struct net_namespace *ns = current_net_ns; int ret = 0; int scope = RT_SCOPE_NOWHERE; if (force) scope = -1; - if (local && fib_info_laddrhash) { - unsigned int hash = fib_laddr_hashfn(local); - struct hlist_head *head = &fib_info_laddrhash[hash]; + if (local && fib_info_laddrhash(ns)) { + unsigned int hash = fib_laddr_hashfn(local, ns); + struct hlist_head *head = &fib_info_laddrhash(ns)[hash]; struct hlist_node *node; struct fib_info *fi; --- linux-2.6.20-rc4-mm1.net_ns.orig/net/ipv4/fib_trie.c +++ linux-2.6.20-rc4-mm1.net_ns/net/ipv4/fib_trie.c @@ -172,7 +172,17 @@ static struct tnode *halve(struct trie * static void tnode_free(struct tnode *tn); static struct kmem_cache *fn_alias_kmem __read_mostly; -static struct trie *trie_local = NULL, *trie_main = NULL; +#ifndef CONFIG_NET_NS +static struct trie *trie_local_static = NULL, *trie_main_static = NULL; +static int trie_last_dflt_static = -1; +#define trie_local trie_local_static +#define trie_main trie_main_static +#define trie_last_dflt trie_last_dflt_static +#else +#define trie_local ((struct trie *)ip_fib_local_table_ns()->tb_data) +#define trie_main ((struct trie *)ip_fib_main_table_ns()->tb_data) +#define trie_last_dflt (current_net_ns->fib4_trie_last_dflt) +#endif /* rcu_read_lock needs to be hold by caller from readside */ @@ -1743,8 +1753,6 @@ static int fn_trie_flush(struct fib_tabl return found; } -static int trie_last_dflt = -1; - static void fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) { @@ -1929,7 +1937,7 @@ out: /* Fix more generic FIB names for init later */ -#ifdef CONFIG_IP_MULTIPLE_TABLES +#if defined(CONFIG_IP_MULTIPLE_TABLES) || defined(CONFIG_NET_NS) struct fib_table * fib_hash_init(u32 id) #else struct fib_table * __init fib_hash_init(u32 id) @@ -1962,10 +1970,12 @@ struct fib_table * __init fib_hash_init( trie_init(t); +#ifndef CONFIG_NET_NS if (id == RT_TABLE_LOCAL) trie_local = t; else if (id == RT_TABLE_MAIN) trie_main = t; +#endif if (id == RT_TABLE_LOCAL) printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); --- linux-2.6.20-rc4-mm1.net_ns.orig/net/ipv4/route.c +++ linux-2.6.20-rc4-mm1.net_ns/net/ipv4/route.c @@ -269,6 +269,7 @@ struct rt_cache_iter_state { int bucket; }; +static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r); static struct rtable *rt_cache_get_first(struct seq_file *seq) { struct rtable *r = NULL; @@ -281,21 +282,28 @@ static struct rtable *rt_cache_get_first break; rcu_read_unlock_bh(); } + if (r && !net_ns_match(r->fl.net_ns, current_net_ns)) + r = rt_cache_get_next(seq, r); return r; } static struct rtable *rt_cache_get_next(struct seq_file *seq, struct rtable *r) { struct rt_cache_iter_state *st = rcu_dereference(seq->private); + struct net_namespace *ns = current_net_ns; +next: r = r->u.rt_next; while (!r) { rcu_read_unlock_bh(); if (--st->bucket < 0) - break; + goto out; rcu_read_lock_bh(); r = rt_hash_table[st->bucket].chain; } + if (!net_ns_match(r->fl.net_ns, ns)) + goto next; +out: return r; } @@ -571,7 +579,11 @@ static inline int compare_keys(struct fl (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | (fl1->oif ^ fl2->oif) | +#ifdef CONFIG_NET_NS + (fl1->iif ^ fl2->iif) | (fl1->net_ns != fl2->net_ns)) == 0; +#else (fl1->iif ^ fl2->iif)) == 0; +#endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED @@ -1133,6 +1145,7 @@ void ip_rt_redirect(__be32 old_gw, __be3 struct rtable *rth, **rthp; __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; + struct net_namespace *ns = current_net_ns; struct netevent_redirect netevent; if (!in_dev) @@ -1164,6 +1177,7 @@ void ip_rt_redirect(__be32 old_gw, __be3 if (rth->fl.fl4_dst != daddr || rth->fl.fl4_src != skeys[i] || + !net_ns_match(rth->fl.net_ns, ns) || rth->fl.oif != ikeys[k] || rth->fl.iif != 0) { rthp = &rth->u.rt_next; @@ -1653,6 +1667,9 @@ static int ip_route_input_mc(struct sk_b dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; +#ifdef CONFIG_NET_NS + rth->fl.net_ns = current_net_ns; +#endif rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_type = RTN_MULTICAST; @@ -1795,6 +1812,9 @@ static inline int __mkroute_input(struct dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; +#ifdef CONFIG_NET_NS + rth->fl.net_ns = current_net_ns; +#endif rth->rt_spec_dst= spec_dst; rth->u.dst.input = ip_forward; @@ -2037,6 +2057,9 @@ local_input: rth->u.dst.dev = &loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); +#ifdef CONFIG_NET_NS + rth->fl.net_ns = current_net_ns; +#endif rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->u.dst.input= ip_local_deliver; @@ -2092,6 +2115,7 @@ int ip_route_input(struct sk_buff *skb, struct rtable * rth; unsigned hash; int iif = dev->ifindex; + struct net_namespace *ns = current_net_ns; tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif); @@ -2101,6 +2125,7 @@ int ip_route_input(struct sk_buff *skb, rth = rcu_dereference(rth->u.rt_next)) { if (rth->fl.fl4_dst == daddr && rth->fl.fl4_src == saddr && + net_ns_match(rth->fl.net_ns, ns) && rth->fl.iif == iif && rth->fl.oif == 0 && rth->fl.mark == skb->mark && @@ -2236,6 +2261,9 @@ static inline int __mkroute_output(struc rth->u.dst.dev = dev_out; dev_hold(dev_out); rth->idev = in_dev_get(dev_out); +#ifdef CONFIG_NET_NS + rth->fl.net_ns = current_net_ns; +#endif rth->rt_gateway = fl->fl4_dst; rth->rt_spec_dst= fl->fl4_src; @@ -2557,6 +2585,7 @@ int __ip_route_output_key(struct rtable { unsigned hash; struct rtable *rth; + struct net_namespace *ns = current_net_ns; hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif); @@ -2565,6 +2594,7 @@ int __ip_route_output_key(struct rtable rth = rcu_dereference(rth->u.rt_next)) { if (rth->fl.fl4_dst == flp->fl4_dst && rth->fl.fl4_src == flp->fl4_src && + net_ns_match(rth->fl.net_ns, ns) && rth->fl.iif == 0 && rth->fl.oif == flp->oif && rth->fl.mark == flp->mark && - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html