Noticed during some testing: the command # arp -s 62.2.0.1 a:b:c:d:e:f dev eth2 adds an entry like the following (listed by "arp -an") ? (62.2.0.1) at 0a:0b:0c:0d:0e:0f [ether] PERM on eth2 but the symmetric deletion command # arp -i eth2 -d 62.2.0.1 does not remove the PERM entry from the table, and instead leaves behind ? (62.2.0.1) at <incomplete> on eth2
The reason is that there is a refcnt of 1 for the arp_tbl itself (neigh_alloc starts off the entry with a refcnt of 1), thus the neigh_release() call from arp_invalidate() will (at best) just decrement the ref to 1, but will never actually free it from the table. To fix this, we need to do something like neigh_forced_gc: if the refcnt is 1 (i.e., on the table's ref), remove the entry from the table and free it. We may need something symmetric for IPv6- I was going to check into that, after getting some feedback on this RFC patch. Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com> --- include/net/neighbour.h | 1 + net/core/neighbour.c | 42 ++++++++++++++++++++++++++++++++++++++++++ net/ipv4/arp.c | 1 + 3 files changed, 44 insertions(+), 0 deletions(-) diff --git a/include/net/neighbour.h b/include/net/neighbour.h index e4dd3a2..639b675 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -317,6 +317,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); +bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl); void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev); int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev); int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d274f81..0a09f6f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -117,6 +117,48 @@ unsigned long neigh_rand_reach_time(unsigned long base) } EXPORT_SYMBOL(neigh_rand_reach_time); +bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) +{ + struct neigh_hash_table *nht; + void *pkey = ndel->primary_key; + u32 hash_val; + struct neighbour *n; + struct neighbour __rcu **np; + + write_lock_bh(&tbl->lock); + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); + hash_val = hash_val >> (32 - nht->hash_shift); + + np = &nht->hash_buckets[hash_val]; + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock))) != NULL) { + write_lock(&n->lock); + if (n == ndel) { + bool retval = false; + + if (atomic_read(&n->refcnt) == 1) { + rcu_assign_pointer(*np, + rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock))); + n->dead = 1; + retval = true; + } + write_unlock(&n->lock); + if (retval) + neigh_cleanup_and_release(n); + write_unlock_bh(&tbl->lock); + return retval; + } + write_unlock(&n->lock); + np = &n->next; + } + + write_unlock_bh(&tbl->lock); + return false; +} +EXPORT_SYMBOL(neigh_remove_one); static int neigh_forced_gc(struct neigh_table *tbl) { diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index e9f3386..5264004 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1120,6 +1120,7 @@ static int arp_invalidate(struct net_device *dev, __be32 ip) NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_ADMIN, 0); neigh_release(neigh); + neigh_remove_one(neigh, &arp_tbl); } return err; -- 1.7.1