On Mon, 2018-02-26 at 22:42 +0100, Paolo Abeni wrote:
> This changeset moves ipvlan address under RCU protection, using
> a per ipvlan device spinlock to protect list mutation and RCU
> read access to protect list traversal.
> 
> Also explicitly use RCU read lock to traverse the per port
> ipvlans list, so that we can now perform a full address lookup
> without asserting the RTNL lock.
> 
> Overall this allows the ipvlan driver to check fully for duplicate
> addresses - before this commit ipv6 addresses assigned by autoconf
> via prefix delegation where accepted without any check - and avoid
> the following rntl assertion failure still in the same code path:
> 
>  RTNL: assertion failed at drivers/net/ipvlan/ipvlan_core.c (124)
>  WARNING: CPU: 15 PID: 0 at drivers/net/ipvlan/ipvlan_core.c:124 
> ipvlan_addr_busy+0x97/0xa0 [ipvlan]
>  Modules linked in: ipvlan(E) ixgbe
>  CPU: 15 PID: 0 Comm: swapper/15 Tainted: G            E    
> 4.16.0-rc2.ipvlan+ #1782
>  Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.1.7 06/16/2016
>  RIP: 0010:ipvlan_addr_busy+0x97/0xa0 [ipvlan]
>  RSP: 0018:ffff881ff9e03768 EFLAGS: 00010286
>  RAX: 0000000000000000 RBX: ffff881fdf2a9000 RCX: 0000000000000000
>  RDX: 0000000000000001 RSI: 00000000000000f6 RDI: 0000000000000300
>  RBP: ffff881fdf2a8000 R08: 0000000000000000 R09: 0000000000000000
>  R10: 0000000000000001 R11: ffff881ff9e034c0 R12: ffff881fe07bcc00
>  R13: 0000000000000001 R14: ffffffffa02002b0 R15: 0000000000000001
>  FS:  0000000000000000(0000) GS:ffff881ff9e00000(0000) knlGS:0000000000000000
>  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>  CR2: 00007fc5c1a4f248 CR3: 000000207e012005 CR4: 00000000001606e0
>  Call Trace:
>   <IRQ>
>   ipvlan_addr6_event+0x6c/0xd0 [ipvlan]
>   notifier_call_chain+0x49/0x90
>   atomic_notifier_call_chain+0x6a/0x100
>   ipv6_add_addr+0x5f9/0x720
>   addrconf_prefix_rcv_add_addr+0x244/0x3c0
>   addrconf_prefix_rcv+0x2f3/0x790
>   ndisc_router_discovery+0x633/0xb70
>   ndisc_rcv+0x155/0x180
>   icmpv6_rcv+0x4ac/0x5f0
>   ip6_input_finish+0x138/0x6a0
>   ip6_input+0x41/0x1f0
>   ipv6_rcv+0x4db/0x8d0
>   __netif_receive_skb_core+0x3d5/0xe40
>   netif_receive_skb_internal+0x89/0x370
>   napi_gro_receive+0x14f/0x1e0
>   ixgbe_clean_rx_irq+0x4ce/0x1020 [ixgbe]
>   ixgbe_poll+0x31a/0x7a0 [ixgbe]
>   net_rx_action+0x296/0x4f0
>   __do_softirq+0xcf/0x4f5
>   irq_exit+0xf5/0x110
>   do_IRQ+0x62/0x110
>   common_interrupt+0x91/0x91
>   </IRQ>
> 
> Fixes: e9997c2938b2 ("ipvlan: fix check for IP addresses in control path")
> Signed-off-by: Paolo Abeni <pab...@redhat.com>
> ---
>  drivers/net/ipvlan/ipvlan.h      |  1 +
>  drivers/net/ipvlan/ipvlan_core.c | 30 +++++++++++++--------
>  drivers/net/ipvlan/ipvlan_main.c | 56 
> ++++++++++++++++++++++++++--------------
>  3 files changed, 56 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
> index 5166575a164d..a115f12bf130 100644
> --- a/drivers/net/ipvlan/ipvlan.h
> +++ b/drivers/net/ipvlan/ipvlan.h
> @@ -74,6 +74,7 @@ struct ipvl_dev {
>       DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
>       netdev_features_t       sfeatures;
>       u32                     msg_enable;
> +     spinlock_t              addrs_lock;
>  };
>  
>  struct ipvl_addr {
> diff --git a/drivers/net/ipvlan/ipvlan_core.c 
> b/drivers/net/ipvlan/ipvlan_core.c
> index 1b5dc200b573..c1781e698b0b 100644
> --- a/drivers/net/ipvlan/ipvlan_core.c
> +++ b/drivers/net/ipvlan/ipvlan_core.c
> @@ -109,25 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
>  struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
>                                  const void *iaddr, bool is_v6)
>  {
> -     struct ipvl_addr *addr;
> +     struct ipvl_addr *addr, *ret = NULL;
>  
> -     list_for_each_entry(addr, &ipvlan->addrs, anode)
> -             if (addr_equal(is_v6, addr, iaddr))
> -                     return addr;
> -     return NULL;
> +     rcu_read_lock();
> +     list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
> +             if (addr_equal(is_v6, addr, iaddr)) {
> +                     ret = addr;
> +                     break;
> +             }
> +     }
> +     rcu_read_unlock();
> +     return ret;
>  }
>  
>  bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
>  {
>       struct ipvl_dev *ipvlan;
> +     bool ret = false;
>  
> -     ASSERT_RTNL();
> -
> -     list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
> -             if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
> -                     return true;
> +     rcu_read_lock();
> +     list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
> +             if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
> +                     ret = true;
> +                     break;
> +             }
>       }
> -     return false;
> +     rcu_read_unlock();
> +     return ret;
>  }
>  
>  static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, 
> int *type)
> diff --git a/drivers/net/ipvlan/ipvlan_main.c 
> b/drivers/net/ipvlan/ipvlan_main.c
> index 67c91ceda979..45b50c778204 100644
> --- a/drivers/net/ipvlan/ipvlan_main.c
> +++ b/drivers/net/ipvlan/ipvlan_main.c
> @@ -227,8 +227,10 @@ static int ipvlan_open(struct net_device *dev)
>       else
>               dev->flags &= ~IFF_NOARP;
>  
> -     list_for_each_entry(addr, &ipvlan->addrs, anode)
> +     rcu_read_lock();
> +     list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
>               ipvlan_ht_addr_add(ipvlan, addr);
> +     rcu_read_unlock();
>  
>       return dev_uc_add(phy_dev, phy_dev->dev_addr);
>  }
> @@ -244,8 +246,10 @@ static int ipvlan_stop(struct net_device *dev)
>  
>       dev_uc_del(phy_dev, phy_dev->dev_addr);
>  
> -     list_for_each_entry(addr, &ipvlan->addrs, anode)
> +     rcu_read_lock();
> +     list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
>               ipvlan_ht_addr_del(addr);
> +     rcu_read_unlock();
>  
>       return 0;
>  }
> @@ -588,6 +592,7 @@ int ipvlan_link_new(struct net *src_net, struct 
> net_device *dev,
>       ipvlan->sfeatures = IPVLAN_FEATURES;
>       ipvlan_adjust_mtu(ipvlan, phy_dev);
>       INIT_LIST_HEAD(&ipvlan->addrs);
> +     spin_lock_init(&ipvlan->addrs_lock);
>  
>       /* TODO Probably put random address here to be presented to the
>        * world but keep using the physical-dev address for the outgoing
> @@ -665,11 +670,13 @@ void ipvlan_link_delete(struct net_device *dev, struct 
> list_head *head)
>       struct ipvl_dev *ipvlan = netdev_priv(dev);
>       struct ipvl_addr *addr, *next;
>  
> +     spin_lock_bh(&ipvlan->addrs_lock);
>       list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
>               ipvlan_ht_addr_del(addr);
> -             list_del(&addr->anode);
> +             list_del_rcu(&addr->anode);
>               kfree_rcu(addr, rcu);
>       }
> +     spin_unlock_bh(&ipvlan->addrs_lock);
>  
>       ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
>       list_del_rcu(&ipvlan->pnode);
> @@ -760,8 +767,7 @@ static int ipvlan_device_event(struct notifier_block 
> *unused,
>               if (dev->reg_state != NETREG_UNREGISTERING)
>                       break;
>  
> -             list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
> -                                      pnode)
> +             list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
>                       ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
>                                                           &lst_kill);
>               unregister_netdevice_many(&lst_kill);
> @@ -793,6 +799,7 @@ static int ipvlan_device_event(struct notifier_block 
> *unused,
>       return NOTIFY_DONE;
>  }
>  
> +/* the caller must held the addrs lock */
>  static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
>  {
>       struct ipvl_addr *addr;
> @@ -811,7 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void 
> *iaddr, bool is_v6)
>               addr->atype = IPVL_IPV6;
>  #endif
>       }
> -     list_add_tail(&addr->anode, &ipvlan->addrs);
> +
> +     list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
>  
>       /* If the interface is not up, the address will be added to the hash
>        * list by ipvlan_open.
> @@ -826,15 +834,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, 
> void *iaddr, bool is_v6)
>  {
>       struct ipvl_addr *addr;
>  
> +     spin_lock_bh(&ipvlan->addrs_lock);
>       addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
> -     if (!addr)
> +     if (!addr) {
> +             spin_unlock_bh(&ipvlan->addrs_lock);
>               return;
> +     }
>  
>       ipvlan_ht_addr_del(addr);
> -     list_del(&addr->anode);
> +     list_del_rcu(&addr->anode);
> +     spin_unlock_bh(&ipvlan->addrs_lock);
>       kfree_rcu(addr, rcu);
> -
> -     return;
>  }
>  
>  static bool ipvlan_is_valid_dev(const struct net_device *dev)
> @@ -853,14 +863,17 @@ static bool ipvlan_is_valid_dev(const struct net_device 
> *dev)
>  #if IS_ENABLED(CONFIG_IPV6)
>  static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr 
> *ip6_addr)
>  {
> -     if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
> +     int ret = -EINVAL;
> +
> +     spin_lock_bh(&ipvlan->addrs_lock);
> +     if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
>               netif_err(ipvlan, ifup, ipvlan->dev,
>                         "Failed to add IPv6=%pI6c addr for %s intf\n",
>                         ip6_addr, ipvlan->dev->name);
> -             return -EINVAL;
> -     }
> -
> -     return ipvlan_add_addr(ipvlan, ip6_addr, true);
> +     else
> +             ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
> +     spin_unlock_bh(&ipvlan->addrs_lock);
> +     return ret;
>  }
>  
>  static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr 
> *ip6_addr)
> @@ -922,14 +935,17 @@ static int ipvlan_addr6_validator_event(struct 
> notifier_block *unused,
>  
>  static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr 
> *ip4_addr)
>  {
> -     if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
> +     int ret = -EINVAL;
> +
> +     spin_lock_bh(&ipvlan->addrs_lock);
> +     if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
>               netif_err(ipvlan, ifup, ipvlan->dev,
>                         "Failed to add IPv4=%pI4 on %s intf.\n",
>                         ip4_addr, ipvlan->dev->name);
> -             return -EINVAL;
> -     }
> -
> -     return ipvlan_add_addr(ipvlan, ip4_addr, false);
> +     else
> +             ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
> +     spin_unlock_bh(&ipvlan->addrs_lock);
> +     return ret;
>  }
>  
>  static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr 
> *ip4_addr)

Oops... I forgot to drop the now unneeded 'if (in_softirq())' ... in
ipvlan_addr6_validator_event(). I'll send a v2.

Also I explicitly targeted net-next, despite the 'Fixes' tag, as this
feels a little too invasive for a -net patch, it mostly
extends/completes an existing feature - ipv6 duplicate addresses
checking - and a net-next patch will avoid several conflict on the next
 merge from -net.

Please advise if you prefer otherwise, thanks!

Paolo

Reply via email to