[gentoo-commits] proj/linux-patches:5.10 commit in: /

Alice Ferrazzi Wed, 03 Aug 2022 07:25:13 -0700

commit:     206a5e2746ef7fe6e5960e2af948e1eedef7e208
Author:     Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
AuthorDate: Wed Aug  3 14:12:37 2022 +0000
Commit:     Alice Ferrazzi <alicef <AT> gentoo <DOT> org>
CommitDate: Wed Aug  3 14:12:44 2022 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=206a5e27


Linux patch 5.10.135

Signed-off-by: Alice Ferrazzi <alicef <AT> gentoo.org>

 0000_README               |    4 +
 1134_linux-5.10.135.patch | 2841 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 2845 insertions(+)

diff --git a/0000_README b/0000_README
index 7292c57d..19bd6321 100644
--- a/0000_README
+++ b/0000_README
@@ -579,6 +579,10 @@ Patch:  1133_linux-5.10.134.patch
 From:   http://www.kernel.org
 Desc:   Linux 5.10.134
 
+Patch:  1134_linux-5.10.135.patch
+From:   http://www.kernel.org
+Desc:   Linux 5.10.135
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1134_linux-5.10.135.patch b/1134_linux-5.10.135.patch
new file mode 100644
index 00000000..435afe17
--- /dev/null
+++ b/1134_linux-5.10.135.patch
@@ -0,0 +1,2841 @@
+diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
+index 1a58c580b2366..8b7c26d090459 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -2873,6 +2873,7 @@
+                                              no_entry_flush [PPC]
+                                              no_uaccess_flush [PPC]
+                                              mmio_stale_data=off [X86]
++                                             retbleed=off [X86]
+ 
+                               Exceptions:
+                                              This does not have any effect on
+@@ -2895,6 +2896,7 @@
+                                              mds=full,nosmt [X86]
+                                              tsx_async_abort=full,nosmt [X86]
+                                              mmio_stale_data=full,nosmt [X86]
++                                             retbleed=auto,nosmt [X86]
+ 
+       mminit_loglevel=
+                       [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+diff --git a/Documentation/networking/ip-sysctl.rst 
b/Documentation/networking/ip-sysctl.rst
+index 0b1f3235aa773..0158dff638873 100644
+--- a/Documentation/networking/ip-sysctl.rst
++++ b/Documentation/networking/ip-sysctl.rst
+@@ -2629,7 +2629,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
+       Default: 4K
+ 
+ sctp_wmem  - vector of 3 INTEGERs: min, default, max
+-      Currently this tunable has no effect.
++      Only the first value ("min") is used, "default" and "max" are
++      ignored.
++
++      min: Minimum size of send buffer that can be used by SCTP sockets.
++      It is guaranteed to each SCTP socket (but not association) even
++      under moderate memory pressure.
++
++      Default: 4K
+ 
+ addr_scope_policy - INTEGER
+       Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
+diff --git a/Makefile b/Makefile
+index 00dddc2ac804a..5f4dbcb433075 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 10
+-SUBLEVEL = 134
++SUBLEVEL = 135
+ EXTRAVERSION =
+ NAME = Dare mighty things
+ 
+diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h
+index a81dda65c5762..45180a2cc47cb 100644
+--- a/arch/arm/include/asm/dma.h
++++ b/arch/arm/include/asm/dma.h
+@@ -10,7 +10,7 @@
+ #else
+ #define MAX_DMA_ADDRESS       ({ \
+       extern phys_addr_t arm_dma_zone_size; \
+-      arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
++      arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) 
? \
+               (PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
+ #endif
+ 
+diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
+index b99dd8e1c93f1..7ba6cf8261626 100644
+--- a/arch/arm/lib/xor-neon.c
++++ b/arch/arm/lib/xor-neon.c
+@@ -26,8 +26,9 @@ MODULE_LICENSE("GPL");
+  * While older versions of GCC do not generate incorrect code, they fail to
+  * recognize the parallel nature of these functions, and emit plain ARM code,
+  * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
++ *
++ * #warning This code requires at least version 4.6 of GCC
+  */
+-#warning This code requires at least version 4.6 of GCC
+ #endif
+ 
+ #pragma GCC diagnostic ignored "-Wunused-variable"
+diff --git a/arch/s390/include/asm/archrandom.h 
b/arch/s390/include/asm/archrandom.h
+index 2c6e1c6ecbe78..4120c428dc378 100644
+--- a/arch/s390/include/asm/archrandom.h
++++ b/arch/s390/include/asm/archrandom.h
+@@ -2,7 +2,7 @@
+ /*
+  * Kernel interface for the s390 arch_random_* functions
+  *
+- * Copyright IBM Corp. 2017, 2020
++ * Copyright IBM Corp. 2017, 2022
+  *
+  * Author: Harald Freudenberger <[email protected]>
+  *
+@@ -14,6 +14,7 @@
+ #ifdef CONFIG_ARCH_RANDOM
+ 
+ #include <linux/static_key.h>
++#include <linux/preempt.h>
+ #include <linux/atomic.h>
+ #include <asm/cpacf.h>
+ 
+@@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned 
int *v)
+ 
+ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
+ {
+-      if (static_branch_likely(&s390_arch_random_available)) {
++      if (static_branch_likely(&s390_arch_random_available) &&
++          in_task()) {
+               cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
+               atomic64_add(sizeof(*v), &s390_arch_random_counter);
+               return true;
+@@ -42,7 +44,8 @@ static inline bool __must_check 
arch_get_random_seed_long(unsigned long *v)
+ 
+ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+ {
+-      if (static_branch_likely(&s390_arch_random_available)) {
++      if (static_branch_likely(&s390_arch_random_available) &&
++          in_task()) {
+               cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
+               atomic64_add(sizeof(*v), &s390_arch_random_counter);
+               return true;
+diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
+index 7896b67dda420..2e5762faf7740 100644
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -1476,6 +1476,7 @@ static void __init spectre_v2_select_mitigation(void)
+        * enable IBRS around firmware calls.
+        */
+       if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
++          boot_cpu_has(X86_FEATURE_IBPB) &&
+           (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+            boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) {
+ 
+diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
+index a918ca93e4f7d..df5897c90becc 100644
+--- a/drivers/edac/ghes_edac.c
++++ b/drivers/edac/ghes_edac.c
+@@ -101,9 +101,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 
handle)
+ 
+       dmi_memdev_name(handle, &bank, &device);
+ 
+-      /* both strings must be non-zero */
+-      if (bank && *bank && device && *device)
+-              snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, 
device);
++      /*
++       * Set to a NULL string when both bank and device are zero. In this 
case,
++       * the label assigned by default will be preserved.
++       */
++      snprintf(dimm->label, sizeof(dimm->label), "%s%s%s",
++               (bank && *bank) ? bank : "",
++               (bank && *bank && device && *device) ? " " : "",
++               (device && *device) ? device : "");
+ }
+ 
+ static void assign_dmi_dimm_info(struct dimm_info *dimm, struct 
memdev_dmi_entry *entry)
+diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c 
b/drivers/gpu/drm/nouveau/nouveau_dmem.c
+index 92987daa5e17d..5e72e6cb2f840 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
+@@ -679,7 +679,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
+               goto out_free_dma;
+ 
+       for (i = 0; i < npages; i += max) {
+-              args.end = start + (max << PAGE_SHIFT);
++              if (args.start + (max << PAGE_SHIFT) > end)
++                      args.end = end;
++              else
++                      args.end = args.start + (max << PAGE_SHIFT);
++
+               ret = migrate_vma_setup(&args);
+               if (ret)
+                       goto out_free_pfns;
+diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c 
b/drivers/net/ethernet/intel/i40e/i40e_main.c
+index 11d4e3ba9af4c..1dad62ecb8a3a 100644
+--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
+@@ -1907,11 +1907,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi 
*vsi,
+                * non-zero req_queue_pairs says that user requested a new
+                * queue count via ethtool's set_channels, so use this
+                * value for queues distribution across traffic classes
++               * We need at least one queue pair for the interface
++               * to be usable as we see in else statement.
+                */
+               if (vsi->req_queue_pairs > 0)
+                       vsi->num_queue_pairs = vsi->req_queue_pairs;
+               else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+                       vsi->num_queue_pairs = pf->num_lan_msix;
++              else
++                      vsi->num_queue_pairs = 1;
+       }
+ 
+       /* Number of queues per enabled TC */
+diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c 
b/drivers/net/ethernet/intel/ice/ice_ethtool.c
+index 060897eb9cabe..7f1bf71844bce 100644
+--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
++++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
+@@ -652,7 +652,8 @@ static int ice_lbtest_receive_frames(struct ice_ring 
*rx_ring)
+               rx_desc = ICE_RX_DESC(rx_ring, i);
+ 
+               if (!(rx_desc->wb.status_error0 &
+-                  cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
++                  (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
++                   cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
+                       continue;
+ 
+               rx_buf = &rx_ring->rx_buf[i];
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c 
b/drivers/net/ethernet/intel/ice/ice_main.c
+index aae79fdd51727..810f2bdb91645 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -5203,10 +5203,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi)
+       if (vsi->netdev) {
+               ice_set_rx_mode(vsi->netdev);
+ 
+-              err = ice_vsi_vlan_setup(vsi);
++              if (vsi->type != ICE_VSI_LB) {
++                      err = ice_vsi_vlan_setup(vsi);
+ 
+-              if (err)
+-                      return err;
++                      if (err)
++                              return err;
++              }
+       }
+       ice_vsi_cfg_dcb_rings(vsi);
+ 
+diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
+index 725b0f38813a9..a2b4e3befa591 100644
+--- a/drivers/net/ethernet/sfc/ptp.c
++++ b/drivers/net/ethernet/sfc/ptp.c
+@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, 
struct sk_buff *skb)
+ 
+       tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
+       if (tx_queue && tx_queue->timestamping) {
++              /* This code invokes normal driver TX code which is always
++               * protected from softirqs when called from generic TX code,
++               * which in turn disables preemption. Look at __dev_queue_xmit
++               * which uses rcu_read_lock_bh disabling preemption for RCU
++               * plus disabling softirqs. We do not need RCU reader
++               * protection here.
++               *
++               * Although it is theoretically safe for current PTP TX/RX code
++               * running without disabling softirqs, there are three good
++               * reasond for doing so:
++               *
++               *      1) The code invoked is mainly implemented for non-PTP
++               *         packets and it is always executed with softirqs
++               *         disabled.
++               *      2) This being a single PTP packet, better to not
++               *         interrupt its processing by softirqs which can lead
++               *         to high latencies.
++               *      3) netdev_xmit_more checks preemption is disabled and
++               *         triggers a BUG_ON if not.
++               */
++              local_bh_disable();
+               efx_enqueue_skb(tx_queue, skb);
++              local_bh_enable();
+       } else {
+               WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
+               dev_kfree_skb_any(skb);
+diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
+index 789a124809e3c..70c5905a916b9 100644
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -240,6 +240,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
+ #define DEFAULT_SEND_SCI true
+ #define DEFAULT_ENCRYPT false
+ #define DEFAULT_ENCODING_SA 0
++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1))
+ 
+ static bool send_sci(const struct macsec_secy *secy)
+ {
+@@ -1694,7 +1695,7 @@ static bool validate_add_rxsa(struct nlattr **attrs)
+               return false;
+ 
+       if (attrs[MACSEC_SA_ATTR_PN] &&
+-          *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0)
++          nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+               return false;
+ 
+       if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -1750,7 +1751,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct 
genl_info *info)
+       }
+ 
+       pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN;
+-      if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
++      if (tb_sa[MACSEC_SA_ATTR_PN] &&
++          nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
+               pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n",
+                         nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len);
+               rtnl_unlock();
+@@ -1766,7 +1768,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct 
genl_info *info)
+               if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
+                       pr_notice("macsec: nl: add_rxsa: bad salt length: %d != 
%d\n",
+                                 nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
+-                                MACSEC_SA_ATTR_SALT);
++                                MACSEC_SALT_LEN);
+                       rtnl_unlock();
+                       return -EINVAL;
+               }
+@@ -1839,7 +1841,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct 
genl_info *info)
+       return 0;
+ 
+ cleanup:
+-      kfree(rx_sa);
++      macsec_rxsa_put(rx_sa);
+       rtnl_unlock();
+       return err;
+ }
+@@ -1936,7 +1938,7 @@ static bool validate_add_txsa(struct nlattr **attrs)
+       if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
+               return false;
+ 
+-      if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
++      if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
+               return false;
+ 
+       if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -2008,7 +2010,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct 
genl_info *info)
+               if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
+                       pr_notice("macsec: nl: add_txsa: bad salt length: %d != 
%d\n",
+                                 nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
+-                                MACSEC_SA_ATTR_SALT);
++                                MACSEC_SALT_LEN);
+                       rtnl_unlock();
+                       return -EINVAL;
+               }
+@@ -2082,7 +2084,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct 
genl_info *info)
+ 
+ cleanup:
+       secy->operational = was_operational;
+-      kfree(tx_sa);
++      macsec_txsa_put(tx_sa);
+       rtnl_unlock();
+       return err;
+ }
+@@ -2290,7 +2292,7 @@ static bool validate_upd_sa(struct nlattr **attrs)
+       if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
+               return false;
+ 
+-      if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) 
== 0)
++      if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) 
== 0)
+               return false;
+ 
+       if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
+@@ -3737,9 +3739,6 @@ static int macsec_changelink_common(struct net_device 
*dev,
+               secy->operational = tx_sa && tx_sa->active;
+       }
+ 
+-      if (data[IFLA_MACSEC_WINDOW])
+-              secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
+-
+       if (data[IFLA_MACSEC_ENCRYPT])
+               tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]);
+ 
+@@ -3785,6 +3784,16 @@ static int macsec_changelink_common(struct net_device 
*dev,
+               }
+       }
+ 
++      if (data[IFLA_MACSEC_WINDOW]) {
++              secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
++
++              /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window
++               * for XPN cipher suites */
++              if (secy->xpn &&
++                  secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW)
++                      return -EINVAL;
++      }
++
+       return 0;
+ }
+ 
+@@ -3814,7 +3823,7 @@ static int macsec_changelink(struct net_device *dev, 
struct nlattr *tb[],
+ 
+       ret = macsec_changelink_common(dev, data);
+       if (ret)
+-              return ret;
++              goto cleanup;
+ 
+       /* If h/w offloading is available, propagate to the device */
+       if (macsec_is_offloaded(macsec)) {
+diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c
+index 291fa449993fb..45f295403cb55 100644
+--- a/drivers/net/sungem_phy.c
++++ b/drivers/net/sungem_phy.c
+@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy)
+               int can_low_power = 1;
+               if (np == NULL || of_get_property(np, "no-autolowpower", NULL))
+                       can_low_power = 0;
++              of_node_put(np);
+               if (can_low_power) {
+                       /* Enable automatic low-power */
+                       sungem_phy_write(phy, 0x1c, 0x9002);
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 37178b078ee37..0a07c05a610d1 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -213,9 +213,15 @@ struct virtnet_info {
+       /* Packet virtio header size */
+       u8 hdr_len;
+ 
+-      /* Work struct for refilling if we run low on memory. */
++      /* Work struct for delayed refilling if we run low on memory. */
+       struct delayed_work refill;
+ 
++      /* Is delayed refill enabled? */
++      bool refill_enabled;
++
++      /* The lock to synchronize the access to refill_enabled */
++      spinlock_t refill_lock;
++
+       /* Work struct for config space updates */
+       struct work_struct config_work;
+ 
+@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, 
gfp_t gfp_mask)
+       return p;
+ }
+ 
++static void enable_delayed_refill(struct virtnet_info *vi)
++{
++      spin_lock_bh(&vi->refill_lock);
++      vi->refill_enabled = true;
++      spin_unlock_bh(&vi->refill_lock);
++}
++
++static void disable_delayed_refill(struct virtnet_info *vi)
++{
++      spin_lock_bh(&vi->refill_lock);
++      vi->refill_enabled = false;
++      spin_unlock_bh(&vi->refill_lock);
++}
++
+ static void virtqueue_napi_schedule(struct napi_struct *napi,
+                                   struct virtqueue *vq)
+ {
+@@ -1403,8 +1423,12 @@ static int virtnet_receive(struct receive_queue *rq, 
int budget,
+       }
+ 
+       if (rq->vq->num_free > min((unsigned int)budget, 
virtqueue_get_vring_size(rq->vq)) / 2) {
+-              if (!try_fill_recv(vi, rq, GFP_ATOMIC))
+-                      schedule_delayed_work(&vi->refill, 0);
++              if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
++                      spin_lock(&vi->refill_lock);
++                      if (vi->refill_enabled)
++                              schedule_delayed_work(&vi->refill, 0);
++                      spin_unlock(&vi->refill_lock);
++              }
+       }
+ 
+       u64_stats_update_begin(&rq->stats.syncp);
+@@ -1523,6 +1547,8 @@ static int virtnet_open(struct net_device *dev)
+       struct virtnet_info *vi = netdev_priv(dev);
+       int i, err;
+ 
++      enable_delayed_refill(vi);
++
+       for (i = 0; i < vi->max_queue_pairs; i++) {
+               if (i < vi->curr_queue_pairs)
+                       /* Make sure we have some buffers: if oom use wq. */
+@@ -1893,6 +1919,8 @@ static int virtnet_close(struct net_device *dev)
+       struct virtnet_info *vi = netdev_priv(dev);
+       int i;
+ 
++      /* Make sure NAPI doesn't schedule refill work */
++      disable_delayed_refill(vi);
+       /* Make sure refill_work doesn't re-enable napi! */
+       cancel_delayed_work_sync(&vi->refill);
+ 
+@@ -2390,6 +2418,8 @@ static int virtnet_restore_up(struct virtio_device *vdev)
+ 
+       virtio_device_ready(vdev);
+ 
++      enable_delayed_refill(vi);
++
+       if (netif_running(vi->dev)) {
+               err = virtnet_open(vi->dev);
+               if (err)
+@@ -3092,6 +3122,7 @@ static int virtnet_probe(struct virtio_device *vdev)
+       vdev->priv = vi;
+ 
+       INIT_WORK(&vi->config_work, virtnet_config_changed_work);
++      spin_lock_init(&vi->refill_lock);
+ 
+       /* If we can receive ANY GSO packets, we must allocate large ones. */
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
+diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c 
b/drivers/net/wireless/mediatek/mt7601u/usb.c
+index 6bcc4a13ae6c7..cc772045d526f 100644
+--- a/drivers/net/wireless/mediatek/mt7601u/usb.c
++++ b/drivers/net/wireless/mediatek/mt7601u/usb.c
+@@ -26,6 +26,7 @@ static const struct usb_device_id mt7601u_device_table[] = {
+       { USB_DEVICE(0x2717, 0x4106) },
+       { USB_DEVICE(0x2955, 0x0001) },
+       { USB_DEVICE(0x2955, 0x1001) },
++      { USB_DEVICE(0x2955, 0x1003) },
+       { USB_DEVICE(0x2a5f, 0x1000) },
+       { USB_DEVICE(0x7392, 0x7710) },
+       { 0, }
+diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c 
b/drivers/scsi/ufs/ufshcd-pltfrm.c
+index 0f2430fb398db..576cc39077f32 100644
+--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
++++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
+@@ -107,9 +107,20 @@ out:
+       return ret;
+ }
+ 
++static bool phandle_exists(const struct device_node *np,
++                         const char *phandle_name, int index)
++{
++      struct device_node *parse_np = of_parse_phandle(np, phandle_name, 
index);
++
++      if (parse_np)
++              of_node_put(parse_np);
++
++      return parse_np != NULL;
++}
++
+ #define MAX_PROP_SIZE 32
+ static int ufshcd_populate_vreg(struct device *dev, const char *name,
+-              struct ufs_vreg **out_vreg)
++                              struct ufs_vreg **out_vreg)
+ {
+       int ret = 0;
+       char prop_name[MAX_PROP_SIZE];
+@@ -122,7 +133,7 @@ static int ufshcd_populate_vreg(struct device *dev, const 
char *name,
+       }
+ 
+       snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name);
+-      if (!of_parse_phandle(np, prop_name, 0)) {
++      if (!phandle_exists(np, prop_name, 0)) {
+               dev_info(dev, "%s: Unable to find %s regulator, assuming 
enabled\n",
+                               __func__, prop_name);
+               goto out;
+diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
+index d563abc3e1364..914e991731300 100644
+--- a/fs/ntfs/attrib.c
++++ b/fs/ntfs/attrib.c
+@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const 
ntfschar *name,
+               a = (ATTR_RECORD*)((u8*)ctx->attr +
+                               le32_to_cpu(ctx->attr->length));
+       for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
+-              if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
+-                              le32_to_cpu(ctx->mrec->bytes_allocated))
++              u8 *mrec_end = (u8 *)ctx->mrec +
++                             le32_to_cpu(ctx->mrec->bytes_allocated);
++              u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
++                             a->name_length * sizeof(ntfschar);
++              if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end ||
++                  name_end > mrec_end)
+                       break;
+               ctx->attr = a;
+               if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
+diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
+index 7993d527edae9..0a8cd8e59a92c 100644
+--- a/fs/ocfs2/ocfs2.h
++++ b/fs/ocfs2/ocfs2.h
+@@ -279,7 +279,6 @@ enum ocfs2_mount_options
+       OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15,  /* Journal Async Commit */
+       OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process 
on error */
+       OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on 
error */
+-      OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
+ };
+ 
+ #define OCFS2_OSB_SOFT_RO     0x0001
+@@ -675,8 +674,7 @@ static inline int 
ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb)
+ 
+ static inline int ocfs2_mount_local(struct ocfs2_super *osb)
+ {
+-      return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
+-              || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
++      return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
+ }
+ 
+ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
+diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
+index 4da0e4b1e79bf..8caeceeaeda7c 100644
+--- a/fs/ocfs2/slot_map.c
++++ b/fs/ocfs2/slot_map.c
+@@ -254,16 +254,14 @@ static int __ocfs2_find_empty_slot(struct 
ocfs2_slot_info *si,
+       int i, ret = -ENOSPC;
+ 
+       if ((preferred >= 0) && (preferred < si->si_num_slots)) {
+-              if (!si->si_slots[preferred].sl_valid ||
+-                  !si->si_slots[preferred].sl_node_num) {
++              if (!si->si_slots[preferred].sl_valid) {
+                       ret = preferred;
+                       goto out;
+               }
+       }
+ 
+       for(i = 0; i < si->si_num_slots; i++) {
+-              if (!si->si_slots[i].sl_valid ||
+-                  !si->si_slots[i].sl_node_num) {
++              if (!si->si_slots[i].sl_valid) {
+                       ret = i;
+                       break;
+               }
+@@ -458,30 +456,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
+       spin_lock(&osb->osb_lock);
+       ocfs2_update_slot_info(si);
+ 
+-      if (ocfs2_mount_local(osb))
+-              /* use slot 0 directly in local mode */
+-              slot = 0;
+-      else {
+-              /* search for ourselves first and take the slot if it already
+-               * exists. Perhaps we need to mark this in a variable for our
+-               * own journal recovery? Possibly not, though we certainly
+-               * need to warn to the user */
+-              slot = __ocfs2_node_num_to_slot(si, osb->node_num);
++      /* search for ourselves first and take the slot if it already
++       * exists. Perhaps we need to mark this in a variable for our
++       * own journal recovery? Possibly not, though we certainly
++       * need to warn to the user */
++      slot = __ocfs2_node_num_to_slot(si, osb->node_num);
++      if (slot < 0) {
++              /* if no slot yet, then just take 1st available
++               * one. */
++              slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+               if (slot < 0) {
+-                      /* if no slot yet, then just take 1st available
+-                       * one. */
+-                      slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+-                      if (slot < 0) {
+-                              spin_unlock(&osb->osb_lock);
+-                              mlog(ML_ERROR, "no free slots available!\n");
+-                              status = -EINVAL;
+-                              goto bail;
+-                      }
+-              } else
+-                      printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
+-                             "already allocated to this node!\n",
+-                             slot, osb->dev_str);
+-      }
++                      spin_unlock(&osb->osb_lock);
++                      mlog(ML_ERROR, "no free slots available!\n");
++                      status = -EINVAL;
++                      goto bail;
++              }
++      } else
++              printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
++                     "allocated to this node!\n", slot, osb->dev_str);
+ 
+       ocfs2_set_slot(si, slot, osb->node_num);
+       osb->slot_num = slot;
+diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
+index 477ad05a34ea2..c0e5f1bad499f 100644
+--- a/fs/ocfs2/super.c
++++ b/fs/ocfs2/super.c
+@@ -175,7 +175,6 @@ enum {
+       Opt_dir_resv_level,
+       Opt_journal_async_commit,
+       Opt_err_cont,
+-      Opt_nocluster,
+       Opt_err,
+ };
+ 
+@@ -209,7 +208,6 @@ static const match_table_t tokens = {
+       {Opt_dir_resv_level, "dir_resv_level=%u"},
+       {Opt_journal_async_commit, "journal_async_commit"},
+       {Opt_err_cont, "errors=continue"},
+-      {Opt_nocluster, "nocluster"},
+       {Opt_err, NULL}
+ };
+ 
+@@ -621,13 +619,6 @@ static int ocfs2_remount(struct super_block *sb, int 
*flags, char *data)
+               goto out;
+       }
+ 
+-      tmp = OCFS2_MOUNT_NOCLUSTER;
+-      if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+-              ret = -EINVAL;
+-              mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
+-              goto out;
+-      }
+-
+       tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
+               OCFS2_MOUNT_HB_NONE;
+       if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+@@ -868,7 +859,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super 
*osb,
+       }
+ 
+       if (ocfs2_userspace_stack(osb) &&
+-          !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
+           strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
+                   OCFS2_STACK_LABEL_LEN)) {
+               mlog(ML_ERROR,
+@@ -1149,11 +1139,6 @@ static int ocfs2_fill_super(struct super_block *sb, 
void *data, int silent)
+              osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
+              "ordered");
+ 
+-      if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
+-         !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
+-              printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
+-                     "without cluster aware mode.\n", osb->dev_str);
+-
+       atomic_set(&osb->vol_state, VOLUME_MOUNTED);
+       wake_up(&osb->osb_mount_event);
+ 
+@@ -1460,9 +1445,6 @@ static int ocfs2_parse_options(struct super_block *sb,
+               case Opt_journal_async_commit:
+                       mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
+                       break;
+-              case Opt_nocluster:
+-                      mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
+-                      break;
+               default:
+                       mlog(ML_ERROR,
+                            "Unrecognized mount option \"%s\" "
+@@ -1574,9 +1556,6 @@ static int ocfs2_show_options(struct seq_file *s, struct 
dentry *root)
+       if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
+               seq_printf(s, ",journal_async_commit");
+ 
+-      if (opts & OCFS2_MOUNT_NOCLUSTER)
+-              seq_printf(s, ",nocluster");
+-
+       return 0;
+ }
+ 
+diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
+index 8bd00da6d2a40..2f46ef3800aa2 100644
+--- a/fs/xfs/libxfs/xfs_log_format.h
++++ b/fs/xfs/libxfs/xfs_log_format.h
+@@ -414,7 +414,16 @@ struct xfs_log_dinode {
+       /* start of the extended dinode, writable fields */
+       uint32_t        di_crc;         /* CRC of the inode */
+       uint64_t        di_changecount; /* number of attribute changes */
+-      xfs_lsn_t       di_lsn;         /* flush sequence */
++
++      /*
++       * The LSN we write to this field during formatting is not a reflection
++       * of the current on-disk LSN. It should never be used for recovery
++       * sequencing, nor should it be recovered into the on-disk inode at all.
++       * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk()
++       * for details.
++       */
++      xfs_lsn_t       di_lsn;
++
+       uint64_t        di_flags2;      /* more random flags */
+       uint32_t        di_cowextsize;  /* basic cow extent size for file */
+       uint8_t         di_pad2[12];    /* more padding for future expansion */
+diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
+index 397d94775440d..1ce06173c2f55 100644
+--- a/fs/xfs/libxfs/xfs_types.h
++++ b/fs/xfs/libxfs/xfs_types.h
+@@ -21,6 +21,7 @@ typedef int32_t              xfs_suminfo_t;  /* type of 
bitmap summary info */
+ typedef uint32_t      xfs_rtword_t;   /* word type for bitmap manipulations */
+ 
+ typedef int64_t               xfs_lsn_t;      /* log sequence number */
++typedef int64_t               xfs_csn_t;      /* CIL sequence number */
+ 
+ typedef uint32_t      xfs_dablk_t;    /* dir/attr block number (in file) */
+ typedef uint32_t      xfs_dahash_t;   /* dir/attr hash value */
+diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
+index 8c6e26d62ef28..a3d5ecccfc2cc 100644
+--- a/fs/xfs/xfs_buf_item.c
++++ b/fs/xfs/xfs_buf_item.c
+@@ -393,17 +393,8 @@ xfs_buf_item_pin(
+ }
+ 
+ /*
+- * This is called to unpin the buffer associated with the buf log
+- * item which was previously pinned with a call to xfs_buf_item_pin().
+- *
+- * Also drop the reference to the buf item for the current transaction.
+- * If the XFS_BLI_STALE flag is set and we are the last reference,
+- * then free up the buf log item and unlock the buffer.
+- *
+- * If the remove flag is set we are called from uncommit in the
+- * forced-shutdown path.  If that is true and the reference count on
+- * the log item is going to drop to zero we need to free the item's
+- * descriptor in the transaction.
++ * This is called to unpin the buffer associated with the buf log item which
++ * was previously pinned with a call to xfs_buf_item_pin().
+  */
+ STATIC void
+ xfs_buf_item_unpin(
+@@ -420,38 +411,35 @@ xfs_buf_item_unpin(
+ 
+       trace_xfs_buf_item_unpin(bip);
+ 
++      /*
++       * Drop the bli ref associated with the pin and grab the hold required
++       * for the I/O simulation failure in the abort case. We have to do this
++       * before the pin count drops because the AIL doesn't acquire a bli
++       * reference. Therefore if the refcount drops to zero, the bli could
++       * still be AIL resident and the buffer submitted for I/O (and freed on
++       * completion) at any point before we return. This can be removed once
++       * the AIL properly holds a reference on the bli.
++       */
+       freed = atomic_dec_and_test(&bip->bli_refcount);
+-
++      if (freed && !stale && remove)
++              xfs_buf_hold(bp);
+       if (atomic_dec_and_test(&bp->b_pin_count))
+               wake_up_all(&bp->b_waiters);
+ 
+-      if (freed && stale) {
++       /* nothing to do but drop the pin count if the bli is active */
++      if (!freed)
++              return;
++
++      if (stale) {
+               ASSERT(bip->bli_flags & XFS_BLI_STALE);
+               ASSERT(xfs_buf_islocked(bp));
+               ASSERT(bp->b_flags & XBF_STALE);
+               ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
++              ASSERT(list_empty(&lip->li_trans));
++              ASSERT(!bp->b_transp);
+ 
+               trace_xfs_buf_item_unpin_stale(bip);
+ 
+-              if (remove) {
+-                      /*
+-                       * If we are in a transaction context, we have to
+-                       * remove the log item from the transaction as we are
+-                       * about to release our reference to the buffer.  If we
+-                       * don't, the unlock that occurs later in
+-                       * xfs_trans_uncommit() will try to reference the
+-                       * buffer which we no longer have a hold on.
+-                       */
+-                      if (!list_empty(&lip->li_trans))
+-                              xfs_trans_del_item(lip);
+-
+-                      /*
+-                       * Since the transaction no longer refers to the buffer,
+-                       * the buffer should no longer refer to the transaction.
+-                       */
+-                      bp->b_transp = NULL;
+-              }
+-
+               /*
+                * If we get called here because of an IO error, we may or may
+                * not have the item on the AIL. xfs_trans_ail_delete() will
+@@ -468,13 +456,13 @@ xfs_buf_item_unpin(
+                       ASSERT(bp->b_log_item == NULL);
+               }
+               xfs_buf_relse(bp);
+-      } else if (freed && remove) {
++      } else if (remove) {
+               /*
+                * The buffer must be locked and held by the caller to simulate
+-               * an async I/O failure.
++               * an async I/O failure. We acquired the hold for this case
++               * before the buffer was unpinned.
+                */
+               xfs_buf_lock(bp);
+-              xfs_buf_hold(bp);
+               bp->b_flags |= XBF_ASYNC;
+               xfs_buf_ioend_fail(bp);
+       }
+@@ -632,7 +620,7 @@ xfs_buf_item_release(
+ STATIC void
+ xfs_buf_item_committing(
+       struct xfs_log_item     *lip,
+-      xfs_lsn_t               commit_lsn)
++      xfs_csn_t               seq)
+ {
+       return xfs_buf_item_release(lip);
+ }
+diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
+index 1d649462d731a..b374c9cee1177 100644
+--- a/fs/xfs/xfs_buf_item_recover.c
++++ b/fs/xfs/xfs_buf_item_recover.c
+@@ -796,6 +796,7 @@ xlog_recover_get_buf_lsn(
+       switch (magicda) {
+       case XFS_DIR3_LEAF1_MAGIC:
+       case XFS_DIR3_LEAFN_MAGIC:
++      case XFS_ATTR3_LEAF_MAGIC:
+       case XFS_DA3_NODE_MAGIC:
+               lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn);
+               uuid = &((struct xfs_da3_blkinfo *)blk)->uuid;
+diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
+index 8c1fdf37ee8f0..8ed47b739b6cc 100644
+--- a/fs/xfs/xfs_dquot_item.c
++++ b/fs/xfs/xfs_dquot_item.c
+@@ -188,7 +188,7 @@ xfs_qm_dquot_logitem_release(
+ STATIC void
+ xfs_qm_dquot_logitem_committing(
+       struct xfs_log_item     *lip,
+-      xfs_lsn_t               commit_lsn)
++      xfs_csn_t               seq)
+ {
+       return xfs_qm_dquot_logitem_release(lip);
+ }
+diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
+index 5b0f93f738372..4d6bf8d4974fe 100644
+--- a/fs/xfs/xfs_file.c
++++ b/fs/xfs/xfs_file.c
+@@ -118,6 +118,54 @@ xfs_dir_fsync(
+       return xfs_log_force_inode(ip);
+ }
+ 
++static xfs_csn_t
++xfs_fsync_seq(
++      struct xfs_inode        *ip,
++      bool                    datasync)
++{
++      if (!xfs_ipincount(ip))
++              return 0;
++      if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
++              return 0;
++      return ip->i_itemp->ili_commit_seq;
++}
++
++/*
++ * All metadata updates are logged, which means that we just have to flush the
++ * log up to the latest LSN that touched the inode.
++ *
++ * If we have concurrent fsync/fdatasync() calls, we need them to all block on
++ * the log force before we clear the ili_fsync_fields field. This ensures that
++ * we don't get a racing sync operation that does not wait for the metadata to
++ * hit the journal before returning.  If we race with clearing 
ili_fsync_fields,
++ * then all that will happen is the log force will do nothing as the lsn will
++ * already be on disk.  We can't race with setting ili_fsync_fields because 
that
++ * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the 
lock
++ * shared until after the ili_fsync_fields is cleared.
++ */
++static  int
++xfs_fsync_flush_log(
++      struct xfs_inode        *ip,
++      bool                    datasync,
++      int                     *log_flushed)
++{
++      int                     error = 0;
++      xfs_csn_t               seq;
++
++      xfs_ilock(ip, XFS_ILOCK_SHARED);
++      seq = xfs_fsync_seq(ip, datasync);
++      if (seq) {
++              error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
++                                        log_flushed);
++
++              spin_lock(&ip->i_itemp->ili_lock);
++              ip->i_itemp->ili_fsync_fields = 0;
++              spin_unlock(&ip->i_itemp->ili_lock);
++      }
++      xfs_iunlock(ip, XFS_ILOCK_SHARED);
++      return error;
++}
++
+ STATIC int
+ xfs_file_fsync(
+       struct file             *file,
+@@ -125,13 +173,10 @@ xfs_file_fsync(
+       loff_t                  end,
+       int                     datasync)
+ {
+-      struct inode            *inode = file->f_mapping->host;
+-      struct xfs_inode        *ip = XFS_I(inode);
+-      struct xfs_inode_log_item *iip = ip->i_itemp;
++      struct xfs_inode        *ip = XFS_I(file->f_mapping->host);
+       struct xfs_mount        *mp = ip->i_mount;
+       int                     error = 0;
+       int                     log_flushed = 0;
+-      xfs_lsn_t               lsn = 0;
+ 
+       trace_xfs_file_fsync(ip);
+ 
+@@ -155,33 +200,7 @@ xfs_file_fsync(
+       else if (mp->m_logdev_targp != mp->m_ddev_targp)
+               xfs_blkdev_issue_flush(mp->m_ddev_targp);
+ 
+-      /*
+-       * All metadata updates are logged, which means that we just have to
+-       * flush the log up to the latest LSN that touched the inode. If we have
+-       * concurrent fsync/fdatasync() calls, we need them to all block on the
+-       * log force before we clear the ili_fsync_fields field. This ensures
+-       * that we don't get a racing sync operation that does not wait for the
+-       * metadata to hit the journal before returning. If we race with
+-       * clearing the ili_fsync_fields, then all that will happen is the log
+-       * force will do nothing as the lsn will already be on disk. We can't
+-       * race with setting ili_fsync_fields because that is done under
+-       * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
+-       * until after the ili_fsync_fields is cleared.
+-       */
+-      xfs_ilock(ip, XFS_ILOCK_SHARED);
+-      if (xfs_ipincount(ip)) {
+-              if (!datasync ||
+-                  (iip->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
+-                      lsn = iip->ili_last_lsn;
+-      }
+-
+-      if (lsn) {
+-              error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
+-              spin_lock(&iip->ili_lock);
+-              iip->ili_fsync_fields = 0;
+-              spin_unlock(&iip->ili_lock);
+-      }
+-      xfs_iunlock(ip, XFS_ILOCK_SHARED);
++      error = xfs_fsync_flush_log(ip, datasync, &log_flushed);
+ 
+       /*
+        * If we only have a single device, and the log force about was
+diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
+index 03497741aef74..1f61e085676b3 100644
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -2754,7 +2754,7 @@ xfs_iunpin(
+       trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
+ 
+       /* Give the log a push to start the unpinning I/O */
+-      xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL);
++      xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL);
+ 
+ }
+ 
+@@ -3716,16 +3716,16 @@ int
+ xfs_log_force_inode(
+       struct xfs_inode        *ip)
+ {
+-      xfs_lsn_t               lsn = 0;
++      xfs_csn_t               seq = 0;
+ 
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       if (xfs_ipincount(ip))
+-              lsn = ip->i_itemp->ili_last_lsn;
++              seq = ip->i_itemp->ili_commit_seq;
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ 
+-      if (!lsn)
++      if (!seq)
+               return 0;
+-      return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
++      return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL);
+ }
+ 
+ /*
+diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
+index 6ff91e5bf3cd7..3aba4559469f1 100644
+--- a/fs/xfs/xfs_inode_item.c
++++ b/fs/xfs/xfs_inode_item.c
+@@ -617,9 +617,9 @@ xfs_inode_item_committed(
+ STATIC void
+ xfs_inode_item_committing(
+       struct xfs_log_item     *lip,
+-      xfs_lsn_t               commit_lsn)
++      xfs_csn_t               seq)
+ {
+-      INODE_ITEM(lip)->ili_last_lsn = commit_lsn;
++      INODE_ITEM(lip)->ili_commit_seq = seq;
+       return xfs_inode_item_release(lip);
+ }
+ 
+diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
+index 4b926e32831c0..403b45ab9aa28 100644
+--- a/fs/xfs/xfs_inode_item.h
++++ b/fs/xfs/xfs_inode_item.h
+@@ -33,7 +33,7 @@ struct xfs_inode_log_item {
+       unsigned int            ili_fields;        /* fields to be logged */
+       unsigned int            ili_fsync_fields;  /* logged since last fsync */
+       xfs_lsn_t               ili_flush_lsn;     /* lsn at last flush */
+-      xfs_lsn_t               ili_last_lsn;      /* lsn at last transaction */
++      xfs_csn_t               ili_commit_seq;    /* last transaction commit */
+ };
+ 
+ static inline int xfs_inode_clean(struct xfs_inode *ip)
+diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
+index cb44f7653f03b..538724f9f85ca 100644
+--- a/fs/xfs/xfs_inode_item_recover.c
++++ b/fs/xfs/xfs_inode_item_recover.c
+@@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts(
+ STATIC void
+ xfs_log_dinode_to_disk(
+       struct xfs_log_dinode   *from,
+-      struct xfs_dinode       *to)
++      struct xfs_dinode       *to,
++      xfs_lsn_t               lsn)
+ {
+       to->di_magic = cpu_to_be16(from->di_magic);
+       to->di_mode = cpu_to_be16(from->di_mode);
+@@ -182,7 +183,7 @@ xfs_log_dinode_to_disk(
+               to->di_flags2 = cpu_to_be64(from->di_flags2);
+               to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
+               to->di_ino = cpu_to_be64(from->di_ino);
+-              to->di_lsn = cpu_to_be64(from->di_lsn);
++              to->di_lsn = cpu_to_be64(lsn);
+               memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
+               uuid_copy(&to->di_uuid, &from->di_uuid);
+               to->di_flushiter = 0;
+@@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2(
+       }
+ 
+       /*
+-       * If the inode has an LSN in it, recover the inode only if it's less
+-       * than the lsn of the transaction we are replaying. Note: we still
+-       * need to replay an owner change even though the inode is more recent
+-       * than the transaction as there is no guarantee that all the btree
+-       * blocks are more recent than this transaction, too.
++       * If the inode has an LSN in it, recover the inode only if the on-disk
++       * inode's LSN is older than the lsn of the transaction we are
++       * replaying. We can have multiple checkpoints with the same start LSN,
++       * so the current LSN being equal to the on-disk LSN doesn't necessarily
++       * mean that the on-disk inode is more recent than the change being
++       * replayed.
++       *
++       * We must check the current_lsn against the on-disk inode
++       * here because the we can't trust the log dinode to contain a valid LSN
++       * (see comment below before replaying the log dinode for details).
++       *
++       * Note: we still need to replay an owner change even though the inode
++       * is more recent than the transaction as there is no guarantee that all
++       * the btree blocks are more recent than this transaction, too.
+        */
+       if (dip->di_version >= 3) {
+               xfs_lsn_t       lsn = be64_to_cpu(dip->di_lsn);
+ 
+-              if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
++              if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) {
+                       trace_xfs_log_recover_inode_skip(log, in_f);
+                       error = 0;
+                       goto out_owner_change;
+@@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2(
+               goto out_release;
+       }
+ 
+-      /* recover the log dinode inode into the on disk inode */
+-      xfs_log_dinode_to_disk(ldip, dip);
++      /*
++       * Recover the log dinode inode into the on disk inode.
++       *
++       * The LSN in the log dinode is garbage - it can be zero or reflect
++       * stale in-memory runtime state that isn't coherent with the changes
++       * logged in this transaction or the changes written to the on-disk
++       * inode.  Hence we write the current lSN into the inode because that
++       * matches what xfs_iflush() would write inode the inode when flushing
++       * the changes in this transaction.
++       */
++      xfs_log_dinode_to_disk(ldip, dip, current_lsn);
+ 
+       fields = in_f->ilf_fields;
+       if (fields & XFS_ILOG_DEV)
+diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
+index b445e63cbc3c7..22d7d74231d42 100644
+--- a/fs/xfs/xfs_log.c
++++ b/fs/xfs/xfs_log.c
+@@ -765,6 +765,9 @@ xfs_log_mount_finish(
+       if (readonly)
+               mp->m_flags |= XFS_MOUNT_RDONLY;
+ 
++      /* Make sure the log is dead if we're returning failure. */
++      ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR));
++
+       return error;
+ }
+ 
+@@ -3210,14 +3213,13 @@ out_error:
+ }
+ 
+ static int
+-__xfs_log_force_lsn(
+-      struct xfs_mount        *mp,
++xlog_force_lsn(
++      struct xlog             *log,
+       xfs_lsn_t               lsn,
+       uint                    flags,
+       int                     *log_flushed,
+       bool                    already_slept)
+ {
+-      struct xlog             *log = mp->m_log;
+       struct xlog_in_core     *iclog;
+ 
+       spin_lock(&log->l_icloglock);
+@@ -3250,8 +3252,6 @@ __xfs_log_force_lsn(
+               if (!already_slept &&
+                   (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC ||
+                    iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) {
+-                      XFS_STATS_INC(mp, xs_log_force_sleep);
+-
+                       xlog_wait(&iclog->ic_prev->ic_write_wait,
+                                       &log->l_icloglock);
+                       return -EAGAIN;
+@@ -3289,25 +3289,29 @@ out_error:
+  * to disk, that thread will wake up all threads waiting on the queue.
+  */
+ int
+-xfs_log_force_lsn(
++xfs_log_force_seq(
+       struct xfs_mount        *mp,
+-      xfs_lsn_t               lsn,
++      xfs_csn_t               seq,
+       uint                    flags,
+       int                     *log_flushed)
+ {
++      struct xlog             *log = mp->m_log;
++      xfs_lsn_t               lsn;
+       int                     ret;
+-      ASSERT(lsn != 0);
++      ASSERT(seq != 0);
+ 
+       XFS_STATS_INC(mp, xs_log_force);
+-      trace_xfs_log_force(mp, lsn, _RET_IP_);
++      trace_xfs_log_force(mp, seq, _RET_IP_);
+ 
+-      lsn = xlog_cil_force_lsn(mp->m_log, lsn);
++      lsn = xlog_cil_force_seq(log, seq);
+       if (lsn == NULLCOMMITLSN)
+               return 0;
+ 
+-      ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false);
+-      if (ret == -EAGAIN)
+-              ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true);
++      ret = xlog_force_lsn(log, lsn, flags, log_flushed, false);
++      if (ret == -EAGAIN) {
++              XFS_STATS_INC(mp, xs_log_force_sleep);
++              ret = xlog_force_lsn(log, lsn, flags, log_flushed, true);
++      }
+       return ret;
+ }
+ 
+diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
+index 98c913da7587e..a1089f8b7169b 100644
+--- a/fs/xfs/xfs_log.h
++++ b/fs/xfs/xfs_log.h
+@@ -106,7 +106,7 @@ struct xfs_item_ops;
+ struct xfs_trans;
+ 
+ int     xfs_log_force(struct xfs_mount *mp, uint flags);
+-int     xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags,
++int     xfs_log_force_seq(struct xfs_mount *mp, xfs_csn_t seq, uint flags,
+               int *log_forced);
+ int     xfs_log_mount(struct xfs_mount        *mp,
+                       struct xfs_buftarg      *log_target,
+@@ -132,8 +132,6 @@ bool       xfs_log_writable(struct xfs_mount *mp);
+ struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
+ void    xfs_log_ticket_put(struct xlog_ticket *ticket);
+ 
+-void  xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+-                              xfs_lsn_t *commit_lsn, bool regrant);
+ void  xlog_cil_process_committed(struct list_head *list);
+ bool  xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
+ 
+diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
+index cd5c04dabe2e1..fbe160d5e9b96 100644
+--- a/fs/xfs/xfs_log_cil.c
++++ b/fs/xfs/xfs_log_cil.c
+@@ -777,7 +777,7 @@ xlog_cil_push_work(
+        * that higher sequences will wait for us to write out a commit record
+        * before they do.
+        *
+-       * xfs_log_force_lsn requires us to mirror the new sequence into the cil
++       * xfs_log_force_seq requires us to mirror the new sequence into the cil
+        * structure atomically with the addition of this sequence to the
+        * committing list. This also ensures that we can do unlocked checks
+        * against the current sequence in log forces without risking
+@@ -1020,16 +1020,14 @@ xlog_cil_empty(
+  * allowed again.
+  */
+ void
+-xfs_log_commit_cil(
+-      struct xfs_mount        *mp,
++xlog_cil_commit(
++      struct xlog             *log,
+       struct xfs_trans        *tp,
+-      xfs_lsn_t               *commit_lsn,
++      xfs_csn_t               *commit_seq,
+       bool                    regrant)
+ {
+-      struct xlog             *log = mp->m_log;
+       struct xfs_cil          *cil = log->l_cilp;
+       struct xfs_log_item     *lip, *next;
+-      xfs_lsn_t               xc_commit_lsn;
+ 
+       /*
+        * Do all necessary memory allocation before we lock the CIL.
+@@ -1043,10 +1041,6 @@ xfs_log_commit_cil(
+ 
+       xlog_cil_insert_items(log, tp);
+ 
+-      xc_commit_lsn = cil->xc_ctx->sequence;
+-      if (commit_lsn)
+-              *commit_lsn = xc_commit_lsn;
+-
+       if (regrant && !XLOG_FORCED_SHUTDOWN(log))
+               xfs_log_ticket_regrant(log, tp->t_ticket);
+       else
+@@ -1069,8 +1063,10 @@ xfs_log_commit_cil(
+       list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
+               xfs_trans_del_item(lip);
+               if (lip->li_ops->iop_committing)
+-                      lip->li_ops->iop_committing(lip, xc_commit_lsn);
++                      lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence);
+       }
++      if (commit_seq)
++              *commit_seq = cil->xc_ctx->sequence;
+ 
+       /* xlog_cil_push_background() releases cil->xc_ctx_lock */
+       xlog_cil_push_background(log);
+@@ -1087,9 +1083,9 @@ xfs_log_commit_cil(
+  * iclog flush is necessary following this call.
+  */
+ xfs_lsn_t
+-xlog_cil_force_lsn(
++xlog_cil_force_seq(
+       struct xlog     *log,
+-      xfs_lsn_t       sequence)
++      xfs_csn_t       sequence)
+ {
+       struct xfs_cil          *cil = log->l_cilp;
+       struct xfs_cil_ctx      *ctx;
+@@ -1183,23 +1179,19 @@ out_shutdown:
+  */
+ bool
+ xfs_log_item_in_current_chkpt(
+-      struct xfs_log_item *lip)
++      struct xfs_log_item     *lip)
+ {
+-      struct xfs_cil_ctx *ctx;
++      struct xfs_cil          *cil = lip->li_mountp->m_log->l_cilp;
+ 
+       if (list_empty(&lip->li_cil))
+               return false;
+ 
+-      ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
+-
+       /*
+        * li_seq is written on the first commit of a log item to record the
+        * first checkpoint it is written to. Hence if it is different to the
+        * current sequence, we're in a new checkpoint.
+        */
+-      if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)
+-              return false;
+-      return true;
++      return lip->li_seq == READ_ONCE(cil->xc_current_sequence);
+ }
+ 
+ /*
+diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
+index 1c6fdbf3d5066..42cd1602ac256 100644
+--- a/fs/xfs/xfs_log_priv.h
++++ b/fs/xfs/xfs_log_priv.h
+@@ -230,7 +230,7 @@ struct xfs_cil;
+ 
+ struct xfs_cil_ctx {
+       struct xfs_cil          *cil;
+-      xfs_lsn_t               sequence;       /* chkpt sequence # */
++      xfs_csn_t               sequence;       /* chkpt sequence # */
+       xfs_lsn_t               start_lsn;      /* first LSN of chkpt commit */
+       xfs_lsn_t               commit_lsn;     /* chkpt commit record lsn */
+       struct xlog_ticket      *ticket;        /* chkpt ticket */
+@@ -268,10 +268,10 @@ struct xfs_cil {
+       struct xfs_cil_ctx      *xc_ctx;
+ 
+       spinlock_t              xc_push_lock ____cacheline_aligned_in_smp;
+-      xfs_lsn_t               xc_push_seq;
++      xfs_csn_t               xc_push_seq;
+       struct list_head        xc_committing;
+       wait_queue_head_t       xc_commit_wait;
+-      xfs_lsn_t               xc_current_sequence;
++      xfs_csn_t               xc_current_sequence;
+       struct work_struct      xc_push_work;
+       wait_queue_head_t       xc_push_wait;   /* background push throttle */
+ } ____cacheline_aligned_in_smp;
+@@ -547,19 +547,18 @@ int      xlog_cil_init(struct xlog *log);
+ void  xlog_cil_init_post_recovery(struct xlog *log);
+ void  xlog_cil_destroy(struct xlog *log);
+ bool  xlog_cil_empty(struct xlog *log);
++void  xlog_cil_commit(struct xlog *log, struct xfs_trans *tp,
++                      xfs_csn_t *commit_seq, bool regrant);
+ 
+ /*
+  * CIL force routines
+  */
+-xfs_lsn_t
+-xlog_cil_force_lsn(
+-      struct xlog *log,
+-      xfs_lsn_t sequence);
++xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence);
+ 
+ static inline void
+ xlog_cil_force(struct xlog *log)
+ {
+-      xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
++      xlog_cil_force_seq(log, log->l_cilp->xc_current_sequence);
+ }
+ 
+ /*
+diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
+index 87886b7f77dad..69408782019eb 100644
+--- a/fs/xfs/xfs_log_recover.c
++++ b/fs/xfs/xfs_log_recover.c
+@@ -2457,8 +2457,10 @@ xlog_finish_defer_ops(
+ 
+               error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres,
+                               dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp);
+-              if (error)
++              if (error) {
++                      xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+                       return error;
++              }
+ 
+               /*
+                * Transfer to this new transaction all the dfops we captured
+@@ -3454,6 +3456,7 @@ xlog_recover_finish(
+                        * this) before we get around to xfs_log_mount_cancel.
+                        */
+                       xlog_recover_cancel_intents(log);
++                      xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
+                       xfs_alert(log->l_mp, "Failed to recover intents");
+                       return error;
+               }
+diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
+index 44b05e1d5d327..a2a5a0fd92334 100644
+--- a/fs/xfs/xfs_mount.c
++++ b/fs/xfs/xfs_mount.c
+@@ -968,9 +968,17 @@ xfs_mountfs(
+       /*
+        * Finish recovering the file system.  This part needed to be delayed
+        * until after the root and real-time bitmap inodes were consistently
+-       * read in.
++       * read in.  Temporarily create per-AG space reservations for metadata
++       * btree shape changes because space freeing transactions (for inode
++       * inactivation) require the per-AG reservation in lieu of reserving
++       * blocks.
+        */
++      error = xfs_fs_reserve_ag_blocks(mp);
++      if (error && error == -ENOSPC)
++              xfs_warn(mp,
++      "ENOSPC reserving per-AG metadata pool, log recovery may fail.");
+       error = xfs_log_mount_finish(mp);
++      xfs_fs_unreserve_ag_blocks(mp);
+       if (error) {
+               xfs_warn(mp, "log mount finish failed");
+               goto out_rtunmount;
+diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
+index 36166bae24a6f..73a1de7ceefc9 100644
+--- a/fs/xfs/xfs_trans.c
++++ b/fs/xfs/xfs_trans.c
+@@ -832,7 +832,7 @@ __xfs_trans_commit(
+       bool                    regrant)
+ {
+       struct xfs_mount        *mp = tp->t_mountp;
+-      xfs_lsn_t               commit_lsn = -1;
++      xfs_csn_t               commit_seq = 0;
+       int                     error = 0;
+       int                     sync = tp->t_flags & XFS_TRANS_SYNC;
+ 
+@@ -874,7 +874,7 @@ __xfs_trans_commit(
+               xfs_trans_apply_sb_deltas(tp);
+       xfs_trans_apply_dquot_deltas(tp);
+ 
+-      xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
++      xlog_cil_commit(mp->m_log, tp, &commit_seq, regrant);
+ 
+       xfs_trans_free(tp);
+ 
+@@ -883,7 +883,7 @@ __xfs_trans_commit(
+        * log out now and wait for it.
+        */
+       if (sync) {
+-              error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
++              error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL);
+               XFS_STATS_INC(mp, xs_trans_sync);
+       } else {
+               XFS_STATS_INC(mp, xs_trans_async);
+diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
+index 075eeade4f7d5..97485559008bb 100644
+--- a/fs/xfs/xfs_trans.h
++++ b/fs/xfs/xfs_trans.h
+@@ -43,7 +43,7 @@ struct xfs_log_item {
+       struct list_head                li_cil;         /* CIL pointers */
+       struct xfs_log_vec              *li_lv;         /* active log vector */
+       struct xfs_log_vec              *li_lv_shadow;  /* standby vector */
+-      xfs_lsn_t                       li_seq;         /* CIL commit seq */
++      xfs_csn_t                       li_seq;         /* CIL commit seq */
+ };
+ 
+ /*
+@@ -69,7 +69,7 @@ struct xfs_item_ops {
+       void (*iop_pin)(struct xfs_log_item *);
+       void (*iop_unpin)(struct xfs_log_item *, int remove);
+       uint (*iop_push)(struct xfs_log_item *, struct list_head *);
+-      void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
++      void (*iop_committing)(struct xfs_log_item *lip, xfs_csn_t seq);
+       void (*iop_release)(struct xfs_log_item *);
+       xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
+       int (*iop_recover)(struct xfs_log_item *lip,
+diff --git a/include/linux/bpf.h b/include/linux/bpf.h
+index f21bc441e3fa8..b010d45a1ecd5 100644
+--- a/include/linux/bpf.h
++++ b/include/linux/bpf.h
+@@ -1457,6 +1457,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog 
*prog,
+ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
+                            const union bpf_attr *kattr,
+                            union bpf_attr __user *uattr);
++int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
++                              const union bpf_attr *kattr,
++                              union bpf_attr __user *uattr);
+ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
+                   const struct bpf_prog *prog,
+                   struct bpf_insn_access_aux *info);
+@@ -1671,6 +1674,13 @@ static inline int 
bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+       return -ENOTSUPP;
+ }
+ 
++static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
++                                            const union bpf_attr *kattr,
++                                            union bpf_attr __user *uattr)
++{
++      return -ENOTSUPP;
++}
++
+ static inline void bpf_map_put(struct bpf_map *map)
+ {
+ }
+diff --git a/include/net/addrconf.h b/include/net/addrconf.h
+index e7ce719838b5e..edba74a536839 100644
+--- a/include/net/addrconf.h
++++ b/include/net/addrconf.h
+@@ -405,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct 
net_device *dev)
+ {
+       const struct inet6_dev *idev = __in6_dev_get(dev);
+ 
++      if (unlikely(!idev))
++              return true;
++
+       return !!idev->cnf.ignore_routes_with_linkdown;
+ }
+ 
+diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
+index 1d1232917de72..9b8000869b078 100644
+--- a/include/net/bluetooth/l2cap.h
++++ b/include/net/bluetooth/l2cap.h
+@@ -845,6 +845,7 @@ enum {
+ };
+ 
+ void l2cap_chan_hold(struct l2cap_chan *c);
++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c);
+ void l2cap_chan_put(struct l2cap_chan *c);
+ 
+ static inline void l2cap_chan_lock(struct l2cap_chan *chan)
+diff --git a/include/net/inet_connection_sock.h 
b/include/net/inet_connection_sock.h
+index 0b1864a82d4ad..ff901aade442f 100644
+--- a/include/net/inet_connection_sock.h
++++ b/include/net/inet_connection_sock.h
+@@ -317,7 +317,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
+ 
+ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
+ 
+-#define TCP_PINGPONG_THRESH   3
++#define TCP_PINGPONG_THRESH   1
+ 
+ static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
+ {
+@@ -334,14 +334,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock 
*sk)
+       return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+ }
+ 
+-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
+-{
+-      struct inet_connection_sock *icsk = inet_csk(sk);
+-
+-      if (icsk->icsk_ack.pingpong < U8_MAX)
+-              icsk->icsk_ack.pingpong++;
+-}
+-
+ static inline bool inet_csk_has_ulp(struct sock *sk)
+ {
+       return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 44bfb22069c1f..8129ce9a07719 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1396,7 +1396,7 @@ void tcp_select_initial_window(const struct sock *sk, 
int __space,
+ 
+ static inline int tcp_win_from_space(const struct sock *sk, int space)
+ {
+-      int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
++      int tcp_adv_win_scale = 
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
+ 
+       return tcp_adv_win_scale <= 0 ?
+               (space>>(-tcp_adv_win_scale)) :
+diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
+index 0f39fdcb2273c..2a234023821e3 100644
+--- a/include/uapi/linux/bpf.h
++++ b/include/uapi/linux/bpf.h
+@@ -5007,7 +5007,10 @@ struct bpf_pidns_info {
+ 
+ /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
+ struct bpf_sk_lookup {
+-      __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
++      union {
++              __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
++              __u64 cookie; /* Non-zero if socket was selected in 
PROG_TEST_RUN */
++      };
+ 
+       __u32 family;           /* Protocol family (AF_INET, AF_INET6) */
+       __u32 protocol;         /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
+diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c
+index e5d22af43fa0b..d29731a30b8e1 100644
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -457,6 +457,33 @@ void init_watch(struct watch *watch, struct watch_queue 
*wqueue)
+       rcu_assign_pointer(watch->queue, wqueue);
+ }
+ 
++static int add_one_watch(struct watch *watch, struct watch_list *wlist, 
struct watch_queue *wqueue)
++{
++      const struct cred *cred;
++      struct watch *w;
++
++      hlist_for_each_entry(w, &wlist->watchers, list_node) {
++              struct watch_queue *wq = rcu_access_pointer(w->queue);
++              if (wqueue == wq && watch->id == w->id)
++                      return -EBUSY;
++      }
++
++      cred = current_cred();
++      if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, 
RLIMIT_NOFILE)) {
++              atomic_dec(&cred->user->nr_watches);
++              return -EAGAIN;
++      }
++
++      watch->cred = get_cred(cred);
++      rcu_assign_pointer(watch->watch_list, wlist);
++
++      kref_get(&wqueue->usage);
++      kref_get(&watch->usage);
++      hlist_add_head(&watch->queue_node, &wqueue->watches);
++      hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
++      return 0;
++}
++
+ /**
+  * add_watch_to_object - Add a watch on an object to a watch list
+  * @watch: The watch to add
+@@ -471,34 +498,21 @@ void init_watch(struct watch *watch, struct watch_queue 
*wqueue)
+  */
+ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
+ {
+-      struct watch_queue *wqueue = rcu_access_pointer(watch->queue);
+-      struct watch *w;
+-
+-      hlist_for_each_entry(w, &wlist->watchers, list_node) {
+-              struct watch_queue *wq = rcu_access_pointer(w->queue);
+-              if (wqueue == wq && watch->id == w->id)
+-                      return -EBUSY;
+-      }
+-
+-      watch->cred = get_current_cred();
+-      rcu_assign_pointer(watch->watch_list, wlist);
++      struct watch_queue *wqueue;
++      int ret = -ENOENT;
+ 
+-      if (atomic_inc_return(&watch->cred->user->nr_watches) >
+-          task_rlimit(current, RLIMIT_NOFILE)) {
+-              atomic_dec(&watch->cred->user->nr_watches);
+-              put_cred(watch->cred);
+-              return -EAGAIN;
+-      }
++      rcu_read_lock();
+ 
++      wqueue = rcu_access_pointer(watch->queue);
+       if (lock_wqueue(wqueue)) {
+-              kref_get(&wqueue->usage);
+-              kref_get(&watch->usage);
+-              hlist_add_head(&watch->queue_node, &wqueue->watches);
++              spin_lock(&wlist->lock);
++              ret = add_one_watch(watch, wlist, wqueue);
++              spin_unlock(&wlist->lock);
+               unlock_wqueue(wqueue);
+       }
+ 
+-      hlist_add_head(&watch->list_node, &wlist->watchers);
+-      return 0;
++      rcu_read_unlock();
++      return ret;
+ }
+ EXPORT_SYMBOL(add_watch_to_object);
+ 
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index f3418edb136be..43ff22ce76324 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -3679,11 +3679,15 @@ static inline bool zone_watermark_fast(struct zone *z, 
unsigned int order,
+        * need to be calculated.
+        */
+       if (!order) {
+-              long fast_free;
++              long usable_free;
++              long reserved;
+ 
+-              fast_free = free_pages;
+-              fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);
+-              if (fast_free > mark + z->lowmem_reserve[highest_zoneidx])
++              usable_free = free_pages;
++              reserved = __zone_watermark_unusable_free(z, 0, alloc_flags);
++
++              /* reserved may over estimate high-atomic reserves. */
++              usable_free -= min(usable_free, reserved);
++              if (usable_free > mark + z->lowmem_reserve[highest_zoneidx])
+                       return true;
+       }
+ 
+diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
+index 2557cd917f5ed..6a5ff5dcc09a9 100644
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct 
l2cap_conn *conn,
+ }
+ 
+ /* Find channel with given SCID.
+- * Returns locked channel. */
++ * Returns a reference locked channel.
++ */
+ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
+                                                u16 cid)
+ {
+@@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct 
l2cap_conn *conn,
+ 
+       mutex_lock(&conn->chan_lock);
+       c = __l2cap_get_chan_by_scid(conn, cid);
+-      if (c)
+-              l2cap_chan_lock(c);
++      if (c) {
++              /* Only lock if chan reference is not 0 */
++              c = l2cap_chan_hold_unless_zero(c);
++              if (c)
++                      l2cap_chan_lock(c);
++      }
+       mutex_unlock(&conn->chan_lock);
+ 
+       return c;
+ }
+ 
+ /* Find channel with given DCID.
+- * Returns locked channel.
++ * Returns a reference locked channel.
+  */
+ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
+                                                u16 cid)
+@@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct 
l2cap_conn *conn,
+ 
+       mutex_lock(&conn->chan_lock);
+       c = __l2cap_get_chan_by_dcid(conn, cid);
+-      if (c)
+-              l2cap_chan_lock(c);
++      if (c) {
++              /* Only lock if chan reference is not 0 */
++              c = l2cap_chan_hold_unless_zero(c);
++              if (c)
++                      l2cap_chan_lock(c);
++      }
+       mutex_unlock(&conn->chan_lock);
+ 
+       return c;
+@@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct 
l2cap_conn *conn,
+ 
+       mutex_lock(&conn->chan_lock);
+       c = __l2cap_get_chan_by_ident(conn, ident);
+-      if (c)
+-              l2cap_chan_lock(c);
++      if (c) {
++              /* Only lock if chan reference is not 0 */
++              c = l2cap_chan_hold_unless_zero(c);
++              if (c)
++                      l2cap_chan_lock(c);
++      }
+       mutex_unlock(&conn->chan_lock);
+ 
+       return c;
+@@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c)
+       kref_get(&c->kref);
+ }
+ 
++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)
++{
++      BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
++
++      if (!kref_get_unless_zero(&c->kref))
++              return NULL;
++
++      return c;
++}
++
+ void l2cap_chan_put(struct l2cap_chan *c)
+ {
+       BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref));
+@@ -1965,7 +1988,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int 
state, __le16 psm,
+                       src_match = !bacmp(&c->src, src);
+                       dst_match = !bacmp(&c->dst, dst);
+                       if (src_match && dst_match) {
+-                              l2cap_chan_hold(c);
++                              c = l2cap_chan_hold_unless_zero(c);
++                              if (!c)
++                                      continue;
++
+                               read_unlock(&chan_list_lock);
+                               return c;
+                       }
+@@ -1980,7 +2006,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int 
state, __le16 psm,
+       }
+ 
+       if (c1)
+-              l2cap_chan_hold(c1);
++              c1 = l2cap_chan_hold_unless_zero(c1);
+ 
+       read_unlock(&chan_list_lock);
+ 
+@@ -4460,6 +4486,7 @@ static inline int l2cap_config_req(struct l2cap_conn 
*conn,
+ 
+ unlock:
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+       return err;
+ }
+ 
+@@ -4573,6 +4600,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn 
*conn,
+ 
+ done:
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+       return err;
+ }
+ 
+@@ -5300,6 +5328,7 @@ send_move_response:
+       l2cap_send_move_chan_rsp(chan, result);
+ 
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+ 
+       return 0;
+ }
+@@ -5392,6 +5421,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, 
u16 icid, u16 result)
+       }
+ 
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+ }
+ 
+ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
+@@ -5421,6 +5451,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 
ident, u16 icid,
+       l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
+ 
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+ }
+ 
+ static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
+@@ -5484,6 +5515,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn 
*conn,
+       l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
+ 
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+ 
+       return 0;
+ }
+@@ -5519,6 +5551,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct 
l2cap_conn *conn,
+       }
+ 
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+ 
+       return 0;
+ }
+@@ -5891,12 +5924,11 @@ static inline int l2cap_le_credits(struct l2cap_conn 
*conn,
+       if (credits > max_credits) {
+               BT_ERR("LE credits overflow");
+               l2cap_send_disconn_req(chan, ECONNRESET);
+-              l2cap_chan_unlock(chan);
+ 
+               /* Return 0 so that we don't trigger an unnecessary
+                * command reject packet.
+                */
+-              return 0;
++              goto unlock;
+       }
+ 
+       chan->tx_credits += credits;
+@@ -5907,7 +5939,9 @@ static inline int l2cap_le_credits(struct l2cap_conn 
*conn,
+       if (chan->tx_credits)
+               chan->ops->resume(chan);
+ 
++unlock:
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+ 
+       return 0;
+ }
+@@ -7587,6 +7621,7 @@ drop:
+ 
+ done:
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+ }
+ 
+ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
+@@ -8074,7 +8109,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct 
l2cap_chan *c,
+               if (src_type != c->src_type)
+                       continue;
+ 
+-              l2cap_chan_hold(c);
++              c = l2cap_chan_hold_unless_zero(c);
+               read_unlock(&chan_list_lock);
+               return c;
+       }
+diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
+index eb684f31fd698..f8b231bbbe381 100644
+--- a/net/bpf/test_run.c
++++ b/net/bpf/test_run.c
+@@ -10,20 +10,86 @@
+ #include <net/bpf_sk_storage.h>
+ #include <net/sock.h>
+ #include <net/tcp.h>
++#include <net/net_namespace.h>
+ #include <linux/error-injection.h>
+ #include <linux/smp.h>
++#include <linux/sock_diag.h>
+ 
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/bpf_test_run.h>
+ 
++struct bpf_test_timer {
++      enum { NO_PREEMPT, NO_MIGRATE } mode;
++      u32 i;
++      u64 time_start, time_spent;
++};
++
++static void bpf_test_timer_enter(struct bpf_test_timer *t)
++      __acquires(rcu)
++{
++      rcu_read_lock();
++      if (t->mode == NO_PREEMPT)
++              preempt_disable();
++      else
++              migrate_disable();
++
++      t->time_start = ktime_get_ns();
++}
++
++static void bpf_test_timer_leave(struct bpf_test_timer *t)
++      __releases(rcu)
++{
++      t->time_start = 0;
++
++      if (t->mode == NO_PREEMPT)
++              preempt_enable();
++      else
++              migrate_enable();
++      rcu_read_unlock();
++}
++
++static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int 
*err, u32 *duration)
++      __must_hold(rcu)
++{
++      t->i++;
++      if (t->i >= repeat) {
++              /* We're done. */
++              t->time_spent += ktime_get_ns() - t->time_start;
++              do_div(t->time_spent, t->i);
++              *duration = t->time_spent > U32_MAX ? U32_MAX : 
(u32)t->time_spent;
++              *err = 0;
++              goto reset;
++      }
++
++      if (signal_pending(current)) {
++              /* During iteration: we've been cancelled, abort. */
++              *err = -EINTR;
++              goto reset;
++      }
++
++      if (need_resched()) {
++              /* During iteration: we need to reschedule between runs. */
++              t->time_spent += ktime_get_ns() - t->time_start;
++              bpf_test_timer_leave(t);
++              cond_resched();
++              bpf_test_timer_enter(t);
++      }
++
++      /* Do another round. */
++      return true;
++
++reset:
++      t->i = 0;
++      return false;
++}
++
+ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
+                       u32 *retval, u32 *time, bool xdp)
+ {
+       struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 
NULL };
++      struct bpf_test_timer t = { NO_MIGRATE };
+       enum bpf_cgroup_storage_type stype;
+-      u64 time_start, time_spent = 0;
+-      int ret = 0;
+-      u32 i;
++      int ret;
+ 
+       for_each_cgroup_storage_type(stype) {
+               storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+@@ -38,10 +104,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, 
u32 repeat,
+       if (!repeat)
+               repeat = 1;
+ 
+-      rcu_read_lock();
+-      migrate_disable();
+-      time_start = ktime_get_ns();
+-      for (i = 0; i < repeat; i++) {
++      bpf_test_timer_enter(&t);
++      do {
+               ret = bpf_cgroup_storage_set(storage);
+               if (ret)
+                       break;
+@@ -53,29 +117,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, 
u32 repeat,
+ 
+               bpf_cgroup_storage_unset();
+ 
+-              if (signal_pending(current)) {
+-                      ret = -EINTR;
+-                      break;
+-              }
+-
+-              if (need_resched()) {
+-                      time_spent += ktime_get_ns() - time_start;
+-                      migrate_enable();
+-                      rcu_read_unlock();
+-
+-                      cond_resched();
+-
+-                      rcu_read_lock();
+-                      migrate_disable();
+-                      time_start = ktime_get_ns();
+-              }
+-      }
+-      time_spent += ktime_get_ns() - time_start;
+-      migrate_enable();
+-      rcu_read_unlock();
+-
+-      do_div(time_spent, repeat);
+-      *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
++      } while (bpf_test_timer_continue(&t, repeat, &ret, time));
++      bpf_test_timer_leave(&t);
+ 
+       for_each_cgroup_storage_type(stype)
+               bpf_cgroup_storage_free(storage[stype]);
+@@ -688,18 +731,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog 
*prog,
+                                    const union bpf_attr *kattr,
+                                    union bpf_attr __user *uattr)
+ {
++      struct bpf_test_timer t = { NO_PREEMPT };
+       u32 size = kattr->test.data_size_in;
+       struct bpf_flow_dissector ctx = {};
+       u32 repeat = kattr->test.repeat;
+       struct bpf_flow_keys *user_ctx;
+       struct bpf_flow_keys flow_keys;
+-      u64 time_start, time_spent = 0;
+       const struct ethhdr *eth;
+       unsigned int flags = 0;
+       u32 retval, duration;
+       void *data;
+       int ret;
+-      u32 i;
+ 
+       if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
+               return -EINVAL;
+@@ -735,48 +777,127 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog 
*prog,
+       ctx.data = data;
+       ctx.data_end = (__u8 *)data + size;
+ 
+-      rcu_read_lock();
+-      preempt_disable();
+-      time_start = ktime_get_ns();
+-      for (i = 0; i < repeat; i++) {
++      bpf_test_timer_enter(&t);
++      do {
+               retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
+                                         size, flags);
++      } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
++      bpf_test_timer_leave(&t);
+ 
+-              if (signal_pending(current)) {
+-                      preempt_enable();
+-                      rcu_read_unlock();
++      if (ret < 0)
++              goto out;
+ 
+-                      ret = -EINTR;
+-                      goto out;
+-              }
++      ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
++                            retval, duration);
++      if (!ret)
++              ret = bpf_ctx_finish(kattr, uattr, user_ctx,
++                                   sizeof(struct bpf_flow_keys));
+ 
+-              if (need_resched()) {
+-                      time_spent += ktime_get_ns() - time_start;
+-                      preempt_enable();
+-                      rcu_read_unlock();
++out:
++      kfree(user_ctx);
++      kfree(data);
++      return ret;
++}
+ 
+-                      cond_resched();
++int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr 
*kattr,
++                              union bpf_attr __user *uattr)
++{
++      struct bpf_test_timer t = { NO_PREEMPT };
++      struct bpf_prog_array *progs = NULL;
++      struct bpf_sk_lookup_kern ctx = {};
++      u32 repeat = kattr->test.repeat;
++      struct bpf_sk_lookup *user_ctx;
++      u32 retval, duration;
++      int ret = -EINVAL;
+ 
+-                      rcu_read_lock();
+-                      preempt_disable();
+-                      time_start = ktime_get_ns();
+-              }
++      if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
++              return -EINVAL;
++
++      if (kattr->test.flags || kattr->test.cpu)
++              return -EINVAL;
++
++      if (kattr->test.data_in || kattr->test.data_size_in || 
kattr->test.data_out ||
++          kattr->test.data_size_out)
++              return -EINVAL;
++
++      if (!repeat)
++              repeat = 1;
++
++      user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
++      if (IS_ERR(user_ctx))
++              return PTR_ERR(user_ctx);
++
++      if (!user_ctx)
++              return -EINVAL;
++
++      if (user_ctx->sk)
++              goto out;
++
++      if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), 
local_port), sizeof(*user_ctx)))
++              goto out;
++
++      if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
++              ret = -ERANGE;
++              goto out;
+       }
+-      time_spent += ktime_get_ns() - time_start;
+-      preempt_enable();
+-      rcu_read_unlock();
+ 
+-      do_div(time_spent, repeat);
+-      duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
++      ctx.family = (u16)user_ctx->family;
++      ctx.protocol = (u16)user_ctx->protocol;
++      ctx.dport = (u16)user_ctx->local_port;
++      ctx.sport = (__force __be16)user_ctx->remote_port;
+ 
+-      ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
+-                            retval, duration);
++      switch (ctx.family) {
++      case AF_INET:
++              ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
++              ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
++              break;
++
++#if IS_ENABLED(CONFIG_IPV6)
++      case AF_INET6:
++              ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
++              ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
++              break;
++#endif
++
++      default:
++              ret = -EAFNOSUPPORT;
++              goto out;
++      }
++
++      progs = bpf_prog_array_alloc(1, GFP_KERNEL);
++      if (!progs) {
++              ret = -ENOMEM;
++              goto out;
++      }
++
++      progs->items[0].prog = prog;
++
++      bpf_test_timer_enter(&t);
++      do {
++              ctx.selected_sk = NULL;
++              retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
++      } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
++      bpf_test_timer_leave(&t);
++
++      if (ret < 0)
++              goto out;
++
++      user_ctx->cookie = 0;
++      if (ctx.selected_sk) {
++              if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
++                      ret = -EOPNOTSUPP;
++                      goto out;
++              }
++
++              user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
++      }
++
++      ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
+       if (!ret)
+-              ret = bpf_ctx_finish(kattr, uattr, user_ctx,
+-                                   sizeof(struct bpf_flow_keys));
++              ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
+ 
+ out:
++      bpf_prog_array_free(progs);
+       kfree(user_ctx);
+-      kfree(data);
+       return ret;
+ }
+diff --git a/net/core/filter.c b/net/core/filter.c
+index e2b491665775f..815edf7bc4390 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -10334,6 +10334,7 @@ static u32 sk_lookup_convert_ctx_access(enum 
bpf_access_type type,
+ }
+ 
+ const struct bpf_prog_ops sk_lookup_prog_ops = {
++      .test_run = bpf_prog_test_run_sk_lookup,
+ };
+ 
+ const struct bpf_verifier_ops sk_lookup_verifier_ops = {
+diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
+index 428cc3a4c36f1..c71b863093ace 100644
+--- a/net/ipv4/igmp.c
++++ b/net/ipv4/igmp.c
+@@ -827,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
+       struct net *net = dev_net(in_dev->dev);
+       if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
+               return;
+-      WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: 
net->ipv4.sysctl_igmp_qrv);
++      WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: 
READ_ONCE(net->ipv4.sysctl_igmp_qrv));
+       igmp_ifc_start_timer(in_dev, 1);
+ }
+ 
+@@ -1009,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, 
struct sk_buff *skb,
+                * received value was zero, use the default or statically
+                * configured value.
+                */
+-              in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
++              in_dev->mr_qrv = ih3->qrv ?: 
READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: 
IGMP_QUERY_INTERVAL;
+ 
+               /* RFC3376, 8.3. Query Response Interval:
+@@ -1189,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, 
struct ip_mc_list *im,
+       pmc->interface = im->interface;
+       in_dev_hold(in_dev);
+       pmc->multiaddr = im->multiaddr;
+-      pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++      pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+       pmc->sfmode = im->sfmode;
+       if (pmc->sfmode == MCAST_INCLUDE) {
+               struct ip_sf_list *psf;
+@@ -1240,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, 
struct ip_mc_list *im)
+                       swap(im->tomb, pmc->tomb);
+                       swap(im->sources, pmc->sources);
+                       for (psf = im->sources; psf; psf = psf->sf_next)
+-                              psf->sf_crcount = in_dev->mr_qrv ?: 
net->ipv4.sysctl_igmp_qrv;
++                              psf->sf_crcount = in_dev->mr_qrv ?:
++                                      READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               } else {
+-                      im->crcount = in_dev->mr_qrv ?: 
net->ipv4.sysctl_igmp_qrv;
++                      im->crcount = in_dev->mr_qrv ?:
++                              READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               }
+               in_dev_put(pmc->interface);
+               kfree_pmc(pmc);
+@@ -1349,7 +1351,7 @@ static void igmp_group_added(struct ip_mc_list *im)
+       if (in_dev->dead)
+               return;
+ 
+-      im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
++      im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+       if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
+               spin_lock_bh(&im->lock);
+               igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
+@@ -1363,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im)
+        * IN() to IN(A).
+        */
+       if (im->sfmode == MCAST_EXCLUDE)
+-              im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++              im->crcount = in_dev->mr_qrv ?: 
READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ 
+       igmp_ifc_event(in_dev);
+ #endif
+@@ -1754,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev)
+ 
+       in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+       in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+-      in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
++      in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ }
+ #else
+ static void ip_mc_reset(struct in_device *in_dev)
+@@ -1888,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int 
sfmode,
+ #ifdef CONFIG_IP_MULTICAST
+               if (psf->sf_oldin &&
+                   !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
+-                      psf->sf_crcount = in_dev->mr_qrv ?: 
net->ipv4.sysctl_igmp_qrv;
++                      psf->sf_crcount = in_dev->mr_qrv ?: 
READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+                       psf->sf_next = pmc->tomb;
+                       pmc->tomb = psf;
+                       rv = 1;
+@@ -1952,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, 
__be32 *pmca, int sfmode,
+               /* filter mode change */
+               pmc->sfmode = MCAST_INCLUDE;
+ #ifdef CONFIG_IP_MULTICAST
+-              pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++              pmc->crcount = in_dev->mr_qrv ?: 
READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
+               for (psf = pmc->sources; psf; psf = psf->sf_next)
+                       psf->sf_crcount = 0;
+@@ -2131,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, 
__be32 *pmca, int sfmode,
+ #ifdef CONFIG_IP_MULTICAST
+               /* else no filters; keep old mode for reports */
+ 
+-              pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
++              pmc->crcount = in_dev->mr_qrv ?: 
READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+               WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
+               for (psf = pmc->sources; psf; psf = psf->sf_next)
+                       psf->sf_crcount = 0;
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index f1fd26bb199ce..78460eb39b3af 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -698,7 +698,7 @@ static bool tcp_should_autocork(struct sock *sk, struct 
sk_buff *skb,
+                               int size_goal)
+ {
+       return skb->len < size_goal &&
+-             sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
++             READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
+              !tcp_rtx_queue_empty(sk) &&
+              refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
+ }
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index d817f8c31c9ce..d35e88b5ffcbe 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -503,7 +503,7 @@ static void tcp_grow_window(struct sock *sk, const struct 
sk_buff *skb)
+  */
+ static void tcp_init_buffer_space(struct sock *sk)
+ {
+-      int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
++      int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
+       struct tcp_sock *tp = tcp_sk(sk);
+       int maxwin;
+ 
+@@ -693,7 +693,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
+        * <prev RTT . ><current RTT .. ><next RTT .... >
+        */
+ 
+-      if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
+           !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+               int rcvmem, rcvbuf;
+               u64 rcvwin, grow;
+@@ -2135,7 +2135,7 @@ void tcp_enter_loss(struct sock *sk)
+        * loss recovery is underway except recurring timeout(s) on
+        * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+        */
+-      tp->frto = net->ipv4.sysctl_tcp_frto &&
++      tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
+                  (new_recovery || icsk->icsk_retransmits) &&
+                  !inet_csk(sk)->icsk_mtup.probe_size;
+ }
+@@ -3004,7 +3004,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const 
u32 prior_snd_una,
+ 
+ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
+ {
+-      u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
++      u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
+       struct tcp_sock *tp = tcp_sk(sk);
+ 
+       if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
+@@ -3528,7 +3528,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int 
mib_idx,
+       if (*last_oow_ack_time) {
+               s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
+ 
+-              if (0 <= elapsed && elapsed < 
net->ipv4.sysctl_tcp_invalid_ratelimit) {
++              if (0 <= elapsed &&
++                  elapsed < 
READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
+                       NET_INC_STATS(net, mib_idx);
+                       return true;    /* rate-limited: don't send yet! */
+               }
+@@ -3576,7 +3577,7 @@ static void tcp_send_challenge_ack(struct sock *sk, 
const struct sk_buff *skb)
+       /* Then check host-wide RFC 5961 rate limit. */
+       now = jiffies / HZ;
+       if (now != challenge_timestamp) {
+-              u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
++              u32 ack_limit = 
READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
+               u32 half = (ack_limit + 1) >> 1;
+ 
+               challenge_timestamp = now;
+@@ -4367,7 +4368,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 
end_seq)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+ 
+-      if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
++      if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
+               int mib_idx;
+ 
+               if (before(seq, tp->rcv_nxt))
+@@ -4414,7 +4415,7 @@ static void tcp_send_dupack(struct sock *sk, const 
struct sk_buff *skb)
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+               tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+ 
+-              if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
++              if (tcp_is_sack(tp) && 
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
+                       u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+ 
+                       tcp_rcv_spurious_retrans(sk, skb);
+@@ -5439,7 +5440,7 @@ send_now:
+       }
+ 
+       if (!tcp_is_sack(tp) ||
+-          tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
++          tp->compressed_ack >= 
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
+               goto send_now;
+ 
+       if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+@@ -5460,11 +5461,12 @@ send_now:
+       if (tp->srtt_us && tp->srtt_us < rtt)
+               rtt = tp->srtt_us;
+ 
+-      delay = min_t(unsigned long, 
sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
++      delay = min_t(unsigned long,
++                    
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
+                     rtt * (NSEC_PER_USEC >> 3)/20);
+       sock_hold(sk);
+       hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+-                             sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
++                             
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
+                              HRTIMER_MODE_REL_PINNED_SOFT);
+ }
+ 
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index d5f13ff7d9004..0d165ce2d80a7 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -983,7 +983,7 @@ static int tcp_v4_send_synack(const struct sock *sk, 
struct dst_entry *dst,
+       if (skb) {
+               __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
+ 
+-              tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
++              tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+                               (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+                               (inet_sk(sk)->tos & INET_ECN_MASK) :
+                               inet_sk(sk)->tos;
+@@ -1558,7 +1558,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, 
struct sk_buff *skb,
+       /* Set ToS of the new socket based upon the value of incoming SYN.
+        * ECT bits are set later in tcp_init_transfer().
+        */
+-      if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+               newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+ 
+       if (!dst) {
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 8d7e32f4abf67..f3ca6eea2ca39 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
+       int m;
+ 
+       sk_dst_confirm(sk);
+-      if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
++      if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
+               return;
+ 
+       rcu_read_lock();
+@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk)
+ 
+       if (tcp_in_initial_slowstart(tp)) {
+               /* Slow start still did not finish. */
+-              if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++              if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+                   !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+                       val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+                       if (val && (tp->snd_cwnd >> 1) > val)
+@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk)
+       } else if (!tcp_in_slow_start(tp) &&
+                  icsk->icsk_ca_state == TCP_CA_Open) {
+               /* Cong. avoidance phase, cwnd is reliable. */
+-              if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++              if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+                   !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
+                       tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+                                      max(tp->snd_cwnd >> 1, 
tp->snd_ssthresh));
+@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)
+                       tcp_metric_set(tm, TCP_METRIC_CWND,
+                                      (val + tp->snd_ssthresh) >> 1);
+               }
+-              if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++              if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+                   !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+                       val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+                       if (val && tp->snd_ssthresh > val)
+@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)
+       if (tcp_metric_locked(tm, TCP_METRIC_CWND))
+               tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
+ 
+-      val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
++      val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
+             0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+       if (val) {
+               tp->snd_ssthresh = val;
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 9b67c61576e4c..657b0a4d93599 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
+       if (tcp_packets_in_flight(tp) == 0)
+               tcp_ca_event(sk, CA_EVENT_TX_START);
+ 
+-      /* If this is the first data packet sent in response to the
+-       * previous received data,
+-       * and it is a reply for ato after last received packet,
+-       * increase pingpong count.
+-       */
+-      if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
+-          (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+-              inet_csk_inc_pingpong_cnt(sk);
+-
+       tp->lsndtime = now;
++
++      /* If it is a reply for ato after last received
++       * packet, enter pingpong mode.
++       */
++      if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
++              inet_csk_enter_pingpong_mode(sk);
+ }
+ 
+ /* Account for an ACK we sent. */
+@@ -1987,7 +1984,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int 
mss_now)
+ 
+       min_tso = ca_ops->min_tso_segs ?
+                       ca_ops->min_tso_segs(sk) :
+-                      sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
++                      READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
+ 
+       tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
+       return min_t(u32, tso_segs, sk->sk_gso_max_segs);
+@@ -2502,7 +2499,7 @@ static bool tcp_small_queue_check(struct sock *sk, const 
struct sk_buff *skb,
+                     sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
+       if (sk->sk_pacing_status == SK_PACING_NONE)
+               limit = min_t(unsigned long, limit,
+-                            sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
++                            
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
+       limit <<= factor;
+ 
+       if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
+index 6ac88fe24a8e0..135e3a060caa8 100644
+--- a/net/ipv6/ping.c
++++ b/net/ipv6/ping.c
+@@ -22,6 +22,11 @@
+ #include <linux/proc_fs.h>
+ #include <net/ping.h>
+ 
++static void ping_v6_destroy(struct sock *sk)
++{
++      inet6_destroy_sock(sk);
++}
++
+ /* Compatibility glue so we can support IPv6 when it's compiled as a module */
+ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
+                                int *addr_len)
+@@ -166,6 +171,7 @@ struct proto pingv6_prot = {
+       .owner =        THIS_MODULE,
+       .init =         ping_init_sock,
+       .close =        ping_close,
++      .destroy =      ping_v6_destroy,
+       .connect =      ip6_datagram_connect_v6_only,
+       .disconnect =   __udp_disconnect,
+       .setsockopt =   ipv6_setsockopt,
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 303b54414a6cc..8d91f36cb11bc 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, 
struct dst_entry *dst,
+               if (np->repflow && ireq->pktopts)
+                       fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
+ 
+-              tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
++              tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+                               (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+                               (np->tclass & INET_ECN_MASK) :
+                               np->tclass;
+@@ -1344,7 +1344,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct 
sock *sk, struct sk_buff *
+       /* Set ToS of the new socket based upon the value of incoming SYN.
+        * ECT bits are set later in tcp_init_transfer().
+        */
+-      if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+               newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+ 
+       /* Clone native IPv6 options from listening socket (if any)
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 8123c79e27913..d0e91aa7b30e5 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -1421,7 +1421,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock 
*msk, int copied)
+       if (msk->rcvq_space.copied <= msk->rcvq_space.space)
+               goto new_measure;
+ 
+-      if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
++      if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
+           !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
+               int rcvmem, rcvbuf;
+               u64 rcvwin, grow;
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index 1640da5c50776..72d30922ed290 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -838,11 +838,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, 
unsigned int queuenum)
+ }
+ 
+ static int
+-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int 
diff)
+ {
+       struct sk_buff *nskb;
+ 
+       if (diff < 0) {
++              unsigned int min_len = skb_transport_offset(e->skb);
++
++              if (data_len < min_len)
++                      return -EINVAL;
++
+               if (pskb_trim(e->skb, data_len))
+                       return -ENOMEM;
+       } else if (diff > 0) {
+diff --git a/net/sctp/associola.c b/net/sctp/associola.c
+index fdb69d46276d6..2d4ec61877553 100644
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -226,9 +226,8 @@ static struct sctp_association *sctp_association_init(
+       if (!sctp_ulpq_init(&asoc->ulpq, asoc))
+               goto fail_init;
+ 
+-      if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
+-                           0, gfp))
+-              goto fail_init;
++      if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
++              goto stream_free;
+ 
+       /* Initialize default path MTU. */
+       asoc->pathmtu = sp->pathmtu;
+diff --git a/net/sctp/stream.c b/net/sctp/stream.c
+index 6dc95dcc0ff4f..ef9fceadef8d5 100644
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 
outcnt, __u16 incnt,
+ 
+       ret = sctp_stream_alloc_out(stream, outcnt, gfp);
+       if (ret)
+-              goto out_err;
++              return ret;
+ 
+       for (i = 0; i < stream->outcnt; i++)
+               SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
+@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 
outcnt, __u16 incnt,
+ handle_in:
+       sctp_stream_interleave_init(stream);
+       if (!incnt)
+-              goto out;
+-
+-      ret = sctp_stream_alloc_in(stream, incnt, gfp);
+-      if (ret)
+-              goto in_err;
+-
+-      goto out;
++              return 0;
+ 
+-in_err:
+-      sched->free(stream);
+-      genradix_free(&stream->in);
+-out_err:
+-      genradix_free(&stream->out);
+-      stream->outcnt = 0;
+-out:
+-      return ret;
++      return sctp_stream_alloc_in(stream, incnt, gfp);
+ }
+ 
+ int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
+diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
+index 99e5f69fbb742..a2e1d34f52c5b 100644
+--- a/net/sctp/stream_sched.c
++++ b/net/sctp/stream_sched.c
+@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
+               if (!SCTP_SO(&asoc->stream, i)->ext)
+                       continue;
+ 
+-              ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
++              ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
+               if (ret)
+                       goto err;
+       }
+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
+index 23eab7ac43ee5..5cb6846544cc7 100644
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -1349,8 +1349,13 @@ static int tls_device_down(struct net_device *netdev)
+                * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+                * Now release the ref taken above.
+                */
+-              if (refcount_dec_and_test(&ctx->refcount))
++              if (refcount_dec_and_test(&ctx->refcount)) {
++                      /* sk_destruct ran after tls_device_down took a ref, and
++                       * it returned early. Complete the destruction here.
++                       */
++                      list_del(&ctx->list);
+                       tls_device_free_ctx(ctx);
++              }
+       }
+ 
+       up_write(&device_offload_lock);
+diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
+index e440cd7f32a6f..b9ee2ded381ab 100644
+--- a/tools/include/uapi/linux/bpf.h
++++ b/tools/include/uapi/linux/bpf.h
+@@ -5006,7 +5006,10 @@ struct bpf_pidns_info {
+ 
+ /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
+ struct bpf_sk_lookup {
+-      __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
++      union {
++              __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
++              __u64 cookie; /* Non-zero if socket was selected in 
PROG_TEST_RUN */
++      };
+ 
+       __u32 family;           /* Protocol family (AF_INET, AF_INET6) */
+       __u32 protocol;         /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
+diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
+index 94809aed8b447..1cab29d45bfb3 100644
+--- a/tools/perf/util/symbol-elf.c
++++ b/tools/perf/util/symbol-elf.c
+@@ -232,6 +232,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+       return NULL;
+ }
+ 
++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
++{
++      size_t i, phdrnum;
++      u64 sz;
++
++      if (elf_getphdrnum(elf, &phdrnum))
++              return -1;
++
++      for (i = 0; i < phdrnum; i++) {
++              if (gelf_getphdr(elf, i, phdr) == NULL)
++                      return -1;
++
++              if (phdr->p_type != PT_LOAD)
++                      continue;
++
++              sz = max(phdr->p_memsz, phdr->p_filesz);
++              if (!sz)
++                      continue;
++
++              if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
++                      return 0;
++      }
++
++      /* Not found any valid program header */
++      return -1;
++}
++
+ static bool want_demangle(bool is_kernel_sym)
+ {
+       return is_kernel_sym ? symbol_conf.demangle_kernel : 
symbol_conf.demangle;
+@@ -1181,6 +1208,7 @@ int dso__load_sym(struct dso *dso, struct map *map, 
struct symsrc *syms_ss,
+                                       sym.st_value);
+                       used_opd = true;
+               }
++
+               /*
+                * When loading symbols in a data mapping, ABS symbols (which
+                * has a value of SHN_ABS in its st_shndx) failed at
+@@ -1217,11 +1245,20 @@ int dso__load_sym(struct dso *dso, struct map *map, 
struct symsrc *syms_ss,
+                               goto out_elf_end;
+               } else if ((used_opd && runtime_ss->adjust_symbols) ||
+                          (!used_opd && syms_ss->adjust_symbols)) {
++                      GElf_Phdr phdr;
++
++                      if (elf_read_program_header(syms_ss->elf,
++                                                  (u64)sym.st_value, &phdr)) {
++                              pr_warning("%s: failed to find program header 
for "
++                                         "symbol: %s st_value: %#" PRIx64 
"\n",
++                                         __func__, elf_name, 
(u64)sym.st_value);
++                              continue;
++                      }
+                       pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " 
"
+-                                "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 
"\n", __func__,
+-                                (u64)sym.st_value, (u64)shdr.sh_addr,
+-                                (u64)shdr.sh_offset);
+-                      sym.st_value -= shdr.sh_addr - shdr.sh_offset;
++                                "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 
"\n",
++                                __func__, (u64)sym.st_value, 
(u64)phdr.p_vaddr,
++                                (u64)phdr.p_offset);
++                      sym.st_value -= phdr.p_vaddr - phdr.p_offset;
+               }
+ 
+               demangled = demangle_sym(dso, kmodule, elf_name);
+diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
+index a4c55fcb0e7b1..0fb92d9a319b7 100644
+--- a/tools/testing/selftests/bpf/test_verifier.c
++++ b/tools/testing/selftests/bpf/test_verifier.c
+@@ -100,7 +100,7 @@ struct bpf_test {
+       enum bpf_prog_type prog_type;
+       uint8_t flags;
+       void (*fill_helper)(struct bpf_test *self);
+-      uint8_t runs;
++      int runs;
+ #define bpf_testdata_struct_t                                 \
+       struct {                                                \
+               uint32_t retval, retval_unpriv;                 \
+@@ -1054,7 +1054,7 @@ static void do_test_single(struct bpf_test *test, bool 
unpriv,
+ 
+       run_errs = 0;
+       run_successes = 0;
+-      if (!alignment_prevented_execution && fd_prog >= 0) {
++      if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
+               uint32_t expected_val;
+               int i;
+ 
+diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c 
b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
+index 2ad5f974451c3..fd3b62a084b9f 100644
+--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
++++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
+@@ -239,6 +239,7 @@
+       .result = ACCEPT,
+       .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+       .expected_attach_type = BPF_SK_LOOKUP,
++      .runs = -1,
+ },
+ /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
+ {

[gentoo-commits] proj/linux-patches:5.10 commit in: /

Reply via email to