commit: 206a5e2746ef7fe6e5960e2af948e1eedef7e208 Author: Alice Ferrazzi <alicef <AT> gentoo <DOT> org> AuthorDate: Wed Aug 3 14:12:37 2022 +0000 Commit: Alice Ferrazzi <alicef <AT> gentoo <DOT> org> CommitDate: Wed Aug 3 14:12:44 2022 +0000 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=206a5e27
Linux patch 5.10.135 Signed-off-by: Alice Ferrazzi <alicef <AT> gentoo.org> 0000_README | 4 + 1134_linux-5.10.135.patch | 2841 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 2845 insertions(+) diff --git a/0000_README b/0000_README index 7292c57d..19bd6321 100644 --- a/0000_README +++ b/0000_README @@ -579,6 +579,10 @@ Patch: 1133_linux-5.10.134.patch From: http://www.kernel.org Desc: Linux 5.10.134 +Patch: 1134_linux-5.10.135.patch +From: http://www.kernel.org +Desc: Linux 5.10.135 + Patch: 1500_XATTR_USER_PREFIX.patch From: https://bugs.gentoo.org/show_bug.cgi?id=470644 Desc: Support for namespace user.pax.* on tmpfs. diff --git a/1134_linux-5.10.135.patch b/1134_linux-5.10.135.patch new file mode 100644 index 00000000..435afe17 --- /dev/null +++ b/1134_linux-5.10.135.patch @@ -0,0 +1,2841 @@ +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 1a58c580b2366..8b7c26d090459 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -2873,6 +2873,7 @@ + no_entry_flush [PPC] + no_uaccess_flush [PPC] + mmio_stale_data=off [X86] ++ retbleed=off [X86] + + Exceptions: + This does not have any effect on +@@ -2895,6 +2896,7 @@ + mds=full,nosmt [X86] + tsx_async_abort=full,nosmt [X86] + mmio_stale_data=full,nosmt [X86] ++ retbleed=auto,nosmt [X86] + + mminit_loglevel= + [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this +diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst +index 0b1f3235aa773..0158dff638873 100644 +--- a/Documentation/networking/ip-sysctl.rst ++++ b/Documentation/networking/ip-sysctl.rst +@@ -2629,7 +2629,14 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max + Default: 4K + + sctp_wmem - vector of 3 INTEGERs: min, default, max +- Currently this tunable has no effect. ++ Only the first value ("min") is used, "default" and "max" are ++ ignored. ++ ++ min: Minimum size of send buffer that can be used by SCTP sockets. ++ It is guaranteed to each SCTP socket (but not association) even ++ under moderate memory pressure. ++ ++ Default: 4K + + addr_scope_policy - INTEGER + Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00 +diff --git a/Makefile b/Makefile +index 00dddc2ac804a..5f4dbcb433075 100644 +--- a/Makefile ++++ b/Makefile +@@ -1,7 +1,7 @@ + # SPDX-License-Identifier: GPL-2.0 + VERSION = 5 + PATCHLEVEL = 10 +-SUBLEVEL = 134 ++SUBLEVEL = 135 + EXTRAVERSION = + NAME = Dare mighty things + +diff --git a/arch/arm/include/asm/dma.h b/arch/arm/include/asm/dma.h +index a81dda65c5762..45180a2cc47cb 100644 +--- a/arch/arm/include/asm/dma.h ++++ b/arch/arm/include/asm/dma.h +@@ -10,7 +10,7 @@ + #else + #define MAX_DMA_ADDRESS ({ \ + extern phys_addr_t arm_dma_zone_size; \ +- arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \ ++ arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \ + (PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; }) + #endif + +diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c +index b99dd8e1c93f1..7ba6cf8261626 100644 +--- a/arch/arm/lib/xor-neon.c ++++ b/arch/arm/lib/xor-neon.c +@@ -26,8 +26,9 @@ MODULE_LICENSE("GPL"); + * While older versions of GCC do not generate incorrect code, they fail to + * recognize the parallel nature of these functions, and emit plain ARM code, + * which is known to be slower than the optimized ARM code in asm-arm/xor.h. ++ * ++ * #warning This code requires at least version 4.6 of GCC + */ +-#warning This code requires at least version 4.6 of GCC + #endif + + #pragma GCC diagnostic ignored "-Wunused-variable" +diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h +index 2c6e1c6ecbe78..4120c428dc378 100644 +--- a/arch/s390/include/asm/archrandom.h ++++ b/arch/s390/include/asm/archrandom.h +@@ -2,7 +2,7 @@ + /* + * Kernel interface for the s390 arch_random_* functions + * +- * Copyright IBM Corp. 2017, 2020 ++ * Copyright IBM Corp. 2017, 2022 + * + * Author: Harald Freudenberger <[email protected]> + * +@@ -14,6 +14,7 @@ + #ifdef CONFIG_ARCH_RANDOM + + #include <linux/static_key.h> ++#include <linux/preempt.h> + #include <linux/atomic.h> + #include <asm/cpacf.h> + +@@ -32,7 +33,8 @@ static inline bool __must_check arch_get_random_int(unsigned int *v) + + static inline bool __must_check arch_get_random_seed_long(unsigned long *v) + { +- if (static_branch_likely(&s390_arch_random_available)) { ++ if (static_branch_likely(&s390_arch_random_available) && ++ in_task()) { + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; +@@ -42,7 +44,8 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) + + static inline bool __must_check arch_get_random_seed_int(unsigned int *v) + { +- if (static_branch_likely(&s390_arch_random_available)) { ++ if (static_branch_likely(&s390_arch_random_available) && ++ in_task()) { + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 7896b67dda420..2e5762faf7740 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1476,6 +1476,7 @@ static void __init spectre_v2_select_mitigation(void) + * enable IBRS around firmware calls. + */ + if (boot_cpu_has_bug(X86_BUG_RETBLEED) && ++ boot_cpu_has(X86_FEATURE_IBPB) && + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) { + +diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c +index a918ca93e4f7d..df5897c90becc 100644 +--- a/drivers/edac/ghes_edac.c ++++ b/drivers/edac/ghes_edac.c +@@ -101,9 +101,14 @@ static void dimm_setup_label(struct dimm_info *dimm, u16 handle) + + dmi_memdev_name(handle, &bank, &device); + +- /* both strings must be non-zero */ +- if (bank && *bank && device && *device) +- snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device); ++ /* ++ * Set to a NULL string when both bank and device are zero. In this case, ++ * the label assigned by default will be preserved. ++ */ ++ snprintf(dimm->label, sizeof(dimm->label), "%s%s%s", ++ (bank && *bank) ? bank : "", ++ (bank && *bank && device && *device) ? " " : "", ++ (device && *device) ? device : ""); + } + + static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry) +diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c +index 92987daa5e17d..5e72e6cb2f840 100644 +--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c ++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c +@@ -679,7 +679,11 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm, + goto out_free_dma; + + for (i = 0; i < npages; i += max) { +- args.end = start + (max << PAGE_SHIFT); ++ if (args.start + (max << PAGE_SHIFT) > end) ++ args.end = end; ++ else ++ args.end = args.start + (max << PAGE_SHIFT); ++ + ret = migrate_vma_setup(&args); + if (ret) + goto out_free_pfns; +diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c +index 11d4e3ba9af4c..1dad62ecb8a3a 100644 +--- a/drivers/net/ethernet/intel/i40e/i40e_main.c ++++ b/drivers/net/ethernet/intel/i40e/i40e_main.c +@@ -1907,11 +1907,15 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, + * non-zero req_queue_pairs says that user requested a new + * queue count via ethtool's set_channels, so use this + * value for queues distribution across traffic classes ++ * We need at least one queue pair for the interface ++ * to be usable as we see in else statement. + */ + if (vsi->req_queue_pairs > 0) + vsi->num_queue_pairs = vsi->req_queue_pairs; + else if (pf->flags & I40E_FLAG_MSIX_ENABLED) + vsi->num_queue_pairs = pf->num_lan_msix; ++ else ++ vsi->num_queue_pairs = 1; + } + + /* Number of queues per enabled TC */ +diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c +index 060897eb9cabe..7f1bf71844bce 100644 +--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c ++++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c +@@ -652,7 +652,8 @@ static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) + rx_desc = ICE_RX_DESC(rx_ring, i); + + if (!(rx_desc->wb.status_error0 & +- cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS))) ++ (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) | ++ cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S))))) + continue; + + rx_buf = &rx_ring->rx_buf[i]; +diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c +index aae79fdd51727..810f2bdb91645 100644 +--- a/drivers/net/ethernet/intel/ice/ice_main.c ++++ b/drivers/net/ethernet/intel/ice/ice_main.c +@@ -5203,10 +5203,12 @@ int ice_vsi_cfg(struct ice_vsi *vsi) + if (vsi->netdev) { + ice_set_rx_mode(vsi->netdev); + +- err = ice_vsi_vlan_setup(vsi); ++ if (vsi->type != ICE_VSI_LB) { ++ err = ice_vsi_vlan_setup(vsi); + +- if (err) +- return err; ++ if (err) ++ return err; ++ } + } + ice_vsi_cfg_dcb_rings(vsi); + +diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c +index 725b0f38813a9..a2b4e3befa591 100644 +--- a/drivers/net/ethernet/sfc/ptp.c ++++ b/drivers/net/ethernet/sfc/ptp.c +@@ -1100,7 +1100,29 @@ static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb) + + tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type); + if (tx_queue && tx_queue->timestamping) { ++ /* This code invokes normal driver TX code which is always ++ * protected from softirqs when called from generic TX code, ++ * which in turn disables preemption. Look at __dev_queue_xmit ++ * which uses rcu_read_lock_bh disabling preemption for RCU ++ * plus disabling softirqs. We do not need RCU reader ++ * protection here. ++ * ++ * Although it is theoretically safe for current PTP TX/RX code ++ * running without disabling softirqs, there are three good ++ * reasond for doing so: ++ * ++ * 1) The code invoked is mainly implemented for non-PTP ++ * packets and it is always executed with softirqs ++ * disabled. ++ * 2) This being a single PTP packet, better to not ++ * interrupt its processing by softirqs which can lead ++ * to high latencies. ++ * 3) netdev_xmit_more checks preemption is disabled and ++ * triggers a BUG_ON if not. ++ */ ++ local_bh_disable(); + efx_enqueue_skb(tx_queue, skb); ++ local_bh_enable(); + } else { + WARN_ONCE(1, "PTP channel has no timestamped tx queue\n"); + dev_kfree_skb_any(skb); +diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c +index 789a124809e3c..70c5905a916b9 100644 +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -240,6 +240,7 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) + #define DEFAULT_SEND_SCI true + #define DEFAULT_ENCRYPT false + #define DEFAULT_ENCODING_SA 0 ++#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) + + static bool send_sci(const struct macsec_secy *secy) + { +@@ -1694,7 +1695,7 @@ static bool validate_add_rxsa(struct nlattr **attrs) + return false; + + if (attrs[MACSEC_SA_ATTR_PN] && +- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0) ++ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) + return false; + + if (attrs[MACSEC_SA_ATTR_ACTIVE]) { +@@ -1750,7 +1751,8 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) + } + + pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; +- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { ++ if (tb_sa[MACSEC_SA_ATTR_PN] && ++ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { + pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); + rtnl_unlock(); +@@ -1766,7 +1768,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) + if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { + pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), +- MACSEC_SA_ATTR_SALT); ++ MACSEC_SALT_LEN); + rtnl_unlock(); + return -EINVAL; + } +@@ -1839,7 +1841,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) + return 0; + + cleanup: +- kfree(rx_sa); ++ macsec_rxsa_put(rx_sa); + rtnl_unlock(); + return err; + } +@@ -1936,7 +1938,7 @@ static bool validate_add_txsa(struct nlattr **attrs) + if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) + return false; + +- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) ++ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) + return false; + + if (attrs[MACSEC_SA_ATTR_ACTIVE]) { +@@ -2008,7 +2010,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) + if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { + pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", + nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), +- MACSEC_SA_ATTR_SALT); ++ MACSEC_SALT_LEN); + rtnl_unlock(); + return -EINVAL; + } +@@ -2082,7 +2084,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) + + cleanup: + secy->operational = was_operational; +- kfree(tx_sa); ++ macsec_txsa_put(tx_sa); + rtnl_unlock(); + return err; + } +@@ -2290,7 +2292,7 @@ static bool validate_upd_sa(struct nlattr **attrs) + if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) + return false; + +- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0) ++ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) + return false; + + if (attrs[MACSEC_SA_ATTR_ACTIVE]) { +@@ -3737,9 +3739,6 @@ static int macsec_changelink_common(struct net_device *dev, + secy->operational = tx_sa && tx_sa->active; + } + +- if (data[IFLA_MACSEC_WINDOW]) +- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); +- + if (data[IFLA_MACSEC_ENCRYPT]) + tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]); + +@@ -3785,6 +3784,16 @@ static int macsec_changelink_common(struct net_device *dev, + } + } + ++ if (data[IFLA_MACSEC_WINDOW]) { ++ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); ++ ++ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window ++ * for XPN cipher suites */ ++ if (secy->xpn && ++ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW) ++ return -EINVAL; ++ } ++ + return 0; + } + +@@ -3814,7 +3823,7 @@ static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], + + ret = macsec_changelink_common(dev, data); + if (ret) +- return ret; ++ goto cleanup; + + /* If h/w offloading is available, propagate to the device */ + if (macsec_is_offloaded(macsec)) { +diff --git a/drivers/net/sungem_phy.c b/drivers/net/sungem_phy.c +index 291fa449993fb..45f295403cb55 100644 +--- a/drivers/net/sungem_phy.c ++++ b/drivers/net/sungem_phy.c +@@ -454,6 +454,7 @@ static int bcm5421_init(struct mii_phy* phy) + int can_low_power = 1; + if (np == NULL || of_get_property(np, "no-autolowpower", NULL)) + can_low_power = 0; ++ of_node_put(np); + if (can_low_power) { + /* Enable automatic low-power */ + sungem_phy_write(phy, 0x1c, 0x9002); +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index 37178b078ee37..0a07c05a610d1 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -213,9 +213,15 @@ struct virtnet_info { + /* Packet virtio header size */ + u8 hdr_len; + +- /* Work struct for refilling if we run low on memory. */ ++ /* Work struct for delayed refilling if we run low on memory. */ + struct delayed_work refill; + ++ /* Is delayed refill enabled? */ ++ bool refill_enabled; ++ ++ /* The lock to synchronize the access to refill_enabled */ ++ spinlock_t refill_lock; ++ + /* Work struct for config space updates */ + struct work_struct config_work; + +@@ -319,6 +325,20 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask) + return p; + } + ++static void enable_delayed_refill(struct virtnet_info *vi) ++{ ++ spin_lock_bh(&vi->refill_lock); ++ vi->refill_enabled = true; ++ spin_unlock_bh(&vi->refill_lock); ++} ++ ++static void disable_delayed_refill(struct virtnet_info *vi) ++{ ++ spin_lock_bh(&vi->refill_lock); ++ vi->refill_enabled = false; ++ spin_unlock_bh(&vi->refill_lock); ++} ++ + static void virtqueue_napi_schedule(struct napi_struct *napi, + struct virtqueue *vq) + { +@@ -1403,8 +1423,12 @@ static int virtnet_receive(struct receive_queue *rq, int budget, + } + + if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) { +- if (!try_fill_recv(vi, rq, GFP_ATOMIC)) +- schedule_delayed_work(&vi->refill, 0); ++ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) { ++ spin_lock(&vi->refill_lock); ++ if (vi->refill_enabled) ++ schedule_delayed_work(&vi->refill, 0); ++ spin_unlock(&vi->refill_lock); ++ } + } + + u64_stats_update_begin(&rq->stats.syncp); +@@ -1523,6 +1547,8 @@ static int virtnet_open(struct net_device *dev) + struct virtnet_info *vi = netdev_priv(dev); + int i, err; + ++ enable_delayed_refill(vi); ++ + for (i = 0; i < vi->max_queue_pairs; i++) { + if (i < vi->curr_queue_pairs) + /* Make sure we have some buffers: if oom use wq. */ +@@ -1893,6 +1919,8 @@ static int virtnet_close(struct net_device *dev) + struct virtnet_info *vi = netdev_priv(dev); + int i; + ++ /* Make sure NAPI doesn't schedule refill work */ ++ disable_delayed_refill(vi); + /* Make sure refill_work doesn't re-enable napi! */ + cancel_delayed_work_sync(&vi->refill); + +@@ -2390,6 +2418,8 @@ static int virtnet_restore_up(struct virtio_device *vdev) + + virtio_device_ready(vdev); + ++ enable_delayed_refill(vi); ++ + if (netif_running(vi->dev)) { + err = virtnet_open(vi->dev); + if (err) +@@ -3092,6 +3122,7 @@ static int virtnet_probe(struct virtio_device *vdev) + vdev->priv = vi; + + INIT_WORK(&vi->config_work, virtnet_config_changed_work); ++ spin_lock_init(&vi->refill_lock); + + /* If we can receive ANY GSO packets, we must allocate large ones. */ + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || +diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c +index 6bcc4a13ae6c7..cc772045d526f 100644 +--- a/drivers/net/wireless/mediatek/mt7601u/usb.c ++++ b/drivers/net/wireless/mediatek/mt7601u/usb.c +@@ -26,6 +26,7 @@ static const struct usb_device_id mt7601u_device_table[] = { + { USB_DEVICE(0x2717, 0x4106) }, + { USB_DEVICE(0x2955, 0x0001) }, + { USB_DEVICE(0x2955, 0x1001) }, ++ { USB_DEVICE(0x2955, 0x1003) }, + { USB_DEVICE(0x2a5f, 0x1000) }, + { USB_DEVICE(0x7392, 0x7710) }, + { 0, } +diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c +index 0f2430fb398db..576cc39077f32 100644 +--- a/drivers/scsi/ufs/ufshcd-pltfrm.c ++++ b/drivers/scsi/ufs/ufshcd-pltfrm.c +@@ -107,9 +107,20 @@ out: + return ret; + } + ++static bool phandle_exists(const struct device_node *np, ++ const char *phandle_name, int index) ++{ ++ struct device_node *parse_np = of_parse_phandle(np, phandle_name, index); ++ ++ if (parse_np) ++ of_node_put(parse_np); ++ ++ return parse_np != NULL; ++} ++ + #define MAX_PROP_SIZE 32 + static int ufshcd_populate_vreg(struct device *dev, const char *name, +- struct ufs_vreg **out_vreg) ++ struct ufs_vreg **out_vreg) + { + int ret = 0; + char prop_name[MAX_PROP_SIZE]; +@@ -122,7 +133,7 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, + } + + snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name); +- if (!of_parse_phandle(np, prop_name, 0)) { ++ if (!phandle_exists(np, prop_name, 0)) { + dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n", + __func__, prop_name); + goto out; +diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c +index d563abc3e1364..914e991731300 100644 +--- a/fs/ntfs/attrib.c ++++ b/fs/ntfs/attrib.c +@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYPE type, const ntfschar *name, + a = (ATTR_RECORD*)((u8*)ctx->attr + + le32_to_cpu(ctx->attr->length)); + for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { +- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + +- le32_to_cpu(ctx->mrec->bytes_allocated)) ++ u8 *mrec_end = (u8 *)ctx->mrec + ++ le32_to_cpu(ctx->mrec->bytes_allocated); ++ u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + ++ a->name_length * sizeof(ntfschar); ++ if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || ++ name_end > mrec_end) + break; + ctx->attr = a; + if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || +diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h +index 7993d527edae9..0a8cd8e59a92c 100644 +--- a/fs/ocfs2/ocfs2.h ++++ b/fs/ocfs2/ocfs2.h +@@ -279,7 +279,6 @@ enum ocfs2_mount_options + OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ + OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ + OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ +- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ + }; + + #define OCFS2_OSB_SOFT_RO 0x0001 +@@ -675,8 +674,7 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) + + static inline int ocfs2_mount_local(struct ocfs2_super *osb) + { +- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) +- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); ++ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); + } + + static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) +diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c +index 4da0e4b1e79bf..8caeceeaeda7c 100644 +--- a/fs/ocfs2/slot_map.c ++++ b/fs/ocfs2/slot_map.c +@@ -254,16 +254,14 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, + int i, ret = -ENOSPC; + + if ((preferred >= 0) && (preferred < si->si_num_slots)) { +- if (!si->si_slots[preferred].sl_valid || +- !si->si_slots[preferred].sl_node_num) { ++ if (!si->si_slots[preferred].sl_valid) { + ret = preferred; + goto out; + } + } + + for(i = 0; i < si->si_num_slots; i++) { +- if (!si->si_slots[i].sl_valid || +- !si->si_slots[i].sl_node_num) { ++ if (!si->si_slots[i].sl_valid) { + ret = i; + break; + } +@@ -458,30 +456,24 @@ int ocfs2_find_slot(struct ocfs2_super *osb) + spin_lock(&osb->osb_lock); + ocfs2_update_slot_info(si); + +- if (ocfs2_mount_local(osb)) +- /* use slot 0 directly in local mode */ +- slot = 0; +- else { +- /* search for ourselves first and take the slot if it already +- * exists. Perhaps we need to mark this in a variable for our +- * own journal recovery? Possibly not, though we certainly +- * need to warn to the user */ +- slot = __ocfs2_node_num_to_slot(si, osb->node_num); ++ /* search for ourselves first and take the slot if it already ++ * exists. Perhaps we need to mark this in a variable for our ++ * own journal recovery? Possibly not, though we certainly ++ * need to warn to the user */ ++ slot = __ocfs2_node_num_to_slot(si, osb->node_num); ++ if (slot < 0) { ++ /* if no slot yet, then just take 1st available ++ * one. */ ++ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); + if (slot < 0) { +- /* if no slot yet, then just take 1st available +- * one. */ +- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); +- if (slot < 0) { +- spin_unlock(&osb->osb_lock); +- mlog(ML_ERROR, "no free slots available!\n"); +- status = -EINVAL; +- goto bail; +- } +- } else +- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " +- "already allocated to this node!\n", +- slot, osb->dev_str); +- } ++ spin_unlock(&osb->osb_lock); ++ mlog(ML_ERROR, "no free slots available!\n"); ++ status = -EINVAL; ++ goto bail; ++ } ++ } else ++ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " ++ "allocated to this node!\n", slot, osb->dev_str); + + ocfs2_set_slot(si, slot, osb->node_num); + osb->slot_num = slot; +diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c +index 477ad05a34ea2..c0e5f1bad499f 100644 +--- a/fs/ocfs2/super.c ++++ b/fs/ocfs2/super.c +@@ -175,7 +175,6 @@ enum { + Opt_dir_resv_level, + Opt_journal_async_commit, + Opt_err_cont, +- Opt_nocluster, + Opt_err, + }; + +@@ -209,7 +208,6 @@ static const match_table_t tokens = { + {Opt_dir_resv_level, "dir_resv_level=%u"}, + {Opt_journal_async_commit, "journal_async_commit"}, + {Opt_err_cont, "errors=continue"}, +- {Opt_nocluster, "nocluster"}, + {Opt_err, NULL} + }; + +@@ -621,13 +619,6 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) + goto out; + } + +- tmp = OCFS2_MOUNT_NOCLUSTER; +- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { +- ret = -EINVAL; +- mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); +- goto out; +- } +- + tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | + OCFS2_MOUNT_HB_NONE; + if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { +@@ -868,7 +859,6 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, + } + + if (ocfs2_userspace_stack(osb) && +- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && + strncmp(osb->osb_cluster_stack, mopt->cluster_stack, + OCFS2_STACK_LABEL_LEN)) { + mlog(ML_ERROR, +@@ -1149,11 +1139,6 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) + osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : + "ordered"); + +- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && +- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) +- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " +- "without cluster aware mode.\n", osb->dev_str); +- + atomic_set(&osb->vol_state, VOLUME_MOUNTED); + wake_up(&osb->osb_mount_event); + +@@ -1460,9 +1445,6 @@ static int ocfs2_parse_options(struct super_block *sb, + case Opt_journal_async_commit: + mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; + break; +- case Opt_nocluster: +- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; +- break; + default: + mlog(ML_ERROR, + "Unrecognized mount option \"%s\" " +@@ -1574,9 +1556,6 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) + if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) + seq_printf(s, ",journal_async_commit"); + +- if (opts & OCFS2_MOUNT_NOCLUSTER) +- seq_printf(s, ",nocluster"); +- + return 0; + } + +diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h +index 8bd00da6d2a40..2f46ef3800aa2 100644 +--- a/fs/xfs/libxfs/xfs_log_format.h ++++ b/fs/xfs/libxfs/xfs_log_format.h +@@ -414,7 +414,16 @@ struct xfs_log_dinode { + /* start of the extended dinode, writable fields */ + uint32_t di_crc; /* CRC of the inode */ + uint64_t di_changecount; /* number of attribute changes */ +- xfs_lsn_t di_lsn; /* flush sequence */ ++ ++ /* ++ * The LSN we write to this field during formatting is not a reflection ++ * of the current on-disk LSN. It should never be used for recovery ++ * sequencing, nor should it be recovered into the on-disk inode at all. ++ * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk() ++ * for details. ++ */ ++ xfs_lsn_t di_lsn; ++ + uint64_t di_flags2; /* more random flags */ + uint32_t di_cowextsize; /* basic cow extent size for file */ + uint8_t di_pad2[12]; /* more padding for future expansion */ +diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h +index 397d94775440d..1ce06173c2f55 100644 +--- a/fs/xfs/libxfs/xfs_types.h ++++ b/fs/xfs/libxfs/xfs_types.h +@@ -21,6 +21,7 @@ typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */ + typedef uint32_t xfs_rtword_t; /* word type for bitmap manipulations */ + + typedef int64_t xfs_lsn_t; /* log sequence number */ ++typedef int64_t xfs_csn_t; /* CIL sequence number */ + + typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ + typedef uint32_t xfs_dahash_t; /* dir/attr hash value */ +diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c +index 8c6e26d62ef28..a3d5ecccfc2cc 100644 +--- a/fs/xfs/xfs_buf_item.c ++++ b/fs/xfs/xfs_buf_item.c +@@ -393,17 +393,8 @@ xfs_buf_item_pin( + } + + /* +- * This is called to unpin the buffer associated with the buf log +- * item which was previously pinned with a call to xfs_buf_item_pin(). +- * +- * Also drop the reference to the buf item for the current transaction. +- * If the XFS_BLI_STALE flag is set and we are the last reference, +- * then free up the buf log item and unlock the buffer. +- * +- * If the remove flag is set we are called from uncommit in the +- * forced-shutdown path. If that is true and the reference count on +- * the log item is going to drop to zero we need to free the item's +- * descriptor in the transaction. ++ * This is called to unpin the buffer associated with the buf log item which ++ * was previously pinned with a call to xfs_buf_item_pin(). + */ + STATIC void + xfs_buf_item_unpin( +@@ -420,38 +411,35 @@ xfs_buf_item_unpin( + + trace_xfs_buf_item_unpin(bip); + ++ /* ++ * Drop the bli ref associated with the pin and grab the hold required ++ * for the I/O simulation failure in the abort case. We have to do this ++ * before the pin count drops because the AIL doesn't acquire a bli ++ * reference. Therefore if the refcount drops to zero, the bli could ++ * still be AIL resident and the buffer submitted for I/O (and freed on ++ * completion) at any point before we return. This can be removed once ++ * the AIL properly holds a reference on the bli. ++ */ + freed = atomic_dec_and_test(&bip->bli_refcount); +- ++ if (freed && !stale && remove) ++ xfs_buf_hold(bp); + if (atomic_dec_and_test(&bp->b_pin_count)) + wake_up_all(&bp->b_waiters); + +- if (freed && stale) { ++ /* nothing to do but drop the pin count if the bli is active */ ++ if (!freed) ++ return; ++ ++ if (stale) { + ASSERT(bip->bli_flags & XFS_BLI_STALE); + ASSERT(xfs_buf_islocked(bp)); + ASSERT(bp->b_flags & XBF_STALE); + ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); ++ ASSERT(list_empty(&lip->li_trans)); ++ ASSERT(!bp->b_transp); + + trace_xfs_buf_item_unpin_stale(bip); + +- if (remove) { +- /* +- * If we are in a transaction context, we have to +- * remove the log item from the transaction as we are +- * about to release our reference to the buffer. If we +- * don't, the unlock that occurs later in +- * xfs_trans_uncommit() will try to reference the +- * buffer which we no longer have a hold on. +- */ +- if (!list_empty(&lip->li_trans)) +- xfs_trans_del_item(lip); +- +- /* +- * Since the transaction no longer refers to the buffer, +- * the buffer should no longer refer to the transaction. +- */ +- bp->b_transp = NULL; +- } +- + /* + * If we get called here because of an IO error, we may or may + * not have the item on the AIL. xfs_trans_ail_delete() will +@@ -468,13 +456,13 @@ xfs_buf_item_unpin( + ASSERT(bp->b_log_item == NULL); + } + xfs_buf_relse(bp); +- } else if (freed && remove) { ++ } else if (remove) { + /* + * The buffer must be locked and held by the caller to simulate +- * an async I/O failure. ++ * an async I/O failure. We acquired the hold for this case ++ * before the buffer was unpinned. + */ + xfs_buf_lock(bp); +- xfs_buf_hold(bp); + bp->b_flags |= XBF_ASYNC; + xfs_buf_ioend_fail(bp); + } +@@ -632,7 +620,7 @@ xfs_buf_item_release( + STATIC void + xfs_buf_item_committing( + struct xfs_log_item *lip, +- xfs_lsn_t commit_lsn) ++ xfs_csn_t seq) + { + return xfs_buf_item_release(lip); + } +diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c +index 1d649462d731a..b374c9cee1177 100644 +--- a/fs/xfs/xfs_buf_item_recover.c ++++ b/fs/xfs/xfs_buf_item_recover.c +@@ -796,6 +796,7 @@ xlog_recover_get_buf_lsn( + switch (magicda) { + case XFS_DIR3_LEAF1_MAGIC: + case XFS_DIR3_LEAFN_MAGIC: ++ case XFS_ATTR3_LEAF_MAGIC: + case XFS_DA3_NODE_MAGIC: + lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); + uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; +diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c +index 8c1fdf37ee8f0..8ed47b739b6cc 100644 +--- a/fs/xfs/xfs_dquot_item.c ++++ b/fs/xfs/xfs_dquot_item.c +@@ -188,7 +188,7 @@ xfs_qm_dquot_logitem_release( + STATIC void + xfs_qm_dquot_logitem_committing( + struct xfs_log_item *lip, +- xfs_lsn_t commit_lsn) ++ xfs_csn_t seq) + { + return xfs_qm_dquot_logitem_release(lip); + } +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c +index 5b0f93f738372..4d6bf8d4974fe 100644 +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -118,6 +118,54 @@ xfs_dir_fsync( + return xfs_log_force_inode(ip); + } + ++static xfs_csn_t ++xfs_fsync_seq( ++ struct xfs_inode *ip, ++ bool datasync) ++{ ++ if (!xfs_ipincount(ip)) ++ return 0; ++ if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) ++ return 0; ++ return ip->i_itemp->ili_commit_seq; ++} ++ ++/* ++ * All metadata updates are logged, which means that we just have to flush the ++ * log up to the latest LSN that touched the inode. ++ * ++ * If we have concurrent fsync/fdatasync() calls, we need them to all block on ++ * the log force before we clear the ili_fsync_fields field. This ensures that ++ * we don't get a racing sync operation that does not wait for the metadata to ++ * hit the journal before returning. If we race with clearing ili_fsync_fields, ++ * then all that will happen is the log force will do nothing as the lsn will ++ * already be on disk. We can't race with setting ili_fsync_fields because that ++ * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock ++ * shared until after the ili_fsync_fields is cleared. ++ */ ++static int ++xfs_fsync_flush_log( ++ struct xfs_inode *ip, ++ bool datasync, ++ int *log_flushed) ++{ ++ int error = 0; ++ xfs_csn_t seq; ++ ++ xfs_ilock(ip, XFS_ILOCK_SHARED); ++ seq = xfs_fsync_seq(ip, datasync); ++ if (seq) { ++ error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, ++ log_flushed); ++ ++ spin_lock(&ip->i_itemp->ili_lock); ++ ip->i_itemp->ili_fsync_fields = 0; ++ spin_unlock(&ip->i_itemp->ili_lock); ++ } ++ xfs_iunlock(ip, XFS_ILOCK_SHARED); ++ return error; ++} ++ + STATIC int + xfs_file_fsync( + struct file *file, +@@ -125,13 +173,10 @@ xfs_file_fsync( + loff_t end, + int datasync) + { +- struct inode *inode = file->f_mapping->host; +- struct xfs_inode *ip = XFS_I(inode); +- struct xfs_inode_log_item *iip = ip->i_itemp; ++ struct xfs_inode *ip = XFS_I(file->f_mapping->host); + struct xfs_mount *mp = ip->i_mount; + int error = 0; + int log_flushed = 0; +- xfs_lsn_t lsn = 0; + + trace_xfs_file_fsync(ip); + +@@ -155,33 +200,7 @@ xfs_file_fsync( + else if (mp->m_logdev_targp != mp->m_ddev_targp) + xfs_blkdev_issue_flush(mp->m_ddev_targp); + +- /* +- * All metadata updates are logged, which means that we just have to +- * flush the log up to the latest LSN that touched the inode. If we have +- * concurrent fsync/fdatasync() calls, we need them to all block on the +- * log force before we clear the ili_fsync_fields field. This ensures +- * that we don't get a racing sync operation that does not wait for the +- * metadata to hit the journal before returning. If we race with +- * clearing the ili_fsync_fields, then all that will happen is the log +- * force will do nothing as the lsn will already be on disk. We can't +- * race with setting ili_fsync_fields because that is done under +- * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared +- * until after the ili_fsync_fields is cleared. +- */ +- xfs_ilock(ip, XFS_ILOCK_SHARED); +- if (xfs_ipincount(ip)) { +- if (!datasync || +- (iip->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) +- lsn = iip->ili_last_lsn; +- } +- +- if (lsn) { +- error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); +- spin_lock(&iip->ili_lock); +- iip->ili_fsync_fields = 0; +- spin_unlock(&iip->ili_lock); +- } +- xfs_iunlock(ip, XFS_ILOCK_SHARED); ++ error = xfs_fsync_flush_log(ip, datasync, &log_flushed); + + /* + * If we only have a single device, and the log force about was +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index 03497741aef74..1f61e085676b3 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -2754,7 +2754,7 @@ xfs_iunpin( + trace_xfs_inode_unpin_nowait(ip, _RET_IP_); + + /* Give the log a push to start the unpinning I/O */ +- xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0, NULL); ++ xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL); + + } + +@@ -3716,16 +3716,16 @@ int + xfs_log_force_inode( + struct xfs_inode *ip) + { +- xfs_lsn_t lsn = 0; ++ xfs_csn_t seq = 0; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_ipincount(ip)) +- lsn = ip->i_itemp->ili_last_lsn; ++ seq = ip->i_itemp->ili_commit_seq; + xfs_iunlock(ip, XFS_ILOCK_SHARED); + +- if (!lsn) ++ if (!seq) + return 0; +- return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL); ++ return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL); + } + + /* +diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c +index 6ff91e5bf3cd7..3aba4559469f1 100644 +--- a/fs/xfs/xfs_inode_item.c ++++ b/fs/xfs/xfs_inode_item.c +@@ -617,9 +617,9 @@ xfs_inode_item_committed( + STATIC void + xfs_inode_item_committing( + struct xfs_log_item *lip, +- xfs_lsn_t commit_lsn) ++ xfs_csn_t seq) + { +- INODE_ITEM(lip)->ili_last_lsn = commit_lsn; ++ INODE_ITEM(lip)->ili_commit_seq = seq; + return xfs_inode_item_release(lip); + } + +diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h +index 4b926e32831c0..403b45ab9aa28 100644 +--- a/fs/xfs/xfs_inode_item.h ++++ b/fs/xfs/xfs_inode_item.h +@@ -33,7 +33,7 @@ struct xfs_inode_log_item { + unsigned int ili_fields; /* fields to be logged */ + unsigned int ili_fsync_fields; /* logged since last fsync */ + xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ +- xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ ++ xfs_csn_t ili_commit_seq; /* last transaction commit */ + }; + + static inline int xfs_inode_clean(struct xfs_inode *ip) +diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c +index cb44f7653f03b..538724f9f85ca 100644 +--- a/fs/xfs/xfs_inode_item_recover.c ++++ b/fs/xfs/xfs_inode_item_recover.c +@@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts( + STATIC void + xfs_log_dinode_to_disk( + struct xfs_log_dinode *from, +- struct xfs_dinode *to) ++ struct xfs_dinode *to, ++ xfs_lsn_t lsn) + { + to->di_magic = cpu_to_be16(from->di_magic); + to->di_mode = cpu_to_be16(from->di_mode); +@@ -182,7 +183,7 @@ xfs_log_dinode_to_disk( + to->di_flags2 = cpu_to_be64(from->di_flags2); + to->di_cowextsize = cpu_to_be32(from->di_cowextsize); + to->di_ino = cpu_to_be64(from->di_ino); +- to->di_lsn = cpu_to_be64(from->di_lsn); ++ to->di_lsn = cpu_to_be64(lsn); + memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); + uuid_copy(&to->di_uuid, &from->di_uuid); + to->di_flushiter = 0; +@@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2( + } + + /* +- * If the inode has an LSN in it, recover the inode only if it's less +- * than the lsn of the transaction we are replaying. Note: we still +- * need to replay an owner change even though the inode is more recent +- * than the transaction as there is no guarantee that all the btree +- * blocks are more recent than this transaction, too. ++ * If the inode has an LSN in it, recover the inode only if the on-disk ++ * inode's LSN is older than the lsn of the transaction we are ++ * replaying. We can have multiple checkpoints with the same start LSN, ++ * so the current LSN being equal to the on-disk LSN doesn't necessarily ++ * mean that the on-disk inode is more recent than the change being ++ * replayed. ++ * ++ * We must check the current_lsn against the on-disk inode ++ * here because the we can't trust the log dinode to contain a valid LSN ++ * (see comment below before replaying the log dinode for details). ++ * ++ * Note: we still need to replay an owner change even though the inode ++ * is more recent than the transaction as there is no guarantee that all ++ * the btree blocks are more recent than this transaction, too. + */ + if (dip->di_version >= 3) { + xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); + +- if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { ++ if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) { + trace_xfs_log_recover_inode_skip(log, in_f); + error = 0; + goto out_owner_change; +@@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2( + goto out_release; + } + +- /* recover the log dinode inode into the on disk inode */ +- xfs_log_dinode_to_disk(ldip, dip); ++ /* ++ * Recover the log dinode inode into the on disk inode. ++ * ++ * The LSN in the log dinode is garbage - it can be zero or reflect ++ * stale in-memory runtime state that isn't coherent with the changes ++ * logged in this transaction or the changes written to the on-disk ++ * inode. Hence we write the current lSN into the inode because that ++ * matches what xfs_iflush() would write inode the inode when flushing ++ * the changes in this transaction. ++ */ ++ xfs_log_dinode_to_disk(ldip, dip, current_lsn); + + fields = in_f->ilf_fields; + if (fields & XFS_ILOG_DEV) +diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c +index b445e63cbc3c7..22d7d74231d42 100644 +--- a/fs/xfs/xfs_log.c ++++ b/fs/xfs/xfs_log.c +@@ -765,6 +765,9 @@ xfs_log_mount_finish( + if (readonly) + mp->m_flags |= XFS_MOUNT_RDONLY; + ++ /* Make sure the log is dead if we're returning failure. */ ++ ASSERT(!error || (mp->m_log->l_flags & XLOG_IO_ERROR)); ++ + return error; + } + +@@ -3210,14 +3213,13 @@ out_error: + } + + static int +-__xfs_log_force_lsn( +- struct xfs_mount *mp, ++xlog_force_lsn( ++ struct xlog *log, + xfs_lsn_t lsn, + uint flags, + int *log_flushed, + bool already_slept) + { +- struct xlog *log = mp->m_log; + struct xlog_in_core *iclog; + + spin_lock(&log->l_icloglock); +@@ -3250,8 +3252,6 @@ __xfs_log_force_lsn( + if (!already_slept && + (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC || + iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) { +- XFS_STATS_INC(mp, xs_log_force_sleep); +- + xlog_wait(&iclog->ic_prev->ic_write_wait, + &log->l_icloglock); + return -EAGAIN; +@@ -3289,25 +3289,29 @@ out_error: + * to disk, that thread will wake up all threads waiting on the queue. + */ + int +-xfs_log_force_lsn( ++xfs_log_force_seq( + struct xfs_mount *mp, +- xfs_lsn_t lsn, ++ xfs_csn_t seq, + uint flags, + int *log_flushed) + { ++ struct xlog *log = mp->m_log; ++ xfs_lsn_t lsn; + int ret; +- ASSERT(lsn != 0); ++ ASSERT(seq != 0); + + XFS_STATS_INC(mp, xs_log_force); +- trace_xfs_log_force(mp, lsn, _RET_IP_); ++ trace_xfs_log_force(mp, seq, _RET_IP_); + +- lsn = xlog_cil_force_lsn(mp->m_log, lsn); ++ lsn = xlog_cil_force_seq(log, seq); + if (lsn == NULLCOMMITLSN) + return 0; + +- ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, false); +- if (ret == -EAGAIN) +- ret = __xfs_log_force_lsn(mp, lsn, flags, log_flushed, true); ++ ret = xlog_force_lsn(log, lsn, flags, log_flushed, false); ++ if (ret == -EAGAIN) { ++ XFS_STATS_INC(mp, xs_log_force_sleep); ++ ret = xlog_force_lsn(log, lsn, flags, log_flushed, true); ++ } + return ret; + } + +diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h +index 98c913da7587e..a1089f8b7169b 100644 +--- a/fs/xfs/xfs_log.h ++++ b/fs/xfs/xfs_log.h +@@ -106,7 +106,7 @@ struct xfs_item_ops; + struct xfs_trans; + + int xfs_log_force(struct xfs_mount *mp, uint flags); +-int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags, ++int xfs_log_force_seq(struct xfs_mount *mp, xfs_csn_t seq, uint flags, + int *log_forced); + int xfs_log_mount(struct xfs_mount *mp, + struct xfs_buftarg *log_target, +@@ -132,8 +132,6 @@ bool xfs_log_writable(struct xfs_mount *mp); + struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); + void xfs_log_ticket_put(struct xlog_ticket *ticket); + +-void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, +- xfs_lsn_t *commit_lsn, bool regrant); + void xlog_cil_process_committed(struct list_head *list); + bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); + +diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c +index cd5c04dabe2e1..fbe160d5e9b96 100644 +--- a/fs/xfs/xfs_log_cil.c ++++ b/fs/xfs/xfs_log_cil.c +@@ -777,7 +777,7 @@ xlog_cil_push_work( + * that higher sequences will wait for us to write out a commit record + * before they do. + * +- * xfs_log_force_lsn requires us to mirror the new sequence into the cil ++ * xfs_log_force_seq requires us to mirror the new sequence into the cil + * structure atomically with the addition of this sequence to the + * committing list. This also ensures that we can do unlocked checks + * against the current sequence in log forces without risking +@@ -1020,16 +1020,14 @@ xlog_cil_empty( + * allowed again. + */ + void +-xfs_log_commit_cil( +- struct xfs_mount *mp, ++xlog_cil_commit( ++ struct xlog *log, + struct xfs_trans *tp, +- xfs_lsn_t *commit_lsn, ++ xfs_csn_t *commit_seq, + bool regrant) + { +- struct xlog *log = mp->m_log; + struct xfs_cil *cil = log->l_cilp; + struct xfs_log_item *lip, *next; +- xfs_lsn_t xc_commit_lsn; + + /* + * Do all necessary memory allocation before we lock the CIL. +@@ -1043,10 +1041,6 @@ xfs_log_commit_cil( + + xlog_cil_insert_items(log, tp); + +- xc_commit_lsn = cil->xc_ctx->sequence; +- if (commit_lsn) +- *commit_lsn = xc_commit_lsn; +- + if (regrant && !XLOG_FORCED_SHUTDOWN(log)) + xfs_log_ticket_regrant(log, tp->t_ticket); + else +@@ -1069,8 +1063,10 @@ xfs_log_commit_cil( + list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { + xfs_trans_del_item(lip); + if (lip->li_ops->iop_committing) +- lip->li_ops->iop_committing(lip, xc_commit_lsn); ++ lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence); + } ++ if (commit_seq) ++ *commit_seq = cil->xc_ctx->sequence; + + /* xlog_cil_push_background() releases cil->xc_ctx_lock */ + xlog_cil_push_background(log); +@@ -1087,9 +1083,9 @@ xfs_log_commit_cil( + * iclog flush is necessary following this call. + */ + xfs_lsn_t +-xlog_cil_force_lsn( ++xlog_cil_force_seq( + struct xlog *log, +- xfs_lsn_t sequence) ++ xfs_csn_t sequence) + { + struct xfs_cil *cil = log->l_cilp; + struct xfs_cil_ctx *ctx; +@@ -1183,23 +1179,19 @@ out_shutdown: + */ + bool + xfs_log_item_in_current_chkpt( +- struct xfs_log_item *lip) ++ struct xfs_log_item *lip) + { +- struct xfs_cil_ctx *ctx; ++ struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp; + + if (list_empty(&lip->li_cil)) + return false; + +- ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; +- + /* + * li_seq is written on the first commit of a log item to record the + * first checkpoint it is written to. Hence if it is different to the + * current sequence, we're in a new checkpoint. + */ +- if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0) +- return false; +- return true; ++ return lip->li_seq == READ_ONCE(cil->xc_current_sequence); + } + + /* +diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h +index 1c6fdbf3d5066..42cd1602ac256 100644 +--- a/fs/xfs/xfs_log_priv.h ++++ b/fs/xfs/xfs_log_priv.h +@@ -230,7 +230,7 @@ struct xfs_cil; + + struct xfs_cil_ctx { + struct xfs_cil *cil; +- xfs_lsn_t sequence; /* chkpt sequence # */ ++ xfs_csn_t sequence; /* chkpt sequence # */ + xfs_lsn_t start_lsn; /* first LSN of chkpt commit */ + xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ + struct xlog_ticket *ticket; /* chkpt ticket */ +@@ -268,10 +268,10 @@ struct xfs_cil { + struct xfs_cil_ctx *xc_ctx; + + spinlock_t xc_push_lock ____cacheline_aligned_in_smp; +- xfs_lsn_t xc_push_seq; ++ xfs_csn_t xc_push_seq; + struct list_head xc_committing; + wait_queue_head_t xc_commit_wait; +- xfs_lsn_t xc_current_sequence; ++ xfs_csn_t xc_current_sequence; + struct work_struct xc_push_work; + wait_queue_head_t xc_push_wait; /* background push throttle */ + } ____cacheline_aligned_in_smp; +@@ -547,19 +547,18 @@ int xlog_cil_init(struct xlog *log); + void xlog_cil_init_post_recovery(struct xlog *log); + void xlog_cil_destroy(struct xlog *log); + bool xlog_cil_empty(struct xlog *log); ++void xlog_cil_commit(struct xlog *log, struct xfs_trans *tp, ++ xfs_csn_t *commit_seq, bool regrant); + + /* + * CIL force routines + */ +-xfs_lsn_t +-xlog_cil_force_lsn( +- struct xlog *log, +- xfs_lsn_t sequence); ++xfs_lsn_t xlog_cil_force_seq(struct xlog *log, xfs_csn_t sequence); + + static inline void + xlog_cil_force(struct xlog *log) + { +- xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence); ++ xlog_cil_force_seq(log, log->l_cilp->xc_current_sequence); + } + + /* +diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c +index 87886b7f77dad..69408782019eb 100644 +--- a/fs/xfs/xfs_log_recover.c ++++ b/fs/xfs/xfs_log_recover.c +@@ -2457,8 +2457,10 @@ xlog_finish_defer_ops( + + error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres, + dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp); +- if (error) ++ if (error) { ++ xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); + return error; ++ } + + /* + * Transfer to this new transaction all the dfops we captured +@@ -3454,6 +3456,7 @@ xlog_recover_finish( + * this) before we get around to xfs_log_mount_cancel. + */ + xlog_recover_cancel_intents(log); ++ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); + xfs_alert(log->l_mp, "Failed to recover intents"); + return error; + } +diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c +index 44b05e1d5d327..a2a5a0fd92334 100644 +--- a/fs/xfs/xfs_mount.c ++++ b/fs/xfs/xfs_mount.c +@@ -968,9 +968,17 @@ xfs_mountfs( + /* + * Finish recovering the file system. This part needed to be delayed + * until after the root and real-time bitmap inodes were consistently +- * read in. ++ * read in. Temporarily create per-AG space reservations for metadata ++ * btree shape changes because space freeing transactions (for inode ++ * inactivation) require the per-AG reservation in lieu of reserving ++ * blocks. + */ ++ error = xfs_fs_reserve_ag_blocks(mp); ++ if (error && error == -ENOSPC) ++ xfs_warn(mp, ++ "ENOSPC reserving per-AG metadata pool, log recovery may fail."); + error = xfs_log_mount_finish(mp); ++ xfs_fs_unreserve_ag_blocks(mp); + if (error) { + xfs_warn(mp, "log mount finish failed"); + goto out_rtunmount; +diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c +index 36166bae24a6f..73a1de7ceefc9 100644 +--- a/fs/xfs/xfs_trans.c ++++ b/fs/xfs/xfs_trans.c +@@ -832,7 +832,7 @@ __xfs_trans_commit( + bool regrant) + { + struct xfs_mount *mp = tp->t_mountp; +- xfs_lsn_t commit_lsn = -1; ++ xfs_csn_t commit_seq = 0; + int error = 0; + int sync = tp->t_flags & XFS_TRANS_SYNC; + +@@ -874,7 +874,7 @@ __xfs_trans_commit( + xfs_trans_apply_sb_deltas(tp); + xfs_trans_apply_dquot_deltas(tp); + +- xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); ++ xlog_cil_commit(mp->m_log, tp, &commit_seq, regrant); + + xfs_trans_free(tp); + +@@ -883,7 +883,7 @@ __xfs_trans_commit( + * log out now and wait for it. + */ + if (sync) { +- error = xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); ++ error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL); + XFS_STATS_INC(mp, xs_trans_sync); + } else { + XFS_STATS_INC(mp, xs_trans_async); +diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h +index 075eeade4f7d5..97485559008bb 100644 +--- a/fs/xfs/xfs_trans.h ++++ b/fs/xfs/xfs_trans.h +@@ -43,7 +43,7 @@ struct xfs_log_item { + struct list_head li_cil; /* CIL pointers */ + struct xfs_log_vec *li_lv; /* active log vector */ + struct xfs_log_vec *li_lv_shadow; /* standby vector */ +- xfs_lsn_t li_seq; /* CIL commit seq */ ++ xfs_csn_t li_seq; /* CIL commit seq */ + }; + + /* +@@ -69,7 +69,7 @@ struct xfs_item_ops { + void (*iop_pin)(struct xfs_log_item *); + void (*iop_unpin)(struct xfs_log_item *, int remove); + uint (*iop_push)(struct xfs_log_item *, struct list_head *); +- void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn); ++ void (*iop_committing)(struct xfs_log_item *lip, xfs_csn_t seq); + void (*iop_release)(struct xfs_log_item *); + xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); + int (*iop_recover)(struct xfs_log_item *lip, +diff --git a/include/linux/bpf.h b/include/linux/bpf.h +index f21bc441e3fa8..b010d45a1ecd5 100644 +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -1457,6 +1457,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); ++int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, ++ const union bpf_attr *kattr, ++ union bpf_attr __user *uattr); + bool btf_ctx_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info); +@@ -1671,6 +1674,13 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + return -ENOTSUPP; + } + ++static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, ++ const union bpf_attr *kattr, ++ union bpf_attr __user *uattr) ++{ ++ return -ENOTSUPP; ++} ++ + static inline void bpf_map_put(struct bpf_map *map) + { + } +diff --git a/include/net/addrconf.h b/include/net/addrconf.h +index e7ce719838b5e..edba74a536839 100644 +--- a/include/net/addrconf.h ++++ b/include/net/addrconf.h +@@ -405,6 +405,9 @@ static inline bool ip6_ignore_linkdown(const struct net_device *dev) + { + const struct inet6_dev *idev = __in6_dev_get(dev); + ++ if (unlikely(!idev)) ++ return true; ++ + return !!idev->cnf.ignore_routes_with_linkdown; + } + +diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h +index 1d1232917de72..9b8000869b078 100644 +--- a/include/net/bluetooth/l2cap.h ++++ b/include/net/bluetooth/l2cap.h +@@ -845,6 +845,7 @@ enum { + }; + + void l2cap_chan_hold(struct l2cap_chan *c); ++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); + void l2cap_chan_put(struct l2cap_chan *c); + + static inline void l2cap_chan_lock(struct l2cap_chan *chan) +diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h +index 0b1864a82d4ad..ff901aade442f 100644 +--- a/include/net/inet_connection_sock.h ++++ b/include/net/inet_connection_sock.h +@@ -317,7 +317,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, + + struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu); + +-#define TCP_PINGPONG_THRESH 3 ++#define TCP_PINGPONG_THRESH 1 + + static inline void inet_csk_enter_pingpong_mode(struct sock *sk) + { +@@ -334,14 +334,6 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk) + return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; + } + +-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk) +-{ +- struct inet_connection_sock *icsk = inet_csk(sk); +- +- if (icsk->icsk_ack.pingpong < U8_MAX) +- icsk->icsk_ack.pingpong++; +-} +- + static inline bool inet_csk_has_ulp(struct sock *sk) + { + return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; +diff --git a/include/net/tcp.h b/include/net/tcp.h +index 44bfb22069c1f..8129ce9a07719 100644 +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1396,7 +1396,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, + + static inline int tcp_win_from_space(const struct sock *sk, int space) + { +- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; ++ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); + + return tcp_adv_win_scale <= 0 ? + (space>>(-tcp_adv_win_scale)) : +diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h +index 0f39fdcb2273c..2a234023821e3 100644 +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -5007,7 +5007,10 @@ struct bpf_pidns_info { + + /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ + struct bpf_sk_lookup { +- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ ++ union { ++ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ ++ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ ++ }; + + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ +diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c +index e5d22af43fa0b..d29731a30b8e1 100644 +--- a/kernel/watch_queue.c ++++ b/kernel/watch_queue.c +@@ -457,6 +457,33 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) + rcu_assign_pointer(watch->queue, wqueue); + } + ++static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue) ++{ ++ const struct cred *cred; ++ struct watch *w; ++ ++ hlist_for_each_entry(w, &wlist->watchers, list_node) { ++ struct watch_queue *wq = rcu_access_pointer(w->queue); ++ if (wqueue == wq && watch->id == w->id) ++ return -EBUSY; ++ } ++ ++ cred = current_cred(); ++ if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) { ++ atomic_dec(&cred->user->nr_watches); ++ return -EAGAIN; ++ } ++ ++ watch->cred = get_cred(cred); ++ rcu_assign_pointer(watch->watch_list, wlist); ++ ++ kref_get(&wqueue->usage); ++ kref_get(&watch->usage); ++ hlist_add_head(&watch->queue_node, &wqueue->watches); ++ hlist_add_head_rcu(&watch->list_node, &wlist->watchers); ++ return 0; ++} ++ + /** + * add_watch_to_object - Add a watch on an object to a watch list + * @watch: The watch to add +@@ -471,34 +498,21 @@ void init_watch(struct watch *watch, struct watch_queue *wqueue) + */ + int add_watch_to_object(struct watch *watch, struct watch_list *wlist) + { +- struct watch_queue *wqueue = rcu_access_pointer(watch->queue); +- struct watch *w; +- +- hlist_for_each_entry(w, &wlist->watchers, list_node) { +- struct watch_queue *wq = rcu_access_pointer(w->queue); +- if (wqueue == wq && watch->id == w->id) +- return -EBUSY; +- } +- +- watch->cred = get_current_cred(); +- rcu_assign_pointer(watch->watch_list, wlist); ++ struct watch_queue *wqueue; ++ int ret = -ENOENT; + +- if (atomic_inc_return(&watch->cred->user->nr_watches) > +- task_rlimit(current, RLIMIT_NOFILE)) { +- atomic_dec(&watch->cred->user->nr_watches); +- put_cred(watch->cred); +- return -EAGAIN; +- } ++ rcu_read_lock(); + ++ wqueue = rcu_access_pointer(watch->queue); + if (lock_wqueue(wqueue)) { +- kref_get(&wqueue->usage); +- kref_get(&watch->usage); +- hlist_add_head(&watch->queue_node, &wqueue->watches); ++ spin_lock(&wlist->lock); ++ ret = add_one_watch(watch, wlist, wqueue); ++ spin_unlock(&wlist->lock); + unlock_wqueue(wqueue); + } + +- hlist_add_head(&watch->list_node, &wlist->watchers); +- return 0; ++ rcu_read_unlock(); ++ return ret; + } + EXPORT_SYMBOL(add_watch_to_object); + +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index f3418edb136be..43ff22ce76324 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -3679,11 +3679,15 @@ static inline bool zone_watermark_fast(struct zone *z, unsigned int order, + * need to be calculated. + */ + if (!order) { +- long fast_free; ++ long usable_free; ++ long reserved; + +- fast_free = free_pages; +- fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags); +- if (fast_free > mark + z->lowmem_reserve[highest_zoneidx]) ++ usable_free = free_pages; ++ reserved = __zone_watermark_unusable_free(z, 0, alloc_flags); ++ ++ /* reserved may over estimate high-atomic reserves. */ ++ usable_free -= min(usable_free, reserved); ++ if (usable_free > mark + z->lowmem_reserve[highest_zoneidx]) + return true; + } + +diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c +index 2557cd917f5ed..6a5ff5dcc09a9 100644 +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn, + } + + /* Find channel with given SCID. +- * Returns locked channel. */ ++ * Returns a reference locked channel. ++ */ + static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, + u16 cid) + { +@@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, + + mutex_lock(&conn->chan_lock); + c = __l2cap_get_chan_by_scid(conn, cid); +- if (c) +- l2cap_chan_lock(c); ++ if (c) { ++ /* Only lock if chan reference is not 0 */ ++ c = l2cap_chan_hold_unless_zero(c); ++ if (c) ++ l2cap_chan_lock(c); ++ } + mutex_unlock(&conn->chan_lock); + + return c; + } + + /* Find channel with given DCID. +- * Returns locked channel. ++ * Returns a reference locked channel. + */ + static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, + u16 cid) +@@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, + + mutex_lock(&conn->chan_lock); + c = __l2cap_get_chan_by_dcid(conn, cid); +- if (c) +- l2cap_chan_lock(c); ++ if (c) { ++ /* Only lock if chan reference is not 0 */ ++ c = l2cap_chan_hold_unless_zero(c); ++ if (c) ++ l2cap_chan_lock(c); ++ } + mutex_unlock(&conn->chan_lock); + + return c; +@@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn, + + mutex_lock(&conn->chan_lock); + c = __l2cap_get_chan_by_ident(conn, ident); +- if (c) +- l2cap_chan_lock(c); ++ if (c) { ++ /* Only lock if chan reference is not 0 */ ++ c = l2cap_chan_hold_unless_zero(c); ++ if (c) ++ l2cap_chan_lock(c); ++ } + mutex_unlock(&conn->chan_lock); + + return c; +@@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c) + kref_get(&c->kref); + } + ++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) ++{ ++ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); ++ ++ if (!kref_get_unless_zero(&c->kref)) ++ return NULL; ++ ++ return c; ++} ++ + void l2cap_chan_put(struct l2cap_chan *c) + { + BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref)); +@@ -1965,7 +1988,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, + src_match = !bacmp(&c->src, src); + dst_match = !bacmp(&c->dst, dst); + if (src_match && dst_match) { +- l2cap_chan_hold(c); ++ c = l2cap_chan_hold_unless_zero(c); ++ if (!c) ++ continue; ++ + read_unlock(&chan_list_lock); + return c; + } +@@ -1980,7 +2006,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, + } + + if (c1) +- l2cap_chan_hold(c1); ++ c1 = l2cap_chan_hold_unless_zero(c1); + + read_unlock(&chan_list_lock); + +@@ -4460,6 +4486,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, + + unlock: + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + return err; + } + +@@ -4573,6 +4600,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, + + done: + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + return err; + } + +@@ -5300,6 +5328,7 @@ send_move_response: + l2cap_send_move_chan_rsp(chan, result); + + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + + return 0; + } +@@ -5392,6 +5421,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result) + } + + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + } + + static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, +@@ -5421,6 +5451,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, + l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); + + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + } + + static int l2cap_move_channel_rsp(struct l2cap_conn *conn, +@@ -5484,6 +5515,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn, + l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); + + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + + return 0; + } +@@ -5519,6 +5551,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn, + } + + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + + return 0; + } +@@ -5891,12 +5924,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, + if (credits > max_credits) { + BT_ERR("LE credits overflow"); + l2cap_send_disconn_req(chan, ECONNRESET); +- l2cap_chan_unlock(chan); + + /* Return 0 so that we don't trigger an unnecessary + * command reject packet. + */ +- return 0; ++ goto unlock; + } + + chan->tx_credits += credits; +@@ -5907,7 +5939,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn, + if (chan->tx_credits) + chan->ops->resume(chan); + ++unlock: + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + + return 0; + } +@@ -7587,6 +7621,7 @@ drop: + + done: + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + } + + static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, +@@ -8074,7 +8109,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c, + if (src_type != c->src_type) + continue; + +- l2cap_chan_hold(c); ++ c = l2cap_chan_hold_unless_zero(c); + read_unlock(&chan_list_lock); + return c; + } +diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c +index eb684f31fd698..f8b231bbbe381 100644 +--- a/net/bpf/test_run.c ++++ b/net/bpf/test_run.c +@@ -10,20 +10,86 @@ + #include <net/bpf_sk_storage.h> + #include <net/sock.h> + #include <net/tcp.h> ++#include <net/net_namespace.h> + #include <linux/error-injection.h> + #include <linux/smp.h> ++#include <linux/sock_diag.h> + + #define CREATE_TRACE_POINTS + #include <trace/events/bpf_test_run.h> + ++struct bpf_test_timer { ++ enum { NO_PREEMPT, NO_MIGRATE } mode; ++ u32 i; ++ u64 time_start, time_spent; ++}; ++ ++static void bpf_test_timer_enter(struct bpf_test_timer *t) ++ __acquires(rcu) ++{ ++ rcu_read_lock(); ++ if (t->mode == NO_PREEMPT) ++ preempt_disable(); ++ else ++ migrate_disable(); ++ ++ t->time_start = ktime_get_ns(); ++} ++ ++static void bpf_test_timer_leave(struct bpf_test_timer *t) ++ __releases(rcu) ++{ ++ t->time_start = 0; ++ ++ if (t->mode == NO_PREEMPT) ++ preempt_enable(); ++ else ++ migrate_enable(); ++ rcu_read_unlock(); ++} ++ ++static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration) ++ __must_hold(rcu) ++{ ++ t->i++; ++ if (t->i >= repeat) { ++ /* We're done. */ ++ t->time_spent += ktime_get_ns() - t->time_start; ++ do_div(t->time_spent, t->i); ++ *duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent; ++ *err = 0; ++ goto reset; ++ } ++ ++ if (signal_pending(current)) { ++ /* During iteration: we've been cancelled, abort. */ ++ *err = -EINTR; ++ goto reset; ++ } ++ ++ if (need_resched()) { ++ /* During iteration: we need to reschedule between runs. */ ++ t->time_spent += ktime_get_ns() - t->time_start; ++ bpf_test_timer_leave(t); ++ cond_resched(); ++ bpf_test_timer_enter(t); ++ } ++ ++ /* Do another round. */ ++ return true; ++ ++reset: ++ t->i = 0; ++ return false; ++} ++ + static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, + u32 *retval, u32 *time, bool xdp) + { + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; ++ struct bpf_test_timer t = { NO_MIGRATE }; + enum bpf_cgroup_storage_type stype; +- u64 time_start, time_spent = 0; +- int ret = 0; +- u32 i; ++ int ret; + + for_each_cgroup_storage_type(stype) { + storage[stype] = bpf_cgroup_storage_alloc(prog, stype); +@@ -38,10 +104,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, + if (!repeat) + repeat = 1; + +- rcu_read_lock(); +- migrate_disable(); +- time_start = ktime_get_ns(); +- for (i = 0; i < repeat; i++) { ++ bpf_test_timer_enter(&t); ++ do { + ret = bpf_cgroup_storage_set(storage); + if (ret) + break; +@@ -53,29 +117,8 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, + + bpf_cgroup_storage_unset(); + +- if (signal_pending(current)) { +- ret = -EINTR; +- break; +- } +- +- if (need_resched()) { +- time_spent += ktime_get_ns() - time_start; +- migrate_enable(); +- rcu_read_unlock(); +- +- cond_resched(); +- +- rcu_read_lock(); +- migrate_disable(); +- time_start = ktime_get_ns(); +- } +- } +- time_spent += ktime_get_ns() - time_start; +- migrate_enable(); +- rcu_read_unlock(); +- +- do_div(time_spent, repeat); +- *time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; ++ } while (bpf_test_timer_continue(&t, repeat, &ret, time)); ++ bpf_test_timer_leave(&t); + + for_each_cgroup_storage_type(stype) + bpf_cgroup_storage_free(storage[stype]); +@@ -688,18 +731,17 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) + { ++ struct bpf_test_timer t = { NO_PREEMPT }; + u32 size = kattr->test.data_size_in; + struct bpf_flow_dissector ctx = {}; + u32 repeat = kattr->test.repeat; + struct bpf_flow_keys *user_ctx; + struct bpf_flow_keys flow_keys; +- u64 time_start, time_spent = 0; + const struct ethhdr *eth; + unsigned int flags = 0; + u32 retval, duration; + void *data; + int ret; +- u32 i; + + if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) + return -EINVAL; +@@ -735,48 +777,127 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + ctx.data = data; + ctx.data_end = (__u8 *)data + size; + +- rcu_read_lock(); +- preempt_disable(); +- time_start = ktime_get_ns(); +- for (i = 0; i < repeat; i++) { ++ bpf_test_timer_enter(&t); ++ do { + retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, + size, flags); ++ } while (bpf_test_timer_continue(&t, repeat, &ret, &duration)); ++ bpf_test_timer_leave(&t); + +- if (signal_pending(current)) { +- preempt_enable(); +- rcu_read_unlock(); ++ if (ret < 0) ++ goto out; + +- ret = -EINTR; +- goto out; +- } ++ ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), ++ retval, duration); ++ if (!ret) ++ ret = bpf_ctx_finish(kattr, uattr, user_ctx, ++ sizeof(struct bpf_flow_keys)); + +- if (need_resched()) { +- time_spent += ktime_get_ns() - time_start; +- preempt_enable(); +- rcu_read_unlock(); ++out: ++ kfree(user_ctx); ++ kfree(data); ++ return ret; ++} + +- cond_resched(); ++int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, ++ union bpf_attr __user *uattr) ++{ ++ struct bpf_test_timer t = { NO_PREEMPT }; ++ struct bpf_prog_array *progs = NULL; ++ struct bpf_sk_lookup_kern ctx = {}; ++ u32 repeat = kattr->test.repeat; ++ struct bpf_sk_lookup *user_ctx; ++ u32 retval, duration; ++ int ret = -EINVAL; + +- rcu_read_lock(); +- preempt_disable(); +- time_start = ktime_get_ns(); +- } ++ if (prog->type != BPF_PROG_TYPE_SK_LOOKUP) ++ return -EINVAL; ++ ++ if (kattr->test.flags || kattr->test.cpu) ++ return -EINVAL; ++ ++ if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out || ++ kattr->test.data_size_out) ++ return -EINVAL; ++ ++ if (!repeat) ++ repeat = 1; ++ ++ user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx)); ++ if (IS_ERR(user_ctx)) ++ return PTR_ERR(user_ctx); ++ ++ if (!user_ctx) ++ return -EINVAL; ++ ++ if (user_ctx->sk) ++ goto out; ++ ++ if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) ++ goto out; ++ ++ if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) { ++ ret = -ERANGE; ++ goto out; + } +- time_spent += ktime_get_ns() - time_start; +- preempt_enable(); +- rcu_read_unlock(); + +- do_div(time_spent, repeat); +- duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent; ++ ctx.family = (u16)user_ctx->family; ++ ctx.protocol = (u16)user_ctx->protocol; ++ ctx.dport = (u16)user_ctx->local_port; ++ ctx.sport = (__force __be16)user_ctx->remote_port; + +- ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), +- retval, duration); ++ switch (ctx.family) { ++ case AF_INET: ++ ctx.v4.daddr = (__force __be32)user_ctx->local_ip4; ++ ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4; ++ break; ++ ++#if IS_ENABLED(CONFIG_IPV6) ++ case AF_INET6: ++ ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6; ++ ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6; ++ break; ++#endif ++ ++ default: ++ ret = -EAFNOSUPPORT; ++ goto out; ++ } ++ ++ progs = bpf_prog_array_alloc(1, GFP_KERNEL); ++ if (!progs) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ progs->items[0].prog = prog; ++ ++ bpf_test_timer_enter(&t); ++ do { ++ ctx.selected_sk = NULL; ++ retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN); ++ } while (bpf_test_timer_continue(&t, repeat, &ret, &duration)); ++ bpf_test_timer_leave(&t); ++ ++ if (ret < 0) ++ goto out; ++ ++ user_ctx->cookie = 0; ++ if (ctx.selected_sk) { ++ if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) { ++ ret = -EOPNOTSUPP; ++ goto out; ++ } ++ ++ user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); ++ } ++ ++ ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration); + if (!ret) +- ret = bpf_ctx_finish(kattr, uattr, user_ctx, +- sizeof(struct bpf_flow_keys)); ++ ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); + + out: ++ bpf_prog_array_free(progs); + kfree(user_ctx); +- kfree(data); + return ret; + } +diff --git a/net/core/filter.c b/net/core/filter.c +index e2b491665775f..815edf7bc4390 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -10334,6 +10334,7 @@ static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type, + } + + const struct bpf_prog_ops sk_lookup_prog_ops = { ++ .test_run = bpf_prog_test_run_sk_lookup, + }; + + const struct bpf_verifier_ops sk_lookup_verifier_ops = { +diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c +index 428cc3a4c36f1..c71b863093ace 100644 +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -827,7 +827,7 @@ static void igmp_ifc_event(struct in_device *in_dev) + struct net *net = dev_net(in_dev->dev); + if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) + return; +- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); ++ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv)); + igmp_ifc_start_timer(in_dev, 1); + } + +@@ -1009,7 +1009,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, + * received value was zero, use the default or statically + * configured value. + */ +- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv; ++ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL; + + /* RFC3376, 8.3. Query Response Interval: +@@ -1189,7 +1189,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im, + pmc->interface = im->interface; + in_dev_hold(in_dev); + pmc->multiaddr = im->multiaddr; +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + pmc->sfmode = im->sfmode; + if (pmc->sfmode == MCAST_INCLUDE) { + struct ip_sf_list *psf; +@@ -1240,9 +1240,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im) + swap(im->tomb, pmc->tomb); + swap(im->sources, pmc->sources); + for (psf = im->sources; psf; psf = psf->sf_next) +- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ psf->sf_crcount = in_dev->mr_qrv ?: ++ READ_ONCE(net->ipv4.sysctl_igmp_qrv); + } else { +- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ im->crcount = in_dev->mr_qrv ?: ++ READ_ONCE(net->ipv4.sysctl_igmp_qrv); + } + in_dev_put(pmc->interface); + kfree_pmc(pmc); +@@ -1349,7 +1351,7 @@ static void igmp_group_added(struct ip_mc_list *im) + if (in_dev->dead) + return; + +- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv; ++ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv); + if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { + spin_lock_bh(&im->lock); + igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY); +@@ -1363,7 +1365,7 @@ static void igmp_group_added(struct ip_mc_list *im) + * IN() to IN(A). + */ + if (im->sfmode == MCAST_EXCLUDE) +- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + + igmp_ifc_event(in_dev); + #endif +@@ -1754,7 +1756,7 @@ static void ip_mc_reset(struct in_device *in_dev) + + in_dev->mr_qi = IGMP_QUERY_INTERVAL; + in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL; +- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv; ++ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv); + } + #else + static void ip_mc_reset(struct in_device *in_dev) +@@ -1888,7 +1890,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, + #ifdef CONFIG_IP_MULTICAST + if (psf->sf_oldin && + !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) { +- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + psf->sf_next = pmc->tomb; + pmc->tomb = psf; + rv = 1; +@@ -1952,7 +1954,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + /* filter mode change */ + pmc->sfmode = MCAST_INCLUDE; + #ifdef CONFIG_IP_MULTICAST +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); + for (psf = pmc->sources; psf; psf = psf->sf_next) + psf->sf_crcount = 0; +@@ -2131,7 +2133,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, + #ifdef CONFIG_IP_MULTICAST + /* else no filters; keep old mode for reports */ + +- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; ++ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv); + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); + for (psf = pmc->sources; psf; psf = psf->sf_next) + psf->sf_crcount = 0; +diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c +index f1fd26bb199ce..78460eb39b3af 100644 +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -698,7 +698,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, + int size_goal) + { + return skb->len < size_goal && +- sock_net(sk)->ipv4.sysctl_tcp_autocorking && ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && + !tcp_rtx_queue_empty(sk) && + refcount_read(&sk->sk_wmem_alloc) > skb->truesize; + } +diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c +index d817f8c31c9ce..d35e88b5ffcbe 100644 +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -503,7 +503,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) + */ + static void tcp_init_buffer_space(struct sock *sk) + { +- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; ++ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win); + struct tcp_sock *tp = tcp_sk(sk); + int maxwin; + +@@ -693,7 +693,7 @@ void tcp_rcv_space_adjust(struct sock *sk) + * <prev RTT . ><current RTT .. ><next RTT .... > + */ + +- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { + int rcvmem, rcvbuf; + u64 rcvwin, grow; +@@ -2135,7 +2135,7 @@ void tcp_enter_loss(struct sock *sk) + * loss recovery is underway except recurring timeout(s) on + * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing + */ +- tp->frto = net->ipv4.sysctl_tcp_frto && ++ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) && + (new_recovery || icsk->icsk_retransmits) && + !inet_csk(sk)->icsk_mtup.probe_size; + } +@@ -3004,7 +3004,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, + + static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) + { +- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; ++ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ; + struct tcp_sock *tp = tcp_sk(sk); + + if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { +@@ -3528,7 +3528,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, + if (*last_oow_ack_time) { + s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); + +- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { ++ if (0 <= elapsed && ++ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) { + NET_INC_STATS(net, mib_idx); + return true; /* rate-limited: don't send yet! */ + } +@@ -3576,7 +3577,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) + /* Then check host-wide RFC 5961 rate limit. */ + now = jiffies / HZ; + if (now != challenge_timestamp) { +- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; ++ u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); + u32 half = (ack_limit + 1) >> 1; + + challenge_timestamp = now; +@@ -4367,7 +4368,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) + { + struct tcp_sock *tp = tcp_sk(sk); + +- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { ++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { + int mib_idx; + + if (before(seq, tp->rcv_nxt)) +@@ -4414,7 +4415,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) + NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); + +- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { ++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { + u32 end_seq = TCP_SKB_CB(skb)->end_seq; + + tcp_rcv_spurious_retrans(sk, skb); +@@ -5439,7 +5440,7 @@ send_now: + } + + if (!tcp_is_sack(tp) || +- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr) ++ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)) + goto send_now; + + if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) { +@@ -5460,11 +5461,12 @@ send_now: + if (tp->srtt_us && tp->srtt_us < rtt) + rtt = tp->srtt_us; + +- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns, ++ delay = min_t(unsigned long, ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns), + rtt * (NSEC_PER_USEC >> 3)/20); + sock_hold(sk); + hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay), +- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns, ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns), + HRTIMER_MODE_REL_PINNED_SOFT); + } + +diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c +index d5f13ff7d9004..0d165ce2d80a7 100644 +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -983,7 +983,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, + if (skb) { + __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); + +- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? ++ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (inet_sk(sk)->tos & INET_ECN_MASK) : + inet_sk(sk)->tos; +@@ -1558,7 +1558,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ +- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) + newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; + + if (!dst) { +diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c +index 8d7e32f4abf67..f3ca6eea2ca39 100644 +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk) + int m; + + sk_dst_confirm(sk); +- if (net->ipv4.sysctl_tcp_nometrics_save || !dst) ++ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst) + return; + + rcu_read_lock(); +@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk) + + if (tcp_in_initial_slowstart(tp)) { + /* Slow start still did not finish. */ +- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val && (tp->snd_cwnd >> 1) > val) +@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk) + } else if (!tcp_in_slow_start(tp) && + icsk->icsk_ca_state == TCP_CA_Open) { + /* Cong. avoidance phase, cwnd is reliable. */ +- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); +@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk) + tcp_metric_set(tm, TCP_METRIC_CWND, + (val + tp->snd_ssthresh) >> 1); + } +- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val && tp->snd_ssthresh > val) +@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk) + if (tcp_metric_locked(tm, TCP_METRIC_CWND)) + tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); + +- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ? ++ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? + 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val) { + tp->snd_ssthresh = val; +diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c +index 9b67c61576e4c..657b0a4d93599 100644 +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -167,16 +167,13 @@ static void tcp_event_data_sent(struct tcp_sock *tp, + if (tcp_packets_in_flight(tp) == 0) + tcp_ca_event(sk, CA_EVENT_TX_START); + +- /* If this is the first data packet sent in response to the +- * previous received data, +- * and it is a reply for ato after last received packet, +- * increase pingpong count. +- */ +- if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) && +- (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) +- inet_csk_inc_pingpong_cnt(sk); +- + tp->lsndtime = now; ++ ++ /* If it is a reply for ato after last received ++ * packet, enter pingpong mode. ++ */ ++ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) ++ inet_csk_enter_pingpong_mode(sk); + } + + /* Account for an ACK we sent. */ +@@ -1987,7 +1984,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) + + min_tso = ca_ops->min_tso_segs ? + ca_ops->min_tso_segs(sk) : +- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + + tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); + return min_t(u32, tso_segs, sk->sk_gso_max_segs); +@@ -2502,7 +2499,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, + sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); + if (sk->sk_pacing_status == SK_PACING_NONE) + limit = min_t(unsigned long, limit, +- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); ++ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes)); + limit <<= factor; + + if (static_branch_unlikely(&tcp_tx_delay_enabled) && +diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c +index 6ac88fe24a8e0..135e3a060caa8 100644 +--- a/net/ipv6/ping.c ++++ b/net/ipv6/ping.c +@@ -22,6 +22,11 @@ + #include <linux/proc_fs.h> + #include <net/ping.h> + ++static void ping_v6_destroy(struct sock *sk) ++{ ++ inet6_destroy_sock(sk); ++} ++ + /* Compatibility glue so we can support IPv6 when it's compiled as a module */ + static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) +@@ -166,6 +171,7 @@ struct proto pingv6_prot = { + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, ++ .destroy = ping_v6_destroy, + .connect = ip6_datagram_connect_v6_only, + .disconnect = __udp_disconnect, + .setsockopt = ipv6_setsockopt, +diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c +index 303b54414a6cc..8d91f36cb11bc 100644 +--- a/net/ipv6/tcp_ipv6.c ++++ b/net/ipv6/tcp_ipv6.c +@@ -542,7 +542,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, + if (np->repflow && ireq->pktopts) + fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + +- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? ++ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (np->tclass & INET_ECN_MASK) : + np->tclass; +@@ -1344,7 +1344,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ +- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) + newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; + + /* Clone native IPv6 options from listening socket (if any) +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 8123c79e27913..d0e91aa7b30e5 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -1421,7 +1421,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) + if (msk->rcvq_space.copied <= msk->rcvq_space.space) + goto new_measure; + +- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && ++ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && + !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { + int rcvmem, rcvbuf; + u64 rcvwin, grow; +diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c +index 1640da5c50776..72d30922ed290 100644 +--- a/net/netfilter/nfnetlink_queue.c ++++ b/net/netfilter/nfnetlink_queue.c +@@ -838,11 +838,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) + } + + static int +-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) ++nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff) + { + struct sk_buff *nskb; + + if (diff < 0) { ++ unsigned int min_len = skb_transport_offset(e->skb); ++ ++ if (data_len < min_len) ++ return -EINVAL; ++ + if (pskb_trim(e->skb, data_len)) + return -ENOMEM; + } else if (diff > 0) { +diff --git a/net/sctp/associola.c b/net/sctp/associola.c +index fdb69d46276d6..2d4ec61877553 100644 +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -226,9 +226,8 @@ static struct sctp_association *sctp_association_init( + if (!sctp_ulpq_init(&asoc->ulpq, asoc)) + goto fail_init; + +- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, +- 0, gfp)) +- goto fail_init; ++ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp)) ++ goto stream_free; + + /* Initialize default path MTU. */ + asoc->pathmtu = sp->pathmtu; +diff --git a/net/sctp/stream.c b/net/sctp/stream.c +index 6dc95dcc0ff4f..ef9fceadef8d5 100644 +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -137,7 +137,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + + ret = sctp_stream_alloc_out(stream, outcnt, gfp); + if (ret) +- goto out_err; ++ return ret; + + for (i = 0; i < stream->outcnt; i++) + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; +@@ -145,22 +145,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, + handle_in: + sctp_stream_interleave_init(stream); + if (!incnt) +- goto out; +- +- ret = sctp_stream_alloc_in(stream, incnt, gfp); +- if (ret) +- goto in_err; +- +- goto out; ++ return 0; + +-in_err: +- sched->free(stream); +- genradix_free(&stream->in); +-out_err: +- genradix_free(&stream->out); +- stream->outcnt = 0; +-out: +- return ret; ++ return sctp_stream_alloc_in(stream, incnt, gfp); + } + + int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) +diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c +index 99e5f69fbb742..a2e1d34f52c5b 100644 +--- a/net/sctp/stream_sched.c ++++ b/net/sctp/stream_sched.c +@@ -163,7 +163,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc, + if (!SCTP_SO(&asoc->stream, i)->ext) + continue; + +- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); ++ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); + if (ret) + goto err; + } +diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c +index 23eab7ac43ee5..5cb6846544cc7 100644 +--- a/net/tls/tls_device.c ++++ b/net/tls/tls_device.c +@@ -1349,8 +1349,13 @@ static int tls_device_down(struct net_device *netdev) + * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. + * Now release the ref taken above. + */ +- if (refcount_dec_and_test(&ctx->refcount)) ++ if (refcount_dec_and_test(&ctx->refcount)) { ++ /* sk_destruct ran after tls_device_down took a ref, and ++ * it returned early. Complete the destruction here. ++ */ ++ list_del(&ctx->list); + tls_device_free_ctx(ctx); ++ } + } + + up_write(&device_offload_lock); +diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h +index e440cd7f32a6f..b9ee2ded381ab 100644 +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -5006,7 +5006,10 @@ struct bpf_pidns_info { + + /* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ + struct bpf_sk_lookup { +- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ ++ union { ++ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ ++ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ ++ }; + + __u32 family; /* Protocol family (AF_INET, AF_INET6) */ + __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ +diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c +index 94809aed8b447..1cab29d45bfb3 100644 +--- a/tools/perf/util/symbol-elf.c ++++ b/tools/perf/util/symbol-elf.c +@@ -232,6 +232,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + return NULL; + } + ++static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) ++{ ++ size_t i, phdrnum; ++ u64 sz; ++ ++ if (elf_getphdrnum(elf, &phdrnum)) ++ return -1; ++ ++ for (i = 0; i < phdrnum; i++) { ++ if (gelf_getphdr(elf, i, phdr) == NULL) ++ return -1; ++ ++ if (phdr->p_type != PT_LOAD) ++ continue; ++ ++ sz = max(phdr->p_memsz, phdr->p_filesz); ++ if (!sz) ++ continue; ++ ++ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz)) ++ return 0; ++ } ++ ++ /* Not found any valid program header */ ++ return -1; ++} ++ + static bool want_demangle(bool is_kernel_sym) + { + return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; +@@ -1181,6 +1208,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, + sym.st_value); + used_opd = true; + } ++ + /* + * When loading symbols in a data mapping, ABS symbols (which + * has a value of SHN_ABS in its st_shndx) failed at +@@ -1217,11 +1245,20 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, + goto out_elf_end; + } else if ((used_opd && runtime_ss->adjust_symbols) || + (!used_opd && syms_ss->adjust_symbols)) { ++ GElf_Phdr phdr; ++ ++ if (elf_read_program_header(syms_ss->elf, ++ (u64)sym.st_value, &phdr)) { ++ pr_warning("%s: failed to find program header for " ++ "symbol: %s st_value: %#" PRIx64 "\n", ++ __func__, elf_name, (u64)sym.st_value); ++ continue; ++ } + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " +- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, +- (u64)sym.st_value, (u64)shdr.sh_addr, +- (u64)shdr.sh_offset); +- sym.st_value -= shdr.sh_addr - shdr.sh_offset; ++ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", ++ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, ++ (u64)phdr.p_offset); ++ sym.st_value -= phdr.p_vaddr - phdr.p_offset; + } + + demangled = demangle_sym(dso, kmodule, elf_name); +diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c +index a4c55fcb0e7b1..0fb92d9a319b7 100644 +--- a/tools/testing/selftests/bpf/test_verifier.c ++++ b/tools/testing/selftests/bpf/test_verifier.c +@@ -100,7 +100,7 @@ struct bpf_test { + enum bpf_prog_type prog_type; + uint8_t flags; + void (*fill_helper)(struct bpf_test *self); +- uint8_t runs; ++ int runs; + #define bpf_testdata_struct_t \ + struct { \ + uint32_t retval, retval_unpriv; \ +@@ -1054,7 +1054,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, + + run_errs = 0; + run_successes = 0; +- if (!alignment_prevented_execution && fd_prog >= 0) { ++ if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) { + uint32_t expected_val; + int i; + +diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c +index 2ad5f974451c3..fd3b62a084b9f 100644 +--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c ++++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c +@@ -239,6 +239,7 @@ + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_LOOKUP, + .expected_attach_type = BPF_SK_LOOKUP, ++ .runs = -1, + }, + /* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */ + {
