commit:     0cd4c4c48a094b7baeb008455b70fa37a9287345
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Sun Aug  4 16:16:01 2019 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Sun Aug  4 16:16:01 2019 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=0cd4c4c4

Linux patch 5.2.6

Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>

 0000_README            |    4 +
 1005_linux-5.2.6.patch | 1297 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1301 insertions(+)

diff --git a/0000_README b/0000_README
index 01e534c..3a50bfb 100644
--- a/0000_README
+++ b/0000_README
@@ -63,6 +63,10 @@ Patch:  1004_linux-5.2.5.patch
 From:   https://www.kernel.org
 Desc:   Linux 5.2.5
 
+Patch:  1005_linux-5.2.6.patch
+From:   https://www.kernel.org
+Desc:   Linux 5.2.6
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1005_linux-5.2.6.patch b/1005_linux-5.2.6.patch
new file mode 100644
index 0000000..4a18db6
--- /dev/null
+++ b/1005_linux-5.2.6.patch
@@ -0,0 +1,1297 @@
+diff --git a/Makefile b/Makefile
+index 78bd926c8439..3cd40f1a8f75 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 5
+ PATCHLEVEL = 2
+-SUBLEVEL = 5
++SUBLEVEL = 6
+ EXTRAVERSION =
+ NAME = Bobtail Squid
+ 
+diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig
+index b9a37057b77a..cee24c308337 100644
+--- a/arch/sh/boards/Kconfig
++++ b/arch/sh/boards/Kconfig
+@@ -8,27 +8,19 @@ config SH_ALPHA_BOARD
+       bool
+ 
+ config SH_DEVICE_TREE
+-      bool "Board Described by Device Tree"
++      bool
+       select OF
+       select OF_EARLY_FLATTREE
+       select TIMER_OF
+       select COMMON_CLK
+       select GENERIC_CALIBRATE_DELAY
+-      help
+-        Select Board Described by Device Tree to build a kernel that
+-        does not hard-code any board-specific knowledge but instead uses
+-        a device tree blob provided by the boot-loader. You must enable
+-        drivers for any hardware you want to use separately. At this
+-        time, only boards based on the open-hardware J-Core processors
+-        have sufficient driver coverage to use this option; do not
+-        select it if you are using original SuperH hardware.
+ 
+ config SH_JCORE_SOC
+       bool "J-Core SoC"
+-      depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2)
++      select SH_DEVICE_TREE
+       select CLKSRC_JCORE_PIT
+       select JCORE_AIC
+-      default y if CPU_J2
++      depends on CPU_J2
+       help
+         Select this option to include drivers core components of the
+         J-Core SoC, including interrupt controllers and timers.
+diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c
+index a55be205b91a..dbfe34664633 100644
+--- a/drivers/bluetooth/hci_ath.c
++++ b/drivers/bluetooth/hci_ath.c
+@@ -98,6 +98,9 @@ static int ath_open(struct hci_uart *hu)
+ 
+       BT_DBG("hu %p", hu);
+ 
++      if (!hci_uart_has_flow_control(hu))
++              return -EOPNOTSUPP;
++
+       ath = kzalloc(sizeof(*ath), GFP_KERNEL);
+       if (!ath)
+               return -ENOMEM;
+diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
+index 8905ad2edde7..ae2624fce913 100644
+--- a/drivers/bluetooth/hci_bcm.c
++++ b/drivers/bluetooth/hci_bcm.c
+@@ -406,6 +406,9 @@ static int bcm_open(struct hci_uart *hu)
+ 
+       bt_dev_dbg(hu->hdev, "hu %p", hu);
+ 
++      if (!hci_uart_has_flow_control(hu))
++              return -EOPNOTSUPP;
++
+       bcm = kzalloc(sizeof(*bcm), GFP_KERNEL);
+       if (!bcm)
+               return -ENOMEM;
+diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
+index 207bae5e0d46..31f25153087d 100644
+--- a/drivers/bluetooth/hci_intel.c
++++ b/drivers/bluetooth/hci_intel.c
+@@ -391,6 +391,9 @@ static int intel_open(struct hci_uart *hu)
+ 
+       BT_DBG("hu %p", hu);
+ 
++      if (!hci_uart_has_flow_control(hu))
++              return -EOPNOTSUPP;
++
+       intel = kzalloc(sizeof(*intel), GFP_KERNEL);
+       if (!intel)
+               return -ENOMEM;
+diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
+index c84f985f348d..c953f14656b5 100644
+--- a/drivers/bluetooth/hci_ldisc.c
++++ b/drivers/bluetooth/hci_ldisc.c
+@@ -284,6 +284,19 @@ static int hci_uart_send_frame(struct hci_dev *hdev, 
struct sk_buff *skb)
+       return 0;
+ }
+ 
++/* Check the underlying device or tty has flow control support */
++bool hci_uart_has_flow_control(struct hci_uart *hu)
++{
++      /* serdev nodes check if the needed operations are present */
++      if (hu->serdev)
++              return true;
++
++      if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset)
++              return true;
++
++      return false;
++}
++
+ /* Flow control or un-flow control the device */
+ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable)
+ {
+diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c
+index 50212ac629e3..49dcf198ffd8 100644
+--- a/drivers/bluetooth/hci_mrvl.c
++++ b/drivers/bluetooth/hci_mrvl.c
+@@ -52,6 +52,9 @@ static int mrvl_open(struct hci_uart *hu)
+ 
+       BT_DBG("hu %p", hu);
+ 
++      if (!hci_uart_has_flow_control(hu))
++              return -EOPNOTSUPP;
++
+       mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL);
+       if (!mrvl)
+               return -ENOMEM;
+diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
+index 9d273cdde563..f41fb2c02e4f 100644
+--- a/drivers/bluetooth/hci_qca.c
++++ b/drivers/bluetooth/hci_qca.c
+@@ -458,6 +458,9 @@ static int qca_open(struct hci_uart *hu)
+ 
+       BT_DBG("hu %p qca_open", hu);
+ 
++      if (!hci_uart_has_flow_control(hu))
++              return -EOPNOTSUPP;
++
+       qca = kzalloc(sizeof(struct qca_data), GFP_KERNEL);
+       if (!qca)
+               return -ENOMEM;
+diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
+index d8cf005e3c5d..22c278b13ab9 100644
+--- a/drivers/bluetooth/hci_uart.h
++++ b/drivers/bluetooth/hci_uart.h
+@@ -103,6 +103,7 @@ int hci_uart_tx_wakeup(struct hci_uart *hu);
+ int hci_uart_init_ready(struct hci_uart *hu);
+ void hci_uart_init_work(struct work_struct *work);
+ void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed);
++bool hci_uart_has_flow_control(struct hci_uart *hu);
+ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable);
+ void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed,
+                        unsigned int oper_speed);
+diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c 
b/drivers/isdn/hardware/mISDN/hfcsusb.c
+index 4c99739b937e..0e224232f746 100644
+--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
++++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
+@@ -1955,6 +1955,9 @@ hfcsusb_probe(struct usb_interface *intf, const struct 
usb_device_id *id)
+ 
+                               /* get endpoint base */
+                               idx = ((ep_addr & 0x7f) - 1) * 2;
++                              if (idx > 15)
++                                      return -EIO;
++
+                               if (ep_addr & 0x80)
+                                       idx++;
+                               attr = ep->desc.bmAttributes;
+diff --git a/drivers/media/radio/radio-raremono.c 
b/drivers/media/radio/radio-raremono.c
+index 5e782b3c2fa9..bf1ee654df80 100644
+--- a/drivers/media/radio/radio-raremono.c
++++ b/drivers/media/radio/radio-raremono.c
+@@ -271,6 +271,14 @@ static int vidioc_g_frequency(struct file *file, void 
*priv,
+       return 0;
+ }
+ 
++static void raremono_device_release(struct v4l2_device *v4l2_dev)
++{
++      struct raremono_device *radio = to_raremono_dev(v4l2_dev);
++
++      kfree(radio->buffer);
++      kfree(radio);
++}
++
+ /* File system interface */
+ static const struct v4l2_file_operations usb_raremono_fops = {
+       .owner          = THIS_MODULE,
+@@ -295,12 +303,14 @@ static int usb_raremono_probe(struct usb_interface *intf,
+       struct raremono_device *radio;
+       int retval = 0;
+ 
+-      radio = devm_kzalloc(&intf->dev, sizeof(struct raremono_device), 
GFP_KERNEL);
+-      if (radio)
+-              radio->buffer = devm_kmalloc(&intf->dev, BUFFER_LENGTH, 
GFP_KERNEL);
+-
+-      if (!radio || !radio->buffer)
++      radio = kzalloc(sizeof(*radio), GFP_KERNEL);
++      if (!radio)
++              return -ENOMEM;
++      radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL);
++      if (!radio->buffer) {
++              kfree(radio);
+               return -ENOMEM;
++      }
+ 
+       radio->usbdev = interface_to_usbdev(intf);
+       radio->intf = intf;
+@@ -324,7 +334,8 @@ static int usb_raremono_probe(struct usb_interface *intf,
+       if (retval != 3 ||
+           (get_unaligned_be16(&radio->buffer[1]) & 0xfff) == 0x0242) {
+               dev_info(&intf->dev, "this is not Thanko's Raremono.\n");
+-              return -ENODEV;
++              retval = -ENODEV;
++              goto free_mem;
+       }
+ 
+       dev_info(&intf->dev, "Thanko's Raremono connected: (%04X:%04X)\n",
+@@ -333,7 +344,7 @@ static int usb_raremono_probe(struct usb_interface *intf,
+       retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev);
+       if (retval < 0) {
+               dev_err(&intf->dev, "couldn't register v4l2_device\n");
+-              return retval;
++              goto free_mem;
+       }
+ 
+       mutex_init(&radio->lock);
+@@ -345,6 +356,7 @@ static int usb_raremono_probe(struct usb_interface *intf,
+       radio->vdev.ioctl_ops = &usb_raremono_ioctl_ops;
+       radio->vdev.lock = &radio->lock;
+       radio->vdev.release = video_device_release_empty;
++      radio->v4l2_dev.release = raremono_device_release;
+ 
+       usb_set_intfdata(intf, &radio->v4l2_dev);
+ 
+@@ -360,6 +372,10 @@ static int usb_raremono_probe(struct usb_interface *intf,
+       }
+       dev_err(&intf->dev, "could not register video device\n");
+       v4l2_device_unregister(&radio->v4l2_dev);
++
++free_mem:
++      kfree(radio->buffer);
++      kfree(radio);
+       return retval;
+ }
+ 
+diff --git a/drivers/media/usb/au0828/au0828-core.c 
b/drivers/media/usb/au0828/au0828-core.c
+index f746f6e2f686..a8a72d5fbd12 100644
+--- a/drivers/media/usb/au0828/au0828-core.c
++++ b/drivers/media/usb/au0828/au0828-core.c
+@@ -719,6 +719,12 @@ static int au0828_usb_probe(struct usb_interface 
*interface,
+       /* Setup */
+       au0828_card_setup(dev);
+ 
++      /*
++       * Store the pointer to the au0828_dev so it can be accessed in
++       * au0828_usb_disconnect
++       */
++      usb_set_intfdata(interface, dev);
++
+       /* Analog TV */
+       retval = au0828_analog_register(dev, interface);
+       if (retval) {
+@@ -737,12 +743,6 @@ static int au0828_usb_probe(struct usb_interface 
*interface,
+       /* Remote controller */
+       au0828_rc_register(dev);
+ 
+-      /*
+-       * Store the pointer to the au0828_dev so it can be accessed in
+-       * au0828_usb_disconnect
+-       */
+-      usb_set_intfdata(interface, dev);
+-
+       pr_info("Registered device AU0828 [%s]\n",
+               dev->board.name == NULL ? "Unset" : dev->board.name);
+ 
+diff --git a/drivers/media/usb/cpia2/cpia2_usb.c 
b/drivers/media/usb/cpia2/cpia2_usb.c
+index b2268981c963..17468f7d78ed 100644
+--- a/drivers/media/usb/cpia2/cpia2_usb.c
++++ b/drivers/media/usb/cpia2/cpia2_usb.c
+@@ -893,7 +893,6 @@ static void cpia2_usb_disconnect(struct usb_interface 
*intf)
+       cpia2_unregister_camera(cam);
+       v4l2_device_disconnect(&cam->v4l2_dev);
+       mutex_unlock(&cam->v4l2_lock);
+-      v4l2_device_put(&cam->v4l2_dev);
+ 
+       if(cam->buffers) {
+               DBG("Wakeup waiting processes\n");
+@@ -902,6 +901,8 @@ static void cpia2_usb_disconnect(struct usb_interface 
*intf)
+               wake_up_interruptible(&cam->wq_stream);
+       }
+ 
++      v4l2_device_put(&cam->v4l2_dev);
++
+       LOG("CPiA2 camera disconnected.\n");
+ }
+ 
+diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c 
b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
+index 70b5cb08d65b..bbf361ce0bd0 100644
+--- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
++++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
+@@ -1670,7 +1670,7 @@ static int pvr2_decoder_enable(struct pvr2_hdw *hdw,int 
enablefl)
+       }
+       if (!hdw->flag_decoder_missed) {
+               pvr2_trace(PVR2_TRACE_ERROR_LEGS,
+-                         "WARNING: No decoder present");
++                         "***WARNING*** No decoder present");
+               hdw->flag_decoder_missed = !0;
+               trace_stbit("flag_decoder_missed",
+                           hdw->flag_decoder_missed);
+@@ -2356,7 +2356,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface 
*intf,
+       if (hdw_desc->flag_is_experimental) {
+               pvr2_trace(PVR2_TRACE_INFO, "**********");
+               pvr2_trace(PVR2_TRACE_INFO,
+-                         "WARNING: Support for this device (%s) is 
experimental.",
++                         "***WARNING*** Support for this device (%s) is 
experimental.",
+                                                             
hdw_desc->description);
+               pvr2_trace(PVR2_TRACE_INFO,
+                          "Important functionality might not be entirely 
working.");
+diff --git a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c 
b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c
+index 68e323f8d9cf..275394bafe7d 100644
+--- a/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c
++++ b/drivers/media/usb/pvrusb2/pvrusb2-i2c-core.c
+@@ -333,11 +333,11 @@ static int i2c_hack_cx25840(struct pvr2_hdw *hdw,
+ 
+       if ((ret != 0) || (*rdata == 0x04) || (*rdata == 0x0a)) {
+               pvr2_trace(PVR2_TRACE_ERROR_LEGS,
+-                         "WARNING: Detected a wedged cx25840 chip; the device 
will not work.");
++                         "***WARNING*** Detected a wedged cx25840 chip; the 
device will not work.");
+               pvr2_trace(PVR2_TRACE_ERROR_LEGS,
+-                         "WARNING: Try power cycling the pvrusb2 device.");
++                         "***WARNING*** Try power cycling the pvrusb2 
device.");
+               pvr2_trace(PVR2_TRACE_ERROR_LEGS,
+-                         "WARNING: Disabling further access to the device to 
prevent other foul-ups.");
++                         "***WARNING*** Disabling further access to the 
device to prevent other foul-ups.");
+               // This blocks all further communication with the part.
+               hdw->i2c_func[0x44] = NULL;
+               pvr2_hdw_render_useless(hdw);
+diff --git a/drivers/media/usb/pvrusb2/pvrusb2-std.c 
b/drivers/media/usb/pvrusb2/pvrusb2-std.c
+index 447279b4a545..e7ab41401577 100644
+--- a/drivers/media/usb/pvrusb2/pvrusb2-std.c
++++ b/drivers/media/usb/pvrusb2/pvrusb2-std.c
+@@ -343,7 +343,7 @@ struct v4l2_standard *pvr2_std_create_enum(unsigned int 
*countptr,
+               bcnt = pvr2_std_id_to_str(buf,sizeof(buf),fmsk);
+               pvr2_trace(
+                       PVR2_TRACE_ERROR_LEGS,
+-                      "WARNING: Failed to classify the following standard(s): 
%.*s",
++                      "***WARNING*** Failed to classify the following 
standard(s): %.*s",
+                       bcnt,buf);
+       }
+ 
+diff --git a/drivers/net/wireless/ath/ath10k/usb.c 
b/drivers/net/wireless/ath/ath10k/usb.c
+index 970cf69ac35f..a3ecf7d77949 100644
+--- a/drivers/net/wireless/ath/ath10k/usb.c
++++ b/drivers/net/wireless/ath/ath10k/usb.c
+@@ -1016,7 +1016,7 @@ static int ath10k_usb_probe(struct usb_interface 
*interface,
+       }
+ 
+       /* TODO: remove this once USB support is fully implemented */
+-      ath10k_warn(ar, "WARNING: ath10k USB support is incomplete, don't 
expect anything to work!\n");
++      ath10k_warn(ar, "Warning: ath10k USB support is incomplete, don't 
expect anything to work!\n");
+ 
+       return 0;
+ 
+diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
+index 499acf07d61a..e942b3e84068 100644
+--- a/drivers/nvme/host/multipath.c
++++ b/drivers/nvme/host/multipath.c
+@@ -12,11 +12,6 @@ module_param(multipath, bool, 0444);
+ MODULE_PARM_DESC(multipath,
+       "turn on native support for multiple controllers per subsystem");
+ 
+-inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
+-{
+-      return multipath && ctrl->subsys && (ctrl->subsys->cmic & (1 << 3));
+-}
+-
+ /*
+  * If multipathing is enabled we need to always use the subsystem instance
+  * number for numbering our devices to avoid conflicts between subsystems that
+@@ -614,7 +609,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct 
nvme_id_ctrl *id)
+ {
+       int error;
+ 
+-      if (!nvme_ctrl_use_ana(ctrl))
++      /* check if multipath is enabled and we have the capability */
++      if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3)))
+               return 0;
+ 
+       ctrl->anacap = id->anacap;
+diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
+index 55553d293a98..7391cd0a7739 100644
+--- a/drivers/nvme/host/nvme.h
++++ b/drivers/nvme/host/nvme.h
+@@ -472,7 +472,11 @@ extern const struct attribute_group 
*nvme_ns_id_attr_groups[];
+ extern const struct block_device_operations nvme_ns_head_ops;
+ 
+ #ifdef CONFIG_NVME_MULTIPATH
+-bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl);
++static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
++{
++      return ctrl->ana_log_buf != NULL;
++}
++
+ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+                       struct nvme_ctrl *ctrl, int *flags);
+ void nvme_failover_req(struct request *req);
+diff --git a/drivers/pps/pps.c b/drivers/pps/pps.c
+index 3a546ec10d90..22a65ad4e46e 100644
+--- a/drivers/pps/pps.c
++++ b/drivers/pps/pps.c
+@@ -152,6 +152,14 @@ static long pps_cdev_ioctl(struct file *file,
+                       pps->params.mode |= PPS_CANWAIT;
+               pps->params.api_version = PPS_API_VERS;
+ 
++              /*
++               * Clear unused fields of pps_kparams to avoid leaking
++               * uninitialized data of the PPS_SETPARAMS caller via
++               * PPS_GETPARAMS
++               */
++              pps->params.assert_off_tu.flags = 0;
++              pps->params.clear_off_tu.flags = 0;
++
+               spin_unlock_irq(&pps->lock);
+ 
+               break;
+diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
+index 0176241eaea7..7754d7679122 100644
+--- a/fs/ceph/caps.c
++++ b/fs/ceph/caps.c
+@@ -1263,20 +1263,22 @@ static int send_cap_msg(struct cap_msg_args *arg)
+ }
+ 
+ /*
+- * Queue cap releases when an inode is dropped from our cache.  Since
+- * inode is about to be destroyed, there is no need for i_ceph_lock.
++ * Queue cap releases when an inode is dropped from our cache.
+  */
+-void __ceph_remove_caps(struct inode *inode)
++void __ceph_remove_caps(struct ceph_inode_info *ci)
+ {
+-      struct ceph_inode_info *ci = ceph_inode(inode);
+       struct rb_node *p;
+ 
++      /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU)
++       * may call __ceph_caps_issued_mask() on a freeing inode. */
++      spin_lock(&ci->i_ceph_lock);
+       p = rb_first(&ci->i_caps);
+       while (p) {
+               struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
+               p = rb_next(p);
+               __ceph_remove_cap(cap, true);
+       }
++      spin_unlock(&ci->i_ceph_lock);
+ }
+ 
+ /*
+diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
+index 5b7d4881a4f8..3c7a32779574 100644
+--- a/fs/ceph/inode.c
++++ b/fs/ceph/inode.c
+@@ -536,7 +536,7 @@ void ceph_evict_inode(struct inode *inode)
+ 
+       ceph_fscache_unregister_inode_cookie(ci);
+ 
+-      __ceph_remove_caps(inode);
++      __ceph_remove_caps(ci);
+ 
+       if (__ceph_has_any_quota(ci))
+               ceph_adjust_quota_realms_count(inode, false);
+diff --git a/fs/ceph/super.h b/fs/ceph/super.h
+index 048409fba1a8..edec39aa5ce2 100644
+--- a/fs/ceph/super.h
++++ b/fs/ceph/super.h
+@@ -1000,7 +1000,7 @@ extern void ceph_add_cap(struct inode *inode,
+                        unsigned cap, unsigned seq, u64 realmino, int flags,
+                        struct ceph_cap **new_cap);
+ extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
+-extern void __ceph_remove_caps(struct inode* inode);
++extern void __ceph_remove_caps(struct ceph_inode_info *ci);
+ extern void ceph_put_cap(struct ceph_mds_client *mdsc,
+                        struct ceph_cap *cap);
+ extern int ceph_is_any_caps(struct inode *inode);
+diff --git a/fs/exec.c b/fs/exec.c
+index 89a500bb897a..39902cc9eb6f 100644
+--- a/fs/exec.c
++++ b/fs/exec.c
+@@ -1828,7 +1828,7 @@ static int __do_execve_file(int fd, struct filename 
*filename,
+       membarrier_execve(current);
+       rseq_execve(current);
+       acct_update_integrals(current);
+-      task_numa_free(current);
++      task_numa_free(current, false);
+       free_bprm(bprm);
+       kfree(pathbuf);
+       if (filename)
+diff --git a/fs/nfs/client.c b/fs/nfs/client.c
+index d7e4f0848e28..4d90f5bf0b0a 100644
+--- a/fs/nfs/client.c
++++ b/fs/nfs/client.c
+@@ -406,10 +406,10 @@ struct nfs_client *nfs_get_client(const struct 
nfs_client_initdata *cl_init)
+               clp = nfs_match_client(cl_init);
+               if (clp) {
+                       spin_unlock(&nn->nfs_client_lock);
+-                      if (IS_ERR(clp))
+-                              return clp;
+                       if (new)
+                               new->rpc_ops->free_client(new);
++                      if (IS_ERR(clp))
++                              return clp;
+                       return nfs_found_client(cl_init, clp);
+               }
+               if (new) {
+diff --git a/fs/proc/base.c b/fs/proc/base.c
+index 03517154fe0f..e781af70d10d 100644
+--- a/fs/proc/base.c
++++ b/fs/proc/base.c
+@@ -209,12 +209,53 @@ static int proc_root_link(struct dentry *dentry, struct 
path *path)
+       return result;
+ }
+ 
++/*
++ * If the user used setproctitle(), we just get the string from
++ * user space at arg_start, and limit it to a maximum of one page.
++ */
++static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
++                              size_t count, unsigned long pos,
++                              unsigned long arg_start)
++{
++      char *page;
++      int ret, got;
++
++      if (pos >= PAGE_SIZE)
++              return 0;
++
++      page = (char *)__get_free_page(GFP_KERNEL);
++      if (!page)
++              return -ENOMEM;
++
++      ret = 0;
++      got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
++      if (got > 0) {
++              int len = strnlen(page, got);
++
++              /* Include the NUL character if it was found */
++              if (len < got)
++                      len++;
++
++              if (len > pos) {
++                      len -= pos;
++                      if (len > count)
++                              len = count;
++                      len -= copy_to_user(buf, page+pos, len);
++                      if (!len)
++                              len = -EFAULT;
++                      ret = len;
++              }
++      }
++      free_page((unsigned long)page);
++      return ret;
++}
++
+ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
+                             size_t count, loff_t *ppos)
+ {
+       unsigned long arg_start, arg_end, env_start, env_end;
+       unsigned long pos, len;
+-      char *page;
++      char *page, c;
+ 
+       /* Check if process spawned far enough to have cmdline. */
+       if (!mm->env_end)
+@@ -231,28 +272,42 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char 
__user *buf,
+               return 0;
+ 
+       /*
+-       * We have traditionally allowed the user to re-write
+-       * the argument strings and overflow the end result
+-       * into the environment section. But only do that if
+-       * the environment area is contiguous to the arguments.
++       * We allow setproctitle() to overwrite the argument
++       * strings, and overflow past the original end. But
++       * only when it overflows into the environment area.
+        */
+-      if (env_start != arg_end || env_start >= env_end)
++      if (env_start != arg_end || env_end < env_start)
+               env_start = env_end = arg_end;
+-
+-      /* .. and limit it to a maximum of one page of slop */
+-      if (env_end >= arg_end + PAGE_SIZE)
+-              env_end = arg_end + PAGE_SIZE - 1;
++      len = env_end - arg_start;
+ 
+       /* We're not going to care if "*ppos" has high bits set */
+-      pos = arg_start + *ppos;
+-
+-      /* .. but we do check the result is in the proper range */
+-      if (pos < arg_start || pos >= env_end)
++      pos = *ppos;
++      if (pos >= len)
+               return 0;
++      if (count > len - pos)
++              count = len - pos;
++      if (!count)
++              return 0;
++
++      /*
++       * Magical special case: if the argv[] end byte is not
++       * zero, the user has overwritten it with setproctitle(3).
++       *
++       * Possible future enhancement: do this only once when
++       * pos is 0, and set a flag in the 'struct file'.
++       */
++      if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
++              return get_mm_proctitle(mm, buf, count, pos, arg_start);
+ 
+-      /* .. and we never go past env_end */
+-      if (env_end - pos < count)
+-              count = env_end - pos;
++      /*
++       * For the non-setproctitle() case we limit things strictly
++       * to the [arg_start, arg_end[ range.
++       */
++      pos += arg_start;
++      if (pos < arg_start || pos >= arg_end)
++              return 0;
++      if (count > arg_end - pos)
++              count = arg_end - pos;
+ 
+       page = (char *)__get_free_page(GFP_KERNEL);
+       if (!page)
+@@ -262,48 +317,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char 
__user *buf,
+       while (count) {
+               int got;
+               size_t size = min_t(size_t, PAGE_SIZE, count);
+-              long offset;
+ 
+-              /*
+-               * Are we already starting past the official end?
+-               * We always include the last byte that is *supposed*
+-               * to be NUL
+-               */
+-              offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
+-
+-              got = access_remote_vm(mm, pos - offset, page, size + offset, 
FOLL_ANON);
+-              if (got <= offset)
++              got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
++              if (got <= 0)
+                       break;
+-              got -= offset;
+-
+-              /* Don't walk past a NUL character once you hit arg_end */
+-              if (pos + got >= arg_end) {
+-                      int n = 0;
+-
+-                      /*
+-                       * If we started before 'arg_end' but ended up
+-                       * at or after it, we start the NUL character
+-                       * check at arg_end-1 (where we expect the normal
+-                       * EOF to be).
+-                       *
+-                       * NOTE! This is smaller than 'got', because
+-                       * pos + got >= arg_end
+-                       */
+-                      if (pos < arg_end)
+-                              n = arg_end - pos - 1;
+-
+-                      /* Cut off at first NUL after 'n' */
+-                      got = n + strnlen(page+n, offset+got-n);
+-                      if (got < offset)
+-                              break;
+-                      got -= offset;
+-
+-                      /* Include the NUL if it existed */
+-                      if (got < size)
+-                              got++;
+-              }
+-
+-              got -= copy_to_user(buf, page+offset, got);
++              got -= copy_to_user(buf, page, got);
+               if (unlikely(!got)) {
+                       if (!len)
+                               len = -EFAULT;
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 11837410690f..1157f6e245af 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1026,7 +1026,15 @@ struct task_struct {
+       u64                             last_sum_exec_runtime;
+       struct callback_head            numa_work;
+ 
+-      struct numa_group               *numa_group;
++      /*
++       * This pointer is only modified for current in syscall and
++       * pagefault context (and for tasks being destroyed), so it can be read
++       * from any of the following contexts:
++       *  - RCU read-side critical section
++       *  - current->numa_group from everywhere
++       *  - task's runqueue locked, task not running
++       */
++      struct numa_group __rcu         *numa_group;
+ 
+       /*
+        * numa_faults is an array split into four regions:
+diff --git a/include/linux/sched/numa_balancing.h 
b/include/linux/sched/numa_balancing.h
+index e7dd04a84ba8..3988762efe15 100644
+--- a/include/linux/sched/numa_balancing.h
++++ b/include/linux/sched/numa_balancing.h
+@@ -19,7 +19,7 @@
+ extern void task_numa_fault(int last_node, int node, int pages, int flags);
+ extern pid_t task_numa_group_id(struct task_struct *p);
+ extern void set_numabalancing_state(bool enabled);
+-extern void task_numa_free(struct task_struct *p);
++extern void task_numa_free(struct task_struct *p, bool final);
+ extern bool should_numa_migrate_memory(struct task_struct *p, struct page 
*page,
+                                       int src_nid, int dst_cpu);
+ #else
+@@ -34,7 +34,7 @@ static inline pid_t task_numa_group_id(struct task_struct *p)
+ static inline void set_numabalancing_state(bool enabled)
+ {
+ }
+-static inline void task_numa_free(struct task_struct *p)
++static inline void task_numa_free(struct task_struct *p, bool final)
+ {
+ }
+ static inline bool should_numa_migrate_memory(struct task_struct *p,
+diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
+index cad09858a5f2..546ebee39e2a 100644
+--- a/kernel/bpf/btf.c
++++ b/kernel/bpf/btf.c
+@@ -1928,8 +1928,8 @@ static int btf_array_resolve(struct btf_verifier_env 
*env,
+       /* Check array->index_type */
+       index_type_id = array->index_type;
+       index_type = btf_type_by_id(btf, index_type_id);
+-      if (btf_type_is_resolve_source_only(index_type) ||
+-          btf_type_nosize_or_null(index_type)) {
++      if (btf_type_nosize_or_null(index_type) ||
++          btf_type_is_resolve_source_only(index_type)) {
+               btf_verifier_log_type(env, v->t, "Invalid index");
+               return -EINVAL;
+       }
+@@ -1948,8 +1948,8 @@ static int btf_array_resolve(struct btf_verifier_env 
*env,
+       /* Check array->type */
+       elem_type_id = array->type;
+       elem_type = btf_type_by_id(btf, elem_type_id);
+-      if (btf_type_is_resolve_source_only(elem_type) ||
+-          btf_type_nosize_or_null(elem_type)) {
++      if (btf_type_nosize_or_null(elem_type) ||
++          btf_type_is_resolve_source_only(elem_type)) {
+               btf_verifier_log_type(env, v->t,
+                                     "Invalid elem");
+               return -EINVAL;
+@@ -2170,8 +2170,8 @@ static int btf_struct_resolve(struct btf_verifier_env 
*env,
+               const struct btf_type *member_type = btf_type_by_id(env->btf,
+                                                               member_type_id);
+ 
+-              if (btf_type_is_resolve_source_only(member_type) ||
+-                  btf_type_nosize_or_null(member_type)) {
++              if (btf_type_nosize_or_null(member_type) ||
++                  btf_type_is_resolve_source_only(member_type)) {
+                       btf_verifier_log_member(env, v->t, member,
+                                               "Invalid member");
+                       return -EINVAL;
+diff --git a/kernel/fork.c b/kernel/fork.c
+index fe83343da24b..d3f006ed2f9d 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -727,7 +727,7 @@ void __put_task_struct(struct task_struct *tsk)
+       WARN_ON(tsk == current);
+ 
+       cgroup_free(tsk);
+-      task_numa_free(tsk);
++      task_numa_free(tsk, true);
+       security_task_free(tsk);
+       exit_creds(tsk);
+       delayacct_tsk_free(tsk);
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index f35930f5e528..9ecf1e4c624b 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -1067,6 +1067,21 @@ struct numa_group {
+       unsigned long faults[0];
+ };
+ 
++/*
++ * For functions that can be called in multiple contexts that permit reading
++ * ->numa_group (see struct task_struct for locking rules).
++ */
++static struct numa_group *deref_task_numa_group(struct task_struct *p)
++{
++      return rcu_dereference_check(p->numa_group, p == current ||
++              (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
++}
++
++static struct numa_group *deref_curr_numa_group(struct task_struct *p)
++{
++      return rcu_dereference_protected(p->numa_group, p == current);
++}
++
+ static inline unsigned long group_faults_priv(struct numa_group *ng);
+ static inline unsigned long group_faults_shared(struct numa_group *ng);
+ 
+@@ -1110,10 +1125,12 @@ static unsigned int task_scan_start(struct task_struct 
*p)
+ {
+       unsigned long smin = task_scan_min(p);
+       unsigned long period = smin;
++      struct numa_group *ng;
+ 
+       /* Scale the maximum scan period with the amount of shared memory. */
+-      if (p->numa_group) {
+-              struct numa_group *ng = p->numa_group;
++      rcu_read_lock();
++      ng = rcu_dereference(p->numa_group);
++      if (ng) {
+               unsigned long shared = group_faults_shared(ng);
+               unsigned long private = group_faults_priv(ng);
+ 
+@@ -1121,6 +1138,7 @@ static unsigned int task_scan_start(struct task_struct 
*p)
+               period *= shared + 1;
+               period /= private + shared + 1;
+       }
++      rcu_read_unlock();
+ 
+       return max(smin, period);
+ }
+@@ -1129,13 +1147,14 @@ static unsigned int task_scan_max(struct task_struct 
*p)
+ {
+       unsigned long smin = task_scan_min(p);
+       unsigned long smax;
++      struct numa_group *ng;
+ 
+       /* Watch for min being lower than max due to floor calculations */
+       smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p);
+ 
+       /* Scale the maximum scan period with the amount of shared memory. */
+-      if (p->numa_group) {
+-              struct numa_group *ng = p->numa_group;
++      ng = deref_curr_numa_group(p);
++      if (ng) {
+               unsigned long shared = group_faults_shared(ng);
+               unsigned long private = group_faults_priv(ng);
+               unsigned long period = smax;
+@@ -1167,7 +1186,7 @@ void init_numa_balancing(unsigned long clone_flags, 
struct task_struct *p)
+       p->numa_scan_period             = sysctl_numa_balancing_scan_delay;
+       p->numa_work.next               = &p->numa_work;
+       p->numa_faults                  = NULL;
+-      p->numa_group                   = NULL;
++      RCU_INIT_POINTER(p->numa_group, NULL);
+       p->last_task_numa_placement     = 0;
+       p->last_sum_exec_runtime        = 0;
+ 
+@@ -1214,7 +1233,16 @@ static void account_numa_dequeue(struct rq *rq, struct 
task_struct *p)
+ 
+ pid_t task_numa_group_id(struct task_struct *p)
+ {
+-      return p->numa_group ? p->numa_group->gid : 0;
++      struct numa_group *ng;
++      pid_t gid = 0;
++
++      rcu_read_lock();
++      ng = rcu_dereference(p->numa_group);
++      if (ng)
++              gid = ng->gid;
++      rcu_read_unlock();
++
++      return gid;
+ }
+ 
+ /*
+@@ -1239,11 +1267,13 @@ static inline unsigned long task_faults(struct 
task_struct *p, int nid)
+ 
+ static inline unsigned long group_faults(struct task_struct *p, int nid)
+ {
+-      if (!p->numa_group)
++      struct numa_group *ng = deref_task_numa_group(p);
++
++      if (!ng)
+               return 0;
+ 
+-      return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
+-              p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)];
++      return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
++              ng->faults[task_faults_idx(NUMA_MEM, nid, 1)];
+ }
+ 
+ static inline unsigned long group_faults_cpu(struct numa_group *group, int 
nid)
+@@ -1381,12 +1411,13 @@ static inline unsigned long task_weight(struct 
task_struct *p, int nid,
+ static inline unsigned long group_weight(struct task_struct *p, int nid,
+                                        int dist)
+ {
++      struct numa_group *ng = deref_task_numa_group(p);
+       unsigned long faults, total_faults;
+ 
+-      if (!p->numa_group)
++      if (!ng)
+               return 0;
+ 
+-      total_faults = p->numa_group->total_faults;
++      total_faults = ng->total_faults;
+ 
+       if (!total_faults)
+               return 0;
+@@ -1400,7 +1431,7 @@ static inline unsigned long group_weight(struct 
task_struct *p, int nid,
+ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
+                               int src_nid, int dst_cpu)
+ {
+-      struct numa_group *ng = p->numa_group;
++      struct numa_group *ng = deref_curr_numa_group(p);
+       int dst_nid = cpu_to_node(dst_cpu);
+       int last_cpupid, this_cpupid;
+ 
+@@ -1583,13 +1614,14 @@ static bool load_too_imbalanced(long src_load, long 
dst_load,
+ static void task_numa_compare(struct task_numa_env *env,
+                             long taskimp, long groupimp, bool maymove)
+ {
++      struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p);
+       struct rq *dst_rq = cpu_rq(env->dst_cpu);
++      long imp = p_ng ? groupimp : taskimp;
+       struct task_struct *cur;
+       long src_load, dst_load;
+-      long load;
+-      long imp = env->p->numa_group ? groupimp : taskimp;
+-      long moveimp = imp;
+       int dist = env->dist;
++      long moveimp = imp;
++      long load;
+ 
+       if (READ_ONCE(dst_rq->numa_migrate_on))
+               return;
+@@ -1628,21 +1660,22 @@ static void task_numa_compare(struct task_numa_env 
*env,
+        * If dst and source tasks are in the same NUMA group, or not
+        * in any group then look only at task weights.
+        */
+-      if (cur->numa_group == env->p->numa_group) {
++      cur_ng = rcu_dereference(cur->numa_group);
++      if (cur_ng == p_ng) {
+               imp = taskimp + task_weight(cur, env->src_nid, dist) -
+                     task_weight(cur, env->dst_nid, dist);
+               /*
+                * Add some hysteresis to prevent swapping the
+                * tasks within a group over tiny differences.
+                */
+-              if (cur->numa_group)
++              if (cur_ng)
+                       imp -= imp / 16;
+       } else {
+               /*
+                * Compare the group weights. If a task is all by itself
+                * (not part of a group), use the task weight instead.
+                */
+-              if (cur->numa_group && env->p->numa_group)
++              if (cur_ng && p_ng)
+                       imp += group_weight(cur, env->src_nid, dist) -
+                              group_weight(cur, env->dst_nid, dist);
+               else
+@@ -1740,11 +1773,12 @@ static int task_numa_migrate(struct task_struct *p)
+               .best_imp = 0,
+               .best_cpu = -1,
+       };
++      unsigned long taskweight, groupweight;
+       struct sched_domain *sd;
++      long taskimp, groupimp;
++      struct numa_group *ng;
+       struct rq *best_rq;
+-      unsigned long taskweight, groupweight;
+       int nid, ret, dist;
+-      long taskimp, groupimp;
+ 
+       /*
+        * Pick the lowest SD_NUMA domain, as that would have the smallest
+@@ -1790,7 +1824,8 @@ static int task_numa_migrate(struct task_struct *p)
+        *   multiple NUMA nodes; in order to better consolidate the group,
+        *   we need to check other locations.
+        */
+-      if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes 
> 1)) {
++      ng = deref_curr_numa_group(p);
++      if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
+               for_each_online_node(nid) {
+                       if (nid == env.src_nid || nid == p->numa_preferred_nid)
+                               continue;
+@@ -1823,7 +1858,7 @@ static int task_numa_migrate(struct task_struct *p)
+        * A task that migrated to a second choice node will be better off
+        * trying for a better one later. Do not set the preferred node here.
+        */
+-      if (p->numa_group) {
++      if (ng) {
+               if (env.best_cpu == -1)
+                       nid = env.src_nid;
+               else
+@@ -2118,6 +2153,7 @@ static void task_numa_placement(struct task_struct *p)
+       unsigned long total_faults;
+       u64 runtime, period;
+       spinlock_t *group_lock = NULL;
++      struct numa_group *ng;
+ 
+       /*
+        * The p->mm->numa_scan_seq field gets updated without
+@@ -2135,8 +2171,9 @@ static void task_numa_placement(struct task_struct *p)
+       runtime = numa_get_avg_runtime(p, &period);
+ 
+       /* If the task is part of a group prevent parallel updates to group 
stats */
+-      if (p->numa_group) {
+-              group_lock = &p->numa_group->lock;
++      ng = deref_curr_numa_group(p);
++      if (ng) {
++              group_lock = &ng->lock;
+               spin_lock_irq(group_lock);
+       }
+ 
+@@ -2177,7 +2214,7 @@ static void task_numa_placement(struct task_struct *p)
+                       p->numa_faults[cpu_idx] += f_diff;
+                       faults += p->numa_faults[mem_idx];
+                       p->total_numa_faults += diff;
+-                      if (p->numa_group) {
++                      if (ng) {
+                               /*
+                                * safe because we can only change our own group
+                                *
+@@ -2185,14 +2222,14 @@ static void task_numa_placement(struct task_struct *p)
+                                * nid and priv in a specific region because it
+                                * is at the beginning of the numa_faults array.
+                                */
+-                              p->numa_group->faults[mem_idx] += diff;
+-                              p->numa_group->faults_cpu[mem_idx] += f_diff;
+-                              p->numa_group->total_faults += diff;
+-                              group_faults += p->numa_group->faults[mem_idx];
++                              ng->faults[mem_idx] += diff;
++                              ng->faults_cpu[mem_idx] += f_diff;
++                              ng->total_faults += diff;
++                              group_faults += ng->faults[mem_idx];
+                       }
+               }
+ 
+-              if (!p->numa_group) {
++              if (!ng) {
+                       if (faults > max_faults) {
+                               max_faults = faults;
+                               max_nid = nid;
+@@ -2203,8 +2240,8 @@ static void task_numa_placement(struct task_struct *p)
+               }
+       }
+ 
+-      if (p->numa_group) {
+-              numa_group_count_active_nodes(p->numa_group);
++      if (ng) {
++              numa_group_count_active_nodes(ng);
+               spin_unlock_irq(group_lock);
+               max_nid = preferred_group_nid(p, max_nid);
+       }
+@@ -2238,7 +2275,7 @@ static void task_numa_group(struct task_struct *p, int 
cpupid, int flags,
+       int cpu = cpupid_to_cpu(cpupid);
+       int i;
+ 
+-      if (unlikely(!p->numa_group)) {
++      if (unlikely(!deref_curr_numa_group(p))) {
+               unsigned int size = sizeof(struct numa_group) +
+                                   4*nr_node_ids*sizeof(unsigned long);
+ 
+@@ -2274,7 +2311,7 @@ static void task_numa_group(struct task_struct *p, int 
cpupid, int flags,
+       if (!grp)
+               goto no_join;
+ 
+-      my_grp = p->numa_group;
++      my_grp = deref_curr_numa_group(p);
+       if (grp == my_grp)
+               goto no_join;
+ 
+@@ -2336,13 +2373,24 @@ no_join:
+       return;
+ }
+ 
+-void task_numa_free(struct task_struct *p)
++/*
++ * Get rid of NUMA staticstics associated with a task (either current or 
dead).
++ * If @final is set, the task is dead and has reached refcount zero, so we can
++ * safely free all relevant data structures. Otherwise, there might be
++ * concurrent reads from places like load balancing and procfs, and we should
++ * reset the data back to default state without freeing ->numa_faults.
++ */
++void task_numa_free(struct task_struct *p, bool final)
+ {
+-      struct numa_group *grp = p->numa_group;
+-      void *numa_faults = p->numa_faults;
++      /* safe: p either is current or is being freed by current */
++      struct numa_group *grp = rcu_dereference_raw(p->numa_group);
++      unsigned long *numa_faults = p->numa_faults;
+       unsigned long flags;
+       int i;
+ 
++      if (!numa_faults)
++              return;
++
+       if (grp) {
+               spin_lock_irqsave(&grp->lock, flags);
+               for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
+@@ -2355,8 +2403,14 @@ void task_numa_free(struct task_struct *p)
+               put_numa_group(grp);
+       }
+ 
+-      p->numa_faults = NULL;
+-      kfree(numa_faults);
++      if (final) {
++              p->numa_faults = NULL;
++              kfree(numa_faults);
++      } else {
++              p->total_numa_faults = 0;
++              for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
++                      numa_faults[i] = 0;
++      }
+ }
+ 
+ /*
+@@ -2409,7 +2463,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int 
pages, int flags)
+        * actively using should be counted as local. This allows the
+        * scan rate to slow down when a workload has settled down.
+        */
+-      ng = p->numa_group;
++      ng = deref_curr_numa_group(p);
+       if (!priv && !local && ng && ng->active_nodes > 1 &&
+                               numa_is_active_node(cpu_node, ng) &&
+                               numa_is_active_node(mem_node, ng))
+@@ -10708,18 +10762,22 @@ void show_numa_stats(struct task_struct *p, struct 
seq_file *m)
+ {
+       int node;
+       unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
++      struct numa_group *ng;
+ 
++      rcu_read_lock();
++      ng = rcu_dereference(p->numa_group);
+       for_each_online_node(node) {
+               if (p->numa_faults) {
+                       tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 
0)];
+                       tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 
1)];
+               }
+-              if (p->numa_group) {
+-                      gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, 
node, 0)],
+-                      gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, 
node, 1)];
++              if (ng) {
++                      gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)],
++                      gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)];
+               }
+               print_numa_stats(m, node, tsf, tpf, gsf, gpf);
+       }
++      rcu_read_unlock();
+ }
+ #endif /* CONFIG_NUMA_BALANCING */
+ #endif /* CONFIG_SCHED_DEBUG */
+diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
+index 169112f8aa1e..ab47bf3ab66e 100644
+--- a/net/vmw_vsock/af_vsock.c
++++ b/net/vmw_vsock/af_vsock.c
+@@ -274,7 +274,8 @@ EXPORT_SYMBOL_GPL(vsock_insert_connected);
+ void vsock_remove_bound(struct vsock_sock *vsk)
+ {
+       spin_lock_bh(&vsock_table_lock);
+-      __vsock_remove_bound(vsk);
++      if (__vsock_in_bound_table(vsk))
++              __vsock_remove_bound(vsk);
+       spin_unlock_bh(&vsock_table_lock);
+ }
+ EXPORT_SYMBOL_GPL(vsock_remove_bound);
+@@ -282,7 +283,8 @@ EXPORT_SYMBOL_GPL(vsock_remove_bound);
+ void vsock_remove_connected(struct vsock_sock *vsk)
+ {
+       spin_lock_bh(&vsock_table_lock);
+-      __vsock_remove_connected(vsk);
++      if (__vsock_in_connected_table(vsk))
++              __vsock_remove_connected(vsk);
+       spin_unlock_bh(&vsock_table_lock);
+ }
+ EXPORT_SYMBOL_GPL(vsock_remove_connected);
+@@ -318,35 +320,10 @@ struct sock *vsock_find_connected_socket(struct 
sockaddr_vm *src,
+ }
+ EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
+ 
+-static bool vsock_in_bound_table(struct vsock_sock *vsk)
+-{
+-      bool ret;
+-
+-      spin_lock_bh(&vsock_table_lock);
+-      ret = __vsock_in_bound_table(vsk);
+-      spin_unlock_bh(&vsock_table_lock);
+-
+-      return ret;
+-}
+-
+-static bool vsock_in_connected_table(struct vsock_sock *vsk)
+-{
+-      bool ret;
+-
+-      spin_lock_bh(&vsock_table_lock);
+-      ret = __vsock_in_connected_table(vsk);
+-      spin_unlock_bh(&vsock_table_lock);
+-
+-      return ret;
+-}
+-
+ void vsock_remove_sock(struct vsock_sock *vsk)
+ {
+-      if (vsock_in_bound_table(vsk))
+-              vsock_remove_bound(vsk);
+-
+-      if (vsock_in_connected_table(vsk))
+-              vsock_remove_connected(vsk);
++      vsock_remove_bound(vsk);
++      vsock_remove_connected(vsk);
+ }
+ EXPORT_SYMBOL_GPL(vsock_remove_sock);
+ 
+@@ -477,8 +454,7 @@ static void vsock_pending_work(struct work_struct *work)
+        * incoming packets can't find this socket, and to reduce the reference
+        * count.
+        */
+-      if (vsock_in_connected_table(vsk))
+-              vsock_remove_connected(vsk);
++      vsock_remove_connected(vsk);
+ 
+       sk->sk_state = TCP_CLOSE;
+ 
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index b1694d5d15d3..82be7780bbe8 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -1280,13 +1280,17 @@ static void xfrm_hash_rebuild(struct work_struct *work)
+ 
+               hlist_for_each_entry_safe(policy, n,
+                                         &net->xfrm.policy_inexact[dir],
+-                                        bydst_inexact_list)
++                                        bydst_inexact_list) {
++                      hlist_del_rcu(&policy->bydst);
+                       hlist_del_init(&policy->bydst_inexact_list);
++              }
+ 
+               hmask = net->xfrm.policy_bydst[dir].hmask;
+               odst = net->xfrm.policy_bydst[dir].table;
+-              for (i = hmask; i >= 0; i--)
+-                      INIT_HLIST_HEAD(odst + i);
++              for (i = hmask; i >= 0; i--) {
++                      hlist_for_each_entry_safe(policy, n, odst + i, bydst)
++                              hlist_del_rcu(&policy->bydst);
++              }
+               if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
+                       /* dir out => dst = remote, src = local */
+                       net->xfrm.policy_bydst[dir].dbits4 = rbits4;
+@@ -1315,8 +1319,6 @@ static void xfrm_hash_rebuild(struct work_struct *work)
+               chain = policy_hash_bysel(net, &policy->selector,
+                                         policy->family, dir);
+ 
+-              hlist_del_rcu(&policy->bydst);
+-
+               if (!chain) {
+                       void *p = xfrm_policy_inexact_insert(policy, dir, 0);
+ 
+diff --git a/tools/testing/selftests/net/xfrm_policy.sh 
b/tools/testing/selftests/net/xfrm_policy.sh
+index 71d7fdc513c1..5445943bf07f 100755
+--- a/tools/testing/selftests/net/xfrm_policy.sh
++++ b/tools/testing/selftests/net/xfrm_policy.sh
+@@ -257,6 +257,29 @@ check_exceptions()
+       return $lret
+ }
+ 
++check_hthresh_repeat()
++{
++      local log=$1
++      i=0
++
++      for i in $(seq 1 10);do
++              ip -net ns1 xfrm policy update src e000:0001::0000 dst 
ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 
100 action allow || break
++              ip -net ns1 xfrm policy set hthresh6 0 28 || break
++
++              ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::01 
dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || 
break
++              ip -net ns1 xfrm policy set hthresh6 0 28 || break
++      done
++
++      if [ $i -ne 10 ] ;then
++              echo "FAIL: $log" 1>&2
++              ret=1
++              return 1
++      fi
++
++      echo "PASS: $log"
++      return 0
++}
++
+ #check for needed privileges
+ if [ "$(id -u)" -ne 0 ];then
+       echo "SKIP: Need root privileges"
+@@ -404,7 +427,9 @@ for n in ns3 ns4;do
+       ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128
+       sleep $((RANDOM%5))
+ done
+-check_exceptions "exceptions and block policies after hresh change to normal"
++check_exceptions "exceptions and block policies after htresh change to normal"
++
++check_hthresh_repeat "policies with repeated htresh change"
+ 
+ for i in 1 2 3 4;do ip netns del ns$i;done
+ 

Reply via email to