commit:     5d6d793f8ed82a72cedf644601ba72883f9ab36f
Author:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
AuthorDate: Sun Jul 11 14:45:27 2021 +0000
Commit:     Mike Pagano <mpagano <AT> gentoo <DOT> org>
CommitDate: Sun Jul 11 14:45:27 2021 +0000
URL:        https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=5d6d793f

Linux patch 4.19.197

Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>

 0000_README               |    4 +
 1196_linux-4.19.197.patch | 1701 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1705 insertions(+)

diff --git a/0000_README b/0000_README
index 2009bef..dd8ede9 100644
--- a/0000_README
+++ b/0000_README
@@ -823,6 +823,10 @@ Patch:  1195_linux-4.19.196.patch
 From:   https://www.kernel.org
 Desc:   Linux 4.19.196
 
+Patch:  1196_linux-4.19.197.patch
+From:   https://www.kernel.org
+Desc:   Linux 4.19.197
+
 Patch:  1500_XATTR_USER_PREFIX.patch
 From:   https://bugs.gentoo.org/show_bug.cgi?id=470644
 Desc:   Support for namespace user.pax.* on tmpfs.

diff --git a/1196_linux-4.19.197.patch b/1196_linux-4.19.197.patch
new file mode 100644
index 0000000..e8a9416
--- /dev/null
+++ b/1196_linux-4.19.197.patch
@@ -0,0 +1,1701 @@
+diff --git a/Makefile b/Makefile
+index 63b0bc92a0fa6..42073a4c6e2e3 100644
+--- a/Makefile
++++ b/Makefile
+@@ -1,7 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
+ VERSION = 4
+ PATCHLEVEL = 19
+-SUBLEVEL = 196
++SUBLEVEL = 197
+ EXTRAVERSION =
+ NAME = "People's Front"
+ 
+diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
+index 0c0781a37c5a7..7f1fe4a724472 100644
+--- a/arch/arm/boot/dts/dra7.dtsi
++++ b/arch/arm/boot/dts/dra7.dtsi
+@@ -48,6 +48,7 @@
+ 
+       timer {
+               compatible = "arm,armv7-timer";
++              status = "disabled";    /* See ARM architected timer wrap 
erratum i940 */
+               interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | 
IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | 
IRQ_TYPE_LEVEL_LOW)>,
+                            <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | 
IRQ_TYPE_LEVEL_LOW)>,
+@@ -910,6 +911,8 @@
+                       reg = <0x48032000 0x80>;
+                       interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+                       ti,hwmods = "timer2";
++                      clock-names = "fck";
++                      clocks = <&l4per_clkctrl DRA7_TIMER2_CLKCTRL 24>;
+               };
+ 
+               timer3: timer@48034000 {
+@@ -917,6 +920,10 @@
+                       reg = <0x48034000 0x80>;
+                       interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+                       ti,hwmods = "timer3";
++                      clock-names = "fck";
++                      clocks = <&l4per_clkctrl DRA7_TIMER3_CLKCTRL 24>;
++                      assigned-clocks = <&l4per_clkctrl DRA7_TIMER3_CLKCTRL 
24>;
++                      assigned-clock-parents = <&timer_sys_clk_div>;
+               };
+ 
+               timer4: timer@48036000 {
+@@ -924,6 +931,10 @@
+                       reg = <0x48036000 0x80>;
+                       interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
+                       ti,hwmods = "timer4";
++                      clock-names = "fck";
++                      clocks = <&l4per_clkctrl DRA7_TIMER4_CLKCTRL 24>;
++                      assigned-clocks = <&l4per_clkctrl DRA7_TIMER4_CLKCTRL 
24>;
++                      assigned-clock-parents = <&timer_sys_clk_div>;
+               };
+ 
+               timer5: timer@48820000 {
+diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi 
b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+index 41384bbd2f60c..03357d39870ee 100644
+--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
++++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+@@ -675,10 +675,6 @@
+       vin-supply = <&vgen5_reg>;
+ };
+ 
+-&reg_vdd3p0 {
+-      vin-supply = <&sw2_reg>;
+-};
+-
+ &reg_vdd2p5 {
+       vin-supply = <&vgen5_reg>;
+ };
+diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
+index 3e1de14805e48..65b91a8379c90 100644
+--- a/arch/arm/mach-omap1/pm.c
++++ b/arch/arm/mach-omap1/pm.c
+@@ -610,11 +610,6 @@ static irqreturn_t omap_wakeup_interrupt(int irq, void 
*dev)
+       return IRQ_HANDLED;
+ }
+ 
+-static struct irqaction omap_wakeup_irq = {
+-      .name           = "peripheral wakeup",
+-      .handler        = omap_wakeup_interrupt
+-};
+-
+ 
+ 
+ static const struct platform_suspend_ops omap_pm_ops = {
+@@ -627,6 +622,7 @@ static const struct platform_suspend_ops omap_pm_ops = {
+ static int __init omap_pm_init(void)
+ {
+       int error = 0;
++      int irq;
+ 
+       if (!cpu_class_is_omap1())
+               return -ENODEV;
+@@ -670,9 +666,12 @@ static int __init omap_pm_init(void)
+       arm_pm_idle = omap1_pm_idle;
+ 
+       if (cpu_is_omap7xx())
+-              setup_irq(INT_7XX_WAKE_UP_REQ, &omap_wakeup_irq);
++              irq = INT_7XX_WAKE_UP_REQ;
+       else if (cpu_is_omap16xx())
+-              setup_irq(INT_1610_WAKE_UP_REQ, &omap_wakeup_irq);
++              irq = INT_1610_WAKE_UP_REQ;
++      if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup",
++                      NULL))
++              pr_err("Failed to request irq %d (peripheral wakeup)\n", irq);
+ 
+       /* Program new power ramp-up time
+        * (0 for most boards since we don't lower voltage when in deep sleep)
+diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
+index 524977a31a49c..de590a85a42b3 100644
+--- a/arch/arm/mach-omap1/time.c
++++ b/arch/arm/mach-omap1/time.c
+@@ -155,15 +155,11 @@ static irqreturn_t omap_mpu_timer1_interrupt(int irq, 
void *dev_id)
+       return IRQ_HANDLED;
+ }
+ 
+-static struct irqaction omap_mpu_timer1_irq = {
+-      .name           = "mpu_timer1",
+-      .flags          = IRQF_TIMER | IRQF_IRQPOLL,
+-      .handler        = omap_mpu_timer1_interrupt,
+-};
+-
+ static __init void omap_init_mpu_timer(unsigned long rate)
+ {
+-      setup_irq(INT_TIMER1, &omap_mpu_timer1_irq);
++      if (request_irq(INT_TIMER1, omap_mpu_timer1_interrupt,
++                      IRQF_TIMER | IRQF_IRQPOLL, "mpu_timer1", NULL))
++              pr_err("Failed to request irq %d (mpu_timer1)\n", INT_TIMER1);
+       omap_mpu_timer_start(0, (rate / HZ) - 1, 1);
+ 
+       clockevent_mpu_timer1.cpumask = cpumask_of(0);
+diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c
+index 0ae6c52a7d70b..780fdf03c3cee 100644
+--- a/arch/arm/mach-omap1/timer32k.c
++++ b/arch/arm/mach-omap1/timer32k.c
+@@ -148,15 +148,11 @@ static irqreturn_t omap_32k_timer_interrupt(int irq, 
void *dev_id)
+       return IRQ_HANDLED;
+ }
+ 
+-static struct irqaction omap_32k_timer_irq = {
+-      .name           = "32KHz timer",
+-      .flags          = IRQF_TIMER | IRQF_IRQPOLL,
+-      .handler        = omap_32k_timer_interrupt,
+-};
+-
+ static __init void omap_init_32k_timer(void)
+ {
+-      setup_irq(INT_OS_TIMER, &omap_32k_timer_irq);
++      if (request_irq(INT_OS_TIMER, omap_32k_timer_interrupt,
++                      IRQF_TIMER | IRQF_IRQPOLL, "32KHz timer", NULL))
++              pr_err("Failed to request irq %d(32KHz timer)\n", INT_OS_TIMER);
+ 
+       clockevent_32k_timer.cpumask = cpumask_of(0);
+       clockevents_config_and_register(&clockevent_32k_timer,
+diff --git a/arch/arm/mach-omap2/board-generic.c 
b/arch/arm/mach-omap2/board-generic.c
+index 6b4f4975cf7a6..6e59c11131c48 100644
+--- a/arch/arm/mach-omap2/board-generic.c
++++ b/arch/arm/mach-omap2/board-generic.c
+@@ -330,7 +330,7 @@ DT_MACHINE_START(DRA74X_DT, "Generic DRA74X (Flattened 
Device Tree)")
+       .init_late      = dra7xx_init_late,
+       .init_irq       = omap_gic_of_init,
+       .init_machine   = omap_generic_init,
+-      .init_time      = omap5_realtime_timer_init,
++      .init_time      = omap3_gptimer_timer_init,
+       .dt_compat      = dra74x_boards_compat,
+       .restart        = omap44xx_restart,
+ MACHINE_END
+@@ -353,7 +353,7 @@ DT_MACHINE_START(DRA72X_DT, "Generic DRA72X (Flattened 
Device Tree)")
+       .init_late      = dra7xx_init_late,
+       .init_irq       = omap_gic_of_init,
+       .init_machine   = omap_generic_init,
+-      .init_time      = omap5_realtime_timer_init,
++      .init_time      = omap3_gptimer_timer_init,
+       .dt_compat      = dra72x_boards_compat,
+       .restart        = omap44xx_restart,
+ MACHINE_END
+diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
+index 98ed5ac073bc1..c4ba848e8af62 100644
+--- a/arch/arm/mach-omap2/timer.c
++++ b/arch/arm/mach-omap2/timer.c
+@@ -42,6 +42,7 @@
+ #include <linux/platform_device.h>
+ #include <linux/platform_data/dmtimer-omap.h>
+ #include <linux/sched_clock.h>
++#include <linux/cpu.h>
+ 
+ #include <asm/mach/time.h>
+ #include <asm/smp_twd.h>
+@@ -64,15 +65,28 @@
+ 
+ /* Clockevent code */
+ 
+-static struct omap_dm_timer clkev;
+-static struct clock_event_device clockevent_gpt;
+-
+ /* Clockevent hwmod for am335x and am437x suspend */
+ static struct omap_hwmod *clockevent_gpt_hwmod;
+ 
+ /* Clockesource hwmod for am437x suspend */
+ static struct omap_hwmod *clocksource_gpt_hwmod;
+ 
++struct dmtimer_clockevent {
++      struct clock_event_device dev;
++      struct omap_dm_timer timer;
++};
++
++static struct dmtimer_clockevent clockevent;
++
++static struct omap_dm_timer *to_dmtimer(struct clock_event_device *clockevent)
++{
++      struct dmtimer_clockevent *clkevt =
++              container_of(clockevent, struct dmtimer_clockevent, dev);
++      struct omap_dm_timer *timer = &clkevt->timer;
++
++      return timer;
++}
++
+ #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
+ static unsigned long arch_timer_freq;
+ 
+@@ -84,24 +98,21 @@ void set_cntfreq(void)
+ 
+ static irqreturn_t omap2_gp_timer_interrupt(int irq, void *dev_id)
+ {
+-      struct clock_event_device *evt = &clockevent_gpt;
+-
+-      __omap_dm_timer_write_status(&clkev, OMAP_TIMER_INT_OVERFLOW);
++      struct dmtimer_clockevent *clkevt = dev_id;
++      struct clock_event_device *evt = &clkevt->dev;
++      struct omap_dm_timer *timer = &clkevt->timer;
+ 
++      __omap_dm_timer_write_status(timer, OMAP_TIMER_INT_OVERFLOW);
+       evt->event_handler(evt);
+       return IRQ_HANDLED;
+ }
+ 
+-static struct irqaction omap2_gp_timer_irq = {
+-      .name           = "gp_timer",
+-      .flags          = IRQF_TIMER | IRQF_IRQPOLL,
+-      .handler        = omap2_gp_timer_interrupt,
+-};
+-
+ static int omap2_gp_timer_set_next_event(unsigned long cycles,
+                                        struct clock_event_device *evt)
+ {
+-      __omap_dm_timer_load_start(&clkev, OMAP_TIMER_CTRL_ST,
++      struct omap_dm_timer *timer = to_dmtimer(evt);
++
++      __omap_dm_timer_load_start(timer, OMAP_TIMER_CTRL_ST,
+                                  0xffffffff - cycles, OMAP_TIMER_POSTED);
+ 
+       return 0;
+@@ -109,22 +120,26 @@ static int omap2_gp_timer_set_next_event(unsigned long 
cycles,
+ 
+ static int omap2_gp_timer_shutdown(struct clock_event_device *evt)
+ {
+-      __omap_dm_timer_stop(&clkev, OMAP_TIMER_POSTED, clkev.rate);
++      struct omap_dm_timer *timer = to_dmtimer(evt);
++
++      __omap_dm_timer_stop(timer, OMAP_TIMER_POSTED, timer->rate);
++
+       return 0;
+ }
+ 
+ static int omap2_gp_timer_set_periodic(struct clock_event_device *evt)
+ {
++      struct omap_dm_timer *timer = to_dmtimer(evt);
+       u32 period;
+ 
+-      __omap_dm_timer_stop(&clkev, OMAP_TIMER_POSTED, clkev.rate);
++      __omap_dm_timer_stop(timer, OMAP_TIMER_POSTED, timer->rate);
+ 
+-      period = clkev.rate / HZ;
++      period = timer->rate / HZ;
+       period -= 1;
+       /* Looks like we need to first set the load value separately */
+-      __omap_dm_timer_write(&clkev, OMAP_TIMER_LOAD_REG, 0xffffffff - period,
++      __omap_dm_timer_write(timer, OMAP_TIMER_LOAD_REG, 0xffffffff - period,
+                             OMAP_TIMER_POSTED);
+-      __omap_dm_timer_load_start(&clkev,
++      __omap_dm_timer_load_start(timer,
+                                  OMAP_TIMER_CTRL_AR | OMAP_TIMER_CTRL_ST,
+                                  0xffffffff - period, OMAP_TIMER_POSTED);
+       return 0;
+@@ -138,25 +153,16 @@ static void omap_clkevt_idle(struct clock_event_device 
*unused)
+       omap_hwmod_idle(clockevent_gpt_hwmod);
+ }
+ 
+-static void omap_clkevt_unidle(struct clock_event_device *unused)
++static void omap_clkevt_unidle(struct clock_event_device *evt)
+ {
++      struct omap_dm_timer *timer = to_dmtimer(evt);
++
+       if (!clockevent_gpt_hwmod)
+               return;
+ 
+       omap_hwmod_enable(clockevent_gpt_hwmod);
+-      __omap_dm_timer_int_enable(&clkev, OMAP_TIMER_INT_OVERFLOW);
+-}
+-
+-static struct clock_event_device clockevent_gpt = {
+-      .features               = CLOCK_EVT_FEAT_PERIODIC |
+-                                CLOCK_EVT_FEAT_ONESHOT,
+-      .rating                 = 300,
+-      .set_next_event         = omap2_gp_timer_set_next_event,
+-      .set_state_shutdown     = omap2_gp_timer_shutdown,
+-      .set_state_periodic     = omap2_gp_timer_set_periodic,
+-      .set_state_oneshot      = omap2_gp_timer_shutdown,
+-      .tick_resume            = omap2_gp_timer_shutdown,
+-};
++      __omap_dm_timer_int_enable(timer, OMAP_TIMER_INT_OVERFLOW);
++}
+ 
+ static const struct of_device_id omap_timer_match[] __initconst = {
+       { .compatible = "ti,omap2420-timer", },
+@@ -363,47 +369,104 @@ void tick_broadcast(const struct cpumask *mask)
+ }
+ #endif
+ 
+-static void __init omap2_gp_clockevent_init(int gptimer_id,
+-                                              const char *fck_source,
+-                                              const char *property)
++static void __init dmtimer_clkevt_init_common(struct dmtimer_clockevent 
*clkevt,
++                                            int gptimer_id,
++                                            const char *fck_source,
++                                            unsigned int features,
++                                            const struct cpumask *cpumask,
++                                            const char *property,
++                                            int rating, const char *name)
+ {
++      struct omap_dm_timer *timer = &clkevt->timer;
+       int res;
+ 
+-      clkev.id = gptimer_id;
+-      clkev.errata = omap_dm_timer_get_errata();
++      timer->id = gptimer_id;
++      timer->errata = omap_dm_timer_get_errata();
++      clkevt->dev.features = features;
++      clkevt->dev.rating = rating;
++      clkevt->dev.set_next_event = omap2_gp_timer_set_next_event;
++      clkevt->dev.set_state_shutdown = omap2_gp_timer_shutdown;
++      clkevt->dev.set_state_periodic = omap2_gp_timer_set_periodic;
++      clkevt->dev.set_state_oneshot = omap2_gp_timer_shutdown;
++      clkevt->dev.tick_resume = omap2_gp_timer_shutdown;
+ 
+       /*
+        * For clock-event timers we never read the timer counter and
+        * so we are not impacted by errata i103 and i767. Therefore,
+        * we can safely ignore this errata for clock-event timers.
+        */
+-      __omap_dm_timer_override_errata(&clkev, OMAP_TIMER_ERRATA_I103_I767);
++      __omap_dm_timer_override_errata(timer, OMAP_TIMER_ERRATA_I103_I767);
+ 
+-      res = omap_dm_timer_init_one(&clkev, fck_source, property,
+-                                   &clockevent_gpt.name, OMAP_TIMER_POSTED);
++      res = omap_dm_timer_init_one(timer, fck_source, property,
++                                   &clkevt->dev.name, OMAP_TIMER_POSTED);
+       BUG_ON(res);
+ 
+-      omap2_gp_timer_irq.dev_id = &clkev;
+-      setup_irq(clkev.irq, &omap2_gp_timer_irq);
++      clkevt->dev.cpumask = cpumask;
++      clkevt->dev.irq = omap_dm_timer_get_irq(timer);
+ 
+-      __omap_dm_timer_int_enable(&clkev, OMAP_TIMER_INT_OVERFLOW);
++      if (request_irq(clkevt->dev.irq, omap2_gp_timer_interrupt,
++                      IRQF_TIMER | IRQF_IRQPOLL, name, clkevt))
++              pr_err("Failed to request irq %d (gp_timer)\n", 
clkevt->dev.irq);
+ 
+-      clockevent_gpt.cpumask = cpu_possible_mask;
+-      clockevent_gpt.irq = omap_dm_timer_get_irq(&clkev);
+-      clockevents_config_and_register(&clockevent_gpt, clkev.rate,
+-                                      3, /* Timer internal resynch latency */
+-                                      0xffffffff);
++      __omap_dm_timer_int_enable(timer, OMAP_TIMER_INT_OVERFLOW);
+ 
+       if (soc_is_am33xx() || soc_is_am43xx()) {
+-              clockevent_gpt.suspend = omap_clkevt_idle;
+-              clockevent_gpt.resume = omap_clkevt_unidle;
++              clkevt->dev.suspend = omap_clkevt_idle;
++              clkevt->dev.resume = omap_clkevt_unidle;
+ 
+               clockevent_gpt_hwmod =
+-                      omap_hwmod_lookup(clockevent_gpt.name);
++                      omap_hwmod_lookup(clkevt->dev.name);
++      }
++
++      pr_info("OMAP clockevent source: %s at %lu Hz\n", clkevt->dev.name,
++              timer->rate);
++}
++
++static DEFINE_PER_CPU(struct dmtimer_clockevent, dmtimer_percpu_timer);
++
++static int omap_gptimer_starting_cpu(unsigned int cpu)
++{
++      struct dmtimer_clockevent *clkevt = per_cpu_ptr(&dmtimer_percpu_timer, 
cpu);
++      struct clock_event_device *dev = &clkevt->dev;
++      struct omap_dm_timer *timer = &clkevt->timer;
++
++      clockevents_config_and_register(dev, timer->rate, 3, ULONG_MAX);
++      irq_force_affinity(dev->irq, cpumask_of(cpu));
++
++      return 0;
++}
++
++static int __init dmtimer_percpu_quirk_init(void)
++{
++      struct dmtimer_clockevent *clkevt;
++      struct clock_event_device *dev;
++      struct device_node *arm_timer;
++      struct omap_dm_timer *timer;
++      int cpu = 0;
++
++      arm_timer = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
++      if (of_device_is_available(arm_timer)) {
++              pr_warn_once("ARM architected timer wrap issue i940 
detected\n");
++              return 0;
++      }
++
++      for_each_possible_cpu(cpu) {
++              clkevt = per_cpu_ptr(&dmtimer_percpu_timer, cpu);
++              dev = &clkevt->dev;
++              timer = &clkevt->timer;
++
++              dmtimer_clkevt_init_common(clkevt, 0, "timer_sys_ck",
++                                         CLOCK_EVT_FEAT_ONESHOT,
++                                         cpumask_of(cpu),
++                                         "assigned-clock-parents",
++                                         500, "percpu timer");
+       }
+ 
+-      pr_info("OMAP clockevent source: %s at %lu Hz\n", clockevent_gpt.name,
+-              clkev.rate);
++      cpuhp_setup_state(CPUHP_AP_OMAP_DM_TIMER_STARTING,
++                        "clockevents/omap/gptimer:starting",
++                        omap_gptimer_starting_cpu, NULL);
++
++      return 0;
+ }
+ 
+ /* Clocksource code */
+@@ -543,7 +606,15 @@ static void __init __omap_sync32k_timer_init(int 
clkev_nr, const char *clkev_src
+ {
+       omap_clk_init();
+       omap_dmtimer_init();
+-      omap2_gp_clockevent_init(clkev_nr, clkev_src, clkev_prop);
++      dmtimer_clkevt_init_common(&clockevent, clkev_nr, clkev_src,
++                                 CLOCK_EVT_FEAT_PERIODIC | 
CLOCK_EVT_FEAT_ONESHOT,
++                                 cpu_possible_mask, clkev_prop, 300, 
"clockevent");
++      clockevents_config_and_register(&clockevent.dev, clockevent.timer.rate,
++                                      3, /* Timer internal resynch latency */
++                                      0xffffffff);
++
++      if (soc_is_dra7xx())
++              dmtimer_percpu_quirk_init();
+ 
+       /* Enable the use of clocksource="gp_timer" kernel parameter */
+       if (use_gptimer_clksrc || gptimer)
+@@ -572,7 +643,7 @@ void __init omap3_secure_sync32k_timer_init(void)
+ #endif /* CONFIG_ARCH_OMAP3 */
+ 
+ #if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) || \
+-      defined(CONFIG_SOC_AM43XX)
++      defined(CONFIG_SOC_AM43XX) || defined(CONFIG_SOC_DRA7XX)
+ void __init omap3_gptimer_timer_init(void)
+ {
+       __omap_sync32k_timer_init(2, "timer_sys_ck", NULL,
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index ad24e67772778..bd463d6842370 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1791,9 +1791,25 @@ static void sev_asid_free(struct kvm *kvm)
+       __sev_asid_free(sev->asid);
+ }
+ 
+-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
++static void sev_decommission(unsigned int handle)
+ {
+       struct sev_data_decommission *decommission;
++
++      if (!handle)
++              return;
++
++      decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
++      if (!decommission)
++              return;
++
++      decommission->handle = handle;
++      sev_guest_decommission(decommission, NULL);
++
++      kfree(decommission);
++}
++
++static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
++{
+       struct sev_data_deactivate *data;
+ 
+       if (!handle)
+@@ -1811,15 +1827,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned 
int handle)
+       sev_guest_df_flush(NULL);
+       kfree(data);
+ 
+-      decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
+-      if (!decommission)
+-              return;
+-
+-      /* decommission handle */
+-      decommission->handle = handle;
+-      sev_guest_decommission(decommission, NULL);
+-
+-      kfree(decommission);
++      sev_decommission(handle);
+ }
+ 
+ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
+@@ -1954,6 +1962,7 @@ static void sev_vm_destroy(struct kvm *kvm)
+               list_for_each_safe(pos, q, head) {
+                       __unregister_enc_region_locked(kvm,
+                               list_entry(pos, struct enc_region, list));
++                      cond_resched();
+               }
+       }
+ 
+@@ -6468,8 +6477,10 @@ static int sev_launch_start(struct kvm *kvm, struct 
kvm_sev_cmd *argp)
+ 
+       /* Bind ASID to this guest */
+       ret = sev_bind_asid(kvm, start->handle, error);
+-      if (ret)
++      if (ret) {
++              sev_decommission(start->handle);
+               goto e_free_session;
++      }
+ 
+       /* return handle to userspace */
+       params.handle = start->handle;
+diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c
+index 71a122b2dc67e..b6d1ec49fa01a 100644
+--- a/drivers/clk/ti/clk-7xx.c
++++ b/drivers/clk/ti/clk-7xx.c
+@@ -733,6 +733,7 @@ const struct omap_clkctrl_data dra7_clkctrl_data[] 
__initconst = {
+ static struct ti_dt_clk dra7xx_clks[] = {
+       DT_CLK(NULL, "timer_32k_ck", "sys_32k_ck"),
+       DT_CLK(NULL, "sys_clkin_ck", "timer_sys_clk_div"),
++      DT_CLK(NULL, "timer_sys_ck", "timer_sys_clk_div"),
+       DT_CLK(NULL, "sys_clkin", "sys_clkin1"),
+       DT_CLK(NULL, "atl_dpll_clk_mux", "atl_cm:0000:24"),
+       DT_CLK(NULL, "atl_gfclk_mux", "atl_cm:0000:26"),
+diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
+index 7214022dfb911..d230536e7086d 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
++++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
+@@ -512,7 +512,7 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
+       struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
+       int i;
+ 
+-      if (!ttm_dma)
++      if (!ttm_dma || !ttm_dma->dma_address)
+               return;
+ 
+       /* Don't waste time looping if the object is coherent */
+@@ -532,7 +532,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
+       struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
+       int i;
+ 
+-      if (!ttm_dma)
++      if (!ttm_dma || !ttm_dma->dma_address)
+               return;
+ 
+       /* Don't waste time looping if the object is coherent */
+diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
+index 45c8bf39ad238..acf0c244141f4 100644
+--- a/drivers/scsi/sr.c
++++ b/drivers/scsi/sr.c
+@@ -216,6 +216,8 @@ static unsigned int sr_get_events(struct scsi_device *sdev)
+               return DISK_EVENT_EJECT_REQUEST;
+       else if (med->media_event_code == 2)
+               return DISK_EVENT_MEDIA_CHANGE;
++      else if (med->media_event_code == 3)
++              return DISK_EVENT_EJECT_REQUEST;
+       return 0;
+ }
+ 
+diff --git a/drivers/xen/events/events_base.c 
b/drivers/xen/events/events_base.c
+index b370144682ed5..a2f8130e18fec 100644
+--- a/drivers/xen/events/events_base.c
++++ b/drivers/xen/events/events_base.c
+@@ -524,6 +524,9 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, 
bool spurious)
+       }
+ 
+       info->eoi_time = 0;
++
++      /* is_active hasn't been reset yet, do it now. */
++      smp_store_release(&info->is_active, 0);
+       do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
+ }
+ 
+@@ -1780,10 +1783,22 @@ static void lateeoi_ack_dynirq(struct irq_data *data)
+       struct irq_info *info = info_for_irq(data->irq);
+       evtchn_port_t evtchn = info ? info->evtchn : 0;
+ 
+-      if (VALID_EVTCHN(evtchn)) {
+-              do_mask(info, EVT_MASK_REASON_EOI_PENDING);
+-              ack_dynirq(data);
+-      }
++      if (!VALID_EVTCHN(evtchn))
++              return;
++
++      do_mask(info, EVT_MASK_REASON_EOI_PENDING);
++
++      if (unlikely(irqd_is_setaffinity_pending(data)) &&
++          likely(!irqd_irq_disabled(data))) {
++              do_mask(info, EVT_MASK_REASON_TEMPORARY);
++
++              clear_evtchn(evtchn);
++
++              irq_move_masked_irq(data);
++
++              do_unmask(info, EVT_MASK_REASON_TEMPORARY);
++      } else
++              clear_evtchn(evtchn);
+ }
+ 
+ static void lateeoi_mask_ack_dynirq(struct irq_data *data)
+diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
+index 1ea8fc9ff048f..1bc65ecd4bd6d 100644
+--- a/fs/ext4/block_validity.c
++++ b/fs/ext4/block_validity.c
+@@ -171,8 +171,10 @@ static int ext4_data_block_valid_rcu(struct ext4_sb_info 
*sbi,
+               else if (start_blk >= (entry->start_blk + entry->count))
+                       n = n->rb_right;
+               else {
++                      if (entry->ino == ino)
++                              return 1;
+                       sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
+-                      return entry->ino == ino;
++                      return 0;
+               }
+       }
+       return 1;
+diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
+index d67c0035165c2..3d323c6c85260 100644
+--- a/include/linux/cpuhotplug.h
++++ b/include/linux/cpuhotplug.h
+@@ -118,6 +118,7 @@ enum cpuhp_state {
+       CPUHP_AP_ARM_L2X0_STARTING,
+       CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
+       CPUHP_AP_ARM_ARCH_TIMER_STARTING,
++      CPUHP_AP_OMAP_DM_TIMER_STARTING,
+       CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
+       CPUHP_AP_JCORE_TIMER_STARTING,
+       CPUHP_AP_ARM_TWD_STARTING,
+diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
+index e375f2249f520..becf9b1eae5a1 100644
+--- a/include/linux/huge_mm.h
++++ b/include/linux/huge_mm.h
+@@ -224,6 +224,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, 
unsigned long addr,
+ extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
+ 
+ extern struct page *huge_zero_page;
++extern unsigned long huge_zero_pfn;
+ 
+ static inline bool is_huge_zero_page(struct page *page)
+ {
+@@ -232,7 +233,7 @@ static inline bool is_huge_zero_page(struct page *page)
+ 
+ static inline bool is_huge_zero_pmd(pmd_t pmd)
+ {
+-      return is_huge_zero_page(pmd_page(pmd));
++      return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
+ }
+ 
+ static inline bool is_huge_zero_pud(pud_t pud)
+@@ -342,6 +343,11 @@ static inline bool is_huge_zero_page(struct page *page)
+       return false;
+ }
+ 
++static inline bool is_huge_zero_pmd(pmd_t pmd)
++{
++      return false;
++}
++
+ static inline bool is_huge_zero_pud(pud_t pud)
+ {
+       return false;
+diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
+index c129c1c14c5f2..2df83a6598182 100644
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -477,17 +477,6 @@ static inline int hstate_index(struct hstate *h)
+       return h - hstates;
+ }
+ 
+-pgoff_t __basepage_index(struct page *page);
+-
+-/* Return page->index in PAGE_SIZE units */
+-static inline pgoff_t basepage_index(struct page *page)
+-{
+-      if (!PageCompound(page))
+-              return page->index;
+-
+-      return __basepage_index(page);
+-}
+-
+ extern int dissolve_free_huge_page(struct page *page);
+ extern int dissolve_free_huge_pages(unsigned long start_pfn,
+                                   unsigned long end_pfn);
+@@ -582,11 +571,6 @@ static inline int hstate_index(struct hstate *h)
+       return 0;
+ }
+ 
+-static inline pgoff_t basepage_index(struct page *page)
+-{
+-      return page->index;
+-}
+-
+ static inline int dissolve_free_huge_page(struct page *page)
+ {
+       return 0;
+diff --git a/include/linux/mm.h b/include/linux/mm.h
+index f6ecf41aea83d..c736c677b876d 100644
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1338,6 +1338,7 @@ struct zap_details {
+       struct address_space *check_mapping;    /* Check page->mapping if set */
+       pgoff_t first_index;                    /* Lowest page->index to unmap 
*/
+       pgoff_t last_index;                     /* Highest page->index to unmap 
*/
++      struct page *single_page;               /* Locked page to be unmapped */
+ };
+ 
+ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
+@@ -1428,6 +1429,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct 
*vma,
+ extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
+                           unsigned long address, unsigned int fault_flags,
+                           bool *unlocked);
++void unmap_mapping_page(struct page *page);
+ void unmap_mapping_pages(struct address_space *mapping,
+               pgoff_t start, pgoff_t nr, bool even_cows);
+ void unmap_mapping_range(struct address_space *mapping,
+@@ -1448,6 +1450,7 @@ static inline int fixup_user_fault(struct task_struct 
*tsk,
+       BUG();
+       return -EFAULT;
+ }
++static inline void unmap_mapping_page(struct page *page) { }
+ static inline void unmap_mapping_pages(struct address_space *mapping,
+               pgoff_t start, pgoff_t nr, bool even_cows) { }
+ static inline void unmap_mapping_range(struct address_space *mapping,
+diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
+index 2ad72d2c8cc52..5d0767cb424aa 100644
+--- a/include/linux/mmdebug.h
++++ b/include/linux/mmdebug.h
+@@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm);
+                       BUG();                                          \
+               }                                                       \
+       } while (0)
++#define VM_WARN_ON_ONCE_PAGE(cond, page)      ({                      \
++      static bool __section(".data.once") __warned;                   \
++      int __ret_warn_once = !!(cond);                                 \
++                                                                      \
++      if (unlikely(__ret_warn_once && !__warned)) {                   \
++              dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\
++              __warned = true;                                        \
++              WARN_ON(1);                                             \
++      }                                                               \
++      unlikely(__ret_warn_once);                                      \
++})
++
+ #define VM_WARN_ON(cond) (void)WARN_ON(cond)
+ #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
+ #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
+@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm);
+ #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
+ #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
+ #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
++#define VM_WARN_ON_ONCE_PAGE(cond, page)  BUILD_BUG_ON_INVALID(cond)
+ #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
+ #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
+ #endif
+diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
+index b1bd2186e6d2b..33b63b2a163f9 100644
+--- a/include/linux/pagemap.h
++++ b/include/linux/pagemap.h
+@@ -403,7 +403,7 @@ static inline struct page *read_mapping_page(struct 
address_space *mapping,
+ }
+ 
+ /*
+- * Get index of the page with in radix-tree
++ * Get index of the page within radix-tree (but not for hugetlb pages).
+  * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
+  */
+ static inline pgoff_t page_to_index(struct page *page)
+@@ -422,15 +422,16 @@ static inline pgoff_t page_to_index(struct page *page)
+       return pgoff;
+ }
+ 
++extern pgoff_t hugetlb_basepage_index(struct page *page);
++
+ /*
+- * Get the offset in PAGE_SIZE.
+- * (TODO: hugepage should have ->index in PAGE_SIZE)
++ * Get the offset in PAGE_SIZE (even for hugetlb pages).
++ * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
+  */
+ static inline pgoff_t page_to_pgoff(struct page *page)
+ {
+-      if (unlikely(PageHeadHuge(page)))
+-              return page->index << compound_order(page);
+-
++      if (unlikely(PageHuge(page)))
++              return hugetlb_basepage_index(page);
+       return page_to_index(page);
+ }
+ 
+diff --git a/include/linux/rmap.h b/include/linux/rmap.h
+index d7d6d4eb17949..91ccae9467164 100644
+--- a/include/linux/rmap.h
++++ b/include/linux/rmap.h
+@@ -98,7 +98,8 @@ enum ttu_flags {
+                                        * do a final flush if necessary */
+       TTU_RMAP_LOCKED         = 0x80, /* do not grab rmap lock:
+                                        * caller holds it */
+-      TTU_SPLIT_FREEZE        = 0x100,                /* freeze pte under 
splitting thp */
++      TTU_SPLIT_FREEZE        = 0x100, /* freeze pte under splitting thp */
++      TTU_SYNC                = 0x200, /* avoid racy checks with PVMW_SYNC */
+ };
+ 
+ #ifdef CONFIG_MMU
+diff --git a/kernel/futex.c b/kernel/futex.c
+index 526ebcff5a0a9..3c67da9b84086 100644
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -719,7 +719,7 @@ again:
+ 
+               key->both.offset |= FUT_OFF_INODE; /* inode-based key */
+               key->shared.i_seq = get_inode_sequence_number(inode);
+-              key->shared.pgoff = basepage_index(tail);
++              key->shared.pgoff = page_to_pgoff(tail);
+               rcu_read_unlock();
+       }
+ 
+diff --git a/kernel/kthread.c b/kernel/kthread.c
+index 81abfac351272..9750f4f7f9010 100644
+--- a/kernel/kthread.c
++++ b/kernel/kthread.c
+@@ -1010,8 +1010,38 @@ void kthread_flush_work(struct kthread_work *work)
+ EXPORT_SYMBOL_GPL(kthread_flush_work);
+ 
+ /*
+- * This function removes the work from the worker queue. Also it makes sure
+- * that it won't get queued later via the delayed work's timer.
++ * Make sure that the timer is neither set nor running and could
++ * not manipulate the work list_head any longer.
++ *
++ * The function is called under worker->lock. The lock is temporary
++ * released but the timer can't be set again in the meantime.
++ */
++static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
++                                            unsigned long *flags)
++{
++      struct kthread_delayed_work *dwork =
++              container_of(work, struct kthread_delayed_work, work);
++      struct kthread_worker *worker = work->worker;
++
++      /*
++       * del_timer_sync() must be called to make sure that the timer
++       * callback is not running. The lock must be temporary released
++       * to avoid a deadlock with the callback. In the meantime,
++       * any queuing is blocked by setting the canceling counter.
++       */
++      work->canceling++;
++      spin_unlock_irqrestore(&worker->lock, *flags);
++      del_timer_sync(&dwork->timer);
++      spin_lock_irqsave(&worker->lock, *flags);
++      work->canceling--;
++}
++
++/*
++ * This function removes the work from the worker queue.
++ *
++ * It is called under worker->lock. The caller must make sure that
++ * the timer used by delayed work is not running, e.g. by calling
++ * kthread_cancel_delayed_work_timer().
+  *
+  * The work might still be in use when this function finishes. See the
+  * current_work proceed by the worker.
+@@ -1019,28 +1049,8 @@ EXPORT_SYMBOL_GPL(kthread_flush_work);
+  * Return: %true if @work was pending and successfully canceled,
+  *    %false if @work was not pending
+  */
+-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
+-                                unsigned long *flags)
++static bool __kthread_cancel_work(struct kthread_work *work)
+ {
+-      /* Try to cancel the timer if exists. */
+-      if (is_dwork) {
+-              struct kthread_delayed_work *dwork =
+-                      container_of(work, struct kthread_delayed_work, work);
+-              struct kthread_worker *worker = work->worker;
+-
+-              /*
+-               * del_timer_sync() must be called to make sure that the timer
+-               * callback is not running. The lock must be temporary released
+-               * to avoid a deadlock with the callback. In the meantime,
+-               * any queuing is blocked by setting the canceling counter.
+-               */
+-              work->canceling++;
+-              spin_unlock_irqrestore(&worker->lock, *flags);
+-              del_timer_sync(&dwork->timer);
+-              spin_lock_irqsave(&worker->lock, *flags);
+-              work->canceling--;
+-      }
+-
+       /*
+        * Try to remove the work from a worker list. It might either
+        * be from worker->work_list or from worker->delayed_work_list.
+@@ -1093,11 +1103,23 @@ bool kthread_mod_delayed_work(struct kthread_worker 
*worker,
+       /* Work must not be used with >1 worker, see kthread_queue_work() */
+       WARN_ON_ONCE(work->worker != worker);
+ 
+-      /* Do not fight with another command that is canceling this work. */
++      /*
++       * Temporary cancel the work but do not fight with another command
++       * that is canceling the work as well.
++       *
++       * It is a bit tricky because of possible races with another
++       * mod_delayed_work() and cancel_delayed_work() callers.
++       *
++       * The timer must be canceled first because worker->lock is released
++       * when doing so. But the work can be removed from the queue (list)
++       * only when it can be queued again so that the return value can
++       * be used for reference counting.
++       */
++      kthread_cancel_delayed_work_timer(work, &flags);
+       if (work->canceling)
+               goto out;
++      ret = __kthread_cancel_work(work);
+ 
+-      ret = __kthread_cancel_work(work, true, &flags);
+ fast_queue:
+       __kthread_queue_delayed_work(worker, dwork, delay);
+ out:
+@@ -1119,7 +1141,10 @@ static bool __kthread_cancel_work_sync(struct 
kthread_work *work, bool is_dwork)
+       /* Work must not be used with >1 worker, see kthread_queue_work(). */
+       WARN_ON_ONCE(work->worker != worker);
+ 
+-      ret = __kthread_cancel_work(work, is_dwork, &flags);
++      if (is_dwork)
++              kthread_cancel_delayed_work_timer(work, &flags);
++
++      ret = __kthread_cancel_work(work);
+ 
+       if (worker->current_work != work)
+               goto out_fast;
+diff --git a/mm/huge_memory.c b/mm/huge_memory.c
+index 7c374c0fcf0c7..4400957d8e4e2 100644
+--- a/mm/huge_memory.c
++++ b/mm/huge_memory.c
+@@ -62,6 +62,7 @@ static struct shrinker deferred_split_shrinker;
+ 
+ static atomic_t huge_zero_refcount;
+ struct page *huge_zero_page __read_mostly;
++unsigned long huge_zero_pfn __read_mostly = ~0UL;
+ 
+ bool transparent_hugepage_enabled(struct vm_area_struct *vma)
+ {
+@@ -93,6 +94,7 @@ retry:
+               __free_pages(zero_page, compound_order(zero_page));
+               goto retry;
+       }
++      WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
+ 
+       /* We take additional reference here. It will be put back by shrinker */
+       atomic_set(&huge_zero_refcount, 2);
+@@ -142,6 +144,7 @@ static unsigned long shrink_huge_zero_page_scan(struct 
shrinker *shrink,
+       if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
+               struct page *zero_page = xchg(&huge_zero_page, NULL);
+               BUG_ON(zero_page == NULL);
++              WRITE_ONCE(huge_zero_pfn, ~0UL);
+               __free_pages(zero_page, compound_order(zero_page));
+               return HPAGE_PMD_NR;
+       }
+@@ -2125,7 +2128,7 @@ static void __split_huge_pmd_locked(struct 
vm_area_struct *vma, pmd_t *pmd,
+       count_vm_event(THP_SPLIT_PMD);
+ 
+       if (!vma_is_anonymous(vma)) {
+-              _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
++              old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+               /*
+                * We are going to unmap this huge page. So
+                * just go ahead and zap it
+@@ -2134,16 +2137,25 @@ static void __split_huge_pmd_locked(struct 
vm_area_struct *vma, pmd_t *pmd,
+                       zap_deposited_table(mm, pmd);
+               if (vma_is_dax(vma))
+                       return;
+-              page = pmd_page(_pmd);
+-              if (!PageDirty(page) && pmd_dirty(_pmd))
+-                      set_page_dirty(page);
+-              if (!PageReferenced(page) && pmd_young(_pmd))
+-                      SetPageReferenced(page);
+-              page_remove_rmap(page, true);
+-              put_page(page);
++              if (unlikely(is_pmd_migration_entry(old_pmd))) {
++                      swp_entry_t entry;
++
++                      entry = pmd_to_swp_entry(old_pmd);
++                      page = migration_entry_to_page(entry);
++              } else {
++                      page = pmd_page(old_pmd);
++                      if (!PageDirty(page) && pmd_dirty(old_pmd))
++                              set_page_dirty(page);
++                      if (!PageReferenced(page) && pmd_young(old_pmd))
++                              SetPageReferenced(page);
++                      page_remove_rmap(page, true);
++                      put_page(page);
++              }
+               add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
+               return;
+-      } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
++      }
++
++      if (is_huge_zero_pmd(*pmd)) {
+               /*
+                * FIXME: Do we want to invalidate secondary mmu by calling
+                * mmu_notifier_invalidate_range() see comments below inside
+@@ -2418,16 +2430,16 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
+ static void unmap_page(struct page *page)
+ {
+       enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
+-              TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+-      bool unmap_success;
++              TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC;
+ 
+       VM_BUG_ON_PAGE(!PageHead(page), page);
+ 
+       if (PageAnon(page))
+               ttu_flags |= TTU_SPLIT_FREEZE;
+ 
+-      unmap_success = try_to_unmap(page, ttu_flags);
+-      VM_BUG_ON_PAGE(!unmap_success, page);
++      try_to_unmap(page, ttu_flags);
++
++      VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
+ }
+ 
+ static void remap_page(struct page *page)
+@@ -2686,7 +2698,7 @@ int split_huge_page_to_list(struct page *page, struct 
list_head *list)
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
+       struct anon_vma *anon_vma = NULL;
+       struct address_space *mapping = NULL;
+-      int count, mapcount, extra_pins, ret;
++      int extra_pins, ret;
+       bool mlocked;
+       unsigned long flags;
+       pgoff_t end;
+@@ -2748,7 +2760,6 @@ int split_huge_page_to_list(struct page *page, struct 
list_head *list)
+ 
+       mlocked = PageMlocked(page);
+       unmap_page(head);
+-      VM_BUG_ON_PAGE(compound_mapcount(head), head);
+ 
+       /* Make sure the page is not on per-CPU pagevec as it takes pin */
+       if (mlocked)
+@@ -2774,9 +2785,7 @@ int split_huge_page_to_list(struct page *page, struct 
list_head *list)
+ 
+       /* Prevent deferred_split_scan() touching ->_refcount */
+       spin_lock(&pgdata->split_queue_lock);
+-      count = page_count(head);
+-      mapcount = total_mapcount(head);
+-      if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
++      if (page_ref_freeze(head, 1 + extra_pins)) {
+               if (!list_empty(page_deferred_list(head))) {
+                       pgdata->split_queue_len--;
+                       list_del(page_deferred_list(head));
+@@ -2792,16 +2801,9 @@ int split_huge_page_to_list(struct page *page, struct 
list_head *list)
+               } else
+                       ret = 0;
+       } else {
+-              if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
+-                      pr_alert("total_mapcount: %u, page_count(): %u\n",
+-                                      mapcount, count);
+-                      if (PageTail(page))
+-                              dump_page(head, NULL);
+-                      dump_page(page, "total_mapcount(head) > 0");
+-                      BUG();
+-              }
+               spin_unlock(&pgdata->split_queue_lock);
+-fail:         if (mapping)
++fail:
++              if (mapping)
+                       xa_unlock(&mapping->i_pages);
+               spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+               remap_page(head);
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index c69f12e4c1499..ebcf26bc4cd4b 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -1391,15 +1391,12 @@ int PageHeadHuge(struct page *page_head)
+       return get_compound_page_dtor(page_head) == free_huge_page;
+ }
+ 
+-pgoff_t __basepage_index(struct page *page)
++pgoff_t hugetlb_basepage_index(struct page *page)
+ {
+       struct page *page_head = compound_head(page);
+       pgoff_t index = page_index(page_head);
+       unsigned long compound_idx;
+ 
+-      if (!PageHuge(page_head))
+-              return page_index(page);
+-
+       if (compound_order(page_head) >= MAX_ORDER)
+               compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
+       else
+diff --git a/mm/internal.h b/mm/internal.h
+index 397183c8fe47b..3a2e973138d36 100644
+--- a/mm/internal.h
++++ b/mm/internal.h
+@@ -331,27 +331,52 @@ static inline void mlock_migrate_page(struct page 
*newpage, struct page *page)
+ extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
+ 
+ /*
+- * At what user virtual address is page expected in @vma?
++ * At what user virtual address is page expected in vma?
++ * Returns -EFAULT if all of the page is outside the range of vma.
++ * If page is a compound head, the entire compound page is considered.
+  */
+ static inline unsigned long
+-__vma_address(struct page *page, struct vm_area_struct *vma)
++vma_address(struct page *page, struct vm_area_struct *vma)
+ {
+-      pgoff_t pgoff = page_to_pgoff(page);
+-      return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++      pgoff_t pgoff;
++      unsigned long address;
++
++      VM_BUG_ON_PAGE(PageKsm(page), page);    /* KSM page->index unusable */
++      pgoff = page_to_pgoff(page);
++      if (pgoff >= vma->vm_pgoff) {
++              address = vma->vm_start +
++                      ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++              /* Check for address beyond vma (or wrapped through 0?) */
++              if (address < vma->vm_start || address >= vma->vm_end)
++                      address = -EFAULT;
++      } else if (PageHead(page) &&
++                 pgoff + (1UL << compound_order(page)) - 1 >= vma->vm_pgoff) {
++              /* Test above avoids possibility of wrap to 0 on 32-bit */
++              address = vma->vm_start;
++      } else {
++              address = -EFAULT;
++      }
++      return address;
+ }
+ 
++/*
++ * Then at what user virtual address will none of the page be found in vma?
++ * Assumes that vma_address() already returned a good starting address.
++ * If page is a compound head, the entire compound page is considered.
++ */
+ static inline unsigned long
+-vma_address(struct page *page, struct vm_area_struct *vma)
++vma_address_end(struct page *page, struct vm_area_struct *vma)
+ {
+-      unsigned long start, end;
+-
+-      start = __vma_address(page, vma);
+-      end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
+-
+-      /* page should be within @vma mapping range */
+-      VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
+-
+-      return max(start, vma->vm_start);
++      pgoff_t pgoff;
++      unsigned long address;
++
++      VM_BUG_ON_PAGE(PageKsm(page), page);    /* KSM page->index unusable */
++      pgoff = page_to_pgoff(page) + (1UL << compound_order(page));
++      address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
++      /* Check for address beyond vma (or wrapped through 0?) */
++      if (address < vma->vm_start || address > vma->vm_end)
++              address = vma->vm_end;
++      return address;
+ }
+ 
+ #else /* !CONFIG_MMU */
+diff --git a/mm/memory.c b/mm/memory.c
+index c2011c51f15de..49b546cdce0d2 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -1439,7 +1439,18 @@ static inline unsigned long zap_pmd_range(struct 
mmu_gather *tlb,
+                       else if (zap_huge_pmd(tlb, vma, pmd, addr))
+                               goto next;
+                       /* fall through */
++              } else if (details && details->single_page &&
++                         PageTransCompound(details->single_page) &&
++                         next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
++                      spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
++                      /*
++                       * Take and drop THP pmd lock so that we cannot return
++                       * prematurely, while zap_huge_pmd() has cleared *pmd,
++                       * but not yet decremented compound_mapcount().
++                       */
++                      spin_unlock(ptl);
+               }
++
+               /*
+                * Here there can be other concurrent MADV_DONTNEED or
+                * trans huge page faults running, and if the pmd is
+@@ -2924,6 +2935,36 @@ static inline void unmap_mapping_range_tree(struct 
rb_root_cached *root,
+       }
+ }
+ 
++/**
++ * unmap_mapping_page() - Unmap single page from processes.
++ * @page: The locked page to be unmapped.
++ *
++ * Unmap this page from any userspace process which still has it mmaped.
++ * Typically, for efficiency, the range of nearby pages has already been
++ * unmapped by unmap_mapping_pages() or unmap_mapping_range().  But once
++ * truncation or invalidation holds the lock on a page, it may find that
++ * the page has been remapped again: and then uses unmap_mapping_page()
++ * to unmap it finally.
++ */
++void unmap_mapping_page(struct page *page)
++{
++      struct address_space *mapping = page->mapping;
++      struct zap_details details = { };
++
++      VM_BUG_ON(!PageLocked(page));
++      VM_BUG_ON(PageTail(page));
++
++      details.check_mapping = mapping;
++      details.first_index = page->index;
++      details.last_index = page->index + hpage_nr_pages(page) - 1;
++      details.single_page = page;
++
++      i_mmap_lock_write(mapping);
++      if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
++              unmap_mapping_range_tree(&mapping->i_mmap, &details);
++      i_mmap_unlock_write(mapping);
++}
++
+ /**
+  * unmap_mapping_pages() - Unmap pages from processes.
+  * @mapping: The address space containing pages to be unmapped.
+diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
+index 11df03e71288c..edca786093187 100644
+--- a/mm/page_vma_mapped.c
++++ b/mm/page_vma_mapped.c
+@@ -111,6 +111,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
+       return pfn_in_hpage(pvmw->page, pfn);
+ }
+ 
++static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long 
size)
++{
++      pvmw->address = (pvmw->address + size) & ~(size - 1);
++      if (!pvmw->address)
++              pvmw->address = ULONG_MAX;
++}
++
+ /**
+  * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
+  * @pvmw->address
+@@ -139,6 +146,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk 
*pvmw)
+ {
+       struct mm_struct *mm = pvmw->vma->vm_mm;
+       struct page *page = pvmw->page;
++      unsigned long end;
+       pgd_t *pgd;
+       p4d_t *p4d;
+       pud_t *pud;
+@@ -148,10 +156,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk 
*pvmw)
+       if (pvmw->pmd && !pvmw->pte)
+               return not_found(pvmw);
+ 
+-      if (pvmw->pte)
+-              goto next_pte;
++      if (unlikely(PageHuge(page))) {
++              /* The only possible mapping was handled on last iteration */
++              if (pvmw->pte)
++                      return not_found(pvmw);
+ 
+-      if (unlikely(PageHuge(pvmw->page))) {
+               /* when pud is not present, pte will be NULL */
+               pvmw->pte = huge_pte_offset(mm, pvmw->address,
+                                           PAGE_SIZE << compound_order(page));
+@@ -164,78 +173,108 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk 
*pvmw)
+                       return not_found(pvmw);
+               return true;
+       }
+-restart:
+-      pgd = pgd_offset(mm, pvmw->address);
+-      if (!pgd_present(*pgd))
+-              return false;
+-      p4d = p4d_offset(pgd, pvmw->address);
+-      if (!p4d_present(*p4d))
+-              return false;
+-      pud = pud_offset(p4d, pvmw->address);
+-      if (!pud_present(*pud))
+-              return false;
+-      pvmw->pmd = pmd_offset(pud, pvmw->address);
++
+       /*
+-       * Make sure the pmd value isn't cached in a register by the
+-       * compiler and used as a stale value after we've observed a
+-       * subsequent update.
++       * Seek to next pte only makes sense for THP.
++       * But more important than that optimization, is to filter out
++       * any PageKsm page: whose page->index misleads vma_address()
++       * and vma_address_end() to disaster.
+        */
+-      pmde = READ_ONCE(*pvmw->pmd);
+-      if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+-              pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+-              if (likely(pmd_trans_huge(*pvmw->pmd))) {
+-                      if (pvmw->flags & PVMW_MIGRATION)
+-                              return not_found(pvmw);
+-                      if (pmd_page(*pvmw->pmd) != page)
+-                              return not_found(pvmw);
+-                      return true;
+-              } else if (!pmd_present(*pvmw->pmd)) {
+-                      if (thp_migration_supported()) {
+-                              if (!(pvmw->flags & PVMW_MIGRATION))
++      end = PageTransCompound(page) ?
++              vma_address_end(page, pvmw->vma) :
++              pvmw->address + PAGE_SIZE;
++      if (pvmw->pte)
++              goto next_pte;
++restart:
++      do {
++              pgd = pgd_offset(mm, pvmw->address);
++              if (!pgd_present(*pgd)) {
++                      step_forward(pvmw, PGDIR_SIZE);
++                      continue;
++              }
++              p4d = p4d_offset(pgd, pvmw->address);
++              if (!p4d_present(*p4d)) {
++                      step_forward(pvmw, P4D_SIZE);
++                      continue;
++              }
++              pud = pud_offset(p4d, pvmw->address);
++              if (!pud_present(*pud)) {
++                      step_forward(pvmw, PUD_SIZE);
++                      continue;
++              }
++
++              pvmw->pmd = pmd_offset(pud, pvmw->address);
++              /*
++               * Make sure the pmd value isn't cached in a register by the
++               * compiler and used as a stale value after we've observed a
++               * subsequent update.
++               */
++              pmde = READ_ONCE(*pvmw->pmd);
++
++              if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
++                      pvmw->ptl = pmd_lock(mm, pvmw->pmd);
++                      pmde = *pvmw->pmd;
++                      if (likely(pmd_trans_huge(pmde))) {
++                              if (pvmw->flags & PVMW_MIGRATION)
+                                       return not_found(pvmw);
+-                              if 
(is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
+-                                      swp_entry_t entry = 
pmd_to_swp_entry(*pvmw->pmd);
++                              if (pmd_page(pmde) != page)
++                                      return not_found(pvmw);
++                              return true;
++                      }
++                      if (!pmd_present(pmde)) {
++                              swp_entry_t entry;
+ 
+-                                      if (migration_entry_to_page(entry) != 
page)
+-                                              return not_found(pvmw);
+-                                      return true;
+-                              }
++                              if (!thp_migration_supported() ||
++                                  !(pvmw->flags & PVMW_MIGRATION))
++                                      return not_found(pvmw);
++                              entry = pmd_to_swp_entry(pmde);
++                              if (!is_migration_entry(entry) ||
++                                  migration_entry_to_page(entry) != page)
++                                      return not_found(pvmw);
++                              return true;
+                       }
+-                      return not_found(pvmw);
+-              } else {
+                       /* THP pmd was split under us: handle on pte level */
+                       spin_unlock(pvmw->ptl);
+                       pvmw->ptl = NULL;
++              } else if (!pmd_present(pmde)) {
++                      /*
++                       * If PVMW_SYNC, take and drop THP pmd lock so that we
++                       * cannot return prematurely, while zap_huge_pmd() has
++                       * cleared *pmd but not decremented compound_mapcount().
++                       */
++                      if ((pvmw->flags & PVMW_SYNC) &&
++                          PageTransCompound(page)) {
++                              spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
++
++                              spin_unlock(ptl);
++                      }
++                      step_forward(pvmw, PMD_SIZE);
++                      continue;
+               }
+-      } else if (!pmd_present(pmde)) {
+-              return false;
+-      }
+-      if (!map_pte(pvmw))
+-              goto next_pte;
+-      while (1) {
++              if (!map_pte(pvmw))
++                      goto next_pte;
++this_pte:
+               if (check_pte(pvmw))
+                       return true;
+ next_pte:
+-              /* Seek to next pte only makes sense for THP */
+-              if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
+-                      return not_found(pvmw);
+               do {
+                       pvmw->address += PAGE_SIZE;
+-                      if (pvmw->address >= pvmw->vma->vm_end ||
+-                          pvmw->address >=
+-                                      __vma_address(pvmw->page, pvmw->vma) +
+-                                      hpage_nr_pages(pvmw->page) * PAGE_SIZE)
++                      if (pvmw->address >= end)
+                               return not_found(pvmw);
+                       /* Did we cross page table boundary? */
+-                      if (pvmw->address % PMD_SIZE == 0) {
+-                              pte_unmap(pvmw->pte);
++                      if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
+                               if (pvmw->ptl) {
+                                       spin_unlock(pvmw->ptl);
+                                       pvmw->ptl = NULL;
+                               }
++                              pte_unmap(pvmw->pte);
++                              pvmw->pte = NULL;
+                               goto restart;
+-                      } else {
+-                              pvmw->pte++;
++                      }
++                      pvmw->pte++;
++                      if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
++                              pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
++                              spin_lock(pvmw->ptl);
+                       }
+               } while (pte_none(*pvmw->pte));
+ 
+@@ -243,7 +282,10 @@ next_pte:
+                       pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
+                       spin_lock(pvmw->ptl);
+               }
+-      }
++              goto this_pte;
++      } while (pvmw->address < end);
++
++      return false;
+ }
+ 
+ /**
+@@ -262,14 +304,10 @@ int page_mapped_in_vma(struct page *page, struct 
vm_area_struct *vma)
+               .vma = vma,
+               .flags = PVMW_SYNC,
+       };
+-      unsigned long start, end;
+-
+-      start = __vma_address(page, vma);
+-      end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
+ 
+-      if (unlikely(end < vma->vm_start || start >= vma->vm_end))
++      pvmw.address = vma_address(page, vma);
++      if (pvmw.address == -EFAULT)
+               return 0;
+-      pvmw.address = max(start, vma->vm_start);
+       if (!page_vma_mapped_walk(&pvmw))
+               return 0;
+       page_vma_mapped_walk_done(&pvmw);
+diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
+index cf2af04b34b97..36770fcdc3582 100644
+--- a/mm/pgtable-generic.c
++++ b/mm/pgtable-generic.c
+@@ -125,8 +125,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, 
unsigned long address,
+ {
+       pmd_t pmd;
+       VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+-      VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+-                         !pmd_devmap(*pmdp)) || !pmd_present(*pmdp));
++      VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
++                         !pmd_devmap(*pmdp));
+       pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
+       flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+       return pmd;
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 1bd94ea62f7f1..699f445e3e78c 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -686,7 +686,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum 
ttu_flags flags)
+  */
+ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct 
*vma)
+ {
+-      unsigned long address;
+       if (PageAnon(page)) {
+               struct anon_vma *page__anon_vma = page_anon_vma(page);
+               /*
+@@ -696,15 +695,13 @@ unsigned long page_address_in_vma(struct page *page, 
struct vm_area_struct *vma)
+               if (!vma->anon_vma || !page__anon_vma ||
+                   vma->anon_vma->root != page__anon_vma->root)
+                       return -EFAULT;
+-      } else if (page->mapping) {
+-              if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
+-                      return -EFAULT;
+-      } else
++      } else if (!vma->vm_file) {
+               return -EFAULT;
+-      address = __vma_address(page, vma);
+-      if (unlikely(address < vma->vm_start || address >= vma->vm_end))
++      } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
+               return -EFAULT;
+-      return address;
++      }
++
++      return vma_address(page, vma);
+ }
+ 
+ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
+@@ -896,7 +893,7 @@ static bool page_mkclean_one(struct page *page, struct 
vm_area_struct *vma,
+        * We have to assume the worse case ie pmd for invalidation. Note that
+        * the page can not be free from this function.
+        */
+-      end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
++      end = vma_address_end(page, vma);
+       mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
+ 
+       while (page_vma_mapped_walk(&pvmw)) {
+@@ -1348,6 +1345,15 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
+       unsigned long start = address, end;
+       enum ttu_flags flags = (enum ttu_flags)arg;
+ 
++      /*
++       * When racing against e.g. zap_pte_range() on another cpu,
++       * in between its ptep_get_and_clear_full() and page_remove_rmap(),
++       * try_to_unmap() may return false when it is about to become true,
++       * if page table locking is skipped: use TTU_SYNC to wait for that.
++       */
++      if (flags & TTU_SYNC)
++              pvmw.flags = PVMW_SYNC;
++
+       /* munlock has nothing to gain from examining un-locked vmas */
+       if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
+               return true;
+@@ -1369,7 +1375,8 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
+        * Note that the page can not be free in this function as call of
+        * try_to_unmap() must hold a reference on the page.
+        */
+-      end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
++      end = PageKsm(page) ?
++                      address + PAGE_SIZE : vma_address_end(page, vma);
+       if (PageHuge(page)) {
+               /*
+                * If sharing is possible, start and end will be adjusted
+@@ -1682,9 +1689,9 @@ static bool invalid_migration_vma(struct vm_area_struct 
*vma, void *arg)
+       return is_vma_temporary_stack(vma);
+ }
+ 
+-static int page_mapcount_is_zero(struct page *page)
++static int page_not_mapped(struct page *page)
+ {
+-      return !total_mapcount(page);
++      return !page_mapped(page);
+ }
+ 
+ /**
+@@ -1702,7 +1709,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags 
flags)
+       struct rmap_walk_control rwc = {
+               .rmap_one = try_to_unmap_one,
+               .arg = (void *)flags,
+-              .done = page_mapcount_is_zero,
++              .done = page_not_mapped,
+               .anon_lock = page_lock_anon_vma_read,
+       };
+ 
+@@ -1723,14 +1730,15 @@ bool try_to_unmap(struct page *page, enum ttu_flags 
flags)
+       else
+               rmap_walk(page, &rwc);
+ 
+-      return !page_mapcount(page) ? true : false;
++      /*
++       * When racing against e.g. zap_pte_range() on another cpu,
++       * in between its ptep_get_and_clear_full() and page_remove_rmap(),
++       * try_to_unmap() may return false when it is about to become true,
++       * if page table locking is skipped: use TTU_SYNC to wait for that.
++       */
++      return !page_mapcount(page);
+ }
+ 
+-static int page_not_mapped(struct page *page)
+-{
+-      return !page_mapped(page);
+-};
+-
+ /**
+  * try_to_munlock - try to munlock a page
+  * @page: the page to be munlocked
+@@ -1825,6 +1833,7 @@ static void rmap_walk_anon(struct page *page, struct 
rmap_walk_control *rwc,
+               struct vm_area_struct *vma = avc->vma;
+               unsigned long address = vma_address(page, vma);
+ 
++              VM_BUG_ON_VMA(address == -EFAULT, vma);
+               cond_resched();
+ 
+               if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+@@ -1879,6 +1888,7 @@ static void rmap_walk_file(struct page *page, struct 
rmap_walk_control *rwc,
+                       pgoff_start, pgoff_end) {
+               unsigned long address = vma_address(page, vma);
+ 
++              VM_BUG_ON_VMA(address == -EFAULT, vma);
+               cond_resched();
+ 
+               if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+diff --git a/mm/truncate.c b/mm/truncate.c
+index 71b65aab80775..43c73db17a0a6 100644
+--- a/mm/truncate.c
++++ b/mm/truncate.c
+@@ -175,13 +175,10 @@ void do_invalidatepage(struct page *page, unsigned int 
offset,
+  * its lock, b) when a concurrent invalidate_mapping_pages got there first and
+  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
+  */
+-static void
+-truncate_cleanup_page(struct address_space *mapping, struct page *page)
++static void truncate_cleanup_page(struct page *page)
+ {
+-      if (page_mapped(page)) {
+-              pgoff_t nr = PageTransHuge(page) ? HPAGE_PMD_NR : 1;
+-              unmap_mapping_pages(mapping, page->index, nr, false);
+-      }
++      if (page_mapped(page))
++              unmap_mapping_page(page);
+ 
+       if (page_has_private(page))
+               do_invalidatepage(page, 0, PAGE_SIZE);
+@@ -226,7 +223,7 @@ int truncate_inode_page(struct address_space *mapping, 
struct page *page)
+       if (page->mapping != mapping)
+               return -EIO;
+ 
+-      truncate_cleanup_page(mapping, page);
++      truncate_cleanup_page(page);
+       delete_from_page_cache(page);
+       return 0;
+ }
+@@ -364,7 +361,7 @@ void truncate_inode_pages_range(struct address_space 
*mapping,
+                       pagevec_add(&locked_pvec, page);
+               }
+               for (i = 0; i < pagevec_count(&locked_pvec); i++)
+-                      truncate_cleanup_page(mapping, locked_pvec.pages[i]);
++                      truncate_cleanup_page(locked_pvec.pages[i]);
+               delete_from_page_cache_batch(mapping, &locked_pvec);
+               for (i = 0; i < pagevec_count(&locked_pvec); i++)
+                       unlock_page(locked_pvec.pages[i]);
+@@ -703,6 +700,16 @@ int invalidate_inode_pages2_range(struct address_space 
*mapping,
+                               continue;
+                       }
+ 
++                      if (!did_range_unmap && page_mapped(page)) {
++                              /*
++                               * If page is mapped, before taking its lock,
++                               * zap the rest of the file in one hit.
++                               */
++                              unmap_mapping_pages(mapping, index,
++                                              (1 + end - index), false);
++                              did_range_unmap = 1;
++                      }
++
+                       lock_page(page);
+                       WARN_ON(page_to_index(page) != index);
+                       if (page->mapping != mapping) {
+@@ -710,23 +717,11 @@ int invalidate_inode_pages2_range(struct address_space 
*mapping,
+                               continue;
+                       }
+                       wait_on_page_writeback(page);
+-                      if (page_mapped(page)) {
+-                              if (!did_range_unmap) {
+-                                      /*
+-                                       * Zap the rest of the file in one hit.
+-                                       */
+-                                      unmap_mapping_pages(mapping, index,
+-                                              (1 + end - index), false);
+-                                      did_range_unmap = 1;
+-                              } else {
+-                                      /*
+-                                       * Just zap this page
+-                                       */
+-                                      unmap_mapping_pages(mapping, index,
+-                                                              1, false);
+-                              }
+-                      }
++
++                      if (page_mapped(page))
++                              unmap_mapping_page(page);
+                       BUG_ON(page_mapped(page));
++
+                       ret2 = do_launder_page(mapping, page);
+                       if (ret2 == 0) {
+                               if (!invalidate_complete_page2(mapping, page))

Reply via email to