dv@ suggested coming to the list to request testing for the pvclock(4)
driver.  Attached is a patch that corrects several bugs.  Most of
these changes will only matter in the non-TSC_STABLE case on a
multiprocessor VM.

Ideally, nothing should break.

- pvclock yields a 64-bit value.  The BSD timecounter layer can only
  use the lower 32 bits, but internally we need to track the full
  64-bit value to allow comparisons with the full value in the
  non-TSC_STABLE case.  So make pvclock_lastcount a 64-bit quantity.

- In pvclock_get_timecount(), move rdtsc() up into the lockless read
  loop to get a more accurate timestamp.

- In pvclock_get_timecount(), use rdtsc_lfence(), not rdtsc().

- In pvclock_get_timecount(), check that our TSC value doesn't predate
  ti->ti_tsc_timestamp, otherwise we will produce an enormous value.

- In pvclock_get_timecount(), update pvclock_lastcount in the
  non-TSC_STABLE case with more care.  On amd64 we can do this with an
  atomic_cas_ulong(9) loop because u_long is 64 bits.  On i386 we need
  to introduce a mutex to protect our comparison and read/write.

Index: pvclock.c
===================================================================
RCS file: /cvs/src/sys/dev/pv/pvclock.c,v
retrieving revision 1.8
diff -u -p -r1.8 pvclock.c
--- pvclock.c   5 Nov 2021 11:38:29 -0000       1.8
+++ pvclock.c   2 Sep 2022 22:54:08 -0000
@@ -27,6 +27,10 @@
 #include <sys/timeout.h>
 #include <sys/malloc.h>
 #include <sys/atomic.h>
+#include <sys/stdint.h>
+#if defined(__i386__)
+#include <sys/mutex.h>
+#endif
 
 #include <machine/cpu.h>
 #include <machine/atomic.h>
@@ -35,7 +39,12 @@
 #include <dev/pv/pvvar.h>
 #include <dev/pv/pvreg.h>
 
-uint pvclock_lastcount;
+#if defined(__amd64__)
+volatile u_long pvclock_lastcount;
+#elif defined(__i386__)
+struct mutex pvclock_mtx = MUTEX_INITIALIZER(IPL_HIGH);
+uint64_t pvclock_lastcount;
+#endif
 
 struct pvclock_softc {
        struct device            sc_dev;
@@ -212,7 +221,7 @@ pvclock_get_timecount(struct timecounter
 {
        struct pvclock_softc            *sc = tc->tc_priv;
        struct pvclock_time_info        *ti;
-       uint64_t                         tsc_timestamp, system_time, delta, ctr;
+       uint64_t                         system_time, delta, ctr, tsc;
        uint32_t                         version, mul_frac;
        int8_t                           shift;
        uint8_t                          flags;
@@ -220,8 +229,12 @@ pvclock_get_timecount(struct timecounter
        ti = sc->sc_time;
        do {
                version = pvclock_read_begin(ti);
+               tsc = rdtsc_lfence();
+               if (ti->ti_tsc_timestamp < tsc)
+                       delta = tsc - ti->ti_tsc_timestamp;
+               else
+                       delta = 0;
                system_time = ti->ti_system_time;
-               tsc_timestamp = ti->ti_tsc_timestamp;
                mul_frac = ti->ti_tsc_to_system_mul;
                shift = ti->ti_tsc_shift;
                flags = ti->ti_flags;
@@ -231,7 +244,6 @@ pvclock_get_timecount(struct timecounter
         * The algorithm is described in
         * linux/Documentation/virtual/kvm/msr.txt
         */
-       delta = rdtsc() - tsc_timestamp;
        if (shift < 0)
                delta >>= -shift;
        else
@@ -241,10 +253,20 @@ pvclock_get_timecount(struct timecounter
        if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0)
                return (ctr);
 
-       if (ctr < pvclock_lastcount)
-               return (pvclock_lastcount);
-
-       atomic_swap_uint(&pvclock_lastcount, ctr);
-
+#if defined(__amd64__)
+       u_long last;
+       do {
+               last = pvclock_lastcount;
+               if (ctr < last)
+                       return last;
+       } while (atomic_cas_ulong(&pvclock_lastcount, last, ctr) != last);
+#elif defined(__i386__)
+       mtx_enter(&pvclock_mtx);
+       if (pvclock_lastcount < ctr)
+               pvclock_lastcount = ctr;
+       else
+               ctr = pvclock_lastcount;
+       mtx_leave(&pvclock_mtx);
+#endif
        return (ctr);
 }

Reply via email to