On Fri, Sep 02, 2022 at 06:00:25PM -0500, Scott Cheloha wrote: > dv@ suggested coming to the list to request testing for the pvclock(4) > driver. Attached is a patch that corrects several bugs. Most of > these changes will only matter in the non-TSC_STABLE case on a > multiprocessor VM. > > Ideally, nothing should break. > > - pvclock yields a 64-bit value. The BSD timecounter layer can only > use the lower 32 bits, but internally we need to track the full > 64-bit value to allow comparisons with the full value in the > non-TSC_STABLE case. So make pvclock_lastcount a 64-bit quantity. > > - In pvclock_get_timecount(), move rdtsc() up into the lockless read > loop to get a more accurate timestamp. > > - In pvclock_get_timecount(), use rdtsc_lfence(), not rdtsc(). > > - In pvclock_get_timecount(), check that our TSC value doesn't predate > ti->ti_tsc_timestamp, otherwise we will produce an enormous value. > > - In pvclock_get_timecount(), update pvclock_lastcount in the > non-TSC_STABLE case with more care. On amd64 we can do this with an > atomic_cas_ulong(9) loop because u_long is 64 bits. On i386 we need > to introduce a mutex to protect our comparison and read/write. >
I tested on an 8 core ESXi VM, nothing broke. But it doesn't even have pvclock as a timesource, so I'm not sure the test is meaningful or useful. -ml > Index: pvclock.c > =================================================================== > RCS file: /cvs/src/sys/dev/pv/pvclock.c,v > retrieving revision 1.8 > diff -u -p -r1.8 pvclock.c > --- pvclock.c 5 Nov 2021 11:38:29 -0000 1.8 > +++ pvclock.c 2 Sep 2022 22:54:08 -0000 > @@ -27,6 +27,10 @@ > #include <sys/timeout.h> > #include <sys/malloc.h> > #include <sys/atomic.h> > +#include <sys/stdint.h> > +#if defined(__i386__) > +#include <sys/mutex.h> > +#endif > > #include <machine/cpu.h> > #include <machine/atomic.h> > @@ -35,7 +39,12 @@ > #include <dev/pv/pvvar.h> > #include <dev/pv/pvreg.h> > > -uint pvclock_lastcount; > +#if defined(__amd64__) > +volatile u_long pvclock_lastcount; > +#elif defined(__i386__) > +struct mutex pvclock_mtx = MUTEX_INITIALIZER(IPL_HIGH); > +uint64_t pvclock_lastcount; > +#endif > > struct pvclock_softc { > struct device sc_dev; > @@ -212,7 +221,7 @@ pvclock_get_timecount(struct timecounter > { > struct pvclock_softc *sc = tc->tc_priv; > struct pvclock_time_info *ti; > - uint64_t tsc_timestamp, system_time, delta, ctr; > + uint64_t system_time, delta, ctr, tsc; > uint32_t version, mul_frac; > int8_t shift; > uint8_t flags; > @@ -220,8 +229,12 @@ pvclock_get_timecount(struct timecounter > ti = sc->sc_time; > do { > version = pvclock_read_begin(ti); > + tsc = rdtsc_lfence(); > + if (ti->ti_tsc_timestamp < tsc) > + delta = tsc - ti->ti_tsc_timestamp; > + else > + delta = 0; > system_time = ti->ti_system_time; > - tsc_timestamp = ti->ti_tsc_timestamp; > mul_frac = ti->ti_tsc_to_system_mul; > shift = ti->ti_tsc_shift; > flags = ti->ti_flags; > @@ -231,7 +244,6 @@ pvclock_get_timecount(struct timecounter > * The algorithm is described in > * linux/Documentation/virtual/kvm/msr.txt > */ > - delta = rdtsc() - tsc_timestamp; > if (shift < 0) > delta >>= -shift; > else > @@ -241,10 +253,20 @@ pvclock_get_timecount(struct timecounter > if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0) > return (ctr); > > - if (ctr < pvclock_lastcount) > - return (pvclock_lastcount); > - > - atomic_swap_uint(&pvclock_lastcount, ctr); > - > +#if defined(__amd64__) > + u_long last; > + do { > + last = pvclock_lastcount; > + if (ctr < last) > + return last; > + } while (atomic_cas_ulong(&pvclock_lastcount, last, ctr) != last); > +#elif defined(__i386__) > + mtx_enter(&pvclock_mtx); > + if (pvclock_lastcount < ctr) > + pvclock_lastcount = ctr; > + else > + ctr = pvclock_lastcount; > + mtx_leave(&pvclock_mtx); > +#endif > return (ctr); > }