On Fri, Sep 02, 2022 at 06:00:25PM -0500, Scott Cheloha wrote:
> dv@ suggested coming to the list to request testing for the pvclock(4)
> driver.  Attached is a patch that corrects several bugs.  Most of
> these changes will only matter in the non-TSC_STABLE case on a
> multiprocessor VM.
>
> Ideally, nothing should break.
>
> - pvclock yields a 64-bit value.  The BSD timecounter layer can only
>   use the lower 32 bits, but internally we need to track the full
>   64-bit value to allow comparisons with the full value in the
>   non-TSC_STABLE case.  So make pvclock_lastcount a 64-bit quantity.
>
> - In pvclock_get_timecount(), move rdtsc() up into the lockless read
>   loop to get a more accurate timestamp.
>
> - In pvclock_get_timecount(), use rdtsc_lfence(), not rdtsc().
>
> - In pvclock_get_timecount(), check that our TSC value doesn't predate
>   ti->ti_tsc_timestamp, otherwise we will produce an enormous value.
>
> - In pvclock_get_timecount(), update pvclock_lastcount in the
>   non-TSC_STABLE case with more care.  On amd64 we can do this with an
>   atomic_cas_ulong(9) loop because u_long is 64 bits.  On i386 we need
>   to introduce a mutex to protect our comparison and read/write.
>

I tested on an 8 core ESXi VM, nothing broke. But it doesn't even
have pvclock as a timesource, so I'm not sure the test is meaningful or
useful.

-ml

> Index: pvclock.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pv/pvclock.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 pvclock.c
> --- pvclock.c 5 Nov 2021 11:38:29 -0000       1.8
> +++ pvclock.c 2 Sep 2022 22:54:08 -0000
> @@ -27,6 +27,10 @@
>  #include <sys/timeout.h>
>  #include <sys/malloc.h>
>  #include <sys/atomic.h>
> +#include <sys/stdint.h>
> +#if defined(__i386__)
> +#include <sys/mutex.h>
> +#endif
>
>  #include <machine/cpu.h>
>  #include <machine/atomic.h>
> @@ -35,7 +39,12 @@
>  #include <dev/pv/pvvar.h>
>  #include <dev/pv/pvreg.h>
>
> -uint pvclock_lastcount;
> +#if defined(__amd64__)
> +volatile u_long pvclock_lastcount;
> +#elif defined(__i386__)
> +struct mutex pvclock_mtx = MUTEX_INITIALIZER(IPL_HIGH);
> +uint64_t pvclock_lastcount;
> +#endif
>
>  struct pvclock_softc {
>       struct device            sc_dev;
> @@ -212,7 +221,7 @@ pvclock_get_timecount(struct timecounter
>  {
>       struct pvclock_softc            *sc = tc->tc_priv;
>       struct pvclock_time_info        *ti;
> -     uint64_t                         tsc_timestamp, system_time, delta, ctr;
> +     uint64_t                         system_time, delta, ctr, tsc;
>       uint32_t                         version, mul_frac;
>       int8_t                           shift;
>       uint8_t                          flags;
> @@ -220,8 +229,12 @@ pvclock_get_timecount(struct timecounter
>       ti = sc->sc_time;
>       do {
>               version = pvclock_read_begin(ti);
> +             tsc = rdtsc_lfence();
> +             if (ti->ti_tsc_timestamp < tsc)
> +                     delta = tsc - ti->ti_tsc_timestamp;
> +             else
> +                     delta = 0;
>               system_time = ti->ti_system_time;
> -             tsc_timestamp = ti->ti_tsc_timestamp;
>               mul_frac = ti->ti_tsc_to_system_mul;
>               shift = ti->ti_tsc_shift;
>               flags = ti->ti_flags;
> @@ -231,7 +244,6 @@ pvclock_get_timecount(struct timecounter
>        * The algorithm is described in
>        * linux/Documentation/virtual/kvm/msr.txt
>        */
> -     delta = rdtsc() - tsc_timestamp;
>       if (shift < 0)
>               delta >>= -shift;
>       else
> @@ -241,10 +253,20 @@ pvclock_get_timecount(struct timecounter
>       if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0)
>               return (ctr);
>
> -     if (ctr < pvclock_lastcount)
> -             return (pvclock_lastcount);
> -
> -     atomic_swap_uint(&pvclock_lastcount, ctr);
> -
> +#if defined(__amd64__)
> +     u_long last;
> +     do {
> +             last = pvclock_lastcount;
> +             if (ctr < last)
> +                     return last;
> +     } while (atomic_cas_ulong(&pvclock_lastcount, last, ctr) != last);
> +#elif defined(__i386__)
> +     mtx_enter(&pvclock_mtx);
> +     if (pvclock_lastcount < ctr)
> +             pvclock_lastcount = ctr;
> +     else
> +             ctr = pvclock_lastcount;
> +     mtx_leave(&pvclock_mtx);
> +#endif
>       return (ctr);
>  }

Reply via email to