On Thu, Nov 22, 2018 at 04:37:49PM +0100, Reyk Floeter wrote:
> On Mon, Nov 19, 2018 at 01:12:46PM +0100, Reyk Floeter wrote:
> > the attached diff is another attempt at implementing a pvclock(4)
> > guest driver.  This improves the clock on KVM and replaces the need
> > for using the VM-expensive acpihpet(4).
> > 
> 
> So far I only got positive reports.  Where are the problems? ;)
> 
> Otherwise: OK?
> 
> Reyk
> 

Reads ok. One question - you mention in pvclock.c that this is supported
on i386 and amd64 but I only see GENERIC changes for amd64?

ok mlarkin in any case, but I'd either add GENERIC changes for i386 or
make this for sure amd64 only.

-ml

> > Index: share/man/man4/pvclock.4
> > ===================================================================
> > RCS file: share/man/man4/pvclock.4
> > diff -N share/man/man4/pvclock.4
> > --- /dev/null       1 Jan 1970 00:00:00 -0000
> > +++ share/man/man4/pvclock.4        19 Nov 2018 11:48:33 -0000
> > @@ -0,0 +1,45 @@
> > +.\"        $OpenBSD$
> > +.\"
> > +.\" Copyright (c) 2018 Reyk Floeter <r...@openbsd.org>
> > +.\"
> > +.\" Permission to use, copy, modify, and distribute this software for any
> > +.\" purpose with or without fee is hereby granted, provided that the above
> > +.\" copyright notice and this permission notice appear in all copies.
> > +.\"
> > +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL 
> > WARRANTIES
> > +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> > +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> > +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> > +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> > +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> > +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> > +.\"
> > +.Dd $Mdocdate$
> > +.Dt PVCLOCK 4
> > +.Os
> > +.Sh NAME
> > +.Nm pvclock
> > +.Nd paravirtual clock driver
> > +.Sh SYNOPSIS
> > +.Cd "pvclock* at pvbus?
> > +.Sh DESCRIPTION
> > +The
> > +.Nm
> > +driver supports the paravirtual clock that is available in KVM and
> > +other hypervisors.
> > +.Nm
> > +uses a shared page between the host and the hypervisor to synchronize
> > +the TSC clock in an efficient way.
> > +.Sh SEE ALSO
> > +.Xr pvbus 4
> > +.Sh HISTORY
> > +The
> > +.Nm
> > +driver first appeared in
> > +.Ox 6.5 .
> > +.Sh AUTHORS
> > +.An -nosplit
> > +The
> > +.Nm
> > +driver was written by
> > +.An Reyk Floeter Aq Mt r...@openbsd.org .
> > Index: sys/arch/amd64/conf/GENERIC
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/conf/GENERIC,v
> > retrieving revision 1.464
> > diff -u -p -u -p -r1.464 GENERIC
> > --- sys/arch/amd64/conf/GENERIC     26 Oct 2018 20:26:19 -0000      1.464
> > +++ sys/arch/amd64/conf/GENERIC     19 Nov 2018 11:48:33 -0000
> > @@ -79,6 +79,8 @@ ipmi0     at mainbus? disable     # IPMI
> >  
> >  vmt0       at pvbus?               # VMware Tools
> >  
> > +pvclock0 at pvbus?         # KVM pvclock
> > +
> >  xen0       at pvbus?               # Xen HVM domU
> >  xnf*       at xen?                 # Xen Netfront
> >  xbf*       at xen?                 # Xen Blkfront
> > Index: sys/dev/pv/files.pv
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pv/files.pv,v
> > retrieving revision 1.14
> > diff -u -p -u -p -r1.14 files.pv
> > --- sys/dev/pv/files.pv     24 Aug 2018 16:07:01 -0000      1.14
> > +++ sys/dev/pv/files.pv     19 Nov 2018 11:48:33 -0000
> > @@ -8,6 +8,11 @@ device     pvbus
> >  attach     pvbus at mainbus
> >  file       dev/pv/pvbus.c                  pvbus   needs-flag
> >  
> > +# KVM clock
> > +device     pvclock
> > +attach     pvclock at pvbus
> > +file       dev/pv/pvclock.c                pvclock needs-flag
> > +
> >  # VMware Tools
> >  device     vmt
> >  attach     vmt at pvbus
> > Index: sys/dev/pv/pvclock.c
> > ===================================================================
> > RCS file: sys/dev/pv/pvclock.c
> > diff -N sys/dev/pv/pvclock.c
> > --- /dev/null       1 Jan 1970 00:00:00 -0000
> > +++ sys/dev/pv/pvclock.c    19 Nov 2018 11:48:33 -0000
> > @@ -0,0 +1,229 @@
> > +/* $OpenBSD$       */
> > +
> > +/*
> > + * Copyright (c) 2018 Reyk Floeter <r...@openbsd.org>
> > + *
> > + * Permission to use, copy, modify, and distribute this software for any
> > + * purpose with or without fee is hereby granted, provided that the above
> > + * copyright notice and this permission notice appear in all copies.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> > + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> > + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> > + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> > + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> > + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> > + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> > + */
> > +
> > +#if !defined(__i386__) && !defined(__amd64__)
> > +#error pvclock(4) is only supported on i386 and amd64
> > +#endif
> > +
> > +#include <sys/param.h>
> > +#include <sys/systm.h>
> > +#include <sys/kernel.h>
> > +#include <sys/timetc.h>
> > +#include <sys/timeout.h>
> > +#include <sys/malloc.h>
> > +#include <sys/atomic.h>
> > +
> > +#include <machine/cpu.h>
> > +#include <uvm/uvm_extern.h>
> > +
> > +#include <dev/pv/pvvar.h>
> > +#include <dev/pv/pvreg.h>
> > +
> > +struct pvclock_softc {
> > +   struct device            sc_dev;
> > +   void                    *sc_time;
> > +   paddr_t                  sc_paddr;
> > +   struct timecounter      *sc_tc;
> > +};
> > +
> > +struct pvclock_wall_clock {
> > +   uint32_t                 wc_version;
> > +   uint32_t                 wc_sec;
> > +   uint32_t                 wc_nsec;
> > +} __packed;
> > +
> > +struct pvclock_time_info {
> > +   uint32_t                 ti_version;
> > +   uint32_t                 ti_pad0;
> > +   uint64_t                 ti_tsc_timestamp;
> > +   uint64_t                 ti_system_time;
> > +   uint32_t                 ti_tsc_to_system_mul;
> > +   int8_t                   ti_tsc_shift;
> > +   uint8_t                  ti_flags;
> > +   uint8_t                  ti_pad[2];
> > +} __packed;
> > +
> > +#define PVCLOCK_FLAG_TSC_STABLE            0x01
> > +#define PVCLOCK_SYSTEM_TIME_ENABLE 0x01
> > +#define DEVNAME(_s)                        ((_s)->sc_dev.dv_xname)
> > +
> > +int         pvclock_match(struct device *, void *, void *);
> > +void        pvclock_attach(struct device *, struct device *, void *);
> > +int         pvclock_activate(struct device *, int);
> > +
> > +uint        pvclock_get_timecount(struct timecounter *);
> > +void        pvclock_read_time_info(struct pvclock_softc *,
> > +       struct pvclock_time_info *);
> > +
> > +struct cfattach pvclock_ca = {
> > +   sizeof(struct pvclock_softc),
> > +   pvclock_match,
> > +   pvclock_attach,
> > +   NULL,
> > +   pvclock_activate
> > +};
> > +
> > +struct cfdriver pvclock_cd = {
> > +   NULL,
> > +   "pvclock",
> > +   DV_DULL
> > +};
> > +
> > +struct timecounter pvclock_timecounter = {
> > +   pvclock_get_timecount, NULL, ~0u, 0, NULL, -2000, NULL
> > +};
> > +
> > +int
> > +pvclock_match(struct device *parent, void *match, void *aux)
> > +{
> > +   struct pv_attach_args   *pva = aux;
> > +   struct pvbus_hv         *hv;
> > +
> > +   /*
> > +    * pvclock is provided by different hypervisors, we currently
> > +    * only support the "kvmclock".
> > +    */
> > +   hv = &pva->pva_hv[PVBUS_KVM];
> > +   if (hv->hv_base != 0) {
> > +           /*
> > +            * We only implement support for the 2nd version of pvclock.
> > +            * The first version is basically the same but with different
> > +            * non-standard MSRs and it is deprecated.
> > +            */
> > +           if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0)
> > +                   return (0);
> > +
> > +           /*
> > +            * Only the "stable" clock with a sync'ed TSC is supported.
> > +            * In this case the host guarantees that the TSC is constant
> > +            * and invariant, either by the underlying TSC or by passing
> > +            * on a synchronized value.
> > +            */
> > +           if ((hv->hv_features &
> > +               (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0)
> > +                   return (0);
> > +   }
> > +
> > +   return (1);
> > +}
> > +
> > +void
> > +pvclock_attach(struct device *parent, struct device *self, void *aux)
> > +{
> > +   struct pvclock_softc    *sc = (struct pvclock_softc *)self;
> > +   paddr_t                  pa;
> > +
> > +   if ((sc->sc_time = km_alloc(PAGE_SIZE,
> > +       &kv_any, &kp_zero, &kd_nowait)) == NULL) {
> > +           printf(": time page allocation failed\n");
> > +           return;
> > +   }
> > +   if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) {
> > +           printf(": time page PA extraction failed\n");
> > +           km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
> > +           return;
> > +   }
> > +
> > +   wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
> > +   sc->sc_paddr = pa;
> > +
> > +   sc->sc_tc = &pvclock_timecounter;
> > +   sc->sc_tc->tc_name = DEVNAME(sc);
> > +   sc->sc_tc->tc_frequency = 1000000000ULL;
> > +   sc->sc_tc->tc_priv = sc;
> > +
> > +   /* Better than HPET but below TSC */
> > +   sc->sc_tc->tc_quality = 1500;
> > +
> > +   tc_init(sc->sc_tc);
> > +
> > +   printf("\n");
> > +}
> > +
> > +int
> > +pvclock_activate(struct device *self, int act)
> > +{
> > +   struct pvclock_softc    *sc = (struct pvclock_softc *)self;
> > +   int                      rv = 0;
> > +   paddr_t                  pa = sc->sc_paddr;
> > +
> > +   switch (act) {
> > +   case DVACT_POWERDOWN:
> > +           wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
> > +           break;
> > +   case DVACT_RESUME:
> > +           wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
> > +           break;
> > +   }
> > +
> > +   return (rv);
> > +}
> > +
> > +static inline uint32_t
> > +pvclock_read_begin(const struct pvclock_time_info *ti)
> > +{
> > +   uint32_t version = ti->ti_version & ~0x1;
> > +   virtio_membar_sync();
> > +   return (version);
> > +}
> > +
> > +static inline int
> > +pvclock_read_done(const struct pvclock_time_info *ti,
> > +    uint32_t version)
> > +{
> > +   virtio_membar_sync();
> > +   return (ti->ti_version == version);
> > +}
> > +
> > +uint
> > +pvclock_get_timecount(struct timecounter *tc)
> > +{
> > +   struct pvclock_softc            *sc = tc->tc_priv;
> > +   struct pvclock_time_info        *ti;
> > +   uint64_t                         tsc_timestamp, system_time, delta, ctr;
> > +   uint32_t                         version, mul_frac;
> > +   int8_t                           shift;
> > +   uint8_t                          flags;
> > +
> > +   ti = sc->sc_time;
> > +   do {
> > +           version = pvclock_read_begin(ti);
> > +           system_time = ti->ti_system_time;
> > +           tsc_timestamp = ti->ti_tsc_timestamp;
> > +           mul_frac = ti->ti_tsc_to_system_mul;
> > +           shift = ti->ti_tsc_shift;
> > +           flags = ti->ti_flags;
> > +   } while (!pvclock_read_done(ti, version));
> > +
> > +   /* This bit must be set as we attached based on the stable flag */
> > +   if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0)
> > +           panic("%s: unstable result on stable clock", DEVNAME(sc));
> > +
> > +   /*
> > +    * The algorithm is described in
> > +    * linux/Documentation/virtual/kvm/msr.txt
> > +    */
> > +   delta = rdtsc() - tsc_timestamp;
> > +   if (shift < 0)
> > +           delta >>= -shift;
> > +   else
> > +           delta <<= shift;
> > +   ctr = ((delta * mul_frac) >> 32) + system_time;
> > +
> > +   return (ctr);
> > +}
> > Index: sys/dev/pv/pvreg.h
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pv/pvreg.h,v
> > retrieving revision 1.4
> > diff -u -p -u -p -r1.4 pvreg.h
> > --- sys/dev/pv/pvreg.h      12 Dec 2015 12:33:49 -0000      1.4
> > +++ sys/dev/pv/pvreg.h      19 Nov 2018 11:48:33 -0000
> > @@ -43,6 +43,9 @@
> >  #define    KVM_MSR_EOI_EN                          0x4b564d04
> >  #define KVM_PV_EOI_BIT                             0
> >  
> > +#define KVM_MSR_WALL_CLOCK                 0x4b564d00
> > +#define KVM_MSR_SYSTEM_TIME                        0x4b564d01
> > +
> >  /*
> >   * Hyper-V
> >   */
> 

Reply via email to