On Sun, Apr 24, 2022 at 02:30:37PM -0400, Bryan Steele wrote:
> On Sun, Apr 24, 2022 at 07:06:19PM +0200, Claudio Jeker wrote:
> > On Ryzen CPUs each CCD has a temp sensor. If the CPU has CCDs (which
> > excludes Zen APU CPUs) this should show additional temp info. This is
> > based on info from the Linux k10temp driver.
> > 
> > Additionally use the MSRs defined in "Open-Source Register Reference For
> > AMD Family 17h Processors" to measure the CPU core frequency.
> > That should be the actuall speed of the CPU core during the measuring
> > interval.
> > 
> > On my T14g2 the output is now for example:
> > ksmn0.temp0                       63.88 degC          Tctl
> > ksmn0.frequency0            3553141515.00 Hz          CPU0
> > ksmn0.frequency1            3549080315.00 Hz          CPU2
> > ksmn0.frequency2            3552369937.00 Hz          CPU4
> > ksmn0.frequency3            3546055048.00 Hz          CPU6
> > ksmn0.frequency4            3546854449.00 Hz          CPU8
> > ksmn0.frequency5            3543869698.00 Hz          CPU10
> > ksmn0.frequency6            3542551127.00 Hz          CPU12
> > ksmn0.frequency7            4441623647.00 Hz          CPU14
> > 
> > It is intresting to watch turbo kick in and how temp causes the CPU to
> > throttle.
> > 
> > I only tested this on systems with APUs so I could not thest the Tccd temp
> > reporting.
> > -- 
> > :wq Claudio
> 
> Awesome! :-)
> 
> I can see this adding a bunch of extra frequency sensors on higher-end
> Ryzen/Threadripper/EPYC CPUs, considering it's displayed in "Hz" that
> might be a bit overwhelming to look at.. but that's just a cosmetic
> nit for systat.
> 
> I'll test on some of my AMD machines later, if someone doesn't beat me
> to it.
> 
> Some comments below..
> 
> > Index: ksmn.c
> > ===================================================================
> > RCS file: /cvs/src/sys/dev/pci/ksmn.c,v
> > retrieving revision 1.6
> > diff -u -p -r1.6 ksmn.c
> > --- ksmn.c  11 Mar 2022 18:00:50 -0000      1.6
> > +++ ksmn.c  24 Apr 2022 16:47:08 -0000
> > @@ -20,6 +20,7 @@
> >  #include <sys/systm.h>
> >  #include <sys/device.h>
> >  #include <sys/sensors.h>
> > +#include <sys/timeout.h>
> >  
> >  #include <machine/bus.h>
> >  
> > @@ -42,6 +43,7 @@
> >    *     [31:21]  Current reported temperature.
> >    */
> >  #define SMU_17H_THM        0x59800
> > +#define SMU_17H_CCD_THM(o, x)      (SMU_17H_THM + (o) + ((x) * 4))
> >  #define GET_CURTMP(r)      (((r) >> 21) & 0x7ff)
> >  
> >  /*
> > @@ -50,6 +52,8 @@
> >   */
> >  #define CURTMP_17H_RANGE_SEL       (1 << 19)
> >  #define CURTMP_17H_RANGE_ADJUST    490
> > +#define CURTMP_CCD_VALID   (1 << 11)
> > +#define CURTMP_CCD_MASK            0x7ff
> >  
> >  /*
> >   * Undocumented tCTL offsets gleamed from Linux k10temp driver.
> > @@ -75,13 +79,24 @@ struct ksmn_softc {
> >     pcitag_t                sc_pcitag;
> >  
> >     int                     sc_tctl_offset;
> > +   unsigned int            sc_ccd_valid;           /* available Tccds */
> > +   unsigned int            sc_ccd_offset;
> > +   int                     sc_hz_count;
> >  
> > -   struct ksensor          sc_sensor;
> >     struct ksensordev       sc_sensordev;
> > +   struct ksensor          sc_sensor;              /* Tctl */
> > +   struct ksensor          sc_ccd_sensor[12];      /* Tccd */
> > +
> > +   struct timeout          sc_hz_timeout;
> > +   struct ksensor          *sc_hz_sensor;
> > +   struct cpu_info         **sc_hz_cpu_info;
> >  };
> >  
> >  int        ksmn_match(struct device *, void *, void *);
> >  void       ksmn_attach(struct device *, struct device *, void *);
> > +uint32_t   ksmn_read_reg(struct ksmn_softc *, uint32_t);
> > +void       ksmn_ccd_attach(struct ksmn_softc *, int);
> > +void       ksmn_hz_task(void *);
> >  void       ksmn_refresh(void *);
> >  
> >  const struct cfattach ksmn_ca = {
> > @@ -113,7 +128,12 @@ ksmn_attach(struct device *parent, struc
> >     struct ksmn_softc       *sc = (struct ksmn_softc *)self;
> >     struct pci_attach_args  *pa = aux;
> >     struct curtmp_offset    *p;
> > -   extern char             cpu_model[];
> > +   CPU_INFO_ITERATOR        cii;
> > +   struct cpu_info         *ci = curcpu();
> > +   struct ksensor          *s;
> > +   extern char              cpu_model[];
> > +   int                      i;
> > +
> >  
> >     sc->sc_pc = pa->pa_pc;
> >     sc->sc_pcitag = pa->pa_tag;
> > @@ -122,6 +142,7 @@ ksmn_attach(struct device *parent, struc
> >         sizeof(sc->sc_sensordev.xname));
> >  
> >     sc->sc_sensor.type = SENSOR_TEMP;
> > +   snprintf(sc->sc_sensor.desc, sizeof(sc->sc_sensor.desc), "Tctl");
> >     sensor_attach(&sc->sc_sensordev, &sc->sc_sensor);
> >  
> >     /*
> > @@ -136,6 +157,80 @@ ksmn_attach(struct device *parent, struc
> >                     sc->sc_tctl_offset = p->tctl_offset;
> >     }
> >  
> > +   sc->sc_ccd_offset = 0x154;
> > +
> > +   if (ci->ci_family == 0x17 || ci->ci_family == 0x18) {
> > +           switch (ci->ci_model) {
> > +           case 0x1:       /* Zen */
> > +           case 0x8:       /* Zen+ */
> > +           case 0x11:      /* Zen APU */
> > +           case 0x18:      /* Zen+ APU */
> > +                   ksmn_ccd_attach(sc, 4);
> > +                   break;
> > +           case 0x31:      /* Zen2 Threadripper */
> > +           case 0x60:      /* Renoir */
> > +           case 0x68:      /* Lucienne */
> > +           case 0x71:      /* Zen2 */
> > +                   ksmn_ccd_attach(sc, 8);
> > +                   break;
> > +           }
> > +   } else if (ci->ci_family == 0x19) {
> > +           uint32_t m = ci->ci_model;
> > +
> > +           if ((m >= 0x40 && m <= 0x4f) ||
> > +               (m >= 0x10 && m <= 0x1f) ||
> > +               (m >= 0xa0 && m <= 0xaf))
> > +                   sc->sc_ccd_offset = 0x300;
> > +
> > +           if ((m >= 0x10 && m <= 0x1f) ||
> > +               (m >= 0xa0 && m <= 0xaf))
> > +                   ksmn_ccd_attach(sc, 12);
> > +           else
> > +                   ksmn_ccd_attach(sc, 8);
> > +   }
> > +
> > +   CPU_INFO_FOREACH(cii, ci) {
> > +           if (ci->ci_smt_id != 0)
> > +                   continue;
> > +           sc->sc_hz_count++;
> > +   }
> > +   sc->sc_hz_sensor = mallocarray(sc->sc_hz_count,
> > +        sizeof(*sc->sc_hz_sensor), M_DEVBUF, M_WAITOK | M_ZERO);
> > +   sc->sc_hz_cpu_info = mallocarray(sc->sc_hz_count,
> > +        sizeof(*sc->sc_hz_cpu_info), M_DEVBUF, M_WAITOK | M_ZERO);
> > +
> > +   i = 0;
> > +   CPU_INFO_FOREACH(cii, ci) {
> > +           if (ci->ci_smt_id != 0)
> > +                   continue;
> > +           sc->sc_hz_cpu_info[i] = ci;
> > +           i++;
> > +   }
> > +   /* sort list of CPUs */
> > +   for (i = 1; i < sc->sc_hz_count; i++) {
> > +           int j;
> > +           for (j = 0; j < sc->sc_hz_count - i; j++) {
> > +                   if (CPU_INFO_UNIT(sc->sc_hz_cpu_info[j]) >
> > +                       CPU_INFO_UNIT(sc->sc_hz_cpu_info[j + 1])) {
> > +                           ci = sc->sc_hz_cpu_info[j];
> > +                           sc->sc_hz_cpu_info[j] =
> > +                               sc->sc_hz_cpu_info[j + 1];
> > +                           sc->sc_hz_cpu_info[j + 1] = ci;
> > +                   }
> > +           }
> > +   }
> > +
> > +   for (i = 0; i < sc->sc_hz_count; i++) {
> > +           ci = sc->sc_hz_cpu_info[i];
> > +           s = &sc->sc_hz_sensor[i];
> > +           s->type = SENSOR_FREQ;
> > +           snprintf(s->desc, sizeof(s->desc), "CPU%d", CPU_INFO_UNIT(ci));
> > +           sensor_attach(&sc->sc_sensordev, &sc->sc_hz_sensor[i]);
> > +   }
> > +
> > +   timeout_set_proc(&sc->sc_hz_timeout, ksmn_hz_task, sc);
> > +   timeout_add_sec(&sc->sc_hz_timeout, 1);
> > +
> >     if (sensor_task_register(sc, ksmn_refresh, 5) == NULL) {
> >             printf(": unable to register update task\n");
> >             return;
> > @@ -146,18 +241,78 @@ ksmn_attach(struct device *parent, struc
> >     printf("\n");
> >  }
> >  
> > +uint32_t
> > +ksmn_read_reg(struct ksmn_softc *sc, uint32_t addr)
> > +{
> > +   uint32_t reg;
> > +   int s;
> > +
> > +   s = splhigh();
> > +   pci_conf_write(sc->sc_pc, sc->sc_pcitag, SMN_17H_ADDR_R, addr);
> > +   reg = pci_conf_read(sc->sc_pc, sc->sc_pcitag, SMN_17H_DATA_R);
> > +   splx(s);
> > +   return reg;
> > +}
> > +
> > +void
> > +ksmn_ccd_attach(struct ksmn_softc *sc, int nccd)
> > +{
> > +   struct ksensor *s;
> > +   uint32_t reg;
> > +   int i;
> > +
> > +   KASSERT(nccd > 0 && nccd < nitems(sc->sc_ccd_sensor));
> > +
> > +   for (i = 0; i < nccd; i++) {
> > +           reg = ksmn_read_reg(sc, SMU_17H_CCD_THM(sc->sc_ccd_offset, i));
> > +           if (reg & CURTMP_CCD_VALID) {
> > +                   sc->sc_ccd_valid |= (1 << i);
> > +                   s = &sc->sc_ccd_sensor[i];
> > +                   s->type = SENSOR_TEMP;
> > +                   snprintf(s->desc, sizeof(s->desc), "Tccd%d", i);
> > +                   sensor_attach(&sc->sc_sensordev, s);
> > +           }
> > +   }
> > +}
> > +
> > +void
> > +ksmn_hz_task(void *arg)
> > +{
> > +   extern uint64_t          tsc_frequency;
> > +   struct ksmn_softc       *sc = arg;
> > +   struct cpu_info         *ci;
> > +   struct ksensor          *sens;
> > +   uint64_t                 mperf, aperf;
> > +   int                      i, s;
> > +
> > +   for (i = 0; i < sc->sc_hz_count; i++) {
> > +           ci = sc->sc_hz_cpu_info[i];
> > +           sens = &sc->sc_hz_sensor[i];
> > +           if (!CPU_IS_RUNNING(ci))
> > +                   continue;
> > +           sched_peg_curproc(ci);
> > +
> > +           s = splhigh();
> > +           mperf = rdmsr(0xE7);
> > +           aperf = rdmsr(0xE8);
> > +           wrmsr(0xE7, 0);
> > +           wrmsr(0xE8, 0);
> 
> Do these MSRs have names?

MPERF and APERF. Guess we could define them or even put them into one of
the cpu .h files.
 
> > +           splx(s);
> > +           sens->value = ((aperf * tsc_frequency) / mperf) * 1000000;
> > +   }
> > +
> > +   timeout_add_sec(&sc->sc_hz_timeout, 1);
> > +}
> > +
> >  void
> >  ksmn_refresh(void *arg)
> >  {
> >     struct ksmn_softc       *sc = arg;
> >     struct ksensor          *s = &sc->sc_sensor;
> >     pcireg_t                reg;
> > -   int                     raw, offset = 0;
> > -
> > -   pci_conf_write(sc->sc_pc, sc->sc_pcitag, SMN_17H_ADDR_R,
> > -       SMU_17H_THM);
> > -   reg = pci_conf_read(sc->sc_pc, sc->sc_pcitag, SMN_17H_DATA_R);
> > +   int                     i, raw, offset = 0;
> >  
> > +   reg = ksmn_read_reg(sc, SMU_17H_THM);
> >     raw = GET_CURTMP(reg);
> >     if ((reg & CURTMP_17H_RANGE_SEL) != 0)
> >             offset -= CURTMP_17H_RANGE_ADJUST;
> > @@ -166,5 +321,16 @@ ksmn_refresh(void *arg)
> >     offset *= 100000;
> >  
> >     /* convert raw (in steps of 0.125C) to uC, add offset, uC to uK. */
> > -   s->value = ((raw * 125000) + offset) + 273150000;
> > +   s->value = raw * 125000 + offset + 273150000;
> > +
> > +   offset = CURTMP_17H_RANGE_ADJUST * 100000;
> 
> Do we not need to handle the tCTL offsets for these additional CCD
> sensors on older Zen/Zen+ CPUs? We compenstate for that above, which
> I think you're losing here.

At least the linux code does not seem to do that. Need to collect some
data from various systems to better understand the output.

> > +   for (i = 0; i < nitems(sc->sc_ccd_sensor); i++) {
> > +           if (sc->sc_ccd_valid & (1 << i)) {
> > +                   s = &sc->sc_ccd_sensor[i];
> > +                   reg = ksmn_read_reg(sc,
> > +                       SMU_17H_CCD_THM(sc->sc_ccd_offset, i));
> > +                   s->value = (reg & CURTMP_CCD_MASK) * 125000 - offset +
> > +                       273150000;
> > +           }
> > +   }
> >  }
> > 
> > 
> 

-- 
:wq Claudio

Reply via email to