On Sun, Apr 24, 2022 at 02:30:37PM -0400, Bryan Steele wrote: > On Sun, Apr 24, 2022 at 07:06:19PM +0200, Claudio Jeker wrote: > > On Ryzen CPUs each CCD has a temp sensor. If the CPU has CCDs (which > > excludes Zen APU CPUs) this should show additional temp info. This is > > based on info from the Linux k10temp driver. > > > > Additionally use the MSRs defined in "Open-Source Register Reference For > > AMD Family 17h Processors" to measure the CPU core frequency. > > That should be the actuall speed of the CPU core during the measuring > > interval. > > > > On my T14g2 the output is now for example: > > ksmn0.temp0 63.88 degC Tctl > > ksmn0.frequency0 3553141515.00 Hz CPU0 > > ksmn0.frequency1 3549080315.00 Hz CPU2 > > ksmn0.frequency2 3552369937.00 Hz CPU4 > > ksmn0.frequency3 3546055048.00 Hz CPU6 > > ksmn0.frequency4 3546854449.00 Hz CPU8 > > ksmn0.frequency5 3543869698.00 Hz CPU10 > > ksmn0.frequency6 3542551127.00 Hz CPU12 > > ksmn0.frequency7 4441623647.00 Hz CPU14 > > > > It is intresting to watch turbo kick in and how temp causes the CPU to > > throttle. > > > > I only tested this on systems with APUs so I could not thest the Tccd temp > > reporting. > > -- > > :wq Claudio > > Awesome! :-) > > I can see this adding a bunch of extra frequency sensors on higher-end > Ryzen/Threadripper/EPYC CPUs, considering it's displayed in "Hz" that > might be a bit overwhelming to look at.. but that's just a cosmetic > nit for systat. > > I'll test on some of my AMD machines later, if someone doesn't beat me > to it. > > Some comments below.. > > > Index: ksmn.c > > =================================================================== > > RCS file: /cvs/src/sys/dev/pci/ksmn.c,v > > retrieving revision 1.6 > > diff -u -p -r1.6 ksmn.c > > --- ksmn.c 11 Mar 2022 18:00:50 -0000 1.6 > > +++ ksmn.c 24 Apr 2022 16:47:08 -0000 > > @@ -20,6 +20,7 @@ > > #include <sys/systm.h> > > #include <sys/device.h> > > #include <sys/sensors.h> > > +#include <sys/timeout.h> > > > > #include <machine/bus.h> > > > > @@ -42,6 +43,7 @@ > > * [31:21] Current reported temperature. > > */ > > #define SMU_17H_THM 0x59800 > > +#define SMU_17H_CCD_THM(o, x) (SMU_17H_THM + (o) + ((x) * 4)) > > #define GET_CURTMP(r) (((r) >> 21) & 0x7ff) > > > > /* > > @@ -50,6 +52,8 @@ > > */ > > #define CURTMP_17H_RANGE_SEL (1 << 19) > > #define CURTMP_17H_RANGE_ADJUST 490 > > +#define CURTMP_CCD_VALID (1 << 11) > > +#define CURTMP_CCD_MASK 0x7ff > > > > /* > > * Undocumented tCTL offsets gleamed from Linux k10temp driver. > > @@ -75,13 +79,24 @@ struct ksmn_softc { > > pcitag_t sc_pcitag; > > > > int sc_tctl_offset; > > + unsigned int sc_ccd_valid; /* available Tccds */ > > + unsigned int sc_ccd_offset; > > + int sc_hz_count; > > > > - struct ksensor sc_sensor; > > struct ksensordev sc_sensordev; > > + struct ksensor sc_sensor; /* Tctl */ > > + struct ksensor sc_ccd_sensor[12]; /* Tccd */ > > + > > + struct timeout sc_hz_timeout; > > + struct ksensor *sc_hz_sensor; > > + struct cpu_info **sc_hz_cpu_info; > > }; > > > > int ksmn_match(struct device *, void *, void *); > > void ksmn_attach(struct device *, struct device *, void *); > > +uint32_t ksmn_read_reg(struct ksmn_softc *, uint32_t); > > +void ksmn_ccd_attach(struct ksmn_softc *, int); > > +void ksmn_hz_task(void *); > > void ksmn_refresh(void *); > > > > const struct cfattach ksmn_ca = { > > @@ -113,7 +128,12 @@ ksmn_attach(struct device *parent, struc > > struct ksmn_softc *sc = (struct ksmn_softc *)self; > > struct pci_attach_args *pa = aux; > > struct curtmp_offset *p; > > - extern char cpu_model[]; > > + CPU_INFO_ITERATOR cii; > > + struct cpu_info *ci = curcpu(); > > + struct ksensor *s; > > + extern char cpu_model[]; > > + int i; > > + > > > > sc->sc_pc = pa->pa_pc; > > sc->sc_pcitag = pa->pa_tag; > > @@ -122,6 +142,7 @@ ksmn_attach(struct device *parent, struc > > sizeof(sc->sc_sensordev.xname)); > > > > sc->sc_sensor.type = SENSOR_TEMP; > > + snprintf(sc->sc_sensor.desc, sizeof(sc->sc_sensor.desc), "Tctl"); > > sensor_attach(&sc->sc_sensordev, &sc->sc_sensor); > > > > /* > > @@ -136,6 +157,80 @@ ksmn_attach(struct device *parent, struc > > sc->sc_tctl_offset = p->tctl_offset; > > } > > > > + sc->sc_ccd_offset = 0x154; > > + > > + if (ci->ci_family == 0x17 || ci->ci_family == 0x18) { > > + switch (ci->ci_model) { > > + case 0x1: /* Zen */ > > + case 0x8: /* Zen+ */ > > + case 0x11: /* Zen APU */ > > + case 0x18: /* Zen+ APU */ > > + ksmn_ccd_attach(sc, 4); > > + break; > > + case 0x31: /* Zen2 Threadripper */ > > + case 0x60: /* Renoir */ > > + case 0x68: /* Lucienne */ > > + case 0x71: /* Zen2 */ > > + ksmn_ccd_attach(sc, 8); > > + break; > > + } > > + } else if (ci->ci_family == 0x19) { > > + uint32_t m = ci->ci_model; > > + > > + if ((m >= 0x40 && m <= 0x4f) || > > + (m >= 0x10 && m <= 0x1f) || > > + (m >= 0xa0 && m <= 0xaf)) > > + sc->sc_ccd_offset = 0x300; > > + > > + if ((m >= 0x10 && m <= 0x1f) || > > + (m >= 0xa0 && m <= 0xaf)) > > + ksmn_ccd_attach(sc, 12); > > + else > > + ksmn_ccd_attach(sc, 8); > > + } > > + > > + CPU_INFO_FOREACH(cii, ci) { > > + if (ci->ci_smt_id != 0) > > + continue; > > + sc->sc_hz_count++; > > + } > > + sc->sc_hz_sensor = mallocarray(sc->sc_hz_count, > > + sizeof(*sc->sc_hz_sensor), M_DEVBUF, M_WAITOK | M_ZERO); > > + sc->sc_hz_cpu_info = mallocarray(sc->sc_hz_count, > > + sizeof(*sc->sc_hz_cpu_info), M_DEVBUF, M_WAITOK | M_ZERO); > > + > > + i = 0; > > + CPU_INFO_FOREACH(cii, ci) { > > + if (ci->ci_smt_id != 0) > > + continue; > > + sc->sc_hz_cpu_info[i] = ci; > > + i++; > > + } > > + /* sort list of CPUs */ > > + for (i = 1; i < sc->sc_hz_count; i++) { > > + int j; > > + for (j = 0; j < sc->sc_hz_count - i; j++) { > > + if (CPU_INFO_UNIT(sc->sc_hz_cpu_info[j]) > > > + CPU_INFO_UNIT(sc->sc_hz_cpu_info[j + 1])) { > > + ci = sc->sc_hz_cpu_info[j]; > > + sc->sc_hz_cpu_info[j] = > > + sc->sc_hz_cpu_info[j + 1]; > > + sc->sc_hz_cpu_info[j + 1] = ci; > > + } > > + } > > + } > > + > > + for (i = 0; i < sc->sc_hz_count; i++) { > > + ci = sc->sc_hz_cpu_info[i]; > > + s = &sc->sc_hz_sensor[i]; > > + s->type = SENSOR_FREQ; > > + snprintf(s->desc, sizeof(s->desc), "CPU%d", CPU_INFO_UNIT(ci)); > > + sensor_attach(&sc->sc_sensordev, &sc->sc_hz_sensor[i]); > > + } > > + > > + timeout_set_proc(&sc->sc_hz_timeout, ksmn_hz_task, sc); > > + timeout_add_sec(&sc->sc_hz_timeout, 1); > > + > > if (sensor_task_register(sc, ksmn_refresh, 5) == NULL) { > > printf(": unable to register update task\n"); > > return; > > @@ -146,18 +241,78 @@ ksmn_attach(struct device *parent, struc > > printf("\n"); > > } > > > > +uint32_t > > +ksmn_read_reg(struct ksmn_softc *sc, uint32_t addr) > > +{ > > + uint32_t reg; > > + int s; > > + > > + s = splhigh(); > > + pci_conf_write(sc->sc_pc, sc->sc_pcitag, SMN_17H_ADDR_R, addr); > > + reg = pci_conf_read(sc->sc_pc, sc->sc_pcitag, SMN_17H_DATA_R); > > + splx(s); > > + return reg; > > +} > > + > > +void > > +ksmn_ccd_attach(struct ksmn_softc *sc, int nccd) > > +{ > > + struct ksensor *s; > > + uint32_t reg; > > + int i; > > + > > + KASSERT(nccd > 0 && nccd < nitems(sc->sc_ccd_sensor)); > > + > > + for (i = 0; i < nccd; i++) { > > + reg = ksmn_read_reg(sc, SMU_17H_CCD_THM(sc->sc_ccd_offset, i)); > > + if (reg & CURTMP_CCD_VALID) { > > + sc->sc_ccd_valid |= (1 << i); > > + s = &sc->sc_ccd_sensor[i]; > > + s->type = SENSOR_TEMP; > > + snprintf(s->desc, sizeof(s->desc), "Tccd%d", i); > > + sensor_attach(&sc->sc_sensordev, s); > > + } > > + } > > +} > > + > > +void > > +ksmn_hz_task(void *arg) > > +{ > > + extern uint64_t tsc_frequency; > > + struct ksmn_softc *sc = arg; > > + struct cpu_info *ci; > > + struct ksensor *sens; > > + uint64_t mperf, aperf; > > + int i, s; > > + > > + for (i = 0; i < sc->sc_hz_count; i++) { > > + ci = sc->sc_hz_cpu_info[i]; > > + sens = &sc->sc_hz_sensor[i]; > > + if (!CPU_IS_RUNNING(ci)) > > + continue; > > + sched_peg_curproc(ci); > > + > > + s = splhigh(); > > + mperf = rdmsr(0xE7); > > + aperf = rdmsr(0xE8); > > + wrmsr(0xE7, 0); > > + wrmsr(0xE8, 0); > > Do these MSRs have names?
MPERF and APERF. Guess we could define them or even put them into one of the cpu .h files. > > + splx(s); > > + sens->value = ((aperf * tsc_frequency) / mperf) * 1000000; > > + } > > + > > + timeout_add_sec(&sc->sc_hz_timeout, 1); > > +} > > + > > void > > ksmn_refresh(void *arg) > > { > > struct ksmn_softc *sc = arg; > > struct ksensor *s = &sc->sc_sensor; > > pcireg_t reg; > > - int raw, offset = 0; > > - > > - pci_conf_write(sc->sc_pc, sc->sc_pcitag, SMN_17H_ADDR_R, > > - SMU_17H_THM); > > - reg = pci_conf_read(sc->sc_pc, sc->sc_pcitag, SMN_17H_DATA_R); > > + int i, raw, offset = 0; > > > > + reg = ksmn_read_reg(sc, SMU_17H_THM); > > raw = GET_CURTMP(reg); > > if ((reg & CURTMP_17H_RANGE_SEL) != 0) > > offset -= CURTMP_17H_RANGE_ADJUST; > > @@ -166,5 +321,16 @@ ksmn_refresh(void *arg) > > offset *= 100000; > > > > /* convert raw (in steps of 0.125C) to uC, add offset, uC to uK. */ > > - s->value = ((raw * 125000) + offset) + 273150000; > > + s->value = raw * 125000 + offset + 273150000; > > + > > + offset = CURTMP_17H_RANGE_ADJUST * 100000; > > Do we not need to handle the tCTL offsets for these additional CCD > sensors on older Zen/Zen+ CPUs? We compenstate for that above, which > I think you're losing here. At least the linux code does not seem to do that. Need to collect some data from various systems to better understand the output. > > + for (i = 0; i < nitems(sc->sc_ccd_sensor); i++) { > > + if (sc->sc_ccd_valid & (1 << i)) { > > + s = &sc->sc_ccd_sensor[i]; > > + reg = ksmn_read_reg(sc, > > + SMU_17H_CCD_THM(sc->sc_ccd_offset, i)); > > + s->value = (reg & CURTMP_CCD_MASK) * 125000 - offset + > > + 273150000; > > + } > > + } > > } > > > > > -- :wq Claudio