Hi, TL;DR:
I want to compute the TSC frequency on AMD CPUs using the methods laid out in the AMD manuals instead of calibrating the TSC by hand. If you have an AMD CPU with an invariant TSC, please apply this patch, recompile/boot the resulting kernel, and send me the resulting dmesg. Family 10h-16h CPUs are especially interesting. If you've got one, don't be shy! Long explanation: On AMD CPUs we calibrate the TSC with a separate timer. This is slow and introduces error. I also worry about a future where legacy timers are absent or heavily gated (read: useless). This patch adds most of the code needed to compute the TSC frequency on AMD family 10h+ CPUs. CPUs prior to family 10h did not support an invariant TSC so they are irrelevant. I have riddled the code with printf(9) calls so I can work out what's wrong by hand if a test result makes no sense. The only missing piece is code to read the configuration space on family 10h-16h CPUs to determine how many boosted P-states we need to skip to get to the MSR describing the software P0 state. I would really appreciate it if someone could explain how to do this at this very early point in boot. jsg@ pointed me to pci_conf_read(9), but I'm a little confused about how I get the needed pci* inputs at this point in boot. -- Test results? Clues on reading the configuration space? -Scott Index: tsc.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v retrieving revision 1.29 diff -u -p -r1.29 tsc.c --- tsc.c 22 Sep 2022 04:57:08 -0000 1.29 +++ tsc.c 23 Sep 2022 14:04:22 -0000 @@ -100,6 +100,253 @@ tsc_freq_cpuid(struct cpu_info *ci) return (0); } +uint64_t +tsc_freq_msr(struct cpu_info *ci) +{ + uint64_t base, def, did, did_lsd, did_msd, divisor, fid, multiplier; + uint32_t msr, off = 0; + + if (strcmp(cpu_vendor, "AuthenticAMD") != 0) + return 0; + + /* + * All family 10h+ CPUs have MSR_HWCR and the TscFreqSel bit. + * If TscFreqSel is not set the TSC does not advance at the P0 + * frequency, in which case something is wrong and we need to + * calibrate by hand. + */ +#define HWCR_TSCFREQSEL (1 << 24) + if (!ISSET(rdmsr(MSR_HWCR), HWCR_TSCFREQSEL)) /* XXX specialreg.h */ + return 0; +#undef HWCR_TSCFREQSEL + + /* + * For families 10h, 12h, 14h, 15h, and 16h, we need to skip past + * the boosted P-states (Pb0, Pb1, etc.) to find the MSR describing + * P0, i.e. the highest performance unboosted P-state. The number + * of boosted states is kept in the "Core Performance Boost Control" + * configuration space register. + */ +#ifdef __not_yet__ + uint32_t reg; + switch (ci->ci_family) { + case 0x10: + /* XXX How do I read config space at this point in boot? */ + reg = read_config_space(F4x15C); + off = (reg >> 2) & 0x1; + break; + case 0x12: + case 0x14: + case 0x15: + case 0x16: + /* XXX How do I read config space at this point in boot? */ + reg = read_config_space(D18F4x15C); + off = (reg >> 2) & 0x7; + break; + default: + break; + } +#endif + +/* DEBUG Let's look at all the MSRs to check my math. */ +for (; off < 8; off++) { + + /* + * In family 10h+, core P-state voltage/frequency definitions + * are kept in MSRs C001_006[4:B] (eight registers in total). + * All MSRs in the range are readable, but if the EN bit isn't + * set the register doesn't define a valid P-state. + */ + msr = 0xc0010064 + off; /* XXX specialreg.h */ + def = rdmsr(msr); + printf("%s: MSR %04X_%04X: en %d", + ci->ci_dev->dv_xname, msr >> 16, msr & 0xffff, + !!ISSET(def, 1ULL << 63)); + if (!ISSET(def, 1ULL << 63)) { /* XXX specialreg.h */ + printf("\n"); + continue; + } + switch (ci->ci_family) { + case 0x10: + /* AMD Family 10h Processor BKDG, Rev 3.62, p. 429 */ + base = 100000000; /* 100.0 MHz */ + did = (def >> 6) & 0x7; + divisor = 1ULL << did; + fid = def & 0x1f; + multiplier = fid + 0x10; + printf(" base %llu did %llu div %llu fid %llu mul %llu", + base, did, divisor, fid, multiplier); + break; + case 0x11: + /* AMD Family 11h Processor BKDG, Rev 3.62, p. 236 */ + base = 100000000; /* 100.0 MHz */ + did = (def >> 6) & 0x7; + divisor = 1ULL << did; + fid = def & 0x1f; + multiplier = fid + 0x8; + printf(" base %llu did %llu div %llu fid %llu mul %llu", + base, did, divisor, fid, multiplier); + break; + case 0x12: + /* AMD Family 12h Processor BKDG, Rev 3.02, pp. 468-469 */ + base = 100000000; /* 100.0 MHz */ + fid = (def >> 4) & 0xf; + multiplier = fid + 0x10; + + /* + * A CpuDid of 1 maps to a divisor of 1.5. To simulate + * this with integer math we use a divisor of 3 and double + * the multiplier, as (X * 2 / 3) equals (X / 1.5). All + * other CpuDid values map to to whole number divisors + * or are reserved. + */ + did = def & 0xf; + printf(" did %llu", did); + if (did >= 8) { + printf("(reserved)\n"); + continue; /* reserved */ + } + if (did == 1) + multiplier *= 2; + uint64_t did_divisor[] = { 1, 3, 2, 3, 4, 6, 8, 12, 16 }; + divisor = did_divisor[did]; + printf(" div %llu base %llu fid %llu mul %llu", + divisor, base, fid, multiplier); + break; + case 0x14: + /* + * BKDG for AMD Family 14h Models 00h-0Fh Processors, + * Rev 3.13, pp. 428-429 + * + * Family 14h doesn't have CpuFid or CpuDid. Instead, + * the CpuCOF divisor is derived from two new fields: + * CpuDidMsd, the integral base, and CpuDidLsd, the + * fractional multiplier. The formula for the divisor + * varies with the magnitude of CpuDidMsd: + * + * CpuDidMsd <= 14: CpuDidMsd + 1 + (CpuDidLsd * 0.25) + * CpuDidMsd >= 15: CpuDidMsd + 1 + ((CpuDidLsd & 0x10) * 0.25) + * + * CpuCOF is just (base / divisor), however we need to + * multiply both sides by 100 to simulate fractional + * division with integer math, e.g. (X * 100 / 125) is + * equivalent to (X / 1.25). + */ +#if __not_yet__ + /* XXX How do I read config space at this point in boot? */ + reg = read_config_space(D18F3xD4); + base = 100000000 * ((reg & 0x3f) + 0x10); +#else + base = 100000000; /* XXX guess 100.0 MHz for now... */ +#endif + multiplier = 100; + did_msd = (def >> 4) & 0x19; + printf(" msd %llu", did_msd); + if (did_msd >= 27) { + printf("(reserved)\n"); + continue; /* XXX might be reserved? */ + } + did_lsd = def & 0xf; + printf(" lsd %llu", did_lsd); + if (did_lsd >= 4) { + printf("(reserved)\n"); + continue; /* reserved */ + } + if (did_msd >= 15) + did_lsd &= 0x10; + divisor = (did_msd + 1) * 100 + (did_lsd * 25); + printf(" div %llu base %llu mul %llu", + divisor, base, multiplier); + break; + case 0x15: + /* + * BKDG for AMD Family 15h [...]: + * Models 00h-OFh Processors, Rev 3.14, pp. 569-571 + * Models 10h-1Fh Processors, Rev 3.12, pp. 580-581 + * Models 30h-3Fh Processors, Rev 3.06, pp. 634-636 + * Models 60h-6Fh Processors, Rev 3.05, pp. 691-693 + * Models 70h-7Fh Processors, Rev 3.09, pp. 655-656 + */ + base = 100000000; /* 100.0 Mhz */ + did = (def >> 6) & 0x7; + printf(" base %llu did %llu", base, did); + if (did >= 0x5) { + printf("(reserved)\n"); + continue; /* reserved */ + } + divisor = 1ULL << did; + + /* + * BKDG for AMD Family 15h Models 00h-0Fh, Rev 3.14, p. 571 + * says that "CpuFid must be less than or equal to 2Fh." + * No other BKDG for family 15h limits the range of CpuFid. + */ + fid = def & 0x3f; + printf(" fid %llu", fid); + if (ci->ci_model <= 0x0f && fid >= 0x30) { + printf("(reserved)\n"); + continue; /* reserved */ + } + multiplier = fid + 0x10; + printf(" mul %llu div %llu", multiplier, divisor); + break; + case 0x16: + /* + * BKDG for AMD Family 16h [...]: + * Models 00h-0Fh Processors, Rev 3.03, pp. 548-550 + * Models 30h-3Fh Processors, Rev 3.06, pp. 610-612 + */ + base = 100000000; /* 100.0 MHz */ + did = (def >> 6) & 0x7; + printf(" did %llu", did); + if (did >= 0x5) { + printf("(reserved)\n"); + continue; /* reserved */ + } + divisor = 1ULL << did; + fid = def & 0x3f; + multiplier = fid + 0x10; + printf(" divisor %llu base %llu fid %llu mul %llu", + divisor, base, fid, multiplier); + break; + case 0x17: + /* + * PPR for AMD Family 17h [...]: + * Models 01h,08h B2, Rev 3.03, pp. 33, 139-140 + * Model 18h B1, Rev 3.16, pp. 36, 143-144 + * Model 60h A1, Rev 3.06, pp. 33, 155-157 + * Model 71h B0, Rev 3.06, pp. 28, 150-151 + * + * OSRR for AMD Family 17h processors, + * Models 00h-2Fh, Rev 3.03, pp. 130-131 + */ + base = 200000000; /* 200.0 MHz */ + divisor = did = (def >> 8) & 0x3f; /* XXX reserved vals? */ + multiplier = fid = def & 0xff; + printf(" base %llu mul %llu div %llu", + base, multiplier, divisor); + break; + case 0x19: + /* + * PPR for AMD Family 19h + * Model 21h B0, Rev 3.05, pp. 33, 166-167 + */ + base = 200000000; /* 200.0 MHz */ + divisor = did = (def >> 8) & 0x3f; /* XXX reserved vals? */ + multiplier = fid = def & 0xff; + printf(" base %llu mul %llu div %llu", + base, multiplier, divisor); + break; + default: + return 0; + } + printf(" freq %llu Hz\n", base * multiplier / divisor); +} +/* DEBUG for-loop ends here. */ + + return 0; +} + void tsc_identify(struct cpu_info *ci) { @@ -118,6 +365,8 @@ tsc_identify(struct cpu_info *ci) tsc_is_invariant = 1; tsc_frequency = tsc_freq_cpuid(ci); + if (tsc_frequency == 0) + tsc_frequency = tsc_freq_msr(ci); if (tsc_frequency > 0) delay_init(tsc_delay, 5000); } @@ -170,6 +419,8 @@ measure_tsc_freq(struct timecounter *tc) u_long s; int delay_usec, i, err1, err2, usec, success = 0; + printf("tsc: calibrating with %s: ", tc->tc_name); + /* warmup the timers */ for (i = 0; i < 3; i++) { (void)tc->tc_get_timecount(tc); @@ -202,6 +453,8 @@ measure_tsc_freq(struct timecounter *tc) min_freq = MIN(min_freq, frequency); success++; } + + printf("%llu Hz\n", success > 1 ? min_freq : 0); return (success > 1 ? min_freq : 0); }