On Mon, Jan 08, 2024 at 04:27:19PM +0800, Zhao Liu wrote:
> From: Zhao Liu <[email protected]>
>
> Linux kernel (from v6.4, with commit edc0a2b595765 ("x86/topology: Fix
> erroneous smp_num_siblings on Intel Hybrid platforms") is able to
> handle platforms with Module level enumerated via CPUID.1F.
>
> Expose the module level in CPUID[0x1F] if the machine has more than 1
> modules.
>
> (Tested CPU topology in CPUID[0x1F] leaf with various die/cluster
> configurations in "-smp".)
>
> Signed-off-by: Zhao Liu <[email protected]>
> Tested-by: Babu Moger <[email protected]>
> Tested-by: Yongwei Ma <[email protected]>
> Acked-by: Michael S. Tsirkin <[email protected]>
> ---
> Changes since v3:
> * New patch to expose module level in 0x1F.
> * Add Tested-by tag from Yongwei.
> ---
> target/i386/cpu.c | 12 +++++++++++-
> target/i386/cpu.h | 2 ++
> target/i386/kvm/kvm.c | 2 +-
> 3 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index 294ca6b8947a..a2d39d2198b6 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -277,6 +277,8 @@ static uint32_t num_cpus_by_topo_level(X86CPUTopoInfo
> *topo_info,
> return 1;
> case CPU_TOPO_LEVEL_CORE:
> return topo_info->threads_per_core;
> + case CPU_TOPO_LEVEL_MODULE:
> + return topo_info->threads_per_core * topo_info->cores_per_module;
> case CPU_TOPO_LEVEL_DIE:
> return topo_info->threads_per_core * topo_info->cores_per_module *
> topo_info->modules_per_die;
> @@ -297,6 +299,8 @@ static uint32_t
> apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info,
> return 0;
> case CPU_TOPO_LEVEL_CORE:
> return apicid_core_offset(topo_info);
> + case CPU_TOPO_LEVEL_MODULE:
> + return apicid_module_offset(topo_info);
> case CPU_TOPO_LEVEL_DIE:
> return apicid_die_offset(topo_info);
> case CPU_TOPO_LEVEL_PACKAGE:
> @@ -316,6 +320,8 @@ static uint32_t cpuid1f_topo_type(enum CPUTopoLevel
> topo_level)
> return CPUID_1F_ECX_TOPO_LEVEL_SMT;
> case CPU_TOPO_LEVEL_CORE:
> return CPUID_1F_ECX_TOPO_LEVEL_CORE;
> + case CPU_TOPO_LEVEL_MODULE:
> + return CPUID_1F_ECX_TOPO_LEVEL_MODULE;
> case CPU_TOPO_LEVEL_DIE:
> return CPUID_1F_ECX_TOPO_LEVEL_DIE;
> default:
> @@ -347,6 +353,10 @@ static void encode_topo_cpuid1f(CPUX86State *env,
> uint32_t count,
> if (env->nr_dies > 1) {
> set_bit(CPU_TOPO_LEVEL_DIE, topo_bitmap);
> }
> +
> + if (env->nr_modules > 1) {
> + set_bit(CPU_TOPO_LEVEL_MODULE, topo_bitmap);
> + }
> }
>
> *ecx = count & 0xff;
> @@ -6394,7 +6404,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
> uint32_t count,
> break;
> case 0x1F:
> /* V2 Extended Topology Enumeration Leaf */
> - if (topo_info.dies_per_pkg < 2) {
> + if (topo_info.modules_per_die < 2 && topo_info.dies_per_pkg < 2) {
A question:
Is the original checking necessary ?
The 0x1f exists even on cpu w/o modules/dies topology on bare metal, I tried
on EMR:
// leaf 0
0x00000000 0x00: eax=0x00000020 ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69
// leaf 0x1f
0x0000001f 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004
0x0000001f 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004
0x0000001f 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004
// leaf 0xb
0x0000000b 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000004
0x0000000b 0x01: eax=0x00000007 ebx=0x00000080 ecx=0x00000201 edx=0x00000004
0x0000000b 0x02: eax=0x00000000 ebx=0x00000000 ecx=0x00000002 edx=0x00000004
So here leads to different cpu behavior from bare metal, even in case
of "-cpu host".
In SDM Vol2, cpudid instruction section:
" CPUID leaf 1FH is a preferred superset to leaf 0BH. Intel
recommends using leaf 1FH when available rather than leaf
0BH and ensuring that any leaf 0BH algorithms are updated to
support leaf 1FH. "
My understanding: if 0x1f is existed (leaf 0.eax >= 0x1f)
then it should have same values in lp/core level as 0xb.
> *eax = *ebx = *ecx = *edx = 0;
> break;
> }
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index eecd30bde92b..97b290e10576 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1018,6 +1018,7 @@ enum CPUTopoLevel {
> CPU_TOPO_LEVEL_INVALID,
> CPU_TOPO_LEVEL_SMT,
> CPU_TOPO_LEVEL_CORE,
> + CPU_TOPO_LEVEL_MODULE,
> CPU_TOPO_LEVEL_DIE,
> CPU_TOPO_LEVEL_PACKAGE,
> CPU_TOPO_LEVEL_MAX,
> @@ -1032,6 +1033,7 @@ enum CPUTopoLevel {
> #define CPUID_1F_ECX_TOPO_LEVEL_INVALID CPUID_B_ECX_TOPO_LEVEL_INVALID
> #define CPUID_1F_ECX_TOPO_LEVEL_SMT CPUID_B_ECX_TOPO_LEVEL_SMT
> #define CPUID_1F_ECX_TOPO_LEVEL_CORE CPUID_B_ECX_TOPO_LEVEL_CORE
> +#define CPUID_1F_ECX_TOPO_LEVEL_MODULE 3
> #define CPUID_1F_ECX_TOPO_LEVEL_DIE 5
>
> /* MSR Feature Bits */
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 4ce80555b45c..e5ddb214cb36 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -1913,7 +1913,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
> break;
> }
> case 0x1f:
> - if (env->nr_dies < 2) {
> + if (env->nr_modules < 2 && env->nr_dies < 2) {
> break;
> }
> /* fallthrough */
> --
> 2.34.1
>
>