On 2/9/2026 10:57 PM, Mi, Dapeng wrote:
> 
> On 1/29/2026 7:09 AM, Zide Chen wrote:
>> Newer Intel server CPUs support a large number of PMU MSRs.  Currently,
>> QEMU allocates cpu->kvm_msr_buf as a single-page buffer, which is not
>> sufficient to hold all possible MSRs.
>>
>> Increase MSR_BUF_SIZE to 8192 bytes, providing space for up to 511 MSRs.
>> This is sufficient even for the theoretical worst case, such as
>> architectural LBR with a depth of 64.
>>
>> KVM_[GET/SET]_MSRS is limited to 255 MSRs per call.  Raising this limit
>> to 511 would require changes in KVM and would introduce backward
>> compatibility issues.  Instead, split requests into multiple
>> KVM_[GET/SET]_MSRS calls when the number of MSRs exceeds the API limit.
>>
>> Signed-off-by: Zide Chen <[email protected]>
>> ---
>> V2:
>> - No changes.
>>
>>  target/i386/kvm/kvm.c | 109 +++++++++++++++++++++++++++++++++++-------
>>  1 file changed, 92 insertions(+), 17 deletions(-)
>>
>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>> index 530f50e4b218..a2cf9b5df35d 100644
>> --- a/target/i386/kvm/kvm.c
>> +++ b/target/i386/kvm/kvm.c
>> @@ -98,9 +98,12 @@
>>  #define KVM_APIC_BUS_CYCLE_NS       1
>>  #define KVM_APIC_BUS_FREQUENCY      (1000000000ULL / KVM_APIC_BUS_CYCLE_NS)
>>  
>> -/* A 4096-byte buffer can hold the 8-byte kvm_msrs header, plus
>> - * 255 kvm_msr_entry structs */
>> -#define MSR_BUF_SIZE 4096
>> +/* A 8192-byte buffer can hold the 8-byte kvm_msrs header, plus
>> + * 511 kvm_msr_entry structs */
>> +#define MSR_BUF_SIZE      8192
>> +
>> +/* Maximum number of MSRs in one single KVM_[GET/SET]_MSRS call. */
>> +#define KVM_MAX_IO_MSRS   255
>>  
>>  typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val);
>>  typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val);
>> @@ -3878,23 +3881,102 @@ static void kvm_msr_entry_add_perf(X86CPU *cpu, 
>> FeatureWordArray f)
>>      }
>>  }
>>  
>> -static int kvm_buf_set_msrs(X86CPU *cpu)
>> +static int __kvm_buf_set_msrs(X86CPU *cpu, struct kvm_msrs *msrs)
>>  {
>> -    int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf);
>> +    int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, msrs);
>>      if (ret < 0) {
>>          return ret;
>>      }
>>  
>> -    if (ret < cpu->kvm_msr_buf->nmsrs) {
>> -        struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret];
>> +    if (ret < msrs->nmsrs) {
>> +        struct kvm_msr_entry *e = &msrs->entries[ret];
>>          error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64,
>>                       (uint32_t)e->index, (uint64_t)e->data);
>>      }
>>  
>> -    assert(ret == cpu->kvm_msr_buf->nmsrs);
>> +    assert(ret == msrs->nmsrs);
>> +    return ret;
>> +}
>> +
>> +static int __kvm_buf_get_msrs(X86CPU *cpu, struct kvm_msrs *msrs)
>> +{
>> +    int ret;
>> +
>> +    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, msrs);
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +
>> +    if (ret < msrs->nmsrs) {
>> +        struct kvm_msr_entry *e = &msrs->entries[ret];
>> +        error_report("error: failed to get MSR 0x%" PRIx32,
>> +                     (uint32_t)e->index);
>> +    }
>> +
>> +    assert(ret == msrs->nmsrs);
>> +    return ret;
>> +}
>> +
>> +static int kvm_buf_set_or_get_msrs(X86CPU *cpu, bool is_write)
>> +{
>> +    struct kvm_msr_entry *entries = cpu->kvm_msr_buf->entries;
>> +    struct kvm_msrs *buf = NULL;
>> +    int current, remaining, ret = 0;
>> +    size_t buf_size;
>> +
>> +    buf_size = KVM_MAX_IO_MSRS * sizeof(struct kvm_msr_entry) +
>> +               sizeof(struct kvm_msrs);
>> +    buf = g_malloc(buf_size);
>> +
>> +    remaining = cpu->kvm_msr_buf->nmsrs;
>> +    current = 0;
>> +    while (remaining) {
>> +        size_t size;
>> +
>> +        memset(buf, 0, buf_size);
>> +
>> +        if (remaining > KVM_MAX_IO_MSRS) {
>> +            buf->nmsrs = KVM_MAX_IO_MSRS;
>> +        } else {
>> +            buf->nmsrs = remaining;
>> +        }
>> +
>> +        size = buf->nmsrs * sizeof(entries[0]);
>> +        memcpy(buf->entries, &entries[current], size);
>> +
>> +        if (is_write) {
>> +            ret = __kvm_buf_set_msrs(cpu, buf);
>> +        } else {
>> +            ret = __kvm_buf_get_msrs(cpu, buf);
>> +        }
>> +
>> +        if (ret < 0) {
>> +            goto out;
>> +        }
>> +
>> +        if (!is_write)
>> +            memcpy(&entries[current], buf->entries, size);
>> +
>> +        current += buf->nmsrs;
>> +        remaining -= buf->nmsrs;
>> +    }
>> +
>> +out:
>> +    g_free(buf);
>> +    return ret < 0 ? ret : cpu->kvm_msr_buf->nmsrs;
>> +}
>> +
>> +static int kvm_buf_set_msrs(X86CPU *cpu)
> 
> Better add inline.

Yes, thanks.

> 
> 
>> +{
>> +    kvm_buf_set_or_get_msrs(cpu, true);
>>      return 0;
> 
> why not directly return the return value of kvm_buf_set_or_get_msrs().

Yes, agreed.

> 
>>  }
>>  
>> +static int kvm_buf_get_msrs(X86CPU *cpu)
> 
> inline.

Yes, thanks.

> 
> Others look good to me.

Reply via email to