On 19.11.2025 20:31, Grygorii Strashko wrote:
> From: Grygorii Strashko <[email protected]>
>
> The VMTRACE feature is depends on Platform/Arch HW and code support and now
> can be used only on x86 HVM with Intel VT-x (INTEL_VMX) enabled.
> This makes VMTRACE support optional by introducing HVM Kconfig option:
> - CONFIG_VMTRACE to enable/disable the feature.
>
> Signed-off-by: Grygorii Strashko <[email protected]>
> ---
> changes in v4:
> - format changes
>
> changes in v3:
> - drop vmtrace stubs for HVM=n case from hvm.h (VMTRACE depnds on HVM)
> - hvm_vmtrace_reset() fix return err code
> - add comment about using func declaration without definition
>
> changes in v2:
> - fix comments from Jan Beulich
> - move CONFIG_VMTRACE in HVM
> - drop HAS_VMTRACE
>
> v3:
>
> https://patchwork.kernel.org/project/xen-devel/patch/[email protected]/
> v2:
>
> https://patchwork.kernel.org/project/xen-devel/patch/[email protected]/
>
> v1:
>
> https://patchwork.kernel.org/project/xen-devel/patch/[email protected]/
>
> xen/arch/x86/domctl.c | 4 +++
> xen/arch/x86/hvm/Kconfig | 12 +++++++++
> xen/arch/x86/hvm/vmx/vmcs.c | 2 ++
> xen/arch/x86/hvm/vmx/vmx.c | 11 ++++++++
> xen/arch/x86/include/asm/guest-msr.h | 2 ++
> xen/arch/x86/include/asm/hvm/hvm.h | 36 ++++++++++---------------
> xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 2 ++
> xen/arch/x86/mm/mem_sharing.c | 2 ++
> xen/arch/x86/vm_event.c | 8 +++---
> xen/common/domain.c | 10 +++++++
> xen/common/memory.c | 6 +++++
> xen/common/vm_event.c | 3 ++-
> xen/include/xen/domain.h | 4 +++
> xen/include/xen/sched.h | 4 +++
> 14 files changed, 80 insertions(+), 26 deletions(-)
Tamas - any chance of an ack here?
Grygorii - chasing missing acks is generally the submitter's responsibility.
Jan
> diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
> index 6153e3c07e2d..d9521808dcba 100644
> --- a/xen/arch/x86/domctl.c
> +++ b/xen/arch/x86/domctl.c
> @@ -154,6 +154,7 @@ void arch_get_domain_info(const struct domain *d,
> static int do_vmtrace_op(struct domain *d, struct xen_domctl_vmtrace_op *op,
> XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
> {
> +#ifdef CONFIG_VMTRACE
> struct vcpu *v;
> int rc;
>
> @@ -198,6 +199,9 @@ static int do_vmtrace_op(struct domain *d, struct
> xen_domctl_vmtrace_op *op,
> vcpu_unpause(v);
>
> return rc;
> +#else
> + return -EOPNOTSUPP;
> +#endif
> }
>
> #define MAX_IOPORTS 0x10000
> diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
> index c1a131d1851a..f75dec5ea40a 100644
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -35,6 +35,18 @@ config INTEL_VMX
> If your system includes a processor with Intel VT-x support, say Y.
> If in doubt, say Y.
>
> +config VMTRACE
> + bool "HW VM tracing support"
> + depends on INTEL_VMX
> + default y
> + help
> + Enables HW VM tracing support which allows to configure HW processor
> + features (vmtrace_op) to enable capturing information about software
> + execution using dedicated hardware facilities with minimal
> interference
> + to the software being traced. The trace data can be retrieved using
> buffer
> + shared between Xen and domain
> + (see XENMEM_acquire_resource(XENMEM_resource_vmtrace_buf)).
> +
> config HVM_FEP
> bool "HVM Forced Emulation Prefix support (UNSUPPORTED)" if UNSUPPORTED
> default DEBUG
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index d610988bf91b..c2e7f9aed39f 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -306,6 +306,7 @@ static int vmx_init_vmcs_config(bool bsp)
>
> rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
>
> +#ifdef CONFIG_VMTRACE
> /* Check whether IPT is supported in VMX operation. */
> if ( bsp )
> vmtrace_available = cpu_has_proc_trace &&
> @@ -317,6 +318,7 @@ static int vmx_init_vmcs_config(bool bsp)
> smp_processor_id());
> return -EINVAL;
> }
> +#endif
>
> if ( caps.cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS
> )
> {
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index d042e7fb927f..c98ec110d144 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -622,6 +622,7 @@ static void cf_check domain_creation_finished(struct
> domain *d)
>
> static void vmx_init_ipt(struct vcpu *v)
> {
> +#ifdef CONFIG_VMTRACE
> unsigned int size = v->domain->vmtrace_size;
>
> if ( !size )
> @@ -632,6 +633,7 @@ static void vmx_init_ipt(struct vcpu *v)
> ASSERT(size >= PAGE_SIZE && (size & (size - 1)) == 0);
>
> v->arch.msrs->rtit.output_limit = size - 1;
> +#endif
> }
>
> static int cf_check vmx_vcpu_initialise(struct vcpu *v)
> @@ -724,11 +726,13 @@ static void vmx_save_guest_msrs(struct vcpu *v)
> */
> v->arch.hvm.vmx.shadow_gs = read_gs_shadow();
>
> +#ifdef CONFIG_VMTRACE
> if ( v->arch.hvm.vmx.ipt_active )
> {
> rdmsrl(MSR_RTIT_OUTPUT_MASK, msrs->rtit.output_mask);
> rdmsrl(MSR_RTIT_STATUS, msrs->rtit.status);
> }
> +#endif
>
> if ( cp->feat.pks )
> msrs->pkrs = rdpkrs_and_cache();
> @@ -747,12 +751,14 @@ static void vmx_restore_guest_msrs(struct vcpu *v)
> if ( cpu_has_msr_tsc_aux )
> wrmsr_tsc_aux(msrs->tsc_aux);
>
> +#ifdef CONFIG_VMTRACE
> if ( v->arch.hvm.vmx.ipt_active )
> {
> wrmsrl(MSR_RTIT_OUTPUT_BASE, page_to_maddr(v->vmtrace.pg));
> wrmsrl(MSR_RTIT_OUTPUT_MASK, msrs->rtit.output_mask);
> wrmsrl(MSR_RTIT_STATUS, msrs->rtit.status);
> }
> +#endif
>
> if ( cp->feat.pks )
> wrpkrs(msrs->pkrs);
> @@ -2626,6 +2632,7 @@ static bool cf_check vmx_get_pending_event(
> return true;
> }
>
> +#ifdef CONFIG_VMTRACE
> /*
> * We only let vmtrace agents see and modify a subset of bits in
> MSR_RTIT_CTL.
> * These all pertain to data-emitted into the trace buffer(s). Must not
> @@ -2768,6 +2775,7 @@ static int cf_check vmtrace_reset(struct vcpu *v)
> v->arch.msrs->rtit.status = 0;
> return 0;
> }
> +#endif
>
> static uint64_t cf_check vmx_get_reg(struct vcpu *v, unsigned int reg)
> {
> @@ -2940,11 +2948,14 @@ static struct hvm_function_table
> __initdata_cf_clobber vmx_function_table = {
> .altp2m_vcpu_emulate_ve = vmx_vcpu_emulate_ve,
> .altp2m_vcpu_emulate_vmfunc = vmx_vcpu_emulate_vmfunc,
> #endif
> +
> +#ifdef CONFIG_VMTRACE
> .vmtrace_control = vmtrace_control,
> .vmtrace_output_position = vmtrace_output_position,
> .vmtrace_set_option = vmtrace_set_option,
> .vmtrace_get_option = vmtrace_get_option,
> .vmtrace_reset = vmtrace_reset,
> +#endif
>
> .get_reg = vmx_get_reg,
> .set_reg = vmx_set_reg,
> diff --git a/xen/arch/x86/include/asm/guest-msr.h
> b/xen/arch/x86/include/asm/guest-msr.h
> index 5f0cb0a93995..702f47fe1e16 100644
> --- a/xen/arch/x86/include/asm/guest-msr.h
> +++ b/xen/arch/x86/include/asm/guest-msr.h
> @@ -50,6 +50,7 @@ struct vcpu_msrs
> };
> } misc_features_enables;
>
> +#ifdef CONFIG_VMTRACE
> /*
> * 0x00000560 ... 57x - MSR_RTIT_*
> *
> @@ -81,6 +82,7 @@ struct vcpu_msrs
> };
> };
> } rtit;
> +#endif
>
> /*
> * 0x000006e1 - MSR_PKRS - Protection Key Supervisor.
> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h
> b/xen/arch/x86/include/asm/hvm/hvm.h
> index 7412256a2dab..93da4dd2dc4b 100644
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -234,12 +234,14 @@ struct hvm_function_table {
> int (*altp2m_vcpu_emulate_vmfunc)(const struct cpu_user_regs *regs);
> #endif
>
> +#ifdef CONFIG_VMTRACE
> /* vmtrace */
> int (*vmtrace_control)(struct vcpu *v, bool enable, bool reset);
> int (*vmtrace_output_position)(struct vcpu *v, uint64_t *pos);
> int (*vmtrace_set_option)(struct vcpu *v, uint64_t key, uint64_t value);
> int (*vmtrace_get_option)(struct vcpu *v, uint64_t key, uint64_t *value);
> int (*vmtrace_reset)(struct vcpu *v);
> +#endif
>
> uint64_t (*get_reg)(struct vcpu *v, unsigned int reg);
> void (*set_reg)(struct vcpu *v, unsigned int reg, uint64_t val);
> @@ -738,6 +740,7 @@ static inline bool altp2m_vcpu_emulate_ve(struct vcpu *v)
> bool altp2m_vcpu_emulate_ve(struct vcpu *v);
> #endif /* CONFIG_ALTP2M */
>
> +#ifdef CONFIG_VMTRACE
> static inline int hvm_vmtrace_control(struct vcpu *v, bool enable, bool
> reset)
> {
> if ( hvm_funcs.vmtrace_control )
> @@ -772,13 +775,24 @@ static inline int hvm_vmtrace_get_option(
>
> return -EOPNOTSUPP;
> }
> +#else
> +/*
> + * Function declaration(s) here are used without definition(s) to make
> compiler
> + * happy when VMTRACE=n, compiler DCE will eliminate unused code.
> + */
> +int hvm_vmtrace_output_position(struct vcpu *v, uint64_t *pos);
> +#endif
>
> static inline int hvm_vmtrace_reset(struct vcpu *v)
> {
> +#ifdef CONFIG_VMTRACE
> if ( hvm_funcs.vmtrace_reset )
> return alternative_call(hvm_funcs.vmtrace_reset, v);
>
> return -EOPNOTSUPP;
> +#else
> + return -EOPNOTSUPP;
> +#endif
> }
>
> /*
> @@ -934,28 +948,6 @@ static inline bool
> hvm_has_set_descriptor_access_exiting(void)
> return false;
> }
>
> -static inline int hvm_vmtrace_control(struct vcpu *v, bool enable, bool
> reset)
> -{
> - return -EOPNOTSUPP;
> -}
> -
> -static inline int hvm_vmtrace_output_position(struct vcpu *v, uint64_t *pos)
> -{
> - return -EOPNOTSUPP;
> -}
> -
> -static inline int hvm_vmtrace_set_option(
> - struct vcpu *v, uint64_t key, uint64_t value)
> -{
> - return -EOPNOTSUPP;
> -}
> -
> -static inline int hvm_vmtrace_get_option(
> - struct vcpu *v, uint64_t key, uint64_t *value)
> -{
> - return -EOPNOTSUPP;
> -}
> -
> static inline uint64_t hvm_get_reg(struct vcpu *v, unsigned int reg)
> {
> ASSERT_UNREACHABLE();
> diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
> b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
> index 8ff7c8045fc6..879ec10cefd0 100644
> --- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
> +++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h
> @@ -154,8 +154,10 @@ struct vmx_vcpu {
> /* Do we need to tolerate a spurious EPT_MISCONFIG VM exit? */
> bool ept_spurious_misconfig;
>
> +#ifdef CONFIG_VMTRACE
> /* Processor Trace configured and enabled for the vcpu. */
> bool ipt_active;
> +#endif
>
> /* Is the guest in real mode? */
> uint8_t vmx_realmode;
> diff --git a/xen/arch/x86/mm/mem_sharing.c b/xen/arch/x86/mm/mem_sharing.c
> index 4787b2796479..074f1b2562b3 100644
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -1888,7 +1888,9 @@ static int fork(struct domain *cd, struct domain *d)
> domain_pause(d);
> cd->max_pages = d->max_pages;
> *cd->arch.cpu_policy = *d->arch.cpu_policy;
> +#ifdef CONFIG_VMTRACE
> cd->vmtrace_size = d->vmtrace_size;
> +#endif
> cd->parent = d;
> }
>
> diff --git a/xen/arch/x86/vm_event.c b/xen/arch/x86/vm_event.c
> index fc349270b9c5..112d2ef66dc7 100644
> --- a/xen/arch/x86/vm_event.c
> +++ b/xen/arch/x86/vm_event.c
> @@ -253,7 +253,9 @@ void vm_event_fill_regs(vm_event_request_t *req)
> req->data.regs.x86.shadow_gs = ctxt.shadow_gs;
> req->data.regs.x86.dr6 = ctxt.dr6;
>
> - if ( hvm_vmtrace_output_position(curr, &req->data.regs.x86.vmtrace_pos)
> != 1 )
> + if ( IS_ENABLED(CONFIG_VMTRACE) &&
> + hvm_vmtrace_output_position(curr,
> + &req->data.regs.x86.vmtrace_pos) != 1 )
> req->data.regs.x86.vmtrace_pos = ~0;
> #endif
> }
> @@ -303,12 +305,12 @@ void vm_event_emulate_check(struct vcpu *v,
> vm_event_response_t *rsp)
> #endif
> }
>
> +#ifdef CONFIG_VMTRACE
> void vm_event_reset_vmtrace(struct vcpu *v)
> {
> -#ifdef CONFIG_HVM
> hvm_vmtrace_reset(v);
> -#endif
> }
> +#endif
>
> /*
> * Local variables:
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index e13e01c1d272..ce4f55339fb4 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -136,7 +136,9 @@ struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
>
> vcpu_info_t dummy_vcpu_info;
>
> +#ifdef CONFIG_VMTRACE
> bool __read_mostly vmtrace_available;
> +#endif
>
> bool __read_mostly vpmu_is_available;
>
> @@ -318,6 +320,7 @@ static void vcpu_info_reset(struct vcpu *v)
>
> static void vmtrace_free_buffer(struct vcpu *v)
> {
> +#ifdef CONFIG_VMTRACE
> const struct domain *d = v->domain;
> struct page_info *pg = v->vmtrace.pg;
> unsigned int i;
> @@ -332,10 +335,12 @@ static void vmtrace_free_buffer(struct vcpu *v)
> put_page_alloc_ref(&pg[i]);
> put_page_and_type(&pg[i]);
> }
> +#endif
> }
>
> static int vmtrace_alloc_buffer(struct vcpu *v)
> {
> +#ifdef CONFIG_VMTRACE
> struct domain *d = v->domain;
> struct page_info *pg;
> unsigned int i;
> @@ -377,6 +382,9 @@ static int vmtrace_alloc_buffer(struct vcpu *v)
> }
>
> return -ENODATA;
> +#else
> + return 0;
> +#endif
> }
>
> /*
> @@ -825,7 +833,9 @@ struct domain *domain_create(domid_t domid,
> ASSERT(!config->altp2m.nr);
> #endif
>
> +#ifdef CONFIG_VMTRACE
> d->vmtrace_size = config->vmtrace_size;
> +#endif
> }
>
> /* Sort out our idea of is_control_domain(). */
> diff --git a/xen/common/memory.c b/xen/common/memory.c
> index 3688e6dd5032..66dc7f7a0a41 100644
> --- a/xen/common/memory.c
> +++ b/xen/common/memory.c
> @@ -1155,8 +1155,10 @@ static unsigned int resource_max_frames(const struct
> domain *d,
> case XENMEM_resource_ioreq_server:
> return ioreq_server_max_frames(d);
>
> +#ifdef CONFIG_VMTRACE
> case XENMEM_resource_vmtrace_buf:
> return d->vmtrace_size >> PAGE_SHIFT;
> +#endif
>
> default:
> return 0;
> @@ -1198,6 +1200,7 @@ static int acquire_ioreq_server(struct domain *d,
> #endif
> }
>
> +#ifdef CONFIG_VMTRACE
> static int acquire_vmtrace_buf(
> struct domain *d, unsigned int id, unsigned int frame,
> unsigned int nr_frames, xen_pfn_t mfn_list[])
> @@ -1220,6 +1223,7 @@ static int acquire_vmtrace_buf(
>
> return nr_frames;
> }
> +#endif
>
> /*
> * Returns -errno on error, or positive in the range [1, nr_frames] on
> @@ -1238,8 +1242,10 @@ static int _acquire_resource(
> case XENMEM_resource_ioreq_server:
> return acquire_ioreq_server(d, id, frame, nr_frames, mfn_list);
>
> +#ifdef CONFIG_VMTRACE
> case XENMEM_resource_vmtrace_buf:
> return acquire_vmtrace_buf(d, id, frame, nr_frames, mfn_list);
> +#endif
>
> default:
> ASSERT_UNREACHABLE();
> diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
> index b2787c010890..cf0258223f50 100644
> --- a/xen/common/vm_event.c
> +++ b/xen/common/vm_event.c
> @@ -441,7 +441,8 @@ static int vm_event_resume(struct domain *d, struct
> vm_event_domain *ved)
> if ( rsp.flags & VM_EVENT_FLAG_GET_NEXT_INTERRUPT )
> vm_event_monitor_next_interrupt(v);
>
> - if ( rsp.flags & VM_EVENT_FLAG_RESET_VMTRACE )
> + if ( IS_ENABLED(CONFIG_VMTRACE) &&
> + (rsp.flags & VM_EVENT_FLAG_RESET_VMTRACE) )
> vm_event_reset_vmtrace(v);
>
> if ( rsp.flags & VM_EVENT_FLAG_VCPU_PAUSED )
> diff --git a/xen/include/xen/domain.h b/xen/include/xen/domain.h
> index 8aab05ae93c8..aa86a9f46022 100644
> --- a/xen/include/xen/domain.h
> +++ b/xen/include/xen/domain.h
> @@ -191,7 +191,11 @@ void vnuma_destroy(struct vnuma_info *vnuma);
> static inline void vnuma_destroy(struct vnuma_info *vnuma) { ASSERT(!vnuma);
> }
> #endif
>
> +#ifdef CONFIG_VMTRACE
> extern bool vmtrace_available;
> +#else
> +#define vmtrace_available false
> +#endif
>
> extern bool vpmu_is_available;
>
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 02bdc256ce37..dcd8647e0d36 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -300,9 +300,11 @@ struct vcpu
> /* vPCI per-vCPU area, used to store data for long running operations. */
> struct vpci_vcpu vpci;
>
> +#ifdef CONFIG_VMTRACE
> struct {
> struct page_info *pg; /* One contiguous allocation of
> d->vmtrace_size */
> } vmtrace;
> +#endif
>
> struct arch_vcpu arch;
>
> @@ -623,7 +625,9 @@ struct domain
> unsigned int nr_altp2m; /* Number of altp2m tables. */
> #endif
>
> +#ifdef CONFIG_VMTRACE
> unsigned int vmtrace_size; /* Buffer size in bytes, or 0 to disable. */
> +#endif
>
> #ifdef CONFIG_ARGO
> /* Argo interdomain communication support */