> From: Lengyel, Tamas <[email protected]>
> Sent: Friday, March 25, 2022 9:33 PM
> 
> During VM forking and resetting a failed vmentry has been observed due
> to the guest non-register state going out-of-sync with the guest register
> state. For example, a VM fork reset right after a STI instruction can trigger
> the failed entry. This is due to the guest non-register state not being saved
> from the parent VM, thus the reset operation only copies the register state.
> 
> Fix this by adding a new pair of hvm functions to get/set the guest
> non-register state so that the overall vCPU state remains in sync.
> 
> Signed-off-by: Tamas K Lengyel <[email protected]>

Reviewed-by: Kevin Tian <[email protected]>

> ---
> v5: Switch to internal-only hvm funcs instead of adding to hvm_hw_cpu
> ---
>  xen/arch/x86/hvm/vmx/vmx.c         | 32 ++++++++++++++++++++++++
>  xen/arch/x86/include/asm/hvm/hvm.h | 40
> ++++++++++++++++++++++++++++++
>  xen/arch/x86/mm/mem_sharing.c      | 11 +++++++-
>  3 files changed, 82 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index c075370f64..2685da16c8 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -1334,6 +1334,36 @@ static void cf_check vmx_set_interrupt_shadow(
>      __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
>  }
> 
> +static void cf_check vmx_get_nonreg_state(struct vcpu *v,
> +    struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    vmx_vmcs_enter(v);
> +
> +    __vmread(GUEST_ACTIVITY_STATE, &nrs->vmx.activity_state);
> +    __vmread(GUEST_INTERRUPTIBILITY_INFO, &nrs-
> >vmx.interruptibility_info);
> +    __vmread(GUEST_PENDING_DBG_EXCEPTIONS, &nrs->vmx.pending_dbg);
> +
> +    if ( cpu_has_vmx_virtual_intr_delivery )
> +        __vmread(GUEST_INTR_STATUS, &nrs->vmx.interrupt_status);
> +
> +    vmx_vmcs_exit(v);
> +}
> +
> +static void cf_check vmx_set_nonreg_state(struct vcpu *v,
> +    struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    vmx_vmcs_enter(v);
> +
> +    __vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state);
> +    __vmwrite(GUEST_INTERRUPTIBILITY_INFO, nrs-
> >vmx.interruptibility_info);
> +    __vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, nrs->vmx.pending_dbg);
> +
> +    if ( cpu_has_vmx_virtual_intr_delivery )
> +        __vmwrite(GUEST_INTR_STATUS, nrs->vmx.interrupt_status);
> +
> +    vmx_vmcs_exit(v);
> +}
> +
>  static void vmx_load_pdptrs(struct vcpu *v)
>  {
>      uint32_t cr3 = v->arch.hvm.guest_cr[3];
> @@ -2487,6 +2517,8 @@ static struct hvm_function_table
> __initdata_cf_clobber vmx_function_table = {
>      .load_cpu_ctxt        = vmx_load_vmcs_ctxt,
>      .get_interrupt_shadow = vmx_get_interrupt_shadow,
>      .set_interrupt_shadow = vmx_set_interrupt_shadow,
> +    .get_nonreg_state     = vmx_get_nonreg_state,
> +    .set_nonreg_state     = vmx_set_nonreg_state,
>      .guest_x86_mode       = vmx_guest_x86_mode,
>      .get_cpl              = _vmx_get_cpl,
>      .get_segment_register = vmx_get_segment_register,
> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h
> b/xen/arch/x86/include/asm/hvm/hvm.h
> index 5b7ec0cf69..9dee0f87a3 100644
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -84,6 +84,17 @@ enum hvm_intblk {
>  /* update_guest_cr() flags. */
>  #define HVM_UPDATE_GUEST_CR3_NOFLUSH 0x00000001
> 
> +struct hvm_vcpu_nonreg_state {
> +    union {
> +        struct {
> +            uint64_t activity_state;
> +            uint64_t interruptibility_info;
> +            uint64_t pending_dbg;
> +            uint64_t interrupt_status;
> +        } vmx;
> +    };
> +};
> +
>  /*
>   * The hardware virtual machine (HVM) interface abstracts away from the
>   * x86/x86_64 CPU virtualization assist specifics. Currently this interface
> @@ -122,6 +133,10 @@ struct hvm_function_table {
>      /* Examine specifics of the guest state. */
>      unsigned int (*get_interrupt_shadow)(struct vcpu *v);
>      void (*set_interrupt_shadow)(struct vcpu *v, unsigned int intr_shadow);
> +    void (*get_nonreg_state)(struct vcpu *v,
> +                             struct hvm_vcpu_nonreg_state *nrs);
> +    void (*set_nonreg_state)(struct vcpu *v,
> +                             struct hvm_vcpu_nonreg_state *nrs);
>      int (*guest_x86_mode)(struct vcpu *v);
>      unsigned int (*get_cpl)(struct vcpu *v);
>      void (*get_segment_register)(struct vcpu *v, enum x86_segment seg,
> @@ -744,6 +759,20 @@ void hvm_set_reg(struct vcpu *v, unsigned int reg,
> uint64_t val);
>          d_->arch.hvm.pi_ops.vcpu_block(v_);                     \
>  })
> 
> +static inline void hvm_get_nonreg_state(struct vcpu *v,
> +                                        struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    if ( hvm_funcs.get_nonreg_state )
> +        alternative_vcall(hvm_funcs.get_nonreg_state, v, nrs);
> +}
> +
> +static inline void hvm_set_nonreg_state(struct vcpu *v,
> +                                        struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    if ( hvm_funcs.set_nonreg_state )
> +        alternative_vcall(hvm_funcs.set_nonreg_state, v, nrs);
> +}
> +
>  #else  /* CONFIG_HVM */
> 
>  #define hvm_enabled false
> @@ -863,6 +892,17 @@ static inline void hvm_set_reg(struct vcpu *v,
> unsigned int reg, uint64_t val)
>      ASSERT_UNREACHABLE();
>  }
> 
> +static inline void hvm_get_nonreg_state(struct vcpu *v,
> +                                        struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +static inline void hvm_set_nonreg_state(struct vcpu *v,
> +                                        struct hvm_vcpu_nonreg_state *nrs)
> +{
> +    ASSERT_UNREACHABLE();
> +}
> +
>  #define is_viridian_domain(d) ((void)(d), false)
>  #define is_viridian_vcpu(v) ((void)(v), false)
>  #define has_viridian_time_ref_count(d) ((void)(d), false)
> diff --git a/xen/arch/x86/mm/mem_sharing.c
> b/xen/arch/x86/mm/mem_sharing.c
> index 15e6a7ed81..857accee58 100644
> --- a/xen/arch/x86/mm/mem_sharing.c
> +++ b/xen/arch/x86/mm/mem_sharing.c
> @@ -1643,6 +1643,13 @@ static int bring_up_vcpus(struct domain *cd,
> struct domain *d)
>      return 0;
>  }
> 
> +static void copy_vcpu_nonreg_state(struct vcpu *d_vcpu, struct vcpu
> *cd_vcpu)
> +{
> +    struct hvm_vcpu_nonreg_state nrs = {};
> +    hvm_get_nonreg_state(d_vcpu, &nrs);
> +    hvm_set_nonreg_state(cd_vcpu, &nrs);
> +}
> +
>  static int copy_vcpu_settings(struct domain *cd, const struct domain *d)
>  {
>      unsigned int i;
> @@ -1651,7 +1658,7 @@ static int copy_vcpu_settings(struct domain *cd,
> const struct domain *d)
> 
>      for ( i = 0; i < cd->max_vcpus; i++ )
>      {
> -        const struct vcpu *d_vcpu = d->vcpu[i];
> +        struct vcpu *d_vcpu = d->vcpu[i];
>          struct vcpu *cd_vcpu = cd->vcpu[i];
>          mfn_t vcpu_info_mfn;
> 
> @@ -1694,6 +1701,8 @@ static int copy_vcpu_settings(struct domain *cd,
> const struct domain *d)
> 
>          hvm_vmtrace_reset(cd_vcpu);
> 
> +        copy_vcpu_nonreg_state(d_vcpu, cd_vcpu);
> +
>          /*
>           * TODO: to support VMs with PV interfaces copy additional
>           * settings here, such as PV timers.
> --
> 2.25.1


Reply via email to