In FRED mode, ERET is stricter than IRET about flags. Notably this means:
* The vm86 bit (bit 17) and IOPL (bits 12,13) must be clear.
* The sticky-1 reserved bit (bit 2) must be set, so dom0_construct() needs to
set X86_EFLAGS_MBS in order for a PV dom0 to start.
* All other reserved bits must be clear.
Xen has been overly lax with reserved bit handling. Adjust
arch_set_info_guest*() and hypercall_iret() which consume flags to clamp the
reserved bits for all guest types.
This is a minor ABI change, but by the same argument as commit
9f892f84c279 ("x86/domctl: Stop using XLAT_cpu_user_regs()"); the reserved
bits would get clamped like this naturally by hardware when the vCPU is run.
The handling of vm86 is also different. Guests under 32bit Xen really could
use vm86 mode, but Long Mode disallows vm86 mode and IRET simply ignores the
bit. Xen's behaviour for a PV32 guest trying to use vm86 mode under a 64bit
Xen is to arrange to deliver #GP at the target of the IRET, rather than to
fail the IRET itself.
However there's no filter filtering in arch_set_info_guest() itself, and it
can't arrange to queue a #GP at the target, so do the next best thing and fail
the hypercall. This is not expected to create an issue for PV guests, as the
result of such an arch_set_info_guest() previously would be to run supposedly
Real Mode code as Protected Mode code.
This allows PV guests to start when Xen is using FRED mode.
Signed-off-by: Andrew Cooper <[email protected]>
---
CC: Jan Beulich <[email protected]>
CC: Roger Pau Monné <[email protected]>
v4.1:
* Adjust VM handling.
* Rewrite commit message.
v3:
* Rewrite the commit message.
v2:
* New
It turns out that it's simply ignored by IRET in Long Mode (i.e. clearing it
commit 0e47f92b0725 ("x86: force EFLAGS.IF on when exiting to PV guests")
wasn't actually necessary) but ERETU does care.
---
xen/arch/x86/domain.c | 24 ++++++++++++++++++++++--
xen/arch/x86/hvm/domain.c | 4 ++--
xen/arch/x86/include/asm/x86-defns.h | 7 +++++++
xen/arch/x86/pv/dom0_build.c | 2 +-
xen/arch/x86/pv/iret.c | 8 +++++---
5 files changed, 37 insertions(+), 8 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 868c26036dd9..4664264b2f5d 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1193,6 +1193,14 @@ int arch_set_info_guest(
if ( !__addr_ok(c.nat->ldt_base) )
return -EINVAL;
+
+ /*
+ * IRET in Long Mode discards EFLAGS.VM, but in FRED mode ERET
+ * cares that it is zero.
+ *
+ * Guests can't see FRED, so emulate IRET behaviour.
+ */
+ c.nat->user_regs.rflags &= ~X86_EFLAGS_VM;
}
#ifdef CONFIG_COMPAT
else
@@ -1205,6 +1213,18 @@ int arch_set_info_guest(
for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); i++ )
fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs);
+
+ /*
+ * Under 32bit Xen, PV guests could really use vm86 mode. Under
+ * 64bit Xen, vm86 mode can't be entered even by PV32 guests.
+ *
+ * For backwards compatibility, compat HYPERCALL_iret will arrange
+ * to deliver #GP at the target of the IRET rather than to fail
+ * the IRET itself, but we can't arrange for the same behaviour
+ * here. Reject the hypercall as the next best option.
+ */
+ if ( c.cmp->user_regs.eflags & X86_EFLAGS_VM )
+ return -EINVAL;
}
#endif
@@ -1244,7 +1264,7 @@ int arch_set_info_guest(
v->arch.user_regs.rax = c.nat->user_regs.rax;
v->arch.user_regs.rip = c.nat->user_regs.rip;
v->arch.user_regs.cs = c.nat->user_regs.cs;
- v->arch.user_regs.rflags = c.nat->user_regs.rflags;
+ v->arch.user_regs.rflags = (c.nat->user_regs.rflags &
X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
v->arch.user_regs.rsp = c.nat->user_regs.rsp;
v->arch.user_regs.ss = c.nat->user_regs.ss;
v->arch.pv.es = c.nat->user_regs.es;
@@ -1268,7 +1288,7 @@ int arch_set_info_guest(
v->arch.user_regs.eax = c.cmp->user_regs.eax;
v->arch.user_regs.eip = c.cmp->user_regs.eip;
v->arch.user_regs.cs = c.cmp->user_regs.cs;
- v->arch.user_regs.eflags = c.cmp->user_regs.eflags;
+ v->arch.user_regs.eflags = (c.cmp->user_regs.eflags &
X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
v->arch.user_regs.esp = c.cmp->user_regs.esp;
v->arch.user_regs.ss = c.cmp->user_regs.ss;
v->arch.pv.es = c.cmp->user_regs.es;
diff --git a/xen/arch/x86/hvm/domain.c b/xen/arch/x86/hvm/domain.c
index 155d61db13f8..a0e811ea47a0 100644
--- a/xen/arch/x86/hvm/domain.c
+++ b/xen/arch/x86/hvm/domain.c
@@ -194,7 +194,7 @@ int arch_set_info_hvm_guest(struct vcpu *v, const struct
vcpu_hvm_context *ctx)
uregs->rsi = regs->esi;
uregs->rdi = regs->edi;
uregs->rip = regs->eip;
- uregs->rflags = regs->eflags;
+ uregs->rflags = (regs->eflags & X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
v->arch.hvm.guest_cr[0] = regs->cr0;
v->arch.hvm.guest_cr[3] = regs->cr3;
@@ -245,7 +245,7 @@ int arch_set_info_hvm_guest(struct vcpu *v, const struct
vcpu_hvm_context *ctx)
uregs->rsi = regs->rsi;
uregs->rdi = regs->rdi;
uregs->rip = regs->rip;
- uregs->rflags = regs->rflags;
+ uregs->rflags = (regs->rflags & X86_EFLAGS_ALL) | X86_EFLAGS_MBS;
v->arch.hvm.guest_cr[0] = regs->cr0;
v->arch.hvm.guest_cr[3] = regs->cr3;
diff --git a/xen/arch/x86/include/asm/x86-defns.h
b/xen/arch/x86/include/asm/x86-defns.h
index 0a0ba83de786..edeb0b4ff95a 100644
--- a/xen/arch/x86/include/asm/x86-defns.h
+++ b/xen/arch/x86/include/asm/x86-defns.h
@@ -27,6 +27,13 @@
(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | \
X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)
+#define X86_EFLAGS_ALL \
+ (X86_EFLAGS_ARITH_MASK | X86_EFLAGS_TF | X86_EFLAGS_IF | \
+ X86_EFLAGS_DF | X86_EFLAGS_OF | X86_EFLAGS_IOPL | \
+ X86_EFLAGS_NT | X86_EFLAGS_RF | X86_EFLAGS_VM | \
+ X86_EFLAGS_AC | X86_EFLAGS_VIF | X86_EFLAGS_VIP | \
+ X86_EFLAGS_ID)
+
/*
* Intel CPU flags in CR0
*/
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index 9a11a0a16b4e..075a3646c2a3 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -1024,7 +1024,7 @@ static int __init dom0_construct(const struct boot_domain
*bd)
regs->rip = parms.virt_entry;
regs->rsp = vstack_end;
regs->rsi = vstartinfo_start;
- regs->eflags = X86_EFLAGS_IF;
+ regs->eflags = X86_EFLAGS_IF | X86_EFLAGS_MBS;
/*
* We don't call arch_set_info_guest(), so some initialisation needs doing
diff --git a/xen/arch/x86/pv/iret.c b/xen/arch/x86/pv/iret.c
index d3a1fb2c685b..39ce316b8d91 100644
--- a/xen/arch/x86/pv/iret.c
+++ b/xen/arch/x86/pv/iret.c
@@ -80,8 +80,9 @@ long do_iret(void)
regs->rip = iret_saved.rip;
regs->cs = iret_saved.cs | 3; /* force guest privilege */
- regs->rflags = ((iret_saved.rflags & ~(X86_EFLAGS_IOPL|X86_EFLAGS_VM))
- | X86_EFLAGS_IF);
+ regs->rflags = ((iret_saved.rflags & X86_EFLAGS_ALL &
+ ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) |
+ X86_EFLAGS_IF | X86_EFLAGS_MBS);
regs->rsp = iret_saved.rsp;
regs->ss = iret_saved.ss | 3; /* force guest privilege */
@@ -143,7 +144,8 @@ int compat_iret(void)
if ( VM_ASSIST(v->domain, architectural_iopl) )
v->arch.pv.iopl = eflags & X86_EFLAGS_IOPL;
- regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
+ regs->eflags = ((eflags & X86_EFLAGS_ALL & ~X86_EFLAGS_IOPL) |
+ X86_EFLAGS_IF | X86_EFLAGS_MBS);
if ( unlikely(eflags & X86_EFLAGS_VM) )
{
--
2.39.5