If max leaf is greater than 0xd but xsave not available to the guest, then the
current XSAVE size should not be filled in. This is a latent bug for now as
the guest max leaf is 0xd, but will become problematic in the future.
The comment concerning XSS state is wrong. VT-x doesn't manage host/guest
state automatically, but there is provision for "host only" bits to be set, so
the implications are still accurate.
Introduce {xstate,hw}_compressed_size() helpers to mirror the uncompressed
ones.
This in turn higlights a bug in xstate_init(). Defaulting this_cpu(xss) to ~0
requires a forced write to clear it back out. This in turn highlights that
it's only a safe default on systems with XSAVES.
Signed-off-by: Andrew Cooper <[email protected]>
---
CC: Jan Beulich <[email protected]>
CC: Roger Pau Monné <[email protected]>
The more I think about it, the more I think that cross-checking with hardware
is a bad move. It's horribly expensive, and for supervisor states in
particular, liable to interfere with functionality.
v2:
* Cope with blind reads of CPUID.0xD[1] prior to %xcr0 having been set up.
---
xen/arch/x86/cpuid.c | 24 ++++--------
xen/arch/x86/include/asm/xstate.h | 1 +
xen/arch/x86/xstate.c | 64 ++++++++++++++++++++++++++++++-
3 files changed, 72 insertions(+), 17 deletions(-)
diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 7a38e032146a..a822e80c7ea7 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -330,23 +330,15 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
case XSTATE_CPUID:
switch ( subleaf )
{
- case 1:
- if ( !p->xstate.xsavec && !p->xstate.xsaves )
- break;
-
- /*
- * TODO: Figure out what to do for XSS state. VT-x manages host
- * vs guest MSR_XSS automatically, so as soon as we start
- * supporting any XSS states, the wrong XSS will be in context.
- */
- BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0);
- fallthrough;
case 0:
- /*
- * Read CPUID[0xD,0/1].EBX from hardware. They vary with enabled
- * XSTATE, and appropriate XCR0|XSS are in context.
- */
- res->b = cpuid_count_ebx(leaf, subleaf);
+ if ( p->basic.xsave )
+ res->b = xstate_uncompressed_size(v->arch.xcr0);
+ break;
+
+ case 1:
+ if ( p->xstate.xsavec )
+ res->b = xstate_compressed_size(v->arch.xcr0 |
+ v->arch.msrs->xss.raw);
break;
}
break;
diff --git a/xen/arch/x86/include/asm/xstate.h
b/xen/arch/x86/include/asm/xstate.h
index bfb66dd766b6..da1d89d2f416 100644
--- a/xen/arch/x86/include/asm/xstate.h
+++ b/xen/arch/x86/include/asm/xstate.h
@@ -109,6 +109,7 @@ void xstate_free_save_area(struct vcpu *v);
int xstate_alloc_save_area(struct vcpu *v);
void xstate_init(struct cpuinfo_x86 *c);
unsigned int xstate_uncompressed_size(uint64_t xcr0);
+unsigned int xstate_compressed_size(uint64_t xstates);
static inline uint64_t xgetbv(unsigned int index)
{
diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
index a94f4025fce5..b2cf3ae6acee 100644
--- a/xen/arch/x86/xstate.c
+++ b/xen/arch/x86/xstate.c
@@ -614,6 +614,65 @@ unsigned int xstate_uncompressed_size(uint64_t xcr0)
return size;
}
+static unsigned int hw_compressed_size(uint64_t xstates)
+{
+ uint64_t curr_xcr0 = get_xcr0(), curr_xss = get_msr_xss();
+ unsigned int size;
+ bool ok;
+
+ ok = set_xcr0(xstates & ~XSTATE_XSAVES_ONLY);
+ ASSERT(ok);
+ set_msr_xss(xstates & XSTATE_XSAVES_ONLY);
+
+ size = cpuid_count_ebx(XSTATE_CPUID, 1);
+
+ ok = set_xcr0(curr_xcr0);
+ ASSERT(ok);
+ set_msr_xss(curr_xss);
+
+ return size;
+}
+
+unsigned int xstate_compressed_size(uint64_t xstates)
+{
+ unsigned int i, size = XSTATE_AREA_MIN_SIZE;
+
+ if ( xstates == 0 ) /* TODO: clean up paths passing 0 in here. */
+ return 0;
+
+ if ( xstates <= (X86_XCR0_SSE | X86_XCR0_FP) )
+ return size;
+
+ /*
+ * For the compressed size, every component matters. Some are
+ * automatically rounded up to 64 first.
+ */
+ xstates &= ~XSTATE_FP_SSE;
+ for_each_set_bit ( i, &xstates, 63 )
+ {
+ if ( test_bit(i, &xstate_align) )
+ size = ROUNDUP(size, 64);
+
+ size += xstate_sizes[i];
+ }
+
+ /* In debug builds, cross-check our calculation with hardware. */
+ if ( IS_ENABLED(CONFIG_DEBUG) )
+ {
+ unsigned int hwsize;
+
+ xstates |= XSTATE_FP_SSE;
+ hwsize = hw_compressed_size(xstates);
+
+ if ( size != hwsize )
+ printk_once(XENLOG_ERR "%s(%#"PRIx64") size %#x != hwsize %#x\n",
+ __func__, xstates, size, hwsize);
+ size = hwsize;
+ }
+
+ return size;
+}
+
static bool valid_xcr0(uint64_t xcr0)
{
/* FP must be unconditionally set. */
@@ -681,7 +740,8 @@ void xstate_init(struct cpuinfo_x86 *c)
* write it.
*/
this_cpu(xcr0) = 0;
- this_cpu(xss) = ~0;
+ if ( cpu_has_xsaves )
+ this_cpu(xss) = ~0;
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK;
@@ -694,6 +754,8 @@ void xstate_init(struct cpuinfo_x86 *c)
set_in_cr4(X86_CR4_OSXSAVE);
if ( !set_xcr0(feature_mask) )
BUG();
+ if ( cpu_has_xsaves )
+ set_msr_xss(0);
if ( bsp )
{
--
2.30.2