On Mon, Jan 10, 2022 at 04:36:13PM +0800, Tian, Kevin wrote: > > From: Zhong, Yang <yang.zh...@intel.com> > > Sent: Friday, January 7, 2022 5:32 PM > > > > Kernel mechanism for dynamically enabled XSAVE features > > there is no definition of "dynamically-enabled XSAVE features). >
Thanks! > > asks userspace VMM requesting guest permission if it wants > > to expose the features. Only with the permission, kernel > > can try to enable the features when detecting the intention > > from guest in runtime. > > > > Qemu should request the permission for guest only once > > before the first vCPU is created. KVM checks the guest > > permission when Qemu advertises the features, and the > > advertising operation fails w/o permission. > > what about below? > > "Kernel allocates 4K xstate buffer by default. For XSAVE features > which require large state component (e.g. AMX), Linux kernel > dynamically expands the xstate buffer only after the process has > acquired the necessary permissions. Those are called dynamically- > enabled XSAVE features (or dynamic xfeatures). > > There are separate permissions for native tasks and guests. > > Qemu should request the guest permissions for dynamic xfeatures > which will be exposed to the guest. This only needs to be done > once before the first vcpu is created." This is clearer. Will update this in new version, thanks! > > > > > Signed-off-by: Yang Zhong <yang.zh...@intel.com> > > Signed-off-by: Jing Liu <jing2....@intel.com> > > --- > > target/i386/cpu.h | 7 +++++++ > > hw/i386/x86.c | 28 ++++++++++++++++++++++++++++ > > 2 files changed, 35 insertions(+) > > > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > > index 768a8218be..79023fe723 100644 > > --- a/target/i386/cpu.h > > +++ b/target/i386/cpu.h > > @@ -549,6 +549,13 @@ typedef enum X86Seg { > > #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) > > #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) > > #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) > > +#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) > > +#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) > > +#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK \ > > + | XSTATE_XTILE_DATA_MASK) > > + > > +#define ARCH_GET_XCOMP_GUEST_PERM 0x1024 > > +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 > > > > /* CPUID feature words */ > > typedef enum FeatureWord { > > diff --git a/hw/i386/x86.c b/hw/i386/x86.c > > index b84840a1bb..0a204c375e 100644 > > --- a/hw/i386/x86.c > > +++ b/hw/i386/x86.c > > @@ -41,6 +41,8 @@ > > #include "sysemu/cpu-timers.h" > > #include "trace.h" > > > > +#include <sys/syscall.h> > > + > > #include "hw/i386/x86.h" > > #include "target/i386/cpu.h" > > #include "hw/i386/topology.h" > > @@ -117,6 +119,30 @@ out: > > object_unref(cpu); > > } > > > > +static void x86_xsave_req_perm(void) > > +{ > > + unsigned long bitmask; > > + > > + long rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, > > + XSTATE_XTILE_DATA_BIT); > > Should we do it based on the cpuid for the first vcpu? This permission is requested before vcpu init, so put it in x86_cpus_init(). If the host kernel does not include AMX changes, or the latest kernel(include AMX) install on previous generation x86 platform, this syscall() will directly return. I ever put this permission request in the vcpu create function, but it's hard to find a good location to handle this. As for cpuid, you mean I need check host cpuid info? to check if this host cpu can support AMX? thanks! Yang > > > + if (rc) { > > + /* > > + * The older kernel version(<5.15) can't support > > + * ARCH_REQ_XCOMP_GUEST_PERM and directly return. > > + */ > > + return; > > + } > > + > > + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); > > + if (rc) { > > + error_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); > > + } else if (!(bitmask & XFEATURE_XTILE_MASK)) { > > + error_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure " > > + "and bitmask=0x%lx", bitmask); > > + exit(EXIT_FAILURE); > > + } > > +} > > + > > void x86_cpus_init(X86MachineState *x86ms, int default_cpu_version) > > { > > int i; > > @@ -124,6 +150,8 @@ void x86_cpus_init(X86MachineState *x86ms, int > > default_cpu_version) > > MachineState *ms = MACHINE(x86ms); > > MachineClass *mc = MACHINE_GET_CLASS(x86ms); > > > > + /* Request AMX pemission for guest */ > > + x86_xsave_req_perm(); > > x86_cpu_set_default_version(default_cpu_version); > > > > /*