On Wed, Mar 25, 2026 at 03:23:28PM +0900, Itaru Kitayama wrote:
> Hi Wei Lin,

Hi,

> On Wed, Mar 25, 2026 at 12:36:20AM +0000, Wei-Lin Chang wrote:
> > Introduce library functions for setting up guest stage-2 page tables,
> > then use that to give L2 an identity mapped stage-2 and enable it.
> > 
> > The translation and stage-2 page table built is simple, start level 0,
> > 4 levels, 4KB granules, normal cachable, 48-bit IA, 40-bit OA.
> > 
> > The nested page table code is adapted from lib/x86/vmx.c.
> > 
> > Signed-off-by: Wei-Lin Chang <[email protected]>
> > ---
> >  .../selftests/kvm/include/arm64/nested.h      |  7 ++
> >  .../selftests/kvm/include/arm64/processor.h   |  9 ++
> >  .../testing/selftests/kvm/lib/arm64/nested.c  | 97 ++++++++++++++++++-
> >  3 files changed, 111 insertions(+), 2 deletions(-)
> > 
> > diff --git a/tools/testing/selftests/kvm/include/arm64/nested.h 
> > b/tools/testing/selftests/kvm/include/arm64/nested.h
> > index 739ff2ee0161..0be10a775e48 100644
> > --- a/tools/testing/selftests/kvm/include/arm64/nested.h
> > +++ b/tools/testing/selftests/kvm/include/arm64/nested.h
> > @@ -6,6 +6,13 @@
> >  #ifndef SELFTEST_KVM_NESTED_H
> >  #define SELFTEST_KVM_NESTED_H
> >  
> > +uint64_t get_l1_vtcr(void);
> 
> Using a type u64 is simpler? And I think you configure guest
> hypervisor's stage 2 translation table, I felt this gives us
> an impression somewhere the configuration IA and OA sizes etc 
> are stored.

Sure, u64 is okay.
In this version I basically just used hard-coded values whenever I not
needed IA, OA and other related values e.g. page shift, which is not
good and as Marc said would not even work on some platforms. I'll make
it more modular in the next iteration.

> 
> > +
> > +void nested_map(struct kvm_vm *vm, vm_paddr_t guest_pgd,
> > +           uint64_t nested_paddr, uint64_t paddr, uint64_t size);
> > +void nested_map_memslot(struct kvm_vm *vm, vm_paddr_t guest_pgd,
> > +                   uint32_t memslot);
> > +
> >  void prepare_l2_stack(struct kvm_vm *vm, struct kvm_vcpu *vcpu);
> >  void prepare_hyp_state(struct kvm_vm *vm, struct kvm_vcpu *vcpu);
> >  void prepare_eret_destination(struct kvm_vm *vm, struct kvm_vcpu *vcpu, 
> > void *l2_pc);
> > diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h 
> > b/tools/testing/selftests/kvm/include/arm64/processor.h
> > index ac97a1c436fc..5de2e932d95a 100644
> > --- a/tools/testing/selftests/kvm/include/arm64/processor.h
> > +++ b/tools/testing/selftests/kvm/include/arm64/processor.h
> > @@ -104,6 +104,15 @@
> >  #define TCR_HA                     (UL(1) << 39)
> >  #define TCR_DS                     (UL(1) << 59)
> >  
> > +/* VTCR_EL2 specific flags */
> > +#define VTCR_EL2_T0SZ_BITS(x)      ((UL(64) - (x)) << VTCR_EL2_T0SZ_SHIFT)
> > +
> > +#define VTCR_EL2_SL0_LV0_4K        (UL(2) << VTCR_EL2_SL0_SHIFT)
> > +#define VTCR_EL2_SL0_LV1_4K        (UL(1) << VTCR_EL2_SL0_SHIFT)
> > +#define VTCR_EL2_SL0_LV2_4K        (UL(0) << VTCR_EL2_SL0_SHIFT)
> > +
> > +#define VTCR_EL2_PS_40_BITS        (UL(2) << VTCR_EL2_PS_SHIFT)
> > +
> >  /*
> >   * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* 
> > registers).
> >   */
> > diff --git a/tools/testing/selftests/kvm/lib/arm64/nested.c 
> > b/tools/testing/selftests/kvm/lib/arm64/nested.c
> > index 111d02f44cfe..910f8cd30f96 100644
> > --- a/tools/testing/selftests/kvm/lib/arm64/nested.c
> > +++ b/tools/testing/selftests/kvm/lib/arm64/nested.c
> > @@ -1,8 +1,11 @@
> >  // SPDX-License-Identifier: GPL-2.0
> >  /*
> > - * ARM64 Nested virtualization helpers
> > + * ARM64 Nested virtualization helpers, nested page table code adapted from
> > + * ../x86/vmx.c.
> >   */
> >  
> > +#include <linux/sizes.h>
> > +
> >  #include "kvm_util.h"
> >  #include "nested.h"
> >  #include "processor.h"
> > @@ -18,6 +21,87 @@ static void hvc_handler(struct ex_regs *regs)
> >     regs->pc = (u64)after_hvc;
> >  }
> >  
> > +uint64_t get_l1_vtcr(void)
> > +{
> > +   return VTCR_EL2_PS_40_BITS | VTCR_EL2_TG0_4K | VTCR_EL2_ORGN0_WBWA |
> > +          VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LV0_4K | 
> > VTCR_EL2_T0SZ_BITS(48);
> > +}
> > +
> > +static void __nested_pg_map(struct kvm_vm *vm, uint64_t guest_pgd,
> > +                uint64_t nested_paddr, uint64_t paddr, uint64_t flags)
> > +{
> > +   uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT);
> > +   uint64_t pg_attr;
> > +   uint64_t *ptep;
> > +
> > +   TEST_ASSERT((nested_paddr % vm->page_size) == 0,
> > +           "L2 IPA not on page boundary,\n"
> > +           "  nested_paddr: 0x%lx vm->page_size: 0x%x", nested_paddr, 
> > vm->page_size);
> > +   TEST_ASSERT((paddr % vm->page_size) == 0,
> > +           "Guest physical address not on page boundary,\n"
> > +           "  paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
> > +   TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
> > +           "Physical address beyond maximum supported,\n"
> > +           "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
> > +           paddr, vm->max_gfn, vm->page_size);
> > +
> > +   ptep = addr_gpa2hva(vm, guest_pgd) + ((nested_paddr >> 39) & 0x1ffu) * 
> > 8;
> > +   if (!*ptep)
> > +           *ptep = (vm_alloc_page_table(vm) & GENMASK(47, 12)) | 
> > PGD_TYPE_TABLE | PTE_VALID;
> 
> Same but given this is stage 2 translation tables, KVM_PTE_VALID?

I see your point, but KVM_PTE_VALID is only defined for KVM, not here in
kselftest userspace. However since I will redo the page table generator,
I can add this, let's see.
Thanks for the suggestions!

Thanks,
Wei-Lin Chang

> 
> Thanks,
> Itaru.
> > +   ptep = addr_gpa2hva(vm, *ptep & GENMASK(47, 12)) + ((nested_paddr >> 
> > 30) & 0x1ffu) * 8;
> > +   if (!*ptep)
> > +           *ptep = (vm_alloc_page_table(vm) & GENMASK(47, 12)) | 
> > PUD_TYPE_TABLE | PTE_VALID;
> > +   ptep = addr_gpa2hva(vm, *ptep & GENMASK(47, 12)) + ((nested_paddr >> 
> > 21) & 0x1ffu) * 8;
> > +   if (!*ptep)
> > +           *ptep = (vm_alloc_page_table(vm) & GENMASK(47, 12)) | 
> > PMD_TYPE_TABLE | PTE_VALID;
> > +   ptep = addr_gpa2hva(vm, *ptep & GENMASK(47, 12)) + ((nested_paddr >> 
> > 12) & 0x1ffu) * 8;
> > +
> > +   pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID;
> > +   pg_attr |= PTE_SHARED;
> > +
> > +   *ptep = (paddr & GENMASK(47, 12)) | pg_attr;
> > +}
> > +
> > +void nested_map(struct kvm_vm *vm, vm_paddr_t guest_pgd,
> > +           uint64_t nested_paddr, uint64_t paddr, uint64_t size)
> > +{
> > +   size_t npages = size / SZ_4K;
> > +
> > +   TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
> > +   TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
> > +
> > +   while (npages--) {
> > +           __nested_pg_map(vm, guest_pgd, nested_paddr, paddr, MT_NORMAL);
> > +           nested_paddr += SZ_4K;
> > +           paddr += SZ_4K;
> > +   }
> > +}
> > +
> > +/*
> > + * Prepare an identity shadow page table that maps all the
> > + * physical pages in VM.
> > + */
> > +void nested_map_memslot(struct kvm_vm *vm, vm_paddr_t guest_pgd,
> > +                   uint32_t memslot)
> > +{
> > +   sparsebit_idx_t i, last;
> > +   struct userspace_mem_region *region =
> > +           memslot2region(vm, memslot);
> > +
> > +   i = (region->region.guest_phys_addr >> vm->page_shift) - 1;
> > +   last = i + (region->region.memory_size >> vm->page_shift);
> > +   for (;;) {
> > +           i = sparsebit_next_clear(region->unused_phy_pages, i);
> > +           if (i > last)
> > +                   break;
> > +
> > +           nested_map(vm, guest_pgd,
> > +                      (uint64_t)i << vm->page_shift,
> > +                      (uint64_t)i << vm->page_shift,
> > +                      1 << vm->page_shift);
> > +   }
> > +}
> > +
> >  void prepare_l2_stack(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
> >  {
> >     size_t l2_stack_size;
> > @@ -32,7 +116,16 @@ void prepare_l2_stack(struct kvm_vm *vm, struct 
> > kvm_vcpu *vcpu)
> >  
> >  void prepare_hyp_state(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
> >  {
> > -   vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), HCR_EL2_RW);
> > +   vm_paddr_t guest_pgd;
> > +
> > +   guest_pgd = vm_phy_pages_alloc(vm, 1,
> > +                                  KVM_GUEST_PAGE_TABLE_MIN_PADDR,
> > +                                  vm->memslots[MEM_REGION_PT]);
> > +   nested_map_memslot(vm, guest_pgd, 0);
> > +
> > +   vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_HCR_EL2), HCR_EL2_RW | 
> > HCR_EL2_VM);
> > +   vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VTTBR_EL2), guest_pgd);
> > +   vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VTCR_EL2), get_l1_vtcr());
> >  }
> >  
> >  void prepare_eret_destination(struct kvm_vm *vm, struct kvm_vcpu *vcpu, 
> > void *l2_pc)
> > -- 
> > 2.43.0
> > 

Reply via email to