On Sun, Jun 05, 2022 at 09:25:34AM -0400, Dave Voutila wrote:
> tech@ friends:
>
> tl;dr: testers wanted for fixing Linux guest reboot. If you've got
> Linux guests that no longer reboot properly, please test! For other
> vmd users, please check for any regressions.
>
> Our port of SeaBIOS is configured to enable QEMU features to simplify
> its working with vmd(8). This generally works well.
>
> SeaBIOS provides a reboot routine specifically for QEMU environments.
> One of the reasons is to provide some extra logic for refreshing the
> copy of the BIOS in memory (as if reading from ROM) before attempting
> the reset (first via PCI and falling back to triple-faulting). The way
> SeaBIOS does this appears to be it assumes there's a "pristine copy"
> of the BIOS loaded by the host's emulator to just below the 4GB mark
> in physical memory. (See src/fw/shadow.c in the SeaBIOS source tree.)
>
> This hasn't been a problem until recent Linux kernel changes started
> calling into the BIOS as a way to reboot the guest. (I know at least
> the 5.15 kernel shipped with Alpine does this.)
>
> Since vmd/vmm doesn't create a mapping for that area just below 4GB,
> guests experience a page fault vm-exit and a resulting failure as we
> consider that address part of the MMIO hole and reserved.
>
> This change to vmd(8) loads a second copy of the BIOS, ending at the
> 4GB mark in guest memory. Consequently, vmm(4)'s MMIO memory hole is
> adjusted to end 2MB below 4GB to accomodate SeaBIOS and future
> firmware payloads that may be > 1MB in size. (I believe EDK-II UEFI is
> larger than 1MB...haven't looked in awhile.)
>
> Along the way, I adjusted the use of hardcoded values for 1 MB and 4
> GB to use a more human readable version via #defines.
>
> For testers:
>   0. apply patch
>   1. build, install updated kernel, boot new kernel
>   2. copy or symlink sys/arch/amd64/include/vmmvar.h to
>      /usr/include/amd64/
>   3. build and install vmd(8)
>   4. test!
>
> ~dv
>

Does qemu load 2 copies of the bios or just rely on A20 tricks to make the
bios appear at two addresses?

-ml

>
> diff refs/heads/master refs/heads/vmd-bios4g
> blob - fea4ab52e6db7eff12b913ecde30abf970da0b54
> blob + f06212b18f8ae19b5edc8fa8d64684d7163e35a8
> --- sys/arch/amd64/include/vmmvar.h
> +++ sys/arch/amd64/include/vmmvar.h
> @@ -35,7 +35,7 @@
>  #define VMM_MAX_NICS_PER_VM  4
>
>  #define VMM_PCI_MMIO_BAR_BASE        0xF0000000ULL
> -#define VMM_PCI_MMIO_BAR_END 0xFFFFFFFFULL
> +#define VMM_PCI_MMIO_BAR_END 0xFFDFFFFFULL           /* 2 MiB below 4 GiB */
>  #define VMM_PCI_MMIO_BAR_SIZE        0x00010000
>  #define VMM_PCI_IO_BAR_BASE  0x1000
>  #define VMM_PCI_IO_BAR_END   0xFFFF
> blob - d952ba4d8d0bff700fc09c066ffc284909150417
> blob + c36e17eb5ed4d1799f55fa1af5f7ca158923202e
> --- usr.sbin/vmd/vm.c
> +++ usr.sbin/vmd/vm.c
> @@ -65,6 +65,10 @@
>  #include "vmd.h"
>  #include "vmm.h"
>
> +#define _1_MB        (1UL * 1024 * 1024)
> +#define _2_MB        (2UL * 1024 * 1024)
> +#define _4_GB        (4UL * 1024 * 1024 * 1024)
> +
>  io_fn_t ioports_map[MAX_PORTS];
>
>  int run_vm(int, int[][VM_MAX_BASE_PER_DISK], int *,
> @@ -234,7 +238,7 @@ loadfile_bios(gzFile fp, off_t size, struct vcpu_reg_s
>               return (-1);
>
>       /* The BIOS image must end at 1MB */
> -     if ((off = 1048576 - size) < 0)
> +     if ((off = _1_MB - size) < 0)
>               return (-1);
>
>       /* Read BIOS image into memory */
> @@ -243,6 +247,16 @@ loadfile_bios(gzFile fp, off_t size, struct vcpu_reg_s
>               return (-1);
>       }
>
> +     if (gzseek(fp, 0, SEEK_SET) == -1)
> +             return (-1);
> +
> +     /* Read a second BIOS copy into memory ending at 4GB */
> +     off = _4_GB - size;
> +     if (mread(fp, off, size) != (size_t)size) {
> +             errno = EIO;
> +             return (-1);
> +     }
> +
>       log_debug("%s: loaded BIOS image", __func__);
>
>       return (0);
> @@ -872,6 +886,7 @@ void
>  create_memory_map(struct vm_create_params *vcp)
>  {
>       size_t len, mem_bytes;
> +     size_t above_1m = 0, above_4g = 0;
>
>       mem_bytes = vcp->vcp_memranges[0].vmr_size;
>       vcp->vcp_nmemranges = 0;
> @@ -893,29 +908,47 @@ create_memory_map(struct vm_create_params *vcp)
>        * we need to make sure that vmm(4) permits accesses
>        * to it. So allocate guest memory for it.
>        */
> -     len = 0x100000 - LOWMEM_KB * 1024;
> +     len = _1_MB - (LOWMEM_KB * 1024);
>       vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024;
>       vcp->vcp_memranges[1].vmr_size = len;
>       mem_bytes -= len;
>
> -     /* Make sure that we do not place physical memory into MMIO ranges. */
> -     if (mem_bytes > VMM_PCI_MMIO_BAR_BASE - 0x100000)
> -             len = VMM_PCI_MMIO_BAR_BASE - 0x100000;
> -     else
> -             len = mem_bytes;
> -
> -     /* Third memory region: 1MB - (1MB + len) */
> -     vcp->vcp_memranges[2].vmr_gpa = 0x100000;
> -     vcp->vcp_memranges[2].vmr_size = len;
> -     mem_bytes -= len;
> -
> -     if (mem_bytes > 0) {
> -             /* Fourth memory region for the remaining memory (if any) */
> -             vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
> -             vcp->vcp_memranges[3].vmr_size = mem_bytes;
> -             vcp->vcp_nmemranges = 4;
> -     } else
> +     /* If we have less than 2MB remaining, still create a 2nd BIOS area. */
> +     if (mem_bytes <= _2_MB) {
> +             vcp->vcp_memranges[2].vmr_gpa = VMM_PCI_MMIO_BAR_END;
> +             vcp->vcp_memranges[2].vmr_size = _2_MB;
>               vcp->vcp_nmemranges = 3;
> +             return;
> +     }
> +
> +     /*
> +      * Calculate the how to split any remaining memory across the 4GB
> +      * boundary while making sure we do not place physical memory into
> +      * MMIO ranges.
> +      */
> +     if (mem_bytes > VMM_PCI_MMIO_BAR_BASE - _1_MB) {
> +             above_1m = VMM_PCI_MMIO_BAR_BASE - _1_MB;
> +             above_4g = mem_bytes - above_1m;
> +     } else {
> +             above_1m = mem_bytes;
> +             above_4g = 0;
> +     }
> +
> +     /* Third memory region: area above 1MB to MMIO region */
> +     vcp->vcp_memranges[2].vmr_gpa = _1_MB;
> +     vcp->vcp_memranges[2].vmr_size = above_1m;
> +
> +     /* Fourth region: 2nd copy of BIOS above MMIO ending at 4GB */
> +     vcp->vcp_memranges[3].vmr_gpa = VMM_PCI_MMIO_BAR_END + 1;
> +     vcp->vcp_memranges[3].vmr_size = _2_MB;
> +
> +     /* Fifth region: any remainder above 4GB */
> +     if (above_4g > 0) {
> +             vcp->vcp_memranges[4].vmr_gpa = _4_GB;
> +             vcp->vcp_memranges[4].vmr_size = above_4g;
> +             vcp->vcp_nmemranges = 5;
> +     } else
> +             vcp->vcp_nmemranges = 4;
>  }
>
>  /*
> @@ -1015,17 +1048,19 @@ init_emulated_hw(struct vmop_create_params *vmc, int c
>      int child_disks[][VM_MAX_BASE_PER_DISK], int *child_taps)
>  {
>       struct vm_create_params *vcp = &vmc->vmc_params;
> -     int i;
> +     size_t i;
>       uint64_t memlo, memhi;
>
>       /* Calculate memory size for NVRAM registers */
>       memlo = memhi = 0;
> -     if (vcp->vcp_nmemranges > 2)
> -             memlo = vcp->vcp_memranges[2].vmr_size - 15 * 0x100000;
> +     for (i = 0; i < vcp->vcp_nmemranges; i++) {
> +             if (vcp->vcp_memranges[i].vmr_gpa == _1_MB &&
> +                 vcp->vcp_memranges[i].vmr_size > (15 * _1_MB))
> +                     memlo = vcp->vcp_memranges[i].vmr_size - (15 * _1_MB);
> +             else if (vcp->vcp_memranges[i].vmr_gpa == _4_GB)
> +                     memhi = vcp->vcp_memranges[i].vmr_size;
> +     }
>
> -     if (vcp->vcp_nmemranges > 3)
> -             memhi = vcp->vcp_memranges[3].vmr_size;
> -
>       /* Reset the IO port map */
>       memset(&ioports_map, 0, sizeof(io_fn_t) * MAX_PORTS);
>

Reply via email to