On Wed, Aug 03, 2022 at 07:02:35PM +0200, Jason A. Donenfeld wrote:
> The boot parameter header refers to setup_data at an absolute address,
> and each setup_data refers to the next setup_data at an absolute address
> too. Currently QEMU simply puts the setup_datas right after the kernel
> image, and since the kernel_image is loaded at prot_addr -- a fixed
> address knowable to QEMU apriori -- the setup_data absolute address
> winds up being just `prot_addr + a_fixed_offset_into_kernel_image`.
>
> This mostly works fine, so long as the kernel image really is loaded at
> prot_addr. However, OVMF doesn't load the kernel at prot_addr, and
> generally EFI doesn't give a good way of predicting where it's going to
> load the kernel. So when it loads it at some address != prot_addr, the
> absolute addresses in setup_data now point somewhere bogus, causing
> crashes when EFI stub tries to follow the next link.
>
> Fix this by placing setup_data at some fixed place in memory, not as
> part of the kernel image, and then pointing the setup_data absolute
> address to that fixed place in memory. This way, even if OVMF or other
> chains relocate the kernel image, the boot parameter still points to the
> correct absolute address.
>
> === NOTE NOTE NOTE NOTE NOTE ===
> This commit is currently garbage! It fixes the boot test case, but it
> just picks the address 0x10000000. That's probably not a good idea. If
> somebody with some x86 architectural knowledge could let me know a
> better reserved place to put this, that'd be very appreciated.
>
> Fixes: 3cbeb52467 ("hw/i386: add device tree support")
> Reported-by: Xiaoyao Li <[email protected]>
> Cc: Paolo Bonzini <[email protected]>
> Cc: Richard Henderson <[email protected]>
> Cc: Peter Maydell <[email protected]>
> Cc: Michael S. Tsirkin <[email protected]>
> Cc: Daniel P. Berrangé <[email protected]>
> Cc: Gerd Hoffmann <[email protected]>
> Cc: Ard Biesheuvel <[email protected]>
> Cc: [email protected]
> Signed-off-by: Jason A. Donenfeld <[email protected]>
> ---
> hw/i386/x86.c | 38 +++++++++++++++++++++-----------------
> 1 file changed, 21 insertions(+), 17 deletions(-)
>
> diff --git a/hw/i386/x86.c b/hw/i386/x86.c
> index 050eedc0c8..0b0083b345 100644
> --- a/hw/i386/x86.c
> +++ b/hw/i386/x86.c
> @@ -773,9 +773,9 @@ void x86_load_linux(X86MachineState *x86ms,
> bool linuxboot_dma_enabled =
> X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
> uint16_t protocol;
> int setup_size, kernel_size, cmdline_size;
> - int dtb_size, setup_data_offset;
> + int dtb_size, setup_data_item_len, setup_data_total_len = 0;
> uint32_t initrd_max;
> - uint8_t header[8192], *setup, *kernel;
> + uint8_t header[8192], *setup, *kernel, *setup_datas = NULL;
> hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0,
> first_setup_data = 0;
> FILE *f;
> char *vmode;
> @@ -1048,6 +1048,8 @@ void x86_load_linux(X86MachineState *x86ms,
> }
> fclose(f);
>
> +#define SETUP_DATA_PHYS_BASE 0x10000000
> +
> /* append dtb to kernel */
> if (dtb_filename) {
> if (protocol < 0x209) {
> @@ -1062,34 +1064,36 @@ void x86_load_linux(X86MachineState *x86ms,
> exit(1);
> }
>
> - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
> - kernel_size = setup_data_offset + sizeof(struct setup_data) +
> dtb_size;
> - kernel = g_realloc(kernel, kernel_size);
> -
> -
> - setup_data = (struct setup_data *)(kernel + setup_data_offset);
> + setup_data_item_len = sizeof(struct setup_data) + dtb_size;
> + setup_datas = g_realloc(setup_datas, setup_data_total_len +
> setup_data_item_len);
> + setup_data = (struct setup_data *)(setup_datas +
> setup_data_total_len);
> setup_data->next = cpu_to_le64(first_setup_data);
> - first_setup_data = prot_addr + setup_data_offset;
> + first_setup_data = SETUP_DATA_PHYS_BASE + setup_data_total_len;
> + setup_data_total_len += setup_data_item_len;
> setup_data->type = cpu_to_le32(SETUP_DTB);
> setup_data->len = cpu_to_le32(dtb_size);
> -
> load_image_size(dtb_filename, setup_data->data, dtb_size);
> }
>
> if (!legacy_no_rng_seed) {
> - setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
> - kernel_size = setup_data_offset + sizeof(struct setup_data) +
> RNG_SEED_LENGTH;
> - kernel = g_realloc(kernel, kernel_size);
> - setup_data = (struct setup_data *)(kernel + setup_data_offset);
> + setup_data_item_len = sizeof(struct setup_data) + SETUP_RNG_SEED;
> + setup_datas = g_realloc(setup_datas, setup_data_total_len +
> setup_data_item_len);
> + setup_data = (struct setup_data *)(setup_datas +
> setup_data_total_len);
> setup_data->next = cpu_to_le64(first_setup_data);
> - first_setup_data = prot_addr + setup_data_offset;
> + first_setup_data = SETUP_DATA_PHYS_BASE + setup_data_total_len;
> + setup_data_total_len += setup_data_item_len;
> setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
> setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
> qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
> }
>
> - /* Offset 0x250 is a pointer to the first setup_data link. */
> - stq_p(header + 0x250, first_setup_data);
> + if (first_setup_data) {
> + /* Offset 0x250 is a pointer to the first setup_data link. */
> + stq_p(header + 0x250, first_setup_data);
> + rom_add_blob("setup_data", setup_datas, setup_data_total_len,
> setup_data_total_len,
> + SETUP_DATA_PHYS_BASE, NULL, NULL, NULL, NULL,
> false);
> + }
> +
>
Allocating memory on x86 is tricky business. Can we maybe use
bios-linker-loader
with COMMAND_WRITE_POINTER to get an address from firmware?
> /*
> * If we're starting an encrypted VM, it will be OVMF based, which uses
> the
> --
> 2.35.1