On 07/21/15 18:03, Marc Marí wrote:
> From: Gerd Hoffmann <[email protected]>
>
> First draft of a fw_cfg dma interface. Designed as add-on to the
> extisting fw_cfg interface, i.e. there is no select register. There
> are four 32bit registers: Target address (low and high bits), transfer
> length, control register.
>
> See docs/specs/fw_cfg.txt update for details.
>
> Possible improvements (can be done incremental):
>
> * Better error reporting. Right now we throw errors on invalid
> addresses only. We could also throw errors on invalid reads/writes
> instead of using fw_cfg_read (return zeros on error) behavior.
> * Use memcpy instead of calling fw_cfg_read in a loop.
>
> Signed-off-by: Gerd Hoffmann <[email protected]>
> ---
> docs/specs/fw_cfg.txt | 35 ++++++++++
> hw/arm/virt.c | 2 +-
> hw/nvram/fw_cfg.c | 159
> ++++++++++++++++++++++++++++++++++++++++++++--
> include/hw/nvram/fw_cfg.h | 5 +-
> 4 files changed, 194 insertions(+), 7 deletions(-)
>
> diff --git a/docs/specs/fw_cfg.txt b/docs/specs/fw_cfg.txt
> index 5bc7b96..64d9192 100644
> --- a/docs/specs/fw_cfg.txt
> +++ b/docs/specs/fw_cfg.txt
> @@ -132,6 +132,41 @@ Selector Reg. Range Usage
> In practice, the number of allowed firmware configuration items is given
> by the value of FW_CFG_MAX_ENTRY (see fw_cfg.h).
>
> += Guest-side DMA Interface =
> +
> +== Registers ==
> +
> +This does not replace the existing fw_cfg interface, it is an add-on.
> +Specifically there is no select register in the dma interface, guests
> +must use the select register of the classic fw_cfg interface instead.
> +
> +There are four 32bit registers: Target address (low and high bits),
> +transfer length, control register.
> +
> +== Protocol ==
> +
> +Probing for dma support being available: Write target address, read it
> +back, verify you got back the value you wrote.
In the kernel tree, in
"Documentation/devicetree/bindings/arm/fw-cfg.txt", we said
> The outermost protocol (involving the write / read sequences of the
> control and data registers) is expected to be versioned, and/or
> described by feature bits. The interface revision / feature bitmap can
> be retrieved with key 0x0001. The blob to be read from the data
> register has size 4, and it is to be interpreted as a uint32_t value
> in little endian byte order. The current value (corresponding to the
> above outer protocol) is zero.
So, I think the DMA interface should be advertised as a new bit in the
bitmap. (Bit #0 with value 1 should not be used, because that's already
used -- for nothing, actually -- on x86 targets. But bit #1 with value 2
should be okay for this.)
> +
> +Kick a transfer: Write select, target address and transfer length
> +registers (order doesn't matter). Then flip read bit in the control
> +register to '1'. Also make sure the error bit is cleared.
> +
> +Check result: Read control register:
> + error bit set -> something went wrong.
> + all bits cleared -> transfer finished successfully.
> + otherwise -> transfer still in progress (doesn't happen
> + today due to implementation not being async,
> + but may in the future).
> +
> +Target address goes up and transfer length goes down as the transfer
> +happens, so after a successfull
successful[]
> transfer the length register is zero
> +and the address register points right after the memory block written.
> +
> +If a partial transfer happened before an error occured the address and
> +length registers indicate how much data has been transfered
> +successfully.
> +
> = Host-side API =
>
> The following functions are available to the QEMU programmer for adding
> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 4846892..374660c 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -612,7 +612,7 @@ static void create_fw_cfg(const VirtBoardInfo *vbi)
> hwaddr size = vbi->memmap[VIRT_FW_CFG].size;
> char *nodename;
>
> - fw_cfg_init_mem_wide(base + 8, base, 8);
> + fw_cfg_init_mem_wide(base + 8, base, 8, 0, NULL);
>
> nodename = g_strdup_printf("/fw-cfg@%" PRIx64, base);
> qemu_fdt_add_subnode(vbi->fdt, nodename);
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index 88481b7..5bcd0e0 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -23,6 +23,7 @@
> */
> #include "hw/hw.h"
> #include "sysemu/sysemu.h"
> +#include "sysemu/dma.h"
> #include "hw/isa/isa.h"
> #include "hw/nvram/fw_cfg.h"
> #include "hw/sysbus.h"
> @@ -42,6 +43,18 @@
> #define FW_CFG_IO(obj) OBJECT_CHECK(FWCfgIoState, (obj), TYPE_FW_CFG_IO)
> #define FW_CFG_MEM(obj) OBJECT_CHECK(FWCfgMemState, (obj), TYPE_FW_CFG_MEM)
>
> +/* fw_cfg dma registers */
> +#define FW_CFG_DMA_ADDR_LO 0
> +#define FW_CFG_DMA_ADDR_HI 4
> +#define FW_CFG_DMA_LENGTH 8
> +#define FW_CFG_DMA_CONTROL 12
> +#define FW_CFG_DMA_SIZE 16
> +
> +/* FW_CFG_DMA_CONTROL bits */
> +#define FW_CFG_DMA_CTL_ERROR 0x01
> +#define FW_CFG_DMA_CTL_READ 0x02
> +#define FW_CFG_DMA_CTL_MASK 0x03
> +
> typedef struct FWCfgEntry {
> uint32_t len;
> uint8_t *data;
> @@ -59,6 +72,12 @@ struct FWCfgState {
> uint16_t cur_entry;
> uint32_t cur_offset;
> Notifier machine_ready;
> +
> + bool dma_enabled;
> + AddressSpace *dma_as;
> + dma_addr_t dma_addr;
> + uint32_t dma_len;
> + uint32_t dma_ctl;
> };
>
> struct FWCfgIoState {
> @@ -75,7 +94,10 @@ struct FWCfgMemState {
> FWCfgState parent_obj;
> /*< public >*/
>
> - MemoryRegion ctl_iomem, data_iomem;
> + MemoryRegion ctl_iomem, data_iomem, dma_iomem;
> +#if 0
> + hwaddr ctl_addr, data_addr, dma_addr;
> +#endif
What's the goal of this?
> uint32_t data_width;
> MemoryRegionOps wide_data_ops;
> };
> @@ -294,6 +316,88 @@ static void fw_cfg_data_mem_write(void *opaque, hwaddr
> addr,
> } while (i);
> }
>
> +static void fw_cfg_dma_transfer(FWCfgState *s)
> +{
> + dma_addr_t len;
> + uint8_t *ptr;
> + uint32_t i;
> +
> + if (s->dma_ctl & FW_CFG_DMA_CTL_ERROR) {
> + return;
> + }
> + if (!(s->dma_ctl & FW_CFG_DMA_CTL_READ)) {
> + return;
> + }
> +
> + while (s->dma_len > 0) {
> + len = s->dma_len;
> + ptr = dma_memory_map(s->dma_as, s->dma_addr, &len,
> + DMA_DIRECTION_FROM_DEVICE);
> + if (!ptr || !len) {
> + s->dma_ctl |= FW_CFG_DMA_CTL_ERROR;
> + return;
> + }
> +
> + for (i = 0; i < len; i++) {
> + ptr[i] = fw_cfg_read(s);
> + }
> +
> + s->dma_addr += i;
> + s->dma_len -= i;
> + dma_memory_unmap(s->dma_as, ptr, len,
> + DMA_DIRECTION_FROM_DEVICE, i);
> + }
> + s->dma_ctl = 0;
> +}
On Aarch64 KVM, is this going to show the same cache coherence problems
as VGA memory access?
... I definitely don't claim that I understand the rest of the patch,
but for now I don't have more questions. :)
Thanks!
Laszlo
> +
> +static uint64_t fw_cfg_dma_mem_read(void *opaque, hwaddr addr,
> + unsigned size)
> +{
> + FWCfgState *s = opaque;
> + uint64_t ret = 0;
> +
> + switch (addr) {
> + case FW_CFG_DMA_ADDR_LO:
> + ret = s->dma_addr & 0xffffffff;
> + break;
> + case FW_CFG_DMA_ADDR_HI:
> + ret = (s->dma_addr >> 32) & 0xffffffff;
> + break;
> + case FW_CFG_DMA_LENGTH:
> + ret = s->dma_len;
> + break;
> + case FW_CFG_DMA_CONTROL:
> + ret = s->dma_ctl;
> + break;
> + }
> + return ret;
> +}
> +
> +static void fw_cfg_dma_mem_write(void *opaque, hwaddr addr,
> + uint64_t value, unsigned size)
> +{
> + FWCfgState *s = opaque;
> +
> + switch (addr) {
> + case FW_CFG_DMA_ADDR_LO:
> + s->dma_addr &= ~((dma_addr_t)0xffffffff);
> + s->dma_addr |= value;
> + break;
> + case FW_CFG_DMA_ADDR_HI:
> + s->dma_addr &= ~(((dma_addr_t)0xffffffff) << 32);
> + s->dma_addr |= ((dma_addr_t)value) << 32;
> + break;
> + case FW_CFG_DMA_LENGTH:
> + s->dma_len = value;
> + break;
> + case FW_CFG_DMA_CONTROL:
> + value &= FW_CFG_DMA_CTL_MASK;
> + s->dma_ctl = value;
> + fw_cfg_dma_transfer(s);
> + break;
> + }
> +}
> +
> static bool fw_cfg_data_mem_valid(void *opaque, hwaddr addr,
> unsigned size, bool is_write)
> {
> @@ -361,6 +465,16 @@ static const MemoryRegionOps fw_cfg_comb_mem_ops = {
> .valid.accepts = fw_cfg_comb_valid,
> };
>
> +static const MemoryRegionOps fw_cfg_dma_mem_ops = {
> + .read = fw_cfg_dma_mem_read,
> + .write = fw_cfg_dma_mem_write,
> + .endianness = DEVICE_BIG_ENDIAN,
> + .valid = {
> + .min_access_size = 4,
> + .max_access_size = 4,
> + },
> +};
> +
> static void fw_cfg_reset(DeviceState *d)
> {
> FWCfgState *s = FW_CFG(d);
> @@ -401,6 +515,23 @@ static bool is_version_1(void *opaque, int version_id)
> return version_id == 1;
> }
>
> +static VMStateDescription vmstate_fw_cfg_dma = {
> + .name = "fw_cfg/dma",
> + .fields = (VMStateField[]) {
> + VMSTATE_UINT64(dma_addr, FWCfgState),
> + VMSTATE_UINT32(dma_len, FWCfgState),
> + VMSTATE_UINT32(dma_ctl, FWCfgState),
> + VMSTATE_END_OF_LIST()
> + },
> +};
> +
> +static bool fw_cfg_dma_enabled(void *opaque)
> +{
> + FWCfgState *s = opaque;
> +
> + return s->dma_enabled;
> +}
> +
> static const VMStateDescription vmstate_fw_cfg = {
> .name = "fw_cfg",
> .version_id = 2,
> @@ -410,6 +541,14 @@ static const VMStateDescription vmstate_fw_cfg = {
> VMSTATE_UINT16_HACK(cur_offset, FWCfgState, is_version_1),
> VMSTATE_UINT32_V(cur_offset, FWCfgState, 2),
> VMSTATE_END_OF_LIST()
> + },
> + .subsections = (VMStateSubsection[]) {
> + {
> + .vmsd = &vmstate_fw_cfg_dma,
> + .needed = fw_cfg_dma_enabled,
> + }, {
> + /* end of list */
> + }
> }
> };
>
> @@ -618,8 +757,9 @@ FWCfgState *fw_cfg_init_io(uint32_t iobase)
> return FW_CFG(dev);
> }
>
> -FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, hwaddr data_addr,
> - uint32_t data_width)
> +FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
> + hwaddr data_addr, uint32_t data_width,
> + hwaddr dma_addr, AddressSpace *dma_as)
> {
> DeviceState *dev;
> SysBusDevice *sbd;
> @@ -633,13 +773,20 @@ FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
> hwaddr data_addr,
> sysbus_mmio_map(sbd, 0, ctl_addr);
> sysbus_mmio_map(sbd, 1, data_addr);
>
> + if (dma_addr && dma_as) {
> + FW_CFG(dev)->dma_as = dma_as;
> + FW_CFG(dev)->dma_enabled = true;
> + sysbus_mmio_map(sbd, 1, dma_addr);
> + }
> +
> return FW_CFG(dev);
> }
>
> FWCfgState *fw_cfg_init_mem(hwaddr ctl_addr, hwaddr data_addr)
> {
> return fw_cfg_init_mem_wide(ctl_addr, data_addr,
> - fw_cfg_data_mem_ops.valid.max_access_size);
> + fw_cfg_data_mem_ops.valid.max_access_size,
> + 0, NULL);
> }
>
>
> @@ -725,6 +872,10 @@ static void fw_cfg_mem_realize(DeviceState *dev, Error
> **errp)
> memory_region_init_io(&s->data_iomem, OBJECT(s), data_ops, FW_CFG(s),
> "fwcfg.data", data_ops->valid.max_access_size);
> sysbus_init_mmio(sbd, &s->data_iomem);
> +
> + memory_region_init_io(&s->dma_iomem, OBJECT(s), &fw_cfg_dma_mem_ops,
> + FW_CFG(s), "fwcfg.dma", FW_CFG_DMA_SIZE);
> + sysbus_init_mmio(sbd, &s->dma_iomem);
> }
>
> static void fw_cfg_mem_class_init(ObjectClass *klass, void *data)
> diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
> index e60d3ca..2fe31ea 100644
> --- a/include/hw/nvram/fw_cfg.h
> +++ b/include/hw/nvram/fw_cfg.h
> @@ -79,8 +79,9 @@ void *fw_cfg_modify_file(FWCfgState *s, const char
> *filename, void *data,
> size_t len);
> FWCfgState *fw_cfg_init_io(uint32_t iobase);
> FWCfgState *fw_cfg_init_mem(hwaddr ctl_addr, hwaddr data_addr);
> -FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr, hwaddr data_addr,
> - uint32_t data_width);
> +FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
> + hwaddr data_addr, uint32_t data_width,
> + hwaddr dma_addr, AddressSpace *dma_as);
>
> FWCfgState *fw_cfg_find(void);
>
>