Alexander Bulekov reported a UAF bug related e1000e packets send. -->https://bugs.launchpad.net/qemu/+bug/1886362
This is because the guest trigger a e1000e packet send and set the data's address to e1000e's MMIO address. So when the e1000e do DMA it will write the MMIO again and trigger re-entrancy and finally causes this UAF. Paolo suggested to use a bottom half whenever MMIO is doing complicate things in here: -->https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg03342.html Reference here: 'The easiest solution is to delay processing of descriptors to a bottom half whenever MMIO is doing something complicated. This is also better for latency because it will free the vCPU thread more quickly and leave the work to the I/O thread.' This patch fixes this UAF. Signed-off-by: Li Qiang <[email protected]> --- Change since v1: Per Jason's review here: -- https://lists.nongnu.org/archive/html/qemu-devel/2020-07/msg05368.html 1. Cancel and schedule the tx bh when VM is stopped or resume 2. Add a tx_burst for e1000e configuration to throttle the bh execution 3. Add a tx_waiting to record whether the bh is pending or not Don't use BQL in the tx_bh handler as when tx_bh is executed, the BQL is acquired. hw/net/e1000e.c | 6 +++ hw/net/e1000e_core.c | 106 ++++++++++++++++++++++++++++++++++--------- hw/net/e1000e_core.h | 8 ++++ 3 files changed, 98 insertions(+), 22 deletions(-) diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c index fda34518c9..24e35a78bf 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -77,10 +77,14 @@ typedef struct E1000EState { bool disable_vnet; + int32_t tx_burst; + E1000ECore core; } E1000EState; +#define TX_BURST 256 + #define E1000E_MMIO_IDX 0 #define E1000E_FLASH_IDX 1 #define E1000E_IO_IDX 2 @@ -263,6 +267,7 @@ static void e1000e_core_realize(E1000EState *s) { s->core.owner = &s->parent_obj; s->core.owner_nic = s->nic; + s->core.tx_burst = s->tx_burst; } static void @@ -665,6 +670,7 @@ static Property e1000e_properties[] = { e1000e_prop_subsys_ven, uint16_t), DEFINE_PROP_SIGNED("subsys", E1000EState, subsys, 0, e1000e_prop_subsys, uint16_t), + DEFINE_PROP_INT32("x-txburst", E1000EState, tx_burst, TX_BURST), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index bcd186cac5..0a38a50cca 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -909,19 +909,18 @@ e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx) rxr->i = &i[idx]; } -static void -e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) +static int32_t +e1000e_start_xmit(struct e1000e_tx *q) { + E1000ECore *core = q->core; dma_addr_t base; struct e1000_tx_desc desc; - bool ide = false; - const E1000E_RingInfo *txi = txr->i; - uint32_t cause = E1000_ICS_TXQE; + const E1000E_RingInfo *txi; + E1000E_TxRing txr; + int32_t num_packets = 0; - if (!(core->mac[TCTL] & E1000_TCTL_EN)) { - trace_e1000e_tx_disabled(); - return; - } + e1000e_tx_ring_init(core, &txr, q - &core->tx[0]); + txi = txr.i; while (!e1000e_ring_empty(core, txi)) { base = e1000e_ring_head_descr(core, txi); @@ -931,15 +930,17 @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) trace_e1000e_tx_descr((void *)(intptr_t)desc.buffer_addr, desc.lower.data, desc.upper.data); - e1000e_process_tx_desc(core, txr->tx, &desc, txi->idx); - cause |= e1000e_txdesc_writeback(core, base, &desc, &ide, txi->idx); + e1000e_process_tx_desc(core, txr.tx, &desc, txi->idx); + q->cause |= e1000e_txdesc_writeback(core, base, &desc, + &q->ide, txi->idx); e1000e_ring_advance(core, txi, 1); + if (++num_packets >= core->tx_burst) { + break; + } } - if (!ide || !e1000e_intrmgr_delay_tx_causes(core, &cause)) { - e1000e_set_interrupt_cause(core, cause); - } + return num_packets; } static bool @@ -2423,32 +2424,41 @@ e1000e_set_dbal(E1000ECore *core, int index, uint32_t val) static void e1000e_set_tctl(E1000ECore *core, int index, uint32_t val) { - E1000E_TxRing txr; core->mac[index] = val; if (core->mac[TARC0] & E1000_TARC_ENABLE) { - e1000e_tx_ring_init(core, &txr, 0); - e1000e_start_xmit(core, &txr); + if (core->tx[0].tx_waiting) { + return; + } + core->tx[0].tx_waiting = 1; + if (!core->vm_running) { + return; + } + qemu_bh_schedule(core->tx[0].tx_bh); } if (core->mac[TARC1] & E1000_TARC_ENABLE) { - e1000e_tx_ring_init(core, &txr, 1); - e1000e_start_xmit(core, &txr); + if (core->tx[1].tx_waiting) { + return; + } + core->tx[1].tx_waiting = 1; + if (!core->vm_running) { + return; + } + qemu_bh_schedule(core->tx[1].tx_bh); } } static void e1000e_set_tdt(E1000ECore *core, int index, uint32_t val) { - E1000E_TxRing txr; int qidx = e1000e_mq_queue_idx(TDT, index); uint32_t tarc_reg = (qidx == 0) ? TARC0 : TARC1; core->mac[index] = val & 0xffff; if (core->mac[tarc_reg] & E1000_TARC_ENABLE) { - e1000e_tx_ring_init(core, &txr, qidx); - e1000e_start_xmit(core, &txr); + qemu_bh_schedule(core->tx[qidx].tx_bh); } } @@ -3315,10 +3325,56 @@ e1000e_vm_state_change(void *opaque, int running, RunState state) trace_e1000e_vm_state_running(); e1000e_intrmgr_resume(core); e1000e_autoneg_resume(core); + core->vm_running = 1; + + for (int i = 0; i < E1000E_NUM_QUEUES; i++) { + qemu_bh_schedule(core->tx[i].tx_bh); + } + } else { trace_e1000e_vm_state_stopped(); + + for (int i = 0; i < E1000E_NUM_QUEUES; i++) { + qemu_bh_cancel(core->tx[i].tx_bh); + } + e1000e_autoneg_pause(core); e1000e_intrmgr_pause(core); + core->vm_running = 0; + } +} + + +static void e1000e_core_tx_bh(void *opaque) +{ + struct e1000e_tx *q = opaque; + E1000ECore *core = q->core; + int32_t ret; + + if (!core->vm_running) { + assert(q->tx_waiting); + return; + } + + q->tx_waiting = 0; + + if (!(core->mac[TCTL] & E1000_TCTL_EN)) { + trace_e1000e_tx_disabled(); + return; + } + + q->cause = E1000_ICS_TXQE; + q->ide = false; + + ret = e1000e_start_xmit(q); + if (ret >= core->tx_burst) { + qemu_bh_schedule(q->tx_bh); + q->tx_waiting = 1; + return; + } + + if (!q->ide || !e1000e_intrmgr_delay_tx_causes(core, &q->cause)) { + e1000e_set_interrupt_cause(core, q->cause); } } @@ -3334,12 +3390,15 @@ e1000e_core_pci_realize(E1000ECore *core, e1000e_autoneg_timer, core); e1000e_intrmgr_pci_realize(core); + core->vm_running = runstate_is_running(); core->vmstate = qemu_add_vm_change_state_handler(e1000e_vm_state_change, core); for (i = 0; i < E1000E_NUM_QUEUES; i++) { net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS, core->has_vnet); + core->tx[i].core = core; + core->tx[i].tx_bh = qemu_bh_new(e1000e_core_tx_bh, &core->tx[i]); } net_rx_pkt_init(&core->rx_pkt, core->has_vnet); @@ -3367,6 +3426,9 @@ e1000e_core_pci_uninit(E1000ECore *core) for (i = 0; i < E1000E_NUM_QUEUES; i++) { net_tx_pkt_reset(core->tx[i].tx_pkt); net_tx_pkt_uninit(core->tx[i].tx_pkt); + qemu_bh_cancel(core->tx[i].tx_bh); + qemu_bh_delete(core->tx[i].tx_bh); + core->tx[i].tx_bh = NULL; } net_rx_pkt_uninit(core->rx_pkt); diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h index aee32f7e48..0c16dce3a6 100644 --- a/hw/net/e1000e_core.h +++ b/hw/net/e1000e_core.h @@ -77,10 +77,18 @@ struct E1000Core { unsigned char sum_needed; bool cptse; struct NetTxPkt *tx_pkt; + QEMUBH *tx_bh; + uint32_t tx_waiting; + uint32_t cause; + bool ide; + E1000ECore *core; } tx[E1000E_NUM_QUEUES]; struct NetRxPkt *rx_pkt; + int32_t tx_burst; + + bool vm_running; bool has_vnet; int max_queue_num; -- 2.17.1
