Packet-split descriptors are used by Linux VF driver for MTU values from 2048 --- hw/net/igb_core.c | 236 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 231 insertions(+), 5 deletions(-)
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index 47c4a16e84..de54ddb5fe 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -282,6 +282,14 @@ igb_rx_queue_desctyp_get(IGBCore *core, const E1000E_RingInfo *r) return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK; } +static inline bool +igb_rx_use_ps_descriptor(IGBCore *core, const E1000E_RingInfo *r) +{ + uint32_t desctyp = igb_rx_queue_desctyp_get(core, r); + return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT || + desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +} + static inline bool igb_rss_enabled(IGBCore *core) { @@ -1245,6 +1253,14 @@ igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc, *buff_addr = le64_to_cpu(desc->read.pkt_addr); } +static inline void +igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc, + hwaddr *buff_addr) +{ + buff_addr[0] = le64_to_cpu(desc->read.hdr_addr); + buff_addr[1] = le64_to_cpu(desc->read.pkt_addr); +} + static inline void igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, hwaddr buff_addr[MAX_PS_BUFFERS], @@ -1258,7 +1274,13 @@ igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, return; } - /* modes other than advanced single buffer descriptor not supported */ + /* advanced header split descriptor */ + if (igb_rx_use_ps_descriptor(core, r)) { + igb_read_adv_rx_split_buf_descr(core, &desc->adv, &buff_addr[0]); + return; + } + + /* descriptor replication modes not supported */ desc_type = igb_rx_queue_desctyp_get(core, r); if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) { trace_igb_wrn_rx_desc_modes_not_supp(desc_type); @@ -1410,6 +1432,19 @@ igb_write_lgcy_rx_descr(IGBCore *core, d->status = (uint8_t) le32_to_cpu(status_flags); } +typedef struct IGB_SplitDescriptorData_st { + bool sph; + bool hbo; + size_t hdr_len; +} IGB_SplitDescriptorData; + +static inline bool +igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000E_RingInfo *r) +{ + uint32_t desctyp = igb_rx_queue_desctyp_get(core, r); + return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +} + static uint16_t igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf) { @@ -1495,6 +1530,43 @@ igb_write_adv_rx_descr(IGBCore *core, d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4)); } +static inline void +igb_write_adv_ps_split_rx_descr(IGBCore *core, + union e1000_adv_rx_desc *d, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + const E1000E_RingInfo *r, + uint16_t etqf, + bool ts, + IGB_SplitDescriptorData *ps_desc_data, + uint16_t(*written)[MAX_PS_BUFFERS]) +{ + size_t pkt_len; + size_t hdr_len = ps_desc_data->hdr_len; + + bool split_always = igb_rx_ps_descriptor_split_always(core, r); + if (!split_always) { + if ((!ps_desc_data->sph && !ps_desc_data->hbo) || + ( ps_desc_data->sph && ps_desc_data->hbo)) { + pkt_len = (*written)[0] + (*written)[1]; + } else { + assert(!ps_desc_data->hbo); + pkt_len = (*written)[1]; + } + } else { + pkt_len = (*written)[1]; + } + + igb_write_adv_rx_descr(core, d, pkt, rss_info, etqf, ts, pkt_len); + + d->wb.lower.lo_dword.hdr_info = (hdr_len << RX_DESC_ADV_HDR_LEN_OFFSET) & + RX_DESC_ADV_HDR_LEN_MASK; + d->wb.lower.lo_dword.hdr_info |= ps_desc_data->sph ? RX_DESC_ADV_HDR_SPH + : 0; + d->wb.upper.status_error |= ps_desc_data->hbo ? + RX_DESC_ADV_ST_ERR_HBO_OFFSET : 0; +} + static inline void igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, @@ -1502,13 +1574,18 @@ igb_write_rx_descr(IGBCore *core, const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, + IGB_SplitDescriptorData *ps_desc_data, uint16_t(*written)[MAX_PS_BUFFERS], const E1000E_RingInfo *r) { if (igb_rx_use_legacy_descriptor(core)) { igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, (*written)[1]); + } else if (igb_rx_use_ps_descriptor(core, r)) { + igb_write_adv_ps_split_rx_descr(core, &desc->adv, pkt, rss_info, r, + etqf, ts, ps_desc_data, written); } else { - igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, ts, (*written)[1]); + igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, + etqf, ts, (*written)[1]); } } @@ -1569,6 +1646,78 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; } +static bool +igb_do_ps(IGBCore *core, + const E1000E_RingInfo *r, + struct NetRxPkt *pkt, + size_t *hdr_len, + IGB_SplitDescriptorData *ps_desc_data) +{ + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + bool fragment; + bool split_always; + size_t bheader_size; + size_t total_pkt_len; + + if (!igb_rx_use_ps_descriptor(core, r)) { + return false; + } + + memset(ps_desc_data, 0, sizeof(IGB_SplitDescriptorData)); + + total_pkt_len = net_rx_pkt_get_total_len(pkt); + bheader_size = igb_get_queue_rx_header_buf_size(core, r); + split_always = igb_rx_ps_descriptor_split_always(core, r); + if (split_always && total_pkt_len <= bheader_size) { + *hdr_len = total_pkt_len; + ps_desc_data->hdr_len = total_pkt_len; + return true; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if (hasip4) { + fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + } else if (hasip6) { + fragment = net_rx_pkt_get_ip6_info(pkt)->fragment; + } else { + ps_desc_data->hdr_len = bheader_size; + goto header_not_handled; + } + + if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) { + ps_desc_data->hdr_len = bheader_size; + goto header_not_handled; + } + + /* no header splitting for SCTP */ + if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP || + l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) { + *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt); + } else { + *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt); + } + + ps_desc_data->sph = true; + ps_desc_data->hdr_len = *hdr_len; + + if (*hdr_len > bheader_size) { + ps_desc_data->hbo = true; + goto header_not_handled; + } + + return true; + +header_not_handled: + if (split_always) { + *hdr_len = bheader_size; + return true; + } + + return false; +} + typedef struct igb_ba_state_st { uint16_t written[MAX_PS_BUFFERS]; uint8_t cur_idx; @@ -1584,6 +1733,7 @@ typedef struct { uint32_t rx_desc_header_buf_size; struct iovec *iov; size_t iov_ofs; + bool do_ps; bool is_first; igb_ba_state bastate; hwaddr ba[MAX_PS_BUFFERS]; @@ -1592,11 +1742,74 @@ typedef struct { static void igb_truncate_to_descriptor_size(igb_packet_tx_dma_state *pdma_st, size_t *size) { - if (*size > pdma_st->rx_desc_packet_buf_size) { - *size = pdma_st->rx_desc_packet_buf_size; + if (pdma_st->do_ps && pdma_st->is_first) { + if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) { + *size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len; + } + } else { + if (*size > pdma_st->rx_desc_packet_buf_size) { + *size = pdma_st->rx_desc_packet_buf_size; + } } } +static inline void +igb_write_hdr_to_rx_buffers(IGBCore *core, + PCIDevice *d, + hwaddr (*ba)[MAX_PS_BUFFERS], + igb_ba_state *bastate, + uint32_t rx_desc_header_buf_size, + const char *data, + dma_addr_t data_len) +{ + assert(data_len <= rx_desc_header_buf_size - bastate->written[0]); + pci_dma_write(d, (*ba)[0] + bastate->written[0], data, data_len); + bastate->written[0] += data_len; + bastate->cur_idx = 1; +} + +static void +igb_write_packet_hdr_to_descr_addr(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + igb_packet_tx_dma_state *pdma_st, + size_t *copy_size) +{ + size_t iov_copy; + size_t ps_hdr_copied = 0; + + if (!pdma_st->is_first) { + /* Leave buffer 0 of each descriptor except first */ + /* empty */ + igb_write_hdr_to_rx_buffers(core, d, &pdma_st->ba, &pdma_st->bastate, + pdma_st->rx_desc_header_buf_size, + NULL, 0); + return; + } + + do { + iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied, + pdma_st->iov->iov_len - pdma_st->iov_ofs); + + igb_write_hdr_to_rx_buffers(core, d, &pdma_st->ba, + &pdma_st->bastate, + pdma_st->rx_desc_header_buf_size, + pdma_st->iov->iov_base, + iov_copy); + + *copy_size -= iov_copy; + ps_hdr_copied += iov_copy; + + pdma_st->iov_ofs += iov_copy; + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { + pdma_st->iov++; + pdma_st->iov_ofs = 0; + } + } while (ps_hdr_copied < pdma_st->ps_hdr_len); + + pdma_st->is_first = false; +} + static void igb_write_payload_to_rx_buffers(IGBCore *core, PCIDevice *d, @@ -1695,7 +1908,14 @@ igb_write_to_rx_buffers(IGBCore *core, igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); copy_size = pdma_st->size - pdma_st->desc_offset; igb_truncate_to_descriptor_size(pdma_st, ©_size); - pdma_st->bastate.cur_idx = 1; + + /* For PS mode copy the packet header first */ + if (pdma_st->do_ps) { + igb_write_packet_hdr_to_descr_addr(core, pkt, d, pdma_st, ©_size); + } else { + pdma_st->bastate.cur_idx = 1; + } + igb_write_packet_payload_to_descr_addr(core, pkt, d, pdma_st, ©_size); } @@ -1710,6 +1930,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, union e1000_rx_desc_union desc; const E1000E_RingInfo *rxi; size_t rx_desc_len; + IGB_SplitDescriptorData ps_desc_data; igb_packet_tx_dma_state pdma_st = {0}; pdma_st.is_first = true; @@ -1728,6 +1949,10 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, d = core->owner; } + pdma_st.do_ps = igb_do_ps(core, rxi, pkt, + &pdma_st.ps_hdr_len, + &ps_desc_data); + do { memset(&pdma_st.bastate, 0, sizeof(igb_ba_state)); bool is_last = false; @@ -1752,6 +1977,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, is_last ? pkt : NULL, rss_info, etqf, ts, + &ps_desc_data, &pdma_st.bastate.written, rxi); pci_dma_write(d, base, &desc, rx_desc_len); -- 2.25.1