We have observed high rate of NIC PF interrupts when VNF is using DPDK
APIs rte_eth_link_get_nowait() and rte_eth_link_get() functions, as
they are causing VF driver to send many MBOX ACK messages.
With these changes, the interrupt rates go down significantly. Here's
some testing results:
Without the patch:
$ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f'
/proc/interrupts
CPU0 CPU1 CPU2 CPU3 CPU4 CPU5
CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 CPU12
CPU13 CPU14 CPU15
34: 88 0 0 0 0 41
30 509 0 0 350 24 88 114
461 562 PCI-MSI 1572864-edge ens1f0-TxRx-0
35: 49 24 0 0 65 130
64 29 67 0 10 0 0 46
38 764 PCI-MSI 1572865-edge ens1f0-TxRx-1
36: 53 0 0 64 15 85
132 71 108 0 30 0 165
215 303 104 PCI-MSI 1572866-edge ens1f0-TxRx-2
37: 46 196 0 0 10 48
62 68 51 0 0 0 103 82
54 192 PCI-MSI 1572867-edge ens1f0-TxRx-3
38: 226 0 0 0 159 145
749 265 0 0 202 0 69229
166 450 0 PCI-MSI 1572868-edge ens1f0
52: 95 896 0 0 0 18
53 0 494 0 0 0 0 265
79 124 PCI-MSI 1574912-edge ens1f1-TxRx-0
53: 50 0 18 0 72 33
0 168 330 0 0 0 141 22
12 65 PCI-MSI 1574913-edge ens1f1-TxRx-1
54: 65 0 0 0 239 104
166 49 442 0 0 0 126
26 307 0 PCI-MSI 1574914-edge ens1f1-TxRx-2
55: 57 0 0 0 123 35
83 54 157 106 0 0 26 29
312 97 PCI-MSI 1574915-edge ens1f1-TxRx-3
56: 232 0 13910 0 16 21
0 54422 0 0 0 24 25 0
78 0 PCI-MSI 1574916-edge ens1f1
CPU0 CPU1 CPU2 CPU3 CPU4 CPU5
CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 CPU12
CPU13 CPU14 CPU15
34: 88 0 0 0 0 41
30 509 0 0 350 24 88 119
461 562 PCI-MSI 1572864-edge ens1f0-TxRx-0
35: 49 24 0 0 65 130
64 29 67 0 10 0 0 46
38 771 PCI-MSI 1572865-edge ens1f0-TxRx-1
36: 53 0 0 64 15 85
132 71 108 0 30 0 165
215 303 113 PCI-MSI 1572866-edge ens1f0-TxRx-2
37: 46 196 0 0 10 48
62 68 56 0 0 0 103 82
54 192 PCI-MSI 1572867-edge ens1f0-TxRx-3
38: 226 0 0 0 159 145
749 265 0 0 202 0 71281
166 450 0 PCI-MSI 1572868-edge ens1f0
52: 95 896 0 0 0 18
53 0 494 0 0 0 0 265
79 133 PCI-MSI 1574912-edge ens1f1-TxRx-0
53: 50 0 18 0 72 33
0 173 330 0 0 0 141 22
12 65 PCI-MSI 1574913-edge ens1f1-TxRx-1
54: 65 0 0 0 239 104
166 49 442 0 0 0 126
26 312 0 PCI-MSI 1574914-edge ens1f1-TxRx-2
55: 57 0 0 0 123 35
83 59 157 106 0 0 26 29
312 97 PCI-MSI 1574915-edge ens1f1-TxRx-3
56: 232 0 15910 0 16 21
0 54422 0 0 0 24 25 0
78 0 PCI-MSI 1574916-edge ens1f1
During the 10s interval, CPU2 jumped by 2000 interrupts, CPU12 by 2051
interrupts, for about 200 interrupts/second. That's on the order of
what we expect. I would have guessed 100/s but perhaps there are two
mailbox messages.
With the patch:
$ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f'
/proc/interrupts
CPU0 CPU1 CPU2 CPU3 CPU4 CPU5
CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 CPU12
CPU13 CPU14 CPU15
34: 88 0 0 0 0 25
19 177 0 0 350 24 88 100
362 559 PCI-MSI 1572864-edge ens1f0-TxRx-0
35: 49 19 0 0 65 130
64 29 67 0 10 0 0 46
38 543 PCI-MSI 1572865-edge ens1f0-TxRx-1
36: 53 0 0 64 15 53
85 71 108 0 24 0 85 215
292 31 PCI-MSI 1572866-edge ens1f0-TxRx-2
37: 46 196 0 0 10 43
57 39 19 0 0 0 78 69
49 149 PCI-MSI 1572867-edge ens1f0-TxRx-3
38: 226 0 0 0 159 145
749 247 0 0 202 0 58250
0 450 0 PCI-MSI 1572868-edge ens1f0
52: 95 896 0 0 0 18
53 0 189 0 0 0 0 265
79 25 PCI-MSI 1574912-edge ens1f1-TxRx-0
53: 50 0 18 0 72 33
0 90 330 0 0 0 136 5
12 0 PCI-MSI 1574913-edge ens1f1-TxRx-1
54: 65 0 0 0 10 104
166 49 442 0 0 0 126
26 226 0 PCI-MSI 1574914-edge ens1f1-TxRx-2
55: 57 0 0 0 61 35
83 30 157 101 0 0 26 15
312 0 PCI-MSI 1574915-edge ens1f1-TxRx-3
56: 232 0 2062 0 16 21
0 54422 0 0 0 24 25 0
78 0 PCI-MSI 1574916-edge ens1f1
CPU0 CPU1 CPU2 CPU3 CPU4 CPU5
CPU6 CPU7 CPU8 CPU9 CPU10 CPU11 CPU12
CPU13 CPU14 CPU15
34: 88 0 0 0 0 25
19 177 0 0 350 24 88 102
362 562 PCI-MSI 1572864-edge ens1f0-TxRx-0
35: 49 19 0 0 65 130
64 29 67 0 10 0 0 46
38 548 PCI-MSI 1572865-edge ens1f0-TxRx-1
36: 53 0 0 64 15 53
85 71 108 0 24 0 85 215
292 36 PCI-MSI 1572866-edge ens1f0-TxRx-2
37: 46 196 0 0 10 45
57 39 19 0 0 0 78 69
49 152 PCI-MSI 1572867-edge ens1f0-TxRx-3
38: 226 0 0 0 159 145
749 247 0 0 202 0 58259
0 450 0 PCI-MSI 1572868-edge ens1f0
52: 95 896 0 0 0 18
53 0 194 0 0 0 0 265
79 25 PCI-MSI 1574912-edge ens1f1-TxRx-0
53: 50 0 18 0 72 33
0 95 330 0 0 0 136 5
12 0 PCI-MSI 1574913-edge ens1f1-TxRx-1
54: 65 0 0 0 10 104
166 49 442 0 0 0 126
26 231 0 PCI-MSI 1574914-edge ens1f1-TxRx-2
55: 57 0 0 0 66 35
83 30 157 101 0 0 26 15
312 0 PCI-MSI 1574915-edge ens1f1-TxRx-3
56: 232 0 2071 0 16 21
0 54422 0 0 0 24 25 0
78 0 PCI-MSI 1574916-edge ens1f1
Note the interrupt rate has gone way down. During the 10s interval, we
only saw a handful of interrupts.
Note that this patch was originally provided by Intel directly to AT&T
and Vyatta, but unfortunately I am unable to find records of the exact
author.
We have been using this in production for more than a year.
Fixes: af75078fece3 ("first public release")
Cc: [email protected]
Signed-off-by: Luca Boccassi <[email protected]>
---
drivers/net/ixgbe/base/ixgbe_vf.c | 33 ++++++++++++++++---------------
1 file changed, 17 insertions(+), 16 deletions(-)
diff --git a/drivers/net/ixgbe/base/ixgbe_vf.c
b/drivers/net/ixgbe/base/ixgbe_vf.c
index 5b25a6b4d4..16086670b1 100644
--- a/drivers/net/ixgbe/base/ixgbe_vf.c
+++ b/drivers/net/ixgbe/base/ixgbe_vf.c
@@ -586,7 +586,6 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw,
ixgbe_link_speed *speed,
s32 ret_val = IXGBE_SUCCESS;
u32 links_reg;
u32 in_msg = 0;
- UNREFERENCED_1PARAMETER(autoneg_wait_to_complete);
/* If we were hit with a reset drop the link */
if (!mbx->ops.check_for_rst(hw, 0) || !mbx->timeout)
@@ -643,23 +642,25 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw,
ixgbe_link_speed *speed,
*speed = IXGBE_LINK_SPEED_UNKNOWN;
}
- /* if the read failed it could just be a mailbox collision, best wait
- * until we are called again and don't report an error
- */
- if (mbx->ops.read(hw, &in_msg, 1, 0))
- goto out;
+ if (autoneg_wait_to_complete) {
+ /* if the read failed it could just be a mailbox collision,
best wait
+ * until we are called again and don't report an error
+ */
+ if (mbx->ops.read(hw, &in_msg, 1, 0))
+ goto out;
- if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
- /* msg is not CTS and is NACK we must have lost CTS status */
- if (in_msg & IXGBE_VT_MSGTYPE_NACK)
+ if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
+ /* msg is not CTS and is NACK we must have lost CTS
status */
+ if (in_msg & IXGBE_VT_MSGTYPE_NACK)
+ ret_val = -1;
+ goto out;
+ }
+
+ /* the pf is talking, if we timed out in the past we reinit */
+ if (!mbx->timeout) {
ret_val = -1;
- goto out;
- }
-
- /* the pf is talking, if we timed out in the past we reinit */
- if (!mbx->timeout) {
- ret_val = -1;
- goto out;
+ goto out;
+ }
}
/* if we passed all the tests above then the link is up and we no
--
2.18.0