On Tue, May 10, 2022 at 12:21:02AM +0200, Alexandr Nedvedicky wrote:
> Hello,
>
> On Mon, May 09, 2022 at 06:01:07PM +0300, Barbaros Bilek wrote:
> > Hello,
> >
> > I was using veb (veb+vlan+ixl) interfaces quite stable since 6.9.
> > My system ran as a firewall under OpenBSD 6.9 and 7.0 quite stable.
> > Also I've used 7.1 for a limited time and there were no crash.
> > After OpenBSD' NET_TASKQ upgrade to 4 it crashed after 5 days.
> > Here crash report and dmesg:
> >
> > ether_input(ffff80000520e000,fffffd8053616700) at ether_input+0x3ad
> > vport_if_enqueue(ffff80000520e000,fffffd8053616700) at vport_if_enqueue+0x19
> > veb_port_input(ffff8000051c3800,fffffd806064c200,ffffffffffff,ffff800002066600)
> > at veb_port_input+0x4d2
> > ether_input(ffff8000051c3800,fffffd806064c200) at ether_input+0x100
> > end trace frame: 0xffff800025575290, count: 0
> > ddb{1}> show panic
> >
> > *cpu1: kernel diagnostic assertion "curcpu()->ci_schedstate.spc_smrdepth ==
> > 0" f
> > ailed: file "/usr/src/sys/kern/subr_xxx.c", line 163
> >
> > ddb{1}> trace
> >
> > db_enter() at db_enter+0x10
> >
> > panic(ffffffff81f22e39) at panic+0xbf
> >
> > __assert(ffffffff81f96c9d,ffffffff81f85ebc,a3,ffffffff81fd252f) at
> > __assert+0x2
> >
> > 5
> >
>
> diff below attempts to fix this particular panic triggered by veb_span()
> function. This is fairly simple/straightforward change:
>
> we grab references to veb ports inside SMR_READ_ section.
>
> we keep those references in single linked list
>
> as soon as we leave SMR_READ_ section we process the list:
> dispatch packets
>
> drop references to port
>
> The change may uncover similar panics in other veb/bridge area.
>
> diff applies to current
>
> thanks for testing and reporting back.
Can we limit the number of span ports per bridge to a small number so that
the instead of a heap object for the SLIST a simple stack array of
MAX_SPAN_PORTS pointers could be used?
Who needs more than a handfull of spanports per veb?
> --------8<---------------8<---------------8<------------------8<--------
> diff --git a/sys/net/if_veb.c b/sys/net/if_veb.c
> index 2976cc200f1..a02dbac887f 100644
> --- a/sys/net/if_veb.c
> +++ b/sys/net/if_veb.c
> @@ -159,6 +159,11 @@ struct veb_softc {
> struct veb_ports sc_spans;
> };
>
> +struct veb_span_port {
> + SLIST_ENTRY(veb_span_port) sp_entry;
> + struct veb_port *sp_port;
> +};
> +
> #define DPRINTF(_sc, fmt...) do { \
> if (ISSET((_sc)->sc_if.if_flags, IFF_DEBUG)) \
> printf(fmt); \
> @@ -225,6 +230,7 @@ static struct if_clone veb_cloner =
> IF_CLONE_INITIALIZER("veb", veb_clone_create, veb_clone_destroy);
>
> static struct pool veb_rule_pool;
> +static struct pool span_port_pool;
>
> static int vport_clone_create(struct if_clone *, int);
> static int vport_clone_destroy(struct ifnet *);
> @@ -266,6 +272,11 @@ veb_clone_create(struct if_clone *ifc, int unit)
> 0, IPL_SOFTNET, 0, "vebrpl", NULL);
> }
>
> + if (span_port_pool.pr_size == 0) {
> + pool_init(&span_port_pool, sizeof(struct veb_span_port),
> + 0, IPL_SOFTNET, 0, "vebspl", NULL);
> + }
> +
> sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
> if (sc == NULL)
> return (ENOMEM);
> @@ -352,22 +363,38 @@ veb_span(struct veb_softc *sc, struct mbuf *m0)
> struct veb_port *p;
> struct ifnet *ifp0;
> struct mbuf *m;
> + struct veb_span_port *sp;
> + SLIST_HEAD(, veb_span_port) span_list;
>
> + SLIST_INIT(&span_list)
> smr_read_enter();
> SMR_TAILQ_FOREACH(p, &sc->sc_spans.l_list, p_entry) {
> ifp0 = p->p_ifp0;
> if (!ISSET(ifp0->if_flags, IFF_RUNNING))
> continue;
>
> - m = m_dup_pkt(m0, max_linkhdr + ETHER_ALIGN, M_NOWAIT);
> - if (m == NULL) {
> - /* XXX count error */
> - continue;
> - }
> + sp = pool_get(&span_port_pool, PR_NOWAIT);
> + if (sp == NULL)
> + continue; /* XXX count error */
>
> - if_enqueue(ifp0, m); /* XXX count error */
> + veb_eb_brport_take(p);
> + sp->sp_port = p;
> + SLIST_INSERT_HEAD(&span_list, sp, sp_entry);
> }
> smr_read_leave();
> +
> + while (!SLIST_EMPTY(&span_list)) {
> + sp = SLIST_FIRST(&span_list);
> + SLIST_REMOVE_HEAD(&span_list, sp_entry);
> +
> + m = m_dup_pkt(m0, max_linkhdr + ETHER_ALIGN, M_NOWAIT);
> + if (m != NULL)
> + if_enqueue(sp->sp_port->p_ifp0, m);
> + /* XXX count error */
> +
> + veb_eb_brport_rele(sp->sp_port);
> + pool_put(&span_port_pool, sp);
> + }
> }
>
> static int
>
--
:wq Claudio