On Thu, Nov 30, 2023 at 04:51:06PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Nov 30, 2023 at 08:44:19AM +0100, Alexandr Nedvedicky wrote:
> > Hello Johan,
> >
> > On Wed, Nov 29, 2023 at 11:24:59PM -0500, Johan Huldtgren wrote:
> > >
> > > so my machine paniced today, but the panic this time is completely
> > > different.
> > > I don't know if it's related to this issue, the patch, or a completely new
> > > issue, but I figured I'd start reporting it here. Unfortuntately when I
> > > tried
> > > to swap CPU to collect traces from the other ones the machine froze and I
> > > was
> > > forced to power cycle it. So I have the panic and initial trace but
> > > that's it.
> > >
> > > panic: ip_output no HDR
> > > Stopped at db_enter+0x14: popq %rbp
> > > TID PID UID PRFLAGS PFLAGS CPU COMMAND
> > > 74003 25022 0 0x10 0 2 afpd
> > > 355827 29745 107 0x1100002 0x4000000 3 vmd
> > > 451006 29745 107 0x1100002 0x4000000 4 vmd
> > > 131508 78367 107 0x1100002 0x4000000 5 vmd
> > > 112644 78367 107 0x1100002 0x4000000 1 vmd
> > > *133058 91446 0 0x14000 0x200 0 softnet0
> > > db_enter() at db_enter+0x14
> > > panic(ffffffff820c20df) at panic+0xc3
> > > ip_output(fffffd8076b76e00,0,fffffd9c9e59e708,0,0,fffffd9c9e59e690,e4a23bf8c0204936)
> > > at ip_output+0xa26
> > > udp_output(fffffd9c9e59e690,fffffd8076b76e00,fffffd8079d14b00,0) at
> > > udp_output+0x3be
> > > sosend(fffffd9c9e59f000,fffffd8079d14b00,0,fffffd8076b76e00,0,0) at
> > > sosend+0x37f
> > > pflow_output_process(ffff8000011a0800) at pflow_output_process+0x67
> > > taskq_thread(ffff800000035200) at taskq_thread+0x100
> > > end trace frame: 0x0, count: 8
> > > https://www.openbsd.org/ddb.html describes the minimum info required in
> > > bug
> > > reports. Insufficient info makes it difficult to find and fix bugs.
> > > ddb{0}>
> > >
> > > ddb{0}> show panic
> > > *cpu0: ip_output no HDR
> > >
> > > ddb{0}> trace
> > > db_enter() at db_enter+0x14
> > > panic(ffffffff820c20df) at panic+0xc3
> > > ip_output(fffffd8076b76e00,0,fffffd9c9e59e708,0,0,fffffd9c9e59e690,e4a23bf8c0204936)
> > > at ip_output+0xa26
> > > udp_output(fffffd9c9e59e690,fffffd8076b76e00,fffffd8079d14b00,0) at
> > > udp_output+0x3be
> > > sosend(fffffd9c9e59f000,fffffd8079d14b00,0,fffffd8076b76e00,0,0) at
> > > sosend+0x37f
> > > pflow_output_process(ffff8000011a0800) at pflow_output_process+0x67
> > > taskq_thread(ffff800000035200) at taskq_thread+0x100
> > > end trace frame: 0x0, count: -7
> > >
> >
> > This is a different issue to what we were seeing. The panic indicates
> > the ip_output() function deals with packet buffer which contains no
> > ip header. How it could happen that's the question...
> >
>
> I found the reason of that panic. The `sc_mbuf{,6}' cumulative mbuf(9)
> of pflow_softc structure has missing protection. So it was overwritten
> concurrently with pflow_sendout_*(). I will fix this later.
>
Here id the diff. I introduces `sc_mtx' mutex(9) to protect the most of
pflow_softc structure. The `send_nam', `sc_flowsrc' and `sc_flowdst' are
prtected by `sc_lock' rwlock(9). `sc_tmpl_ipfix' is immutable.
Also, the pflow_sendout_ipfix_tmpl() calls pflow_get_mbuf() with NULL
instead of `sc'. This fix also included to this diff.
Please note, this diff does not fix all the problems in the pflow(4).
The ifconfig create/destroy sequence could still break the kernel. I
have ok'ed diff to fix this but did not commit it yet for some reason.
Also the `pflowstats' data left unprotected. I will fix this with
separate diff.
Please test this diff and let us know the result. I will continue with
pflow(4) after committing this.
Index: sys/net/if_pflow.c
===================================================================
RCS file: /cvs/src/sys/net/if_pflow.c,v
retrieving revision 1.100
diff -u -p -r1.100 if_pflow.c
--- sys/net/if_pflow.c 9 Nov 2023 08:53:20 -0000 1.100
+++ sys/net/if_pflow.c 7 Dec 2023 14:58:18 -0000
@@ -29,6 +29,7 @@
#include <sys/kernel.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
+#include <sys/mutex.h>
#include <net/if.h>
#include <net/if_types.h>
@@ -147,6 +148,7 @@ pflow_clone_create(struct if_clone *ifc,
pflowif = malloc(sizeof(*pflowif), M_DEVBUF, M_WAITOK|M_ZERO);
rw_init(&pflowif->sc_lock, "pflowlk");
+ mtx_init(&pflowif->sc_mtx, IPL_MPFLOOR);
MGET(pflowif->send_nam, M_WAIT, MT_SONAME);
pflowif->sc_version = PFLOW_PROTO_DEFAULT;
@@ -347,6 +349,8 @@ pflow_set(struct pflow_softc *sc, struct
}
}
+ rw_assert_wrlock(&sc->sc_lock);
+
pflow_flush(sc);
if (pflowr->addrmask & PFLOW_MASK_DSTIP) {
@@ -461,6 +465,8 @@ pflow_set(struct pflow_softc *sc, struct
sc->so = NULL;
}
+ mtx_enter(&sc->sc_mtx);
+
/* error check is above */
if (pflowr->addrmask & PFLOW_MASK_VERSION)
sc->sc_version = pflowr->version;
@@ -479,6 +485,8 @@ pflow_set(struct pflow_softc *sc, struct
break;
}
+ mtx_leave(&sc->sc_mtx);
+
return (0);
}
@@ -504,10 +512,12 @@ pflowioctl(struct ifnet *ifp, u_long cmd
NET_LOCK();
if ((ifp->if_flags & IFF_UP) && sc->so != NULL) {
ifp->if_flags |= IFF_RUNNING;
+ mtx_enter(&sc->sc_mtx);
sc->sc_gcounter=pflowstats.pflow_flows;
/* send templates on startup */
if (sc->sc_version == PFLOW_PROTO_10)
pflow_sendout_ipfix_tmpl(sc);
+ mtx_leave(&sc->sc_mtx);
} else
ifp->if_flags &= ~IFF_RUNNING;
rw_exit_read(&sc->sc_lock);
@@ -519,19 +529,28 @@ pflowioctl(struct ifnet *ifp, u_long cmd
ifr->ifr_mtu = MCLBYTES;
if (ifr->ifr_mtu < ifp->if_mtu)
pflow_flush(sc);
+ mtx_enter(&sc->sc_mtx);
pflow_setmtu(sc, ifr->ifr_mtu);
+ mtx_leave(&sc->sc_mtx);
break;
case SIOCGETPFLOW:
bzero(&pflowr, sizeof(pflowr));
+ /* XXXSMP: enforce lock order */
+ NET_UNLOCK();
+ rw_enter_read(&sc->sc_lock);
+ NET_LOCK();
if (sc->sc_flowsrc != NULL)
memcpy(&pflowr.flowsrc, sc->sc_flowsrc,
sc->sc_flowsrc->sa_len);
if (sc->sc_flowdst != NULL)
memcpy(&pflowr.flowdst, sc->sc_flowdst,
sc->sc_flowdst->sa_len);
+ rw_exit_read(&sc->sc_lock);
+ mtx_enter(&sc->sc_mtx);
pflowr.version = sc->sc_version;
+ mtx_leave(&sc->sc_mtx);
if ((error = copyout(&pflowr, ifr->ifr_data,
sizeof(pflowr))))
@@ -557,9 +576,11 @@ pflowioctl(struct ifnet *ifp, u_long cmd
if ((ifp->if_flags & IFF_UP) && sc->so != NULL) {
ifp->if_flags |= IFF_RUNNING;
+ mtx_enter(&sc->sc_mtx);
sc->sc_gcounter=pflowstats.pflow_flows;
if (sc->sc_version == PFLOW_PROTO_10)
pflow_sendout_ipfix_tmpl(sc);
+ mtx_leave(&sc->sc_mtx);
} else
ifp->if_flags &= ~IFF_RUNNING;
rw_exit_write(&sc->sc_lock);
@@ -575,7 +596,6 @@ pflowioctl(struct ifnet *ifp, u_long cmd
int
pflow_calc_mtu(struct pflow_softc *sc, int mtu, int hdrsz)
{
-
sc->sc_maxcount4 = (mtu - hdrsz -
sizeof(struct udpiphdr)) / sizeof(struct pflow_ipfix_flow4);
sc->sc_maxcount6 = (mtu - hdrsz -
@@ -622,6 +642,8 @@ pflow_get_mbuf(struct pflow_softc *sc, u
struct pflow_header h;
struct mbuf *m;
+ MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
+
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL) {
pflowstats.pflow_onomem++;
@@ -791,6 +813,7 @@ export_pflow(struct pf_state *st)
sk = st->key[st->direction == PF_IN ? PF_SK_WIRE : PF_SK_STACK];
SLIST_FOREACH(sc, &pflowif_list, sc_next) {
+ mtx_enter(&sc->sc_mtx);
switch (sc->sc_version) {
case PFLOW_PROTO_5:
if( sk->af == AF_INET )
@@ -803,6 +826,7 @@ export_pflow(struct pf_state *st)
default: /* NOTREACHED */
break;
}
+ mtx_leave(&sc->sc_mtx);
}
return (0);
@@ -864,6 +888,8 @@ copy_flow_to_m(struct pflow_flow *flow,
{
int ret = 0;
+ MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
+
if (sc->sc_mbuf == NULL) {
if ((sc->sc_mbuf = pflow_get_mbuf(sc, 0)) == NULL)
return (ENOBUFS);
@@ -888,6 +914,8 @@ copy_flow_ipfix_4_to_m(struct pflow_ipfi
{
int ret = 0;
+ MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
+
if (sc->sc_mbuf == NULL) {
if ((sc->sc_mbuf =
pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV4_ID)) == NULL) {
@@ -915,6 +943,8 @@ copy_flow_ipfix_6_to_m(struct pflow_ipfi
{
int ret = 0;
+ MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
+
if (sc->sc_mbuf6 == NULL) {
if ((sc->sc_mbuf6 =
pflow_get_mbuf(sc, PFLOW_IPFIX_TMPL_IPV6_ID)) == NULL) {
@@ -1011,6 +1041,7 @@ pflow_timeout(void *v)
{
struct pflow_softc *sc = v;
+ mtx_enter(&sc->sc_mtx);
switch (sc->sc_version) {
case PFLOW_PROTO_5:
pflow_sendout_v5(sc);
@@ -1021,6 +1052,7 @@ pflow_timeout(void *v)
default: /* NOTREACHED */
break;
}
+ mtx_leave(&sc->sc_mtx);
}
void
@@ -1028,7 +1060,9 @@ pflow_timeout6(void *v)
{
struct pflow_softc *sc = v;
+ mtx_enter(&sc->sc_mtx);
pflow_sendout_ipfix(sc, AF_INET6);
+ mtx_leave(&sc->sc_mtx);
}
void
@@ -1036,12 +1070,15 @@ pflow_timeout_tmpl(void *v)
{
struct pflow_softc *sc = v;
+ mtx_enter(&sc->sc_mtx);
pflow_sendout_ipfix_tmpl(sc);
+ mtx_leave(&sc->sc_mtx);
}
void
pflow_flush(struct pflow_softc *sc)
{
+ mtx_enter(&sc->sc_mtx);
switch (sc->sc_version) {
case PFLOW_PROTO_5:
pflow_sendout_v5(sc);
@@ -1053,6 +1090,7 @@ pflow_flush(struct pflow_softc *sc)
default: /* NOTREACHED */
break;
}
+ mtx_leave(&sc->sc_mtx);
}
int
@@ -1063,6 +1101,8 @@ pflow_sendout_v5(struct pflow_softc *sc)
struct ifnet *ifp = &sc->sc_if;
struct timespec tv;
+ MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
+
timeout_del(&sc->sc_tmo);
if (m == NULL)
@@ -1099,6 +1139,8 @@ pflow_sendout_ipfix(struct pflow_softc *
u_int32_t count;
int set_length;
+ MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
+
switch (af) {
case AF_INET:
m = sc->sc_mbuf;
@@ -1158,12 +1200,14 @@ pflow_sendout_ipfix_tmpl(struct pflow_so
struct pflow_v10_header *h10;
struct ifnet *ifp = &sc->sc_if;
+ MUTEX_ASSERT_LOCKED(&sc->sc_mtx);
+
timeout_del(&sc->sc_tmo_tmpl);
if (!(ifp->if_flags & IFF_RUNNING)) {
return (0);
}
- m = pflow_get_mbuf(NULL, 0);
+ m = pflow_get_mbuf(sc, 0);
if (m == NULL)
return (0);
if (m_copyback(m, 0, sizeof(struct pflow_ipfix_tmpl),
@@ -1196,6 +1240,8 @@ pflow_sendout_ipfix_tmpl(struct pflow_so
int
pflow_sendout_mbuf(struct pflow_softc *sc, struct mbuf *m)
{
+ rw_assert_anylock(&sc->sc_lock);
+
counters_pkt(sc->sc_if.if_counters,
ifc_opackets, ifc_obytes, m->m_pkthdr.len);
Index: sys/net/if_pflow.h
===================================================================
RCS file: /cvs/src/sys/net/if_pflow.h,v
retrieving revision 1.19
diff -u -p -r1.19 if_pflow.h
--- sys/net/if_pflow.h 23 Nov 2022 15:12:27 -0000 1.19
+++ sys/net/if_pflow.h 7 Dec 2023 14:58:18 -0000
@@ -171,37 +171,42 @@ struct pflow_ipfix_flow6 {
/*
* Locks used to protect struct members and global data
+ * I immutable after creation
* N net lock
+ * m this pflow_softc' `sc_mtx'
* p this pflow_softc' `sc_lock'
*/
struct pflow_softc {
+ struct mutex sc_mtx;
struct rwlock sc_lock;
int sc_dying; /* [N] */
struct ifnet sc_if;
- unsigned int sc_count;
- unsigned int sc_count4;
- unsigned int sc_count6;
- unsigned int sc_maxcount;
- unsigned int sc_maxcount4;
- unsigned int sc_maxcount6;
- u_int64_t sc_gcounter;
- u_int32_t sc_sequence;
+ unsigned int sc_count; /* [m] */
+ unsigned int sc_count4; /* [m] */
+ unsigned int sc_count6; /* [m] */
+ unsigned int sc_maxcount; /* [m] */
+ unsigned int sc_maxcount4; /* [m] */
+ unsigned int sc_maxcount6; /* [m] */
+ u_int64_t sc_gcounter; /* [m] */
+ u_int32_t sc_sequence; /* [m] */
struct timeout sc_tmo;
struct timeout sc_tmo6;
struct timeout sc_tmo_tmpl;
struct mbuf_queue sc_outputqueue;
struct task sc_outputtask;
struct socket *so; /* [p] */
- struct mbuf *send_nam;
- struct sockaddr *sc_flowsrc;
- struct sockaddr *sc_flowdst;
- struct pflow_ipfix_tmpl sc_tmpl_ipfix;
- u_int8_t sc_version;
- struct mbuf *sc_mbuf; /* current cumulative mbuf */
- struct mbuf *sc_mbuf6; /* current cumulative mbuf */
+ struct mbuf *send_nam; /* [p] */
+ struct sockaddr *sc_flowsrc; /* [p] */
+ struct sockaddr *sc_flowdst; /* [p] */
+ struct pflow_ipfix_tmpl sc_tmpl_ipfix; /* [I] */
+ u_int8_t sc_version; /* [m] */
+ struct mbuf *sc_mbuf; /* [m] current cumulative
+ mbuf */
+ struct mbuf *sc_mbuf6; /* [m] current cumulative
+ mbuf */
SLIST_ENTRY(pflow_softc) sc_next;
};