On Wed, Jul 07, 2021 at 10:01:59PM +0200, Hrvoje Popovski wrote:
> On 7.7.2021. 19:38, Vitaliy Makkoveev wrote:
> > Hi,
> > 
> > It seems the first the first panic occured because ipsp_spd_lookup()
> > modifies tdbp->tdb_policy_head and simultaneous execution breaks it.
> > I guess at least mutex(9) should be used to protect `tdb_policy_head'.
> > 
> > The second panic occured because ipsp_acquire_sa() does
> > `ipsec_acquire_pool' initialization in runtime so parallel execution
> > breaks it. It's easy to fix.
> > 
> > Could you try the diff below? It moves `ipsec_acquire_pool'
> > initialization to pfkey_init() just after `ipsec_policy_pool'
> > initialization. This should fix the second panic.
> 
> 
> Hi,
> 
> with this diff i manage to get this panics ..
> 
> r620-1# uvm_fault(0xfffffd842f83e170, 0x147, 0, 2) -> e
> kernel: page fault trap, code=0
> Stopped at      tdb_free+0x9c:  movq    %rsi,0(%rdi)
>     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
> * 38448  89106     68        0x10          0    1K isakmpd
> tdb_free(ffff80000131e688) at tdb_free+0x9c
> pfkeyv2_send(fffffd83b2512d30,ffff8000012f2680,50) at pfkeyv2_send+0x1191
> pfkeyv2_output(fffffd80a4b37400,fffffd83b2512d30,0,0) at pfkeyv2_output+0x8a
> pfkeyv2_usrreq(fffffd83b2512d30,9,fffffd80a4b37400,0,0,ffff800023908d20)
> at pfkeyv2_usrreq+0x1b0
> sosend(fffffd83b2512d30,0,ffff800023959130,0,0,0) at sosend+0x3a9
> dofilewritev(ffff800023908d20,7,ffff800023959130,0,ffff800023959230) at
> dofilewritev+0x14d
> sys_writev(ffff800023908d20,ffff8000239591d0,ffff800023959230) at
> sys_writev+0xe2
> syscall(ffff8000239592a0) at syscall+0x3b9
> Xsyscall() at Xsyscall+0x128
> end of kernel
> end trace frame: 0x7f7ffffb27c0, count: 6
> https://www.openbsd.org/ddb.html describes the minimum info required in
> bug reports.  Insufficient info makes it difficult to find and fix bugs.
> ddb{1}>
> 
> 
> 
> r620-1# uvm_fault(0xffffffff82379d80, 0x147, 0, 2) -> e
> kernel: page fault trap, code=0
> Stopped at      ipsp_spd_lookup+0x9a4:  movq    %rax,0(%rcx)
>     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
>  434446  37326      0     0x14000      0x200    4  softnet
>  405134  41722      0     0x14000      0x200    1  softnet
>  106389  88948      0     0x14000      0x200    3  softnet
> *262295  93659      0     0x14000      0x200    2  softnet
> ipsp_spd_lookup(fffffd80a44f6f00,2,14,ffff8000238651bc,2,0) at
> ipsp_spd_lookup+0x9a4
> ip_output_ipsec_lookup(fffffd80a44f6f00,14,ffff8000238651bc,0,0) at
> ip_output_ipsec_lookup+0x4d
> ip_output(fffffd80a44f6f00,0,ffff800023865348,1,0,0) at ip_output+0x42a
> ip_forward(fffffd80a44f6f00,ffff800000087048,fffffd83b3ea6d98,0) at
> ip_forward+0x26a
> ip_input_if(ffff800023865488,ffff800023865494,4,0,ffff800000087048) at
> ip_input_if+0x365
> ipv4_input(ffff800000087048,fffffd80a44f6f00) at ipv4_input+0x39
> if_input_process(ffff800000087048,ffff800023865508) at if_input_process+0x6f
> ifiq_process(ffff800000086c00) at ifiq_process+0x69
> taskq_thread(ffff800000030000) at taskq_thread+0x9f
> end trace frame: 0x0, count: 6
> https://www.openbsd.org/ddb.html describes the minimum info required in
> bug reports.  Insufficient info makes it difficult to find and fix bugs.
> 

Thanks. ipsp_spd_lookup() stopped panic in pool_get(9).

I guess the panics continue because simultaneous modifications of
'tdbp->tdb_policy_head' break it. Could you try the diff below? It
introduces `tdb_polhd_mtx' mutex(9) and uses it to protect
'tdbp->tdb_policy_head' modifications. I don't propose this diff for
commit but to check my suggestion.

Index: sys/net/pfkeyv2.c
===================================================================
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.216
diff -u -p -r1.216 pfkeyv2.c
--- sys/net/pfkeyv2.c   5 Jul 2021 12:01:20 -0000       1.216
+++ sys/net/pfkeyv2.c   7 Jul 2021 20:21:44 -0000
@@ -2000,9 +2000,12 @@ pfkeyv2_send(struct socket *so, void *me
                                (caddr_t)&ipo->ipo_mask, rnh,
                                ipo->ipo_nodes, 0)) == NULL) {
                                /* Remove from linked list of policies on TDB */
-                               if (ipo->ipo_tdb)
+                               if (ipo->ipo_tdb) {
+                                       mtx_enter(&tdb_polhd_mtx);
                                        
TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
                                            ipo, ipo_tdb_next);
+                                       mtx_leave(&tdb_polhd_mtx);
+                               }
 
                                if (ipo->ipo_ids)
                                        ipsp_ids_free(ipo->ipo_ids);
Index: sys/netinet/ip_ipsp.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.238
diff -u -p -r1.238 ip_ipsp.c
--- sys/netinet/ip_ipsp.c       10 Mar 2021 10:21:49 -0000      1.238
+++ sys/netinet/ip_ipsp.c       7 Jul 2021 20:21:44 -0000
@@ -92,6 +92,7 @@ u_int64_t ipsec_last_added = 0;
 int ipsec_ids_idle = 100;              /* keep free ids for 100s */
 
 struct pool tdb_pool;
+struct mutex   tdb_polhd_mtx=MUTEX_INITIALIZER(IPL_SOFTNET);
 
 /* Protected by the NET_LOCK(). */
 u_int32_t ipsec_ids_next_flow = 1;     /* may not be zero */
@@ -846,12 +847,14 @@ tdb_free(struct tdb *tdbp)
 #endif
 
        /* Cleanup SPD references. */
+       mtx_enter(&tdb_polhd_mtx);
        for (ipo = TAILQ_FIRST(&tdbp->tdb_policy_head); ipo;
            ipo = TAILQ_FIRST(&tdbp->tdb_policy_head))  {
                TAILQ_REMOVE(&tdbp->tdb_policy_head, ipo, ipo_tdb_next);
                ipo->ipo_tdb = NULL;
                ipo->ipo_last_searched = 0; /* Force a re-search. */
        }
+       mtx_leave(&tdb_polhd_mtx);
 
        if (tdbp->tdb_ids) {
                ipsp_ids_free(tdbp->tdb_ids);
Index: sys/netinet/ip_ipsp.h
===================================================================
RCS file: /cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.197
diff -u -p -r1.197 ip_ipsp.h
--- sys/netinet/ip_ipsp.h       4 May 2021 09:28:04 -0000       1.197
+++ sys/netinet/ip_ipsp.h       7 Jul 2021 20:21:44 -0000
@@ -480,6 +480,8 @@ struct xformsw {
            int, int);        /* output */
 };
 
+extern struct mutex tdb_polhd_mtx;
+
 extern int ipsec_in_use;
 extern u_int64_t ipsec_last_added;
 extern int encdebug;                   /* enable message reporting */
Index: sys/netinet/ip_spd.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_spd.c,v
retrieving revision 1.103
diff -u -p -r1.103 ip_spd.c
--- sys/netinet/ip_spd.c        4 May 2021 09:28:04 -0000       1.103
+++ sys/netinet/ip_spd.c        7 Jul 2021 20:21:44 -0000
@@ -370,8 +370,10 @@ ipsp_spd_lookup(struct mbuf *m, int af, 
 
        /* Do we have a cached entry ? If so, check if it's still valid. */
        if ((ipo->ipo_tdb) && (ipo->ipo_tdb->tdb_flags & TDBF_INVALID)) {
+               mtx_enter(&tdb_polhd_mtx);
                TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
                    ipo_tdb_next);
+               mtx_leave(&tdb_polhd_mtx);
                ipo->ipo_tdb = NULL;
        }
 
@@ -419,8 +421,10 @@ ipsp_spd_lookup(struct mbuf *m, int af, 
 
   nomatchout:
                        /* Cached TDB was not good. */
+                       mtx_enter(&tdb_polhd_mtx);
                        TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
                            ipo_tdb_next);
+                       mtx_leave(&tdb_polhd_mtx);
                        ipo->ipo_tdb = NULL;
                        ipo->ipo_last_searched = 0;
                }
@@ -447,8 +451,10 @@ ipsp_spd_lookup(struct mbuf *m, int af, 
                                ids ? ids: ipo->ipo_ids,
                                &ipo->ipo_addr, &ipo->ipo_mask);
                        if (ipo->ipo_tdb) {
+                               mtx_enter(&tdb_polhd_mtx);
                                
TAILQ_INSERT_TAIL(&ipo->ipo_tdb->tdb_policy_head,
                                    ipo, ipo_tdb_next);
+                               mtx_leave(&tdb_polhd_mtx);
                                *error = 0;
                                return ipsp_spd_inp(m, af, hlen, error,
                                    direction, tdbp, inp, ipo);
@@ -521,12 +527,14 @@ ipsp_spd_lookup(struct mbuf *m, int af, 
                                        goto nomatchin;
 
                        /* Add it to the cache. */
+                       mtx_enter(&tdb_polhd_mtx);
                        if (ipo->ipo_tdb)
                                TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head,
                                    ipo, ipo_tdb_next);
                        ipo->ipo_tdb = tdbp;
                        TAILQ_INSERT_TAIL(&tdbp->tdb_policy_head, ipo,
                            ipo_tdb_next);
+                       mtx_leave(&tdb_polhd_mtx);
                        *error = 0;
                        return ipsp_spd_inp(m, af, hlen, error, direction,
                            tdbp, inp, ipo);
@@ -550,8 +558,10 @@ ipsp_spd_lookup(struct mbuf *m, int af, 
                                goto skipinputsearch;
 
                        /* Not applicable, unlink. */
+                       mtx_enter(&tdb_polhd_mtx);
                        TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
                            ipo_tdb_next);
+                       mtx_leave(&tdb_polhd_mtx);
                        ipo->ipo_last_searched = 0;
                        ipo->ipo_tdb = NULL;
                }
@@ -566,9 +576,12 @@ ipsp_spd_lookup(struct mbuf *m, int af, 
                                dignore ? &ssrc : &ipo->ipo_dst,
                                ipo->ipo_sproto, ipo->ipo_ids,
                                &ipo->ipo_addr, &ipo->ipo_mask);
-                       if (ipo->ipo_tdb)
+                       if (ipo->ipo_tdb) {
+                               mtx_enter(&tdb_polhd_mtx);
                                
TAILQ_INSERT_TAIL(&ipo->ipo_tdb->tdb_policy_head,
                                    ipo, ipo_tdb_next);
+                               mtx_leave(&tdb_polhd_mtx);
+                       }
                }
   skipinputsearch:
 
@@ -638,9 +651,12 @@ ipsec_delete_policy(struct ipsec_policy 
            rn_delete(&ipo->ipo_addr, &ipo->ipo_mask, rnh, rn) == NULL)
                return (ESRCH);
 
-       if (ipo->ipo_tdb != NULL)
+       if (ipo->ipo_tdb != NULL) {
+               mtx_enter(&tdb_polhd_mtx);
                TAILQ_REMOVE(&ipo->ipo_tdb->tdb_policy_head, ipo,
                    ipo_tdb_next);
+               mtx_leave(&tdb_polhd_mtx);
+       }
 
        while ((ipa = TAILQ_FIRST(&ipo->ipo_acquires)) != NULL)
                ipsp_delete_acquire(ipa);

Reply via email to