On Fri, Jun 21, 2024 at 01:24:27PM +0200, Martin Pieuchot wrote:
> So I'm trying to see where the remaining sched_yield() are coming from
> ld(1):
>
> $ cd /sys/arch/arm64/compile/GENERIC.MP
> $ LD="egdb --args ld" make -j32
>
> Then I add a breakpoint on sched_yield & hit run.
>
> As soon as the first thread is stopped, I can see the trace as usual,
> however the process is now in a "stopped" state, impossible to kill or
> continue. Even ddb's kill command doesn't help.
So this is caused by multiple issues in ptrace, the single thread API
and the sleep machinery. The fundamental issue is that the ps_singlecnt
gets off and egdb hangs in single_thread_wait().
The issue at hand is that single_thread_check() for suspends while in deep
is just not right. It will decrement ps_singlecnt but if the proc was
already in SSLEEP then that decrement was already done in
single_thread_set().
Here is a possible solution to this problem by adding yet another proc
flag to track the time between sleep_setup and single_thread_check (via
sleep_signal_check). Using this information single_thread_set() can be
adjusted to no decrement ps_singlecnt in that case.
On top of this I had to fix ptrace to better respect gdb's request to
progress a single thread or all threads. Without that I just constantly
hit egdb asserts because of unexpected progress of unnrelated threads.
With this I can add the sched_yield break point and continue through the
full run. Does this break other stuff?
Most probably :) there is a lot of bits that are not quite right when it
comes to signals and multiple threads.
--
:wq Claudio
Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
diff -u -p -r1.333 kern_sig.c
--- kern/kern_sig.c 22 Jul 2024 09:43:47 -0000 1.333
+++ kern/kern_sig.c 24 Jul 2024 14:14:06 -0000
@@ -851,7 +851,9 @@ trapsignal(struct proc *p, int signum, u
SCHED_UNLOCK();
signum = pr->ps_xsig;
- single_thread_clear(p, 0);
+ if ((p->p_flag & P_TRACESINGLE) == 0)
+ single_thread_clear(p, 0);
+ atomic_clearbits_int(&p->p_flag, P_TRACESINGLE);
/*
* If we are no longer being traced, or the parent
@@ -1370,7 +1372,9 @@ cursig(struct proc *p, struct sigctx *sc
atomic_clearbits_int(&pr->ps_siglist, mask);
}
- single_thread_clear(p, 0);
+ if ((p->p_flag & P_TRACESINGLE) == 0)
+ single_thread_clear(p, 0);
+ atomic_clearbits_int(&p->p_flag, P_TRACESINGLE);
/*
* If we are no longer being traced, or the parent
@@ -2077,6 +2081,9 @@ single_thread_check_locked(struct proc *
MUTEX_ASSERT_LOCKED(&pr->ps_mtx);
+ if (deep)
+ atomic_clearbits_int(&p->p_flag, P_SINGLESLEEP);
+
if (pr->ps_single == NULL || pr->ps_single == p)
return (0);
@@ -2177,16 +2184,23 @@ single_thread_set(struct proc *p, int fl
if (mode == SINGLE_EXIT) {
unsleep(q);
setrunnable(q);
- } else
+ } else {
--pr->ps_singlecnt;
+ }
break;
case SSLEEP:
/* if it's not interruptible, then just have to wait */
if (q->p_flag & P_SINTR) {
/* merely need to suspend? just stop it */
if (mode == SINGLE_SUSPEND) {
+ /*
+ * if between sleep_setup and
+ * sleep_signal_check don't count us
+ * out.
+ */
+ if ((q->p_flag & P_SINGLESLEEP) == 0)
+ --pr->ps_singlecnt;
q->p_stat = SSTOP;
- --pr->ps_singlecnt;
break;
}
/* need to unwind or exit, so wake it */
@@ -2263,6 +2277,8 @@ single_thread_clear(struct proc *p, int
*/
SCHED_LOCK();
if (q->p_stat == SSTOP && (q->p_flag & flag) == 0) {
+ if (flag == 0)
+ atomic_clearbits_int(&q->p_flag, P_SUSPSIG);
if (q->p_wchan == NULL)
setrunnable(q);
else {
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
diff -u -p -r1.206 kern_synch.c
--- kern/kern_synch.c 23 Jul 2024 08:38:02 -0000 1.206
+++ kern/kern_synch.c 24 Jul 2024 14:14:06 -0000
@@ -356,7 +356,7 @@ sleep_setup(const volatile void *ident,
atomic_setbits_int(&p->p_flag, P_WSLEEP);
TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
if (prio & PCATCH)
- atomic_setbits_int(&p->p_flag, P_SINTR);
+ atomic_setbits_int(&p->p_flag, P_SINTR | P_SINGLESLEEP);
p->p_stat = SSLEEP;
SCHED_UNLOCK();
@@ -399,15 +399,18 @@ sleep_finish(int timo, int do_sleep)
*/
if (p->p_wchan == NULL)
do_sleep = 0;
+ KASSERT((p->p_flag & P_SINGLESLEEP) == 0);
atomic_clearbits_int(&p->p_flag, P_WSLEEP);
+ /* If requested to stop always force a stop even if do_sleep == 0 */
+ if (p->p_stat == SSTOP)
+ do_sleep = 1;
if (do_sleep) {
KASSERT(p->p_stat == SSLEEP || p->p_stat == SSTOP);
p->p_ru.ru_nvcsw++;
mi_switch();
} else {
- KASSERT(p->p_stat == SONPROC || p->p_stat == SSLEEP ||
- p->p_stat == SSTOP);
+ KASSERT(p->p_stat == SONPROC || p->p_stat == SSLEEP);
unsleep(p);
p->p_stat = SONPROC;
}
@@ -420,10 +423,7 @@ sleep_finish(int timo, int do_sleep)
p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
SCHED_UNLOCK();
- /*
- * Even though this belongs to the signal handling part of sleep,
- * we need to clear it before the ktrace.
- */
+ /* Must clear this before hitting another sleep point. */
atomic_clearbits_int(&p->p_flag, P_SINTR);
if (timo != 0) {
Index: kern/sys_process.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_process.c,v
diff -u -p -r1.98 sys_process.c
--- kern/sys_process.c 3 Jun 2024 12:48:25 -0000 1.98
+++ kern/sys_process.c 24 Jul 2024 14:14:06 -0000
@@ -441,6 +441,8 @@ ptrace_ctrl(struct proc *p, int req, pid
if (pid < THREAD_PID_OFFSET && tr->ps_single)
t = tr->ps_single;
+ else
+ atomic_setbits_int(&t->p_flag, P_TRACESINGLE);
/* If the address parameter is not (int *)1, set the pc. */
if ((int *)addr != (int *)1)
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
diff -u -p -r1.365 proc.h
--- sys/proc.h 22 Jul 2024 09:43:47 -0000 1.365
+++ sys/proc.h 24 Jul 2024 14:14:06 -0000
@@ -430,9 +430,11 @@ struct proc {
#define P_SIGSUSPEND 0x00000008 /* Need to restore
before-suspend mask*/
#define P_CANTSLEEP 0x00000010 /* insomniac thread */
#define P_WSLEEP 0x00000020 /* Working on going to sleep. */
+#define P_SINGLESLEEP 0x00000040 /* Like P_WSLEEP for single
thread api */
#define P_SINTR 0x00000080 /* Sleep is interruptible. */
#define P_SYSTEM 0x00000200 /* No sigs, stats or swapping.
*/
#define P_TIMEOUT 0x00000400 /* Timing out during sleep. */
+#define P_TRACESINGLE 0x00001000 /* keep single threaded
ptraced. */
#define P_WEXIT 0x00002000 /* Working on exiting. */
#define P_OWEUPC 0x00008000 /* Owe proc an addupc() at next
ast. */
#define P_SUSPSINGLE 0x00080000 /* Need to stop for single
threading. */
@@ -443,9 +445,9 @@ struct proc {
#define P_BITS \
("\20" "\01INKTR" "\02PROFPEND" "\03ALRMPEND" "\04SIGSUSPEND" \
- "\05CANTSLEEP" "\06WSLEEP" "\010SINTR" "\012SYSTEM" "\013TIMEOUT" \
- "\016WEXIT" "\020OWEUPC" "\024SUSPSINGLE" "\030CONTINUED" "\033THREAD" \
- "\034SUSPSIG" "\037CPUPEG")
+ "\05CANTSLEEP" "\06WSLEEP" "\07SINGLESLEEP" "\010SINTR" "\012SYSTEM" \
+ "\013TIMEOUT" "\015TRACESINGLE" "\016WEXIT" "\020OWEUPC" "\024SUSPSINGLE"
\
+ "\030CONTINUED" "\033THREAD" "\034SUSPSIG" "\037CPUPEG")
#define THREAD_PID_OFFSET 100000