diff --git a/kern/kern_clock.c b/kern/kern_clock.c
index 843965b..f598afc 100644
--- a/kern/kern_clock.c
+++ b/kern/kern_clock.c
@@ -233,7 +233,7 @@ hardclock(struct clockframe *frame)
if (stathz == 0)
statclock(frame);
- if (--ci->ci_schedstate.spc_rrticks <= 0)
+ if (p && (--(p->p_rrticks) <= 0))
roundrobin(ci);
/*
diff --git a/kern/kern_proc.c b/kern/kern_proc.c
index ad861c8..e0d5536 100644
--- a/kern/kern_proc.c
+++ b/kern/kern_proc.c
@@ -398,8 +398,6 @@ proc_printit(struct proc *p, const char *modif, int
(*pr)(const char *, ...))
p->p_comm, p->p_pid, pst, p->p_flag, P_BITS);
(*pr)(" pri=%u, usrpri=%u, nice=%d\n",
p->p_priority, p->p_usrpri, p->p_p->ps_nice);
- (*pr)(" forw=%p, list=%p,%p\n",
- TAILQ_NEXT(p, p_runq), p->p_list.le_next, p->p_list.le_prev);
(*pr)(" process=%p user=%p, vmspace=%p\n",
p->p_p, p->p_addr, p->p_vmspace);
(*pr)(" estcpu=%u, cpticks=%d, pctcpu=%u.%u, swtime=%u\n",
diff --git a/kern/kern_sched.c b/kern/kern_sched.c
index 253226a..79eb28c 100644
--- a/kern/kern_sched.c
+++ b/kern/kern_sched.c
@@ -24,11 +24,22 @@
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/mutex.h>
+#include <sys/tree.h>
#include <uvm/uvm_extern.h>
#include <sys/malloc.h>
+static int
+sched_cmp_proc(struct proc *a, struct proc *b) {
+ if (a == b)
+ return 0;
+ if (timercmp(&(a->p_deadline), &(b->p_deadline), <))
+ return -1;
+ return 1;
+}
+
+RB_GENERATE_STATIC(prochead, proc, p_runq, sched_cmp_proc);
void sched_kthreads_create(void *);
@@ -79,10 +90,8 @@ void
sched_init_cpu(struct cpu_info *ci)
{
struct schedstate_percpu *spc = &ci->ci_schedstate;
- int i;
- for (i = 0; i < SCHED_NQS; i++)
- TAILQ_INIT(&spc->spc_qs[i]);
+ RB_INIT(&spc->spc_runq);
spc->spc_idleproc = NULL;
@@ -158,18 +167,19 @@ sched_idle(void *v)
cpuset_add(&sched_idle_cpus, ci);
cpu_idle_enter();
- while (spc->spc_whichqs == 0) {
+
+ while (curcpu_is_idle()) {
if (spc->spc_schedflags & SPCF_SHOULDHALT &&
- (spc->spc_schedflags & SPCF_HALTED) == 0) {
+ (spc->spc_schedflags & SPCF_HALTED) == 0) {
cpuset_del(&sched_idle_cpus, ci);
SCHED_LOCK(s);
- atomic_setbits_int(&spc->spc_schedflags,
- spc->spc_whichqs ? 0 : SPCF_HALTED);
+ atomic_setbits_int(&spc->spc_schedflags,
SPCF_HALTED);
SCHED_UNLOCK(s);
wakeup(spc);
}
cpu_idle_cycle();
}
+
cpu_idle_leave();
cpuset_del(&sched_idle_cpus, ci);
}
@@ -222,14 +232,13 @@ void
setrunqueue(struct proc *p)
{
struct schedstate_percpu *spc;
- int queue = p->p_priority >> 2;
SCHED_ASSERT_LOCKED();
spc = &p->p_cpu->ci_schedstate;
spc->spc_nrun++;
- TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
- spc->spc_whichqs |= (1 << queue);
+ KASSERT(!RB_FIND(prochead, &spc->spc_runq, p));
+ RB_INSERT(prochead, &spc->spc_runq, p);
cpuset_add(&sched_queued_cpus, p->p_cpu);
if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
@@ -240,38 +249,29 @@ void
remrunqueue(struct proc *p)
{
struct schedstate_percpu *spc;
- int queue = p->p_priority >> 2;
SCHED_ASSERT_LOCKED();
spc = &p->p_cpu->ci_schedstate;
spc->spc_nrun--;
- TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
- if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
- spc->spc_whichqs &= ~(1 << queue);
- if (spc->spc_whichqs == 0)
- cpuset_del(&sched_queued_cpus, p->p_cpu);
- }
+ KASSERT(RB_REMOVE(prochead, &spc->spc_runq, p));
+ if (RB_EMPTY(&spc->spc_runq))
+ cpuset_del(&sched_queued_cpus, p->p_cpu);
}
struct proc *
sched_chooseproc(void)
{
struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
- struct proc *p;
- int queue;
+ struct proc *p, *p_tmp = NULL;
SCHED_ASSERT_LOCKED();
if (spc->spc_schedflags & SPCF_SHOULDHALT) {
- if (spc->spc_whichqs) {
- for (queue = 0; queue < SCHED_NQS; queue++) {
- TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
- remrunqueue(p);
- p->p_cpu = sched_choosecpu(p);
- setrunqueue(p);
- }
- }
+ RB_FOREACH_SAFE(p, prochead, &spc->spc_runq, p_tmp) {
+ remrunqueue(p);
+ p->p_cpu = sched_choosecpu(p);
+ setrunqueue(p);
}
p = spc->spc_idleproc;
KASSERT(p);
@@ -280,17 +280,14 @@ sched_chooseproc(void)
return (p);
}
-again:
- if (spc->spc_whichqs) {
- queue = ffs(spc->spc_whichqs) - 1;
- p = TAILQ_FIRST(&spc->spc_qs[queue]);
+ if (!RB_EMPTY(&spc->spc_runq)) {
+ p = RB_MIN(prochead, &spc->spc_runq);
remrunqueue(p);
sched_noidle++;
KASSERT(p->p_stat == SRUN);
} else if ((p = sched_steal_proc(curcpu())) == NULL) {
- p = spc->spc_idleproc;
- if (p == NULL) {
- int s;
+ while ((p = spc->spc_idleproc) == NULL) {
+ int s;
/*
* We get here if someone decides to switch during
* boot before forking kthreads, bleh.
@@ -302,8 +299,7 @@ again:
spl0();
delay(10);
SCHED_LOCK(s);
- goto again;
- }
+ }
KASSERT(p);
p->p_stat = SRUN;
}
@@ -441,15 +437,13 @@ sched_steal_proc(struct cpu_info *self)
while ((ci = cpuset_first(&set)) != NULL) {
struct proc *p;
- int queue;
int cost;
cpuset_del(&set, ci);
spc = &ci->ci_schedstate;
- queue = ffs(spc->spc_whichqs) - 1;
- TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
+ RB_FOREACH(p, prochead, &spc->spc_runq) {
if (p->p_flag & P_CPUPEG)
continue;
diff --git a/kern/kern_synch.c b/kern/kern_synch.c
index f1db615..43716b3 100644
--- a/kern/kern_synch.c
+++ b/kern/kern_synch.c
@@ -205,7 +205,7 @@ sleep_setup(struct sleep_state *sls, const volatile void
*ident, int prio,
p->p_wmesg = wmesg;
p->p_slptime = 0;
p->p_priority = prio & PRIMASK;
- TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
+ TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_slpq);
}
void
@@ -342,7 +342,7 @@ void
unsleep(struct proc *p)
{
if (p->p_wchan) {
- TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
+ TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_slpq);
p->p_wchan = NULL;
}
}
@@ -361,7 +361,7 @@ wakeup_n(const volatile void *ident, int n)
SCHED_LOCK(s);
qp = &slpque[LOOKUP(ident)];
for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
- pnext = TAILQ_NEXT(p, p_runq);
+ pnext = TAILQ_NEXT(p, p_slpq);
#ifdef DIAGNOSTIC
if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
panic("wakeup: p_stat is %d", (int)p->p_stat);
@@ -369,7 +369,7 @@ wakeup_n(const volatile void *ident, int n)
if (p->p_wchan == ident) {
--n;
p->p_wchan = 0;
- TAILQ_REMOVE(qp, p, p_runq);
+ TAILQ_REMOVE(qp, p, p_slpq);
if (p->p_stat == SSLEEP)
setrunnable(p);
}
diff --git a/kern/sched_bsd.c b/kern/sched_bsd.c
index 6c5eb63..172bb8f 100644
--- a/kern/sched_bsd.c
+++ b/kern/sched_bsd.c
@@ -89,8 +89,6 @@ roundrobin(struct cpu_info *ci)
{
struct schedstate_percpu *spc = &ci->ci_schedstate;
- spc->spc_rrticks = rrticks_init;
-
if (ci->ci_curproc != NULL) {
if (spc->spc_schedflags & SPCF_SEENRR) {
/*
@@ -252,8 +250,7 @@ schedcpu(void *arg)
resetpriority(p);
if (p->p_priority >= PUSER) {
if (p->p_stat == SRUN &&
- (p->p_priority / SCHED_PPQ) !=
- (p->p_usrpri / SCHED_PPQ)) {
+ p->p_priority == p->p_usrpri) {
remrunqueue(p);
p->p_priority = p->p_usrpri;
setrunqueue(p);
@@ -304,6 +301,7 @@ yield(void)
SCHED_LOCK(s);
p->p_priority = p->p_usrpri;
p->p_stat = SRUN;
+ generate_deadline(p, 1);
setrunqueue(p);
p->p_ru.ru_nvcsw++;
mi_switch();
@@ -332,6 +330,7 @@ preempt(struct proc *newp)
p->p_priority = p->p_usrpri;
p->p_stat = SRUN;
p->p_cpu = sched_choosecpu(p);
+ generate_deadline(p, 0);
setrunqueue(p);
p->p_ru.ru_nivcsw++;
mi_switch();
@@ -531,8 +530,7 @@ resetpriority(struct proc *p)
SCHED_ASSERT_LOCKED();
- newpriority = PUSER + p->p_estcpu +
- NICE_WEIGHT * (p->p_p->ps_nice - NZERO);
+ newpriority = PUSER + p->p_estcpu + (p->p_p->ps_nice - NZERO);
newpriority = min(newpriority, MAXPRI);
p->p_usrpri = newpriority;
resched_proc(p, p->p_usrpri);
@@ -565,3 +563,34 @@ schedclock(struct proc *p)
p->p_priority = p->p_usrpri;
SCHED_UNLOCK(s);
}
+
+void
+generate_deadline(struct proc *p, char voluntary) {
+ /*
+ * For nice values between 0 and 39 inclusively, the offset lies between
+ * 32 and 1280 milliseconds for a machine with hz=100. That means that
+ * processes with nice value=0 (i.e. -20 in userland) will be executed
+ * 32 milliseconds in the future at the latest. Processes with very
+ * little priority will be executed 1.28 seconds in the future at the
very
+ * latest. The shift is done to ensure that the lowest possible offset is
+ * larger than the timeslice, in order to make sure that the scheduler
does
+ * not degenerate to round robin behaviour when more than just a few
processes
+ * with high priority are started.
+ *
+ * If the process voluntarily yielded its CPU, we reward it by halving
its
+ * deadline offset.
+ */
+
+ unsigned int offset_msec =
+ ((p->p_p->ps_nice + (p->p_priority >> 1) + 1) * rrticks_init)
<< (voluntary? 2: 3);
+ struct timeval offset = {
+ .tv_sec = offset_msec / 1000,
+ .tv_usec = offset_msec % 1000
+ };
+ struct timeval now;
+ microuptime(&now);
+
+ timeradd(&now, &offset, &(p->p_deadline));
+ if (!voluntary)
+ p->p_rrticks = rrticks_init;
+}
diff --git a/sys/proc.h b/sys/proc.h
index da04d28..69fc0e2 100644
--- a/sys/proc.h
+++ b/sys/proc.h
@@ -48,6 +48,7 @@
#include <sys/mutex.h> /* For struct mutex */
#include <sys/resource.h> /* For struct rusage */
#include <machine/atomic.h>
+#include <sys/tree.h>
#ifdef _KERNEL
#define __need_process
@@ -247,8 +248,9 @@ struct process {
#define PS_EXITING _P_EXITING
struct proc {
- TAILQ_ENTRY(proc) p_runq;
+ TAILQ_ENTRY(proc) p_slpq;
LIST_ENTRY(proc) p_list; /* List of all processes. */
+ RB_ENTRY(proc) p_runq;
struct process *p_p; /* The process of this thread. */
TAILQ_ENTRY(proc) p_thr_link;/* Threads in a process linkage. */
@@ -280,6 +282,8 @@ struct proc {
int p_sigwait; /* signal handled by sigwait() */
/* scheduling */
+ int p_rrticks;
+ struct timeval p_deadline;
u_int p_estcpu; /* Time averaged value of p_cpticks. */
int p_cpticks; /* Ticks of cpu time. */
fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */
diff --git a/sys/sched.h b/sys/sched.h
index 1651205..fb01f21 100644
--- a/sys/sched.h
+++ b/sys/sched.h
@@ -70,6 +70,7 @@
#define _SYS_SCHED_H_
#include <sys/queue.h>
+#include <sys/tree.h>
/*
* Posix defines a <sched.h> which may want to include <sys/sched.h>
@@ -99,7 +100,6 @@ struct schedstate_percpu {
u_int spc_schedticks; /* ticks for schedclock() */
u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */
u_char spc_curpriority; /* usrpri of curproc */
- int spc_rrticks; /* ticks until roundrobin() */
int spc_pscnt; /* prof/stat counter */
int spc_psdiv; /* prof/stat divisor */
struct proc *spc_idleproc; /* idle proc for this cpu */
@@ -107,8 +107,7 @@ struct schedstate_percpu {
u_int spc_nrun; /* procs on the run queues */
fixpt_t spc_ldavg; /* shortest load avg. for this cpu */
- TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];
- volatile uint32_t spc_whichqs;
+ RB_HEAD(prochead, proc) spc_runq;
#ifdef notyet
struct proc *spc_reaper; /* dead proc reaper */
@@ -125,9 +124,7 @@ struct schedstate_percpu {
#define SPCF_SHOULDHALT 0x0004 /* CPU should be vacated */
#define SPCF_HALTED 0x0008 /* CPU has been halted */
-#define SCHED_PPQ (128 / SCHED_NQS) /* priorities per queue
*/
-#define NICE_WEIGHT 2 /* priorities per nice level */
-#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ)
+#define ESTCPULIM(e) min((e), PRIO_MAX)
extern int schedhz; /* ideally: 16 */
extern int rrticks_init; /* ticks per roundrobin() */
@@ -152,13 +149,14 @@ void cpu_idle_enter(void);
void cpu_idle_cycle(void);
void cpu_idle_leave(void);
void sched_peg_curproc(struct cpu_info *ci);
+void generate_deadline(struct proc *, char);
#ifdef MULTIPROCESSOR
void sched_start_secondary_cpus(void);
void sched_stop_secondary_cpus(void);
#endif
-#define curcpu_is_idle() (curcpu()->ci_schedstate.spc_whichqs == 0)
+#define curcpu_is_idle() (RB_EMPTY(&curcpu()->ci_schedstate.spc_runq))
void sched_init_runqueues(void);
void setrunqueue(struct proc *);
--
1.7.6