> Discussions.
> 
>   - /sbin/init init_main.c!start_init() map page? (deraadt@)
>     -> that is not the problem, the page should be mapped even there
>        by the sys_execve() call

Robert found the proper solution to this: move the find_timekeep bits in
_libc_preinit!

This helps with a lot of things:

  - removes the need for the find_timekeep() function
  - removes the nasty ELF exports
  - shrinks the diff

Good job, Robert!

What's left is the TSC discussion and the bikeshedding bits.

Paul


diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
index cd056c85719..2b25d49f32a 100644
--- lib/libc/asr/asr.c
+++ lib/libc/asr/asr.c
@@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int 
timeout)
        struct timespec pollstart, pollend, elapsed;
        int r;
 
-       if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
+       if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
                return -1;
 
        while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
-               if (clock_gettime(CLOCK_MONOTONIC, &pollend))
+               if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
                        return -1;
                timespecsub(&pollend, &pollstart, &elapsed);
                timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
@@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
                asr->a_rtime = 0;
        }
 
-       if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+       if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
                return;
 
        if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
index 82de8fa33b7..02fd3013cc1 100644
--- lib/libc/crypt/bcrypt.c
+++ lib/libc/crypt/bcrypt.c
@@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
        char buf[_PASSWORD_LEN];
        int duration;
 
-       clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
+       WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
        bcrypt_newhash("testpassword", r, buf, sizeof(buf));
-       clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
+       WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
 
        duration = after.tv_sec - before.tv_sec;
        duration *= 1000000;
diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
index 270f54aada5..70b70eb3ea0 100644
--- lib/libc/dlfcn/init.c
+++ lib/libc/dlfcn/init.c
@@ -30,6 +30,7 @@
 #include <link.h>
 #include <stdlib.h>            /* atexit */
 #include <string.h>
+#include <time.h>              /* timekeep */
 #include <unistd.h>
 
 #include "init.h"
@@ -45,8 +46,9 @@
 /* XXX should be in an include file shared with csu */
 char   ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void));
 
-/* provide definition for this */
+/* provide definition for these */
 int    _pagesize = 0;
+void   *_timekeep = NULL;
 
 /*
  * In dynamicly linked binaries environ and __progname are overriden by
@@ -105,6 +107,9 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb 
*cb)
                        phnum = aux->au_v;
                        break;
 #endif /* !PIC */
+               case AUX_openbsd_timekeep:
+                       _timekeep = (void *)aux->au_v;
+                       break;
                }
        }
 
diff --git lib/libc/gen/times.c lib/libc/gen/times.c
index 02e4dd44b5c..36841810d1b 100644
--- lib/libc/gen/times.c
+++ lib/libc/gen/times.c
@@ -52,7 +52,7 @@ times(struct tms *tp)
                return ((clock_t)-1);
        tp->tms_cutime = CONVTCK(ru.ru_utime);
        tp->tms_cstime = CONVTCK(ru.ru_stime);
-       if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+       if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
                return ((clock_t)-1);
        return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
 }
diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
index 520a5954025..845cbe80356 100644
--- lib/libc/gen/timespec_get.c
+++ lib/libc/gen/timespec_get.c
@@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
 {
        switch (base) {
        case TIME_UTC:
-               if (clock_gettime(CLOCK_REALTIME, ts) == -1)
+               if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
                        return 0;
                break;
        default:
diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
index 18c49f8fcb9..1137dbcd44f 100644
--- lib/libc/hidden/time.h
+++ lib/libc/hidden/time.h
@@ -24,12 +24,16 @@
 extern PROTO_NORMAL(tzname);
 #endif
 
+__BEGIN_HIDDEN_DECLS
+extern void    *_timekeep;
+__END_HIDDEN_DECLS
+
 PROTO_NORMAL(asctime);
 PROTO_NORMAL(asctime_r);
 PROTO_STD_DEPRECATED(clock);
 PROTO_DEPRECATED(clock_getcpuclockid);
 PROTO_NORMAL(clock_getres);
-PROTO_NORMAL(clock_gettime);
+PROTO_WRAP(clock_gettime);
 PROTO_NORMAL(clock_settime);
 PROTO_STD_DEPRECATED(ctime);
 PROTO_DEPRECATED(ctime_r);
diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
index 763e420bb88..9babb28470a 100644
--- lib/libc/net/res_random.c
+++ lib/libc/net/res_random.c
@@ -219,7 +219,7 @@ res_initid(void)
        if (ru_prf != NULL)
                arc4random_buf(ru_prf, sizeof(*ru_prf));
 
-       clock_gettime(CLOCK_MONOTONIC, &ts);
+       WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
        ru_reseed = ts.tv_sec + RU_OUT;
        ru_msb = ru_msb == 0x8000 ? 0 : 0x8000; 
 }
@@ -232,7 +232,7 @@ __res_randomid(void)
        u_int r;
        static void *randomid_mutex;
 
-       clock_gettime(CLOCK_MONOTONIC, &ts);
+       WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
        pid = getpid();
 
        _MUTEX_LOCK(&randomid_mutex);
diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
index 8e6ef515b0e..927b4bf2028 100644
--- lib/libc/rpc/clnt_tcp.c
+++ lib/libc/rpc/clnt_tcp.c
@@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
        pfd[0].events = POLLIN;
        TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
        delta = wait;
-       clock_gettime(CLOCK_MONOTONIC, &start);
+       WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
        for (;;) {
                r = ppoll(pfd, 1, &delta, NULL);
                save_errno = errno;
 
-               clock_gettime(CLOCK_MONOTONIC, &after);
+               WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
                timespecsub(&start, &after, &duration);
                timespecsub(&wait, &duration, &delta);
                if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
index 68d01674410..92e1d5c350d 100644
--- lib/libc/rpc/clnt_udp.c
+++ lib/libc/rpc/clnt_udp.c
@@ -265,7 +265,7 @@ send_again:
        reply_msg.acpted_rply.ar_results.where = resultsp;
        reply_msg.acpted_rply.ar_results.proc = xresults;
 
-       clock_gettime(CLOCK_MONOTONIC, &start);
+       WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
        for (;;) {
                switch (ppoll(pfd, 1, &wait, NULL)) {
                case 0:
@@ -283,7 +283,7 @@ send_again:
                        /* FALLTHROUGH */
                case -1:
                        if (errno == EINTR) {
-                               clock_gettime(CLOCK_MONOTONIC, &after);
+                               WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
                                timespecsub(&after, &start, &duration);
                                timespecadd(&time_waited, &duration, 
&time_waited);
                                if (timespeccmp(&time_waited, &timeout, <))
diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
index f9d7a70938f..6c99db84359 100644
--- lib/libc/rpc/svc_tcp.c
+++ lib/libc/rpc/svc_tcp.c
@@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
         * A timeout is fatal for the connection.
         */
        delta = wait_per_try;
-       clock_gettime(CLOCK_MONOTONIC, &start);
+       WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
        pfd[0].fd = sock;
        pfd[0].events = POLLIN;
        do {
@@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
                case -1:
                        if (errno != EINTR)
                                goto fatal_err;
-                       clock_gettime(CLOCK_MONOTONIC, &after);
+                       WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
                        timespecsub(&after, &start, &duration);
                        timespecsub(&wait_per_try, &duration, &delta);
                        if (delta.tv_sec < 0 || !timespecisset(&delta))
diff --git lib/libc/shlib_version lib/libc/shlib_version
index 06f98b01084..5fb0770494f 100644
--- lib/libc/shlib_version
+++ lib/libc/shlib_version
@@ -1,4 +1,4 @@
 major=96
-minor=0
+minor=1
 # note: If changes were made to include/thread_private.h or if system calls
 # were added/changed then librthread/shlib_version must also be updated.
diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
index 34769576ced..d0b5dd1bdcd 100644
--- lib/libc/sys/Makefile.inc
+++ lib/libc/sys/Makefile.inc
@@ -12,7 +12,8 @@ SRCS+=        Ovfork.S brk.S ${CERROR} \
 
 # glue to offer userland wrappers for some syscalls
 SRCS+= posix_madvise.c pthread_sigmask.c \
-       w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
+       w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
+       w_clock_gettime.c
 
 # glue for compat with old syscall interfaces.
 SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \
@@ -43,7 +44,7 @@ SRCS+=        ${CANCEL:%=w_%.c} w_pread.c w_preadv.c 
w_pwrite.c w_pwritev.c
 ASM=   __semctl.o __syscall.o __thrsigdivert.o \
        access.o acct.o adjfreq.o adjtime.o \
        bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
-       clock_getres.o clock_gettime.o clock_settime.o \
+       clock_getres.o clock_settime.o \
        dup.o dup2.o dup3.o \
        execve.o \
        faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
@@ -109,7 +110,7 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
 SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
 DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
 
-HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
+HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} 
clock_gettime.o
 PHIDDEN=${HIDDEN:.o=.po}
 SHIDDEN=${HIDDEN:.o=.so}
 DHIDDEN=${HIDDEN:.o=.do}
diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
new file mode 100644
index 00000000000..7c2883c31fd
--- /dev/null
+++ lib/libc/sys/w_clock_gettime.c
@@ -0,0 +1,64 @@
+/*     $OpenBSD$ */
+/*
+ * Copyright (c) 2020 Paul Irofti <p...@irofti.net>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <time.h>
+#include <err.h>
+
+#include <sys/time.h>
+
+int
+WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
+{
+       struct __timekeep *timekeep;
+       unsigned int seq;
+
+       if (_timekeep == NULL)
+               return clock_gettime(clock_id, tp);
+       timekeep = _timekeep;
+
+       switch (clock_id) {
+       case CLOCK_REALTIME:
+               do {
+                       seq = timekeep->seq;
+                       *tp = timekeep->tp_realtime;
+               } while (seq == 0 || seq != timekeep->seq);
+               break;
+       case CLOCK_UPTIME:
+               do {
+                       seq = timekeep->seq;
+                       *tp = timekeep->tp_uptime;
+               } while (seq == 0 || seq != timekeep->seq);
+               break;
+       case CLOCK_MONOTONIC:
+               do {
+                       seq = timekeep->seq;
+                       *tp = timekeep->tp_monotonic;
+               } while (seq == 0 || seq != timekeep->seq);
+               break;
+       case CLOCK_BOOTTIME:
+               do {
+                       seq = timekeep->seq;
+                       *tp = timekeep->tp_boottime;
+               } while (seq == 0 || seq != timekeep->seq);
+               break;
+       default:
+               return clock_gettime(clock_id, tp);
+       }
+       return 0;
+}
+DEF_WRAP(clock_gettime);
diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
index 788890add89..df2239438d2 100644
--- lib/libc/thread/synch.h
+++ lib/libc/thread/synch.h
@@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, 
const struct timespec *
        if (abs == NULL)
                return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
 
-       if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
+       if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
                return (EINVAL);
 
        rel.tv_sec = abs->tv_sec - rel.tv_sec;
diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
index 9b5b8eb3acf..59bc923a6fb 100644
--- sys/kern/exec_elf.c
+++ sys/kern/exec_elf.c
@@ -124,7 +124,7 @@ extern char *syscallnames[];
 /*
  * How many entries are in the AuxInfo array we pass to the process?
  */
-#define ELF_AUX_ENTRIES        8
+#define ELF_AUX_ENTRIES        9
 
 /*
  * This is the OpenBSD ELF emul
@@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
                a->au_v = ap->arg_entry;
                a++;
 
+               a->au_id = AUX_openbsd_timekeep;
+               a->au_v = p->p_p->ps_timekeep;
+               a++;
+
                a->au_id = AUX_null;
                a->au_v = 0;
                a++;
diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 20480c2fc28..15bf4db6fbd 100644
--- sys/kern/kern_exec.c
+++ sys/kern/kern_exec.c
@@ -64,6 +64,11 @@
 #include <uvm/uvm_extern.h>
 #include <machine/tcb.h>
 
+#include <sys/time.h>
+
+struct uvm_object *timekeep_object;
+struct __timekeep* timekeep;
+
 void   unveil_destroy(struct process *ps);
 
 const struct kmem_va_mode kv_exec = {
@@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
  */
 int exec_sigcode_map(struct process *, struct emul *);
 
+/*
+ * Map the shared timekeep page.
+ */
+int exec_timekeep_map(struct process *);
+
 /*
  * If non-zero, stackgap_random specifies the upper limit of the random gap 
size
  * added to the fixed stack position. Must be n^2.
@@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
        /* map the process's signal trampoline code */
        if (exec_sigcode_map(pr, pack.ep_emul))
                goto free_pack_abort;
+       /* map the process's timekeep page */
+       if (exec_timekeep_map(pr))
+               goto free_pack_abort;
 
 #ifdef __HAVE_EXEC_MD_MAP
        /* perform md specific mappings that process might need */
@@ -863,3 +876,43 @@ exec_sigcode_map(struct process *pr, struct emul *e)
 
        return (0);
 }
+
+int
+exec_timekeep_map(struct process *pr)
+{
+       size_t timekeep_sz = sizeof(struct __timekeep);
+
+       /*
+        * Similar to the sigcode object, except that there is a single timekeep
+        * object, and not one per emulation.
+        */
+       if (timekeep_object == NULL) {
+               vaddr_t va;
+
+               timekeep_object = uao_create(timekeep_sz, 0);
+               uao_reference(timekeep_object);
+
+               if (uvm_map(kernel_map, &va, round_page(timekeep_sz), 
timekeep_object,
+                   0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | 
PROT_WRITE,
+                   MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
+                       uao_detach(timekeep_object);
+                       return (ENOMEM);
+               }
+
+               timekeep = (struct __timekeep *)va;
+               timekeep->major = 0;
+               timekeep->minor = 0;
+
+               timekeep->seq = 0;
+       }
+
+       uao_reference(timekeep_object);
+       if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, 
round_page(timekeep_sz),
+           timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
+           MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
+               uao_detach(timekeep_object);
+               return (ENOMEM);
+       }
+
+       return (0);
+}
diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
index 4b9eedf50b9..9c67cb738de 100644
--- sys/kern/kern_tc.c
+++ sys/kern/kern_tc.c
@@ -35,6 +35,7 @@
 #include <sys/queue.h>
 #include <sys/malloc.h>
 #include <dev/rndvar.h>
+#include <sys/time.h>
 
 /*
  * A large step happens on boot.  This constant detects such steps.
@@ -480,6 +481,29 @@ tc_setclock(const struct timespec *ts)
 #endif
 }
 
+void
+tc_clock_gettime(void)
+{
+       if (timekeep == NULL)
+               return;
+
+       atomic_inc_int(&timekeep->seq);
+
+       /* CLOCK_REALTIME */
+       nanotime(&timekeep->tp_realtime);
+
+       /* CLOCK_UPTIME */
+       nanoruntime(&timekeep->tp_uptime);
+
+       /* CLOCK_MONOTONIC */
+       nanouptime(&timekeep->tp_monotonic);
+
+       /* CLOCK_BOOTTIME */
+       timekeep->tp_boottime = timekeep->tp_monotonic;
+
+       return;
+}
+
 /*
  * Initialize the next struct timehands in the ring and make
  * it the active timehands.  Along the way we might switch to a different
@@ -632,6 +656,8 @@ tc_windup(struct bintime *new_boottime, struct bintime 
*new_offset,
        time_uptime = th->th_offset.sec;
        membar_producer();
        timehands = th;
+
+       tc_clock_gettime();
 }
 
 /* Report or change the active timecounter hardware. */
diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
index a40e0510273..f55b75f1e84 100644
--- sys/sys/exec_elf.h
+++ sys/sys/exec_elf.h
@@ -691,7 +691,8 @@ enum AuxID {
        AUX_sun_uid = 2000,             /* euid */
        AUX_sun_ruid = 2001,            /* ruid */
        AUX_sun_gid = 2002,             /* egid */
-       AUX_sun_rgid = 2003             /* rgid */
+       AUX_sun_rgid = 2003,            /* rgid */
+       AUX_openbsd_timekeep = 2004,    /* userland clock_gettime */
 };
 
 struct elf_args {
diff --git sys/sys/proc.h sys/sys/proc.h
index 357c0c0d52c..93a79a220db 100644
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -248,6 +248,8 @@ struct process {
        u_int   ps_rtableid;            /* Process routing table/domain. */
        char    ps_nice;                /* Process "nice" value. */
 
+       vaddr_t ps_timekeep;            /* User pointer to timekeep */
+
        struct uprof {                  /* profile arguments */
                caddr_t pr_base;        /* buffer base */
                size_t  pr_size;        /* buffer size */
diff --git sys/sys/time.h sys/sys/time.h
index e758a64ce07..be762be15e4 100644
--- sys/sys/time.h
+++ sys/sys/time.h
@@ -163,6 +163,17 @@ struct clockinfo {
 };
 #endif /* __BSD_VISIBLE */
 
+struct __timekeep {
+       uint8_t major;          /* version major number */
+       uint8_t minor;          /* version minor number */
+
+       volatile unsigned int seq;      /* synchronization */
+       struct timespec tp_realtime;    /* CLOCK_REALTIME */
+       struct timespec tp_uptime;      /* CLOCK_UPTIME */
+       struct timespec tp_monotonic;   /* CLOCK_MONOTONIC */
+       struct timespec tp_boottime;    /* CLOCK_BOOTTIME */
+};
+
 #if defined(_KERNEL) || defined(_STANDALONE)
 #include <sys/_time.h>
 
@@ -396,6 +407,8 @@ TIMESPEC_TO_NSEC(const struct timespec *ts)
        return ts->tv_sec * 1000000000ULL + ts->tv_nsec;
 }
 
+extern struct uvm_object *timekeep_object;
+extern struct __timekeep *timekeep;
 #else /* !_KERNEL */
 #include <time.h>
 

Reply via email to