> Discussions. > > - /sbin/init init_main.c!start_init() map page? (deraadt@) > -> that is not the problem, the page should be mapped even there > by the sys_execve() call
Robert found the proper solution to this: move the find_timekeep bits in _libc_preinit! This helps with a lot of things: - removes the need for the find_timekeep() function - removes the nasty ELF exports - shrinks the diff Good job, Robert! What's left is the TSC discussion and the bikeshedding bits. Paul diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c index cd056c85719..2b25d49f32a 100644 --- lib/libc/asr/asr.c +++ lib/libc/asr/asr.c @@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout) struct timespec pollstart, pollend, elapsed; int r; - if (clock_gettime(CLOCK_MONOTONIC, &pollstart)) + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart)) return -1; while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) { - if (clock_gettime(CLOCK_MONOTONIC, &pollend)) + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend)) return -1; timespecsub(&pollend, &pollstart, &elapsed); timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000; @@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr) asr->a_rtime = 0; } - if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1) return; if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0) diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c index 82de8fa33b7..02fd3013cc1 100644 --- lib/libc/crypt/bcrypt.c +++ lib/libc/crypt/bcrypt.c @@ -248,9 +248,9 @@ _bcrypt_autorounds(void) char buf[_PASSWORD_LEN]; int duration; - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before); + WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before); bcrypt_newhash("testpassword", r, buf, sizeof(buf)); - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after); + WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after); duration = after.tv_sec - before.tv_sec; duration *= 1000000; diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c index 270f54aada5..70b70eb3ea0 100644 --- lib/libc/dlfcn/init.c +++ lib/libc/dlfcn/init.c @@ -30,6 +30,7 @@ #include <link.h> #include <stdlib.h> /* atexit */ #include <string.h> +#include <time.h> /* timekeep */ #include <unistd.h> #include "init.h" @@ -45,8 +46,9 @@ /* XXX should be in an include file shared with csu */ char ***_csu_finish(char **_argv, char **_envp, void (*_cleanup)(void)); -/* provide definition for this */ +/* provide definition for these */ int _pagesize = 0; +void *_timekeep = NULL; /* * In dynamicly linked binaries environ and __progname are overriden by @@ -105,6 +107,9 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb) phnum = aux->au_v; break; #endif /* !PIC */ + case AUX_openbsd_timekeep: + _timekeep = (void *)aux->au_v; + break; } } diff --git lib/libc/gen/times.c lib/libc/gen/times.c index 02e4dd44b5c..36841810d1b 100644 --- lib/libc/gen/times.c +++ lib/libc/gen/times.c @@ -52,7 +52,7 @@ times(struct tms *tp) return ((clock_t)-1); tp->tms_cutime = CONVTCK(ru.ru_utime); tp->tms_cstime = CONVTCK(ru.ru_stime); - if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1) return ((clock_t)-1); return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK)); } diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c index 520a5954025..845cbe80356 100644 --- lib/libc/gen/timespec_get.c +++ lib/libc/gen/timespec_get.c @@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base) { switch (base) { case TIME_UTC: - if (clock_gettime(CLOCK_REALTIME, ts) == -1) + if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1) return 0; break; default: diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h index 18c49f8fcb9..1137dbcd44f 100644 --- lib/libc/hidden/time.h +++ lib/libc/hidden/time.h @@ -24,12 +24,16 @@ extern PROTO_NORMAL(tzname); #endif +__BEGIN_HIDDEN_DECLS +extern void *_timekeep; +__END_HIDDEN_DECLS + PROTO_NORMAL(asctime); PROTO_NORMAL(asctime_r); PROTO_STD_DEPRECATED(clock); PROTO_DEPRECATED(clock_getcpuclockid); PROTO_NORMAL(clock_getres); -PROTO_NORMAL(clock_gettime); +PROTO_WRAP(clock_gettime); PROTO_NORMAL(clock_settime); PROTO_STD_DEPRECATED(ctime); PROTO_DEPRECATED(ctime_r); diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c index 763e420bb88..9babb28470a 100644 --- lib/libc/net/res_random.c +++ lib/libc/net/res_random.c @@ -219,7 +219,7 @@ res_initid(void) if (ru_prf != NULL) arc4random_buf(ru_prf, sizeof(*ru_prf)); - clock_gettime(CLOCK_MONOTONIC, &ts); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts); ru_reseed = ts.tv_sec + RU_OUT; ru_msb = ru_msb == 0x8000 ? 0 : 0x8000; } @@ -232,7 +232,7 @@ __res_randomid(void) u_int r; static void *randomid_mutex; - clock_gettime(CLOCK_MONOTONIC, &ts); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts); pid = getpid(); _MUTEX_LOCK(&randomid_mutex); diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c index 8e6ef515b0e..927b4bf2028 100644 --- lib/libc/rpc/clnt_tcp.c +++ lib/libc/rpc/clnt_tcp.c @@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len) pfd[0].events = POLLIN; TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait); delta = wait; - clock_gettime(CLOCK_MONOTONIC, &start); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &start); for (;;) { r = ppoll(pfd, 1, &delta, NULL); save_errno = errno; - clock_gettime(CLOCK_MONOTONIC, &after); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &after); timespecsub(&start, &after, &duration); timespecsub(&wait, &duration, &delta); if (delta.tv_sec < 0 || !timespecisset(&delta)) diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c index 68d01674410..92e1d5c350d 100644 --- lib/libc/rpc/clnt_udp.c +++ lib/libc/rpc/clnt_udp.c @@ -265,7 +265,7 @@ send_again: reply_msg.acpted_rply.ar_results.where = resultsp; reply_msg.acpted_rply.ar_results.proc = xresults; - clock_gettime(CLOCK_MONOTONIC, &start); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &start); for (;;) { switch (ppoll(pfd, 1, &wait, NULL)) { case 0: @@ -283,7 +283,7 @@ send_again: /* FALLTHROUGH */ case -1: if (errno == EINTR) { - clock_gettime(CLOCK_MONOTONIC, &after); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &after); timespecsub(&after, &start, &duration); timespecadd(&time_waited, &duration, &time_waited); if (timespeccmp(&time_waited, &timeout, <)) diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c index f9d7a70938f..6c99db84359 100644 --- lib/libc/rpc/svc_tcp.c +++ lib/libc/rpc/svc_tcp.c @@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len) * A timeout is fatal for the connection. */ delta = wait_per_try; - clock_gettime(CLOCK_MONOTONIC, &start); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &start); pfd[0].fd = sock; pfd[0].events = POLLIN; do { @@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len) case -1: if (errno != EINTR) goto fatal_err; - clock_gettime(CLOCK_MONOTONIC, &after); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &after); timespecsub(&after, &start, &duration); timespecsub(&wait_per_try, &duration, &delta); if (delta.tv_sec < 0 || !timespecisset(&delta)) diff --git lib/libc/shlib_version lib/libc/shlib_version index 06f98b01084..5fb0770494f 100644 --- lib/libc/shlib_version +++ lib/libc/shlib_version @@ -1,4 +1,4 @@ major=96 -minor=0 +minor=1 # note: If changes were made to include/thread_private.h or if system calls # were added/changed then librthread/shlib_version must also be updated. diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc index 34769576ced..d0b5dd1bdcd 100644 --- lib/libc/sys/Makefile.inc +++ lib/libc/sys/Makefile.inc @@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \ # glue to offer userland wrappers for some syscalls SRCS+= posix_madvise.c pthread_sigmask.c \ - w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c + w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \ + w_clock_gettime.c # glue for compat with old syscall interfaces. SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \ @@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c ASM= __semctl.o __syscall.o __thrsigdivert.o \ access.o acct.o adjfreq.o adjtime.o \ bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \ - clock_getres.o clock_gettime.o clock_settime.o \ + clock_getres.o clock_settime.o \ dup.o dup2.o dup3.o \ execve.o \ faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \ @@ -109,7 +110,7 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po} SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so} DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do} -HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} +HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} clock_gettime.o PHIDDEN=${HIDDEN:.o=.po} SHIDDEN=${HIDDEN:.o=.so} DHIDDEN=${HIDDEN:.o=.do} diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c new file mode 100644 index 00000000000..7c2883c31fd --- /dev/null +++ lib/libc/sys/w_clock_gettime.c @@ -0,0 +1,64 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2020 Paul Irofti <p...@irofti.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdlib.h> +#include <time.h> +#include <err.h> + +#include <sys/time.h> + +int +WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp) +{ + struct __timekeep *timekeep; + unsigned int seq; + + if (_timekeep == NULL) + return clock_gettime(clock_id, tp); + timekeep = _timekeep; + + switch (clock_id) { + case CLOCK_REALTIME: + do { + seq = timekeep->seq; + *tp = timekeep->tp_realtime; + } while (seq == 0 || seq != timekeep->seq); + break; + case CLOCK_UPTIME: + do { + seq = timekeep->seq; + *tp = timekeep->tp_uptime; + } while (seq == 0 || seq != timekeep->seq); + break; + case CLOCK_MONOTONIC: + do { + seq = timekeep->seq; + *tp = timekeep->tp_monotonic; + } while (seq == 0 || seq != timekeep->seq); + break; + case CLOCK_BOOTTIME: + do { + seq = timekeep->seq; + *tp = timekeep->tp_boottime; + } while (seq == 0 || seq != timekeep->seq); + break; + default: + return clock_gettime(clock_id, tp); + } + return 0; +} +DEF_WRAP(clock_gettime); diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h index 788890add89..df2239438d2 100644 --- lib/libc/thread/synch.h +++ lib/libc/thread/synch.h @@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec * if (abs == NULL) return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL); - if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel)) + if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel)) return (EINVAL); rel.tv_sec = abs->tv_sec - rel.tv_sec; diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c index 9b5b8eb3acf..59bc923a6fb 100644 --- sys/kern/exec_elf.c +++ sys/kern/exec_elf.c @@ -124,7 +124,7 @@ extern char *syscallnames[]; /* * How many entries are in the AuxInfo array we pass to the process? */ -#define ELF_AUX_ENTRIES 8 +#define ELF_AUX_ENTRIES 9 /* * This is the OpenBSD ELF emul @@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp) a->au_v = ap->arg_entry; a++; + a->au_id = AUX_openbsd_timekeep; + a->au_v = p->p_p->ps_timekeep; + a++; + a->au_id = AUX_null; a->au_v = 0; a++; diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c index 20480c2fc28..15bf4db6fbd 100644 --- sys/kern/kern_exec.c +++ sys/kern/kern_exec.c @@ -64,6 +64,11 @@ #include <uvm/uvm_extern.h> #include <machine/tcb.h> +#include <sys/time.h> + +struct uvm_object *timekeep_object; +struct __timekeep* timekeep; + void unveil_destroy(struct process *ps); const struct kmem_va_mode kv_exec = { @@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = { */ int exec_sigcode_map(struct process *, struct emul *); +/* + * Map the shared timekeep page. + */ +int exec_timekeep_map(struct process *); + /* * If non-zero, stackgap_random specifies the upper limit of the random gap size * added to the fixed stack position. Must be n^2. @@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval) /* map the process's signal trampoline code */ if (exec_sigcode_map(pr, pack.ep_emul)) goto free_pack_abort; + /* map the process's timekeep page */ + if (exec_timekeep_map(pr)) + goto free_pack_abort; #ifdef __HAVE_EXEC_MD_MAP /* perform md specific mappings that process might need */ @@ -863,3 +876,43 @@ exec_sigcode_map(struct process *pr, struct emul *e) return (0); } + +int +exec_timekeep_map(struct process *pr) +{ + size_t timekeep_sz = sizeof(struct __timekeep); + + /* + * Similar to the sigcode object, except that there is a single timekeep + * object, and not one per emulation. + */ + if (timekeep_object == NULL) { + vaddr_t va; + + timekeep_object = uao_create(timekeep_sz, 0); + uao_reference(timekeep_object); + + if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object, + 0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE, + MAP_INHERIT_SHARE, MADV_RANDOM, 0))) { + uao_detach(timekeep_object); + return (ENOMEM); + } + + timekeep = (struct __timekeep *)va; + timekeep->major = 0; + timekeep->minor = 0; + + timekeep->seq = 0; + } + + uao_reference(timekeep_object); + if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz), + timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ, + MAP_INHERIT_COPY, MADV_RANDOM, 0))) { + uao_detach(timekeep_object); + return (ENOMEM); + } + + return (0); +} diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c index 4b9eedf50b9..9c67cb738de 100644 --- sys/kern/kern_tc.c +++ sys/kern/kern_tc.c @@ -35,6 +35,7 @@ #include <sys/queue.h> #include <sys/malloc.h> #include <dev/rndvar.h> +#include <sys/time.h> /* * A large step happens on boot. This constant detects such steps. @@ -480,6 +481,29 @@ tc_setclock(const struct timespec *ts) #endif } +void +tc_clock_gettime(void) +{ + if (timekeep == NULL) + return; + + atomic_inc_int(&timekeep->seq); + + /* CLOCK_REALTIME */ + nanotime(&timekeep->tp_realtime); + + /* CLOCK_UPTIME */ + nanoruntime(&timekeep->tp_uptime); + + /* CLOCK_MONOTONIC */ + nanouptime(&timekeep->tp_monotonic); + + /* CLOCK_BOOTTIME */ + timekeep->tp_boottime = timekeep->tp_monotonic; + + return; +} + /* * Initialize the next struct timehands in the ring and make * it the active timehands. Along the way we might switch to a different @@ -632,6 +656,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset, time_uptime = th->th_offset.sec; membar_producer(); timehands = th; + + tc_clock_gettime(); } /* Report or change the active timecounter hardware. */ diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h index a40e0510273..f55b75f1e84 100644 --- sys/sys/exec_elf.h +++ sys/sys/exec_elf.h @@ -691,7 +691,8 @@ enum AuxID { AUX_sun_uid = 2000, /* euid */ AUX_sun_ruid = 2001, /* ruid */ AUX_sun_gid = 2002, /* egid */ - AUX_sun_rgid = 2003 /* rgid */ + AUX_sun_rgid = 2003, /* rgid */ + AUX_openbsd_timekeep = 2004, /* userland clock_gettime */ }; struct elf_args { diff --git sys/sys/proc.h sys/sys/proc.h index 357c0c0d52c..93a79a220db 100644 --- sys/sys/proc.h +++ sys/sys/proc.h @@ -248,6 +248,8 @@ struct process { u_int ps_rtableid; /* Process routing table/domain. */ char ps_nice; /* Process "nice" value. */ + vaddr_t ps_timekeep; /* User pointer to timekeep */ + struct uprof { /* profile arguments */ caddr_t pr_base; /* buffer base */ size_t pr_size; /* buffer size */ diff --git sys/sys/time.h sys/sys/time.h index e758a64ce07..be762be15e4 100644 --- sys/sys/time.h +++ sys/sys/time.h @@ -163,6 +163,17 @@ struct clockinfo { }; #endif /* __BSD_VISIBLE */ +struct __timekeep { + uint8_t major; /* version major number */ + uint8_t minor; /* version minor number */ + + volatile unsigned int seq; /* synchronization */ + struct timespec tp_realtime; /* CLOCK_REALTIME */ + struct timespec tp_uptime; /* CLOCK_UPTIME */ + struct timespec tp_monotonic; /* CLOCK_MONOTONIC */ + struct timespec tp_boottime; /* CLOCK_BOOTTIME */ +}; + #if defined(_KERNEL) || defined(_STANDALONE) #include <sys/_time.h> @@ -396,6 +407,8 @@ TIMESPEC_TO_NSEC(const struct timespec *ts) return ts->tv_sec * 1000000000ULL + ts->tv_nsec; } +extern struct uvm_object *timekeep_object; +extern struct __timekeep *timekeep; #else /* !_KERNEL */ #include <time.h>