Here is an updated diff that addresses the following points mentioned by kettenis@:
- syscall fallback was implemented from the first version - the low resolution clock argument, I think, was shown not to be a problem - TSC and HPET alternatives were discussed, and if we decide to add them, I think that should be done by a separate diff - I think this version does proper wrapping (at least according to the README); of course Philip's input would be greatly appreciated! - I will export the ELF bits after the diff gets in commitable shape - proper auxv number instead of 2004: I see that NetBSD has taken 2004 for AT_SUN_LDELF; should I take 2015 which seems the next one free? Hopefully this version also fixes the init bug solene@ was seeing. Paul diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c index cd056c85719..2b25d49f32a 100644 --- lib/libc/asr/asr.c +++ lib/libc/asr/asr.c @@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int timeout) struct timespec pollstart, pollend, elapsed; int r; - if (clock_gettime(CLOCK_MONOTONIC, &pollstart)) + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart)) return -1; while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) { - if (clock_gettime(CLOCK_MONOTONIC, &pollend)) + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend)) return -1; timespecsub(&pollend, &pollstart, &elapsed); timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000; @@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr) asr->a_rtime = 0; } - if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1) return; if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0) diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c index 82de8fa33b7..63edde9072e 100644 --- lib/libc/crypt/bcrypt.c +++ lib/libc/crypt/bcrypt.c @@ -31,6 +31,7 @@ * */ +#include <sys/time.h> #include <sys/types.h> #include <blf.h> #include <ctype.h> @@ -248,9 +249,9 @@ _bcrypt_autorounds(void) char buf[_PASSWORD_LEN]; int duration; - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before); + WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before); bcrypt_newhash("testpassword", r, buf, sizeof(buf)); - clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after); + WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after); duration = after.tv_sec - before.tv_sec; duration *= 1000000; diff --git lib/libc/dlfcn/dlfcn_stubs.c lib/libc/dlfcn/dlfcn_stubs.c index 78d728f66cb..7b75ec4582a 100644 --- lib/libc/dlfcn/dlfcn_stubs.c +++ lib/libc/dlfcn/dlfcn_stubs.c @@ -80,10 +80,14 @@ dlerror(void) return "Wrong dl symbols!\n"; } +extern void *elf_aux_timekeep; +extern int find_timekeep(void); + int dl_iterate_phdr(int (*callback)(struct dl_phdr_info *, size_t, void *), void *data) { + find_timekeep(); if (_dl_cb != NULL && _dl_cb->dl_iterate_phdr != NULL) return _dl_cb->dl_iterate_phdr(callback, data); #ifndef PIC diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c index 270f54aada5..0238bb50b0b 100644 --- lib/libc/dlfcn/init.c +++ lib/libc/dlfcn/init.c @@ -69,6 +69,9 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak)); /* provide definitions for these */ const dl_cb *_dl_cb __relro = NULL; +extern void *elf_aux_timekeep; +extern int find_timekeep(void); + void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden; void _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb) @@ -126,6 +129,7 @@ _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb) if (cb == NULL) setup_static_tib(phdr, phnum); #endif /* !PIC */ + find_timekeep(); } /* ARM just had to be different... */ diff --git lib/libc/gen/times.c lib/libc/gen/times.c index 02e4dd44b5c..36841810d1b 100644 --- lib/libc/gen/times.c +++ lib/libc/gen/times.c @@ -52,7 +52,7 @@ times(struct tms *tp) return ((clock_t)-1); tp->tms_cutime = CONVTCK(ru.ru_utime); tp->tms_cstime = CONVTCK(ru.ru_stime); - if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) + if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1) return ((clock_t)-1); return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK)); } diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c index 520a5954025..b2bdcd15a4d 100644 --- lib/libc/gen/timespec_get.c +++ lib/libc/gen/timespec_get.c @@ -30,6 +30,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include <sys/time.h> #include <time.h> int @@ -37,7 +38,7 @@ timespec_get(struct timespec *ts, int base) { switch (base) { case TIME_UTC: - if (clock_gettime(CLOCK_REALTIME, ts) == -1) + if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1) return 0; break; default: diff --git lib/libc/hidden/sys/time.h lib/libc/hidden/sys/time.h index ed112320fa2..7f59daa0107 100644 --- lib/libc/hidden/sys/time.h +++ lib/libc/hidden/sys/time.h @@ -22,6 +22,7 @@ PROTO_NORMAL(adjfreq); PROTO_NORMAL(adjtime); +PROTO_WRAP(clock_gettime); PROTO_NORMAL(futimes); PROTO_NORMAL(getitimer); PROTO_NORMAL(gettimeofday); diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c index 763e420bb88..9babb28470a 100644 --- lib/libc/net/res_random.c +++ lib/libc/net/res_random.c @@ -219,7 +219,7 @@ res_initid(void) if (ru_prf != NULL) arc4random_buf(ru_prf, sizeof(*ru_prf)); - clock_gettime(CLOCK_MONOTONIC, &ts); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts); ru_reseed = ts.tv_sec + RU_OUT; ru_msb = ru_msb == 0x8000 ? 0 : 0x8000; } @@ -232,7 +232,7 @@ __res_randomid(void) u_int r; static void *randomid_mutex; - clock_gettime(CLOCK_MONOTONIC, &ts); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts); pid = getpid(); _MUTEX_LOCK(&randomid_mutex); diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c index 8e6ef515b0e..927b4bf2028 100644 --- lib/libc/rpc/clnt_tcp.c +++ lib/libc/rpc/clnt_tcp.c @@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len) pfd[0].events = POLLIN; TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait); delta = wait; - clock_gettime(CLOCK_MONOTONIC, &start); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &start); for (;;) { r = ppoll(pfd, 1, &delta, NULL); save_errno = errno; - clock_gettime(CLOCK_MONOTONIC, &after); + WRAP(clock_gettime)(CLOCK_MONOTONIC, &after); timespecsub(&start, &after, &duration); timespecsub(&wait, &duration, &delta); if (delta.tv_sec < 0 || !timespecisset(&delta)) diff --git lib/libc/shlib_version lib/libc/shlib_version index 06f98b01084..5fb0770494f 100644 --- lib/libc/shlib_version +++ lib/libc/shlib_version @@ -1,4 +1,4 @@ major=96 -minor=0 +minor=1 # note: If changes were made to include/thread_private.h or if system calls # were added/changed then librthread/shlib_version must also be updated. diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc index 34769576ced..d0b5dd1bdcd 100644 --- lib/libc/sys/Makefile.inc +++ lib/libc/sys/Makefile.inc @@ -12,7 +12,8 @@ SRCS+= Ovfork.S brk.S ${CERROR} \ # glue to offer userland wrappers for some syscalls SRCS+= posix_madvise.c pthread_sigmask.c \ - w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c + w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \ + w_clock_gettime.c # glue for compat with old syscall interfaces. SRCS+= ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c truncate.c \ @@ -43,7 +44,7 @@ SRCS+= ${CANCEL:%=w_%.c} w_pread.c w_preadv.c w_pwrite.c w_pwritev.c ASM= __semctl.o __syscall.o __thrsigdivert.o \ access.o acct.o adjfreq.o adjtime.o \ bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \ - clock_getres.o clock_gettime.o clock_settime.o \ + clock_getres.o clock_settime.o \ dup.o dup2.o dup3.o \ execve.o \ faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \ @@ -109,7 +110,7 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po} SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so} DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do} -HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} +HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} clock_gettime.o PHIDDEN=${HIDDEN:.o=.po} SHIDDEN=${HIDDEN:.o=.so} DHIDDEN=${HIDDEN:.o=.do} diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c new file mode 100644 index 00000000000..04850fbda32 --- /dev/null +++ lib/libc/sys/w_clock_gettime.c @@ -0,0 +1,109 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2020 Paul Irofti <p...@irofti.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <stdlib.h> +#include <time.h> +#include <err.h> + +#include <sys/timekeep.h> + +void *elf_aux_timekeep; + + +/* + * Needed exec_elf implementation. + * To be exposed by the kernel later if needed. + */ + +#include <sys/exec_elf.h> + +typedef struct { + uint32_t au_id; /* 32-bit id */ + uint64_t au_v; /* 64-bit value */ +} AuxInfo; + +enum AuxID { + AUX_null = 0, + AUX_ignore = 1, + AUX_execfd = 2, + AUX_phdr = 3, /* &phdr[0] */ + AUX_phent = 4, /* sizeof(phdr[0]) */ + AUX_phnum = 5, /* # phdr entries */ + AUX_pagesz = 6, /* PAGESIZE */ + AUX_base = 7, /* ld.so base addr */ + AUX_flags = 8, /* processor flags */ + AUX_entry = 9, /* a.out entry */ + AUX_sun_uid = 2000, /* euid */ + AUX_sun_ruid = 2001, /* ruid */ + AUX_sun_gid = 2002, /* egid */ + AUX_sun_rgid = 2003, /* rgid */ + AUX_openbsd_timekeep = 2004, /* userland clock_gettime */ +}; + + +/* + * Helper functions. + */ + +int +find_timekeep(void) +{ + Elf_Addr *stackp; + AuxInfo *auxv; + + stackp = (Elf_Addr *)environ; + while (*stackp++) ; /* pass environment */ + + /* look-up timekeep auxv */ + for (auxv = (AuxInfo *)stackp; auxv->au_id != AUX_null; auxv++) + if (auxv->au_id == AUX_openbsd_timekeep) { + elf_aux_timekeep = (void *)auxv->au_v; + return 0; + } + + warnx("%s", "Could not find auxv!"); + return -1; +} + +int +WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp) +{ + struct timekeep *timekeep; + + if (elf_aux_timekeep == NULL && find_timekeep()) + return clock_gettime(clock_id, tp); + timekeep = elf_aux_timekeep; + + switch (clock_id) { + case CLOCK_REALTIME: + *tp = timekeep->tp_realtime; + break; + case CLOCK_UPTIME: + *tp = timekeep->tp_uptime; + break; + case CLOCK_MONOTONIC: + *tp = timekeep->tp_monotonic; + break; + case CLOCK_BOOTTIME: + *tp = timekeep->tp_boottime; + break; + default: + return clock_gettime(clock_id, tp); + } + return 0; +} +DEF_WRAP(clock_gettime); diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c index 9b5b8eb3acf..59bc923a6fb 100644 --- sys/kern/exec_elf.c +++ sys/kern/exec_elf.c @@ -124,7 +124,7 @@ extern char *syscallnames[]; /* * How many entries are in the AuxInfo array we pass to the process? */ -#define ELF_AUX_ENTRIES 8 +#define ELF_AUX_ENTRIES 9 /* * This is the OpenBSD ELF emul @@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp) a->au_v = ap->arg_entry; a++; + a->au_id = AUX_openbsd_timekeep; + a->au_v = p->p_p->ps_timekeep; + a++; + a->au_id = AUX_null; a->au_v = 0; a++; diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c index 20480c2fc28..2496458fde1 100644 --- sys/kern/kern_exec.c +++ sys/kern/kern_exec.c @@ -64,6 +64,11 @@ #include <uvm/uvm_extern.h> #include <machine/tcb.h> +#include <sys/timekeep.h> + +struct uvm_object *timekeep_object; +struct timekeep* timekeep; + void unveil_destroy(struct process *ps); const struct kmem_va_mode kv_exec = { @@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = { */ int exec_sigcode_map(struct process *, struct emul *); +/* + * Map the shared timekeep page. + */ +int exec_timekeep_map(struct process *); + /* * If non-zero, stackgap_random specifies the upper limit of the random gap size * added to the fixed stack position. Must be n^2. @@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval) /* map the process's signal trampoline code */ if (exec_sigcode_map(pr, pack.ep_emul)) goto free_pack_abort; + /* map the process's timekeep page */ + if (exec_timekeep_map(pr)) + goto free_pack_abort; #ifdef __HAVE_EXEC_MD_MAP /* perform md specific mappings that process might need */ @@ -863,3 +876,38 @@ exec_sigcode_map(struct process *pr, struct emul *e) return (0); } + +int exec_timekeep_map(struct process *pr) +{ + size_t timekeep_sz = sizeof(struct timekeep); + + /* + * Similar to the sigcode object, except that there is a single timekeep + * object, and not one per emulation. + */ + if (timekeep_object == NULL) { + vaddr_t va; + + timekeep_object = uao_create(timekeep_sz, 0); + uao_reference(timekeep_object); + + if (uvm_map(kernel_map, &va, round_page(timekeep_sz), timekeep_object, + 0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE, + MAP_INHERIT_SHARE, MADV_RANDOM, 0))) { + uao_detach(timekeep_object); + return (ENOMEM); + } + + timekeep = (struct timekeep *)va; + } + + uao_reference(timekeep_object); + if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, round_page(timekeep_sz), + timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ, + MAP_INHERIT_COPY, MADV_RANDOM, 0))) { + uao_detach(timekeep_object); + return (ENOMEM); + } + + return (0); +} diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c index bcf8f689625..007f1116c4f 100644 --- sys/kern/kern_tc.c +++ sys/kern/kern_tc.c @@ -35,6 +35,7 @@ #include <sys/queue.h> #include <sys/malloc.h> #include <dev/rndvar.h> +#include <sys/timekeep.h> /* * A large step happens on boot. This constant detects such steps. @@ -209,6 +210,31 @@ microuptime(struct timeval *tvp) BINTIME_TO_TIMEVAL(&bt, tvp); } +void +tc_clock_gettime(void) +{ + struct bintime bt; + + if (timekeep == NULL) + return; + + /* CLOCK_REALTIME */ + nanotime(&timekeep->tp_realtime); + + /* CLOCK_UPTIME */ + binuptime(&bt); + bintimesub(&bt, &naptime, &bt); + BINTIME_TO_TIMESPEC(&bt, &timekeep->tp_uptime); + + /* CLOCK_MONOTONIC */ + nanouptime(&timekeep->tp_monotonic); + + /* CLOCK_BOOTTIME */ + timekeep->tp_boottime = timekeep->tp_monotonic; + + return; +} + void bintime(struct bintime *bt) { @@ -613,6 +639,8 @@ tc_windup(struct bintime *new_boottime, struct bintime *new_offset, time_uptime = th->th_offset.sec; membar_producer(); timehands = th; + + tc_clock_gettime(); } /* Report or change the active timecounter hardware. */ diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h index a40e0510273..f55b75f1e84 100644 --- sys/sys/exec_elf.h +++ sys/sys/exec_elf.h @@ -691,7 +691,8 @@ enum AuxID { AUX_sun_uid = 2000, /* euid */ AUX_sun_ruid = 2001, /* ruid */ AUX_sun_gid = 2002, /* egid */ - AUX_sun_rgid = 2003 /* rgid */ + AUX_sun_rgid = 2003, /* rgid */ + AUX_openbsd_timekeep = 2004, /* userland clock_gettime */ }; struct elf_args { diff --git sys/sys/proc.h sys/sys/proc.h index 357c0c0d52c..93a79a220db 100644 --- sys/sys/proc.h +++ sys/sys/proc.h @@ -248,6 +248,8 @@ struct process { u_int ps_rtableid; /* Process routing table/domain. */ char ps_nice; /* Process "nice" value. */ + vaddr_t ps_timekeep; /* User pointer to timekeep */ + struct uprof { /* profile arguments */ caddr_t pr_base; /* buffer base */ size_t pr_size; /* buffer size */ diff --git sys/sys/timekeep.h sys/sys/timekeep.h new file mode 100644 index 00000000000..bad25185bc4 --- /dev/null +++ sys/sys/timekeep.h @@ -0,0 +1,37 @@ +/* $OpenBSD$ */ +/* + * Copyright (c) 2020 Paul Irofti <p...@irofti.net> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_TIMEKEEP_H_ +#define _SYS_TIMEKEEP_H_ + +#include <sys/time.h> + +struct timekeep { + struct timespec tp_realtime; + struct timespec tp_uptime; + struct timespec tp_monotonic; + struct timespec tp_boottime; +}; + +#if defined(_KERNEL) +#include <uvm/uvm_extern.h> + +extern struct uvm_object *timekeep_object; +extern struct timekeep *timekeep; +#endif + +#endif /* _SYS_TIMEKEEP_H_ */