Lets try to finish work by stopping all syncs and fall back to what we have in cache after 7/8 of the timeout (timeout - 1/2 repo_timeout). This way we still have 1/8 of time to finish the calculation and produce output.
Tested this diff by setting the deadline to fire after 60sec. -- :wq Claudio Index: main.c =================================================================== RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v retrieving revision 1.217 diff -u -p -r1.217 main.c --- main.c 2 Sep 2022 19:14:04 -0000 1.217 +++ main.c 2 Sep 2022 19:58:19 -0000 @@ -66,6 +66,7 @@ int noop; int filemode; int rrdpon = 1; int repo_timeout; +time_t deadline; struct skiplist skiplist = LIST_HEAD_INITIALIZER(skiplist); @@ -1044,6 +1045,9 @@ main(int argc, char *argv[]) */ alarm(timeout); signal(SIGALRM, suicide); + + /* give up a bit before the hard timeout and try to finish up */ + deadline = getmonotime() + timeout - repo_timeout / 2; } if (pledge("stdio rpath wpath cpath fattr sendfd unveil", NULL) == -1) Index: repo.c =================================================================== RCS file: /cvs/src/usr.sbin/rpki-client/repo.c,v retrieving revision 1.38 diff -u -p -r1.38 repo.c --- repo.c 2 Sep 2022 19:10:37 -0000 1.38 +++ repo.c 2 Sep 2022 19:15:41 -0000 @@ -41,6 +41,8 @@ extern struct stats stats; extern int noop; extern int rrdpon; extern int repo_timeout; +extern time_t deadline; +int nofetch; enum repo_state { REPO_LOADING = 0, @@ -288,7 +290,7 @@ repo_done(const void *vp, int ok) if (vp == rp->rsync) entityq_flush(&rp->queue, rp); if (vp == rp->rrdp) { - if (!ok) { + if (!ok && !nofetch) { /* try to fall back to rsync */ rp->rrdp = NULL; rp->rsync = rsync_get(rp->repouri, @@ -937,8 +939,8 @@ rrdp_finish(unsigned int id, int ok) stats.rrdp_repos++; rr->state = REPO_DONE; } else { - warnx("%s: load from network failed, fallback to rsync", - rr->notifyuri); + warnx("%s: load from network failed, fallback to %s", + rr->notifyuri, nofetch ? "cache" : "rsync"); stats.rrdp_fails++; rr->state = REPO_FAILED; /* clear the RRDP repo since it failed */ @@ -1044,7 +1046,6 @@ repo_lookup(int talid, const char *uri, { struct repo *rp; char *repouri; - int nofetch = 0; if ((repouri = rsync_base_uri(uri)) == NULL) errx(1, "bad caRepository URI: %s", uri); @@ -1223,8 +1224,26 @@ repo_check_timeout(int timeout) { struct repo *rp; time_t now; + int diff; now = getmonotime(); + + /* check against our runtime deadline first */ + if (deadline != 0) { + if (deadline <= now) { + warnx("deadline reached, giving up on repository sync"); + nofetch = 1; + /* clear deadline since nofetch is set */ + deadline = 0; + /* increase now enough so that all pending repos fail */ + now += repo_timeout; + } else { + diff = deadline - now; + diff *= 1000; + if (timeout == INFTIM || diff < timeout) + timeout = diff; + } + } /* Look up in repository table. (Lookup should actually fail here) */ SLIST_FOREACH(rp, &repos, entry) { if (repo_state(rp) == REPO_LOADING) { @@ -1233,7 +1252,7 @@ repo_check_timeout(int timeout) rp->repouri); repo_abort(rp); } else { - int diff = rp->alarm - now; + diff = rp->alarm - now; diff *= 1000; if (timeout == INFTIM || diff < timeout) timeout = diff;