Claudio Jeker(cje...@diehard.n-r-g.com) on 2022.09.02 22:02:33 +0200: > Lets try to finish work by stopping all syncs and fall back to what we > have in cache after 7/8 of the timeout (timeout - 1/2 repo_timeout). > This way we still have 1/8 of time to finish the calculation and produce > output. > > Tested this diff by setting the deadline to fire after 60sec.
ok > -- > :wq Claudio > > Index: main.c > =================================================================== > RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v > retrieving revision 1.217 > diff -u -p -r1.217 main.c > --- main.c 2 Sep 2022 19:14:04 -0000 1.217 > +++ main.c 2 Sep 2022 19:58:19 -0000 > @@ -66,6 +66,7 @@ int noop; > int filemode; > int rrdpon = 1; > int repo_timeout; > +time_t deadline; > > struct skiplist skiplist = LIST_HEAD_INITIALIZER(skiplist); > > @@ -1044,6 +1045,9 @@ main(int argc, char *argv[]) > */ > alarm(timeout); > signal(SIGALRM, suicide); > + > + /* give up a bit before the hard timeout and try to finish up */ > + deadline = getmonotime() + timeout - repo_timeout / 2; > } > > if (pledge("stdio rpath wpath cpath fattr sendfd unveil", NULL) == -1) > Index: repo.c > =================================================================== > RCS file: /cvs/src/usr.sbin/rpki-client/repo.c,v > retrieving revision 1.38 > diff -u -p -r1.38 repo.c > --- repo.c 2 Sep 2022 19:10:37 -0000 1.38 > +++ repo.c 2 Sep 2022 19:15:41 -0000 > @@ -41,6 +41,8 @@ extern struct stats stats; > extern int noop; > extern int rrdpon; > extern int repo_timeout; > +extern time_t deadline; > +int nofetch; > > enum repo_state { > REPO_LOADING = 0, > @@ -288,7 +290,7 @@ repo_done(const void *vp, int ok) > if (vp == rp->rsync) > entityq_flush(&rp->queue, rp); > if (vp == rp->rrdp) { > - if (!ok) { > + if (!ok && !nofetch) { > /* try to fall back to rsync */ > rp->rrdp = NULL; > rp->rsync = rsync_get(rp->repouri, > @@ -937,8 +939,8 @@ rrdp_finish(unsigned int id, int ok) > stats.rrdp_repos++; > rr->state = REPO_DONE; > } else { > - warnx("%s: load from network failed, fallback to rsync", > - rr->notifyuri); > + warnx("%s: load from network failed, fallback to %s", > + rr->notifyuri, nofetch ? "cache" : "rsync"); > stats.rrdp_fails++; > rr->state = REPO_FAILED; > /* clear the RRDP repo since it failed */ > @@ -1044,7 +1046,6 @@ repo_lookup(int talid, const char *uri, > { > struct repo *rp; > char *repouri; > - int nofetch = 0; > > if ((repouri = rsync_base_uri(uri)) == NULL) > errx(1, "bad caRepository URI: %s", uri); > @@ -1223,8 +1224,26 @@ repo_check_timeout(int timeout) > { > struct repo *rp; > time_t now; > + int diff; > > now = getmonotime(); > + > + /* check against our runtime deadline first */ > + if (deadline != 0) { > + if (deadline <= now) { > + warnx("deadline reached, giving up on repository sync"); > + nofetch = 1; > + /* clear deadline since nofetch is set */ > + deadline = 0; > + /* increase now enough so that all pending repos fail */ > + now += repo_timeout; > + } else { > + diff = deadline - now; > + diff *= 1000; > + if (timeout == INFTIM || diff < timeout) > + timeout = diff; > + } > + } > /* Look up in repository table. (Lookup should actually fail here) */ > SLIST_FOREACH(rp, &repos, entry) { > if (repo_state(rp) == REPO_LOADING) { > @@ -1233,7 +1252,7 @@ repo_check_timeout(int timeout) > rp->repouri); > repo_abort(rp); > } else { > - int diff = rp->alarm - now; > + diff = rp->alarm - now; > diff *= 1000; > if (timeout == INFTIM || diff < timeout) > timeout = diff; >