Lets try to finish work by stopping all syncs and fall back to what we
have in cache after 7/8 of the timeout (timeout - 1/2 repo_timeout).
This way we still have 1/8 of time to finish the calculation and produce
output.

Tested this diff by setting the deadline to fire after 60sec.
-- 
:wq Claudio

Index: main.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.217
diff -u -p -r1.217 main.c
--- main.c      2 Sep 2022 19:14:04 -0000       1.217
+++ main.c      2 Sep 2022 19:58:19 -0000
@@ -66,6 +66,7 @@ int   noop;
 int    filemode;
 int    rrdpon = 1;
 int    repo_timeout;
+time_t deadline;
 
 struct skiplist skiplist = LIST_HEAD_INITIALIZER(skiplist);
 
@@ -1044,6 +1045,9 @@ main(int argc, char *argv[])
                 */
                alarm(timeout);
                signal(SIGALRM, suicide);
+
+               /* give up a bit before the hard timeout and try to finish up */
+               deadline = getmonotime() + timeout - repo_timeout / 2;
        }
 
        if (pledge("stdio rpath wpath cpath fattr sendfd unveil", NULL) == -1)
Index: repo.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/repo.c,v
retrieving revision 1.38
diff -u -p -r1.38 repo.c
--- repo.c      2 Sep 2022 19:10:37 -0000       1.38
+++ repo.c      2 Sep 2022 19:15:41 -0000
@@ -41,6 +41,8 @@ extern struct stats   stats;
 extern int             noop;
 extern int             rrdpon;
 extern int             repo_timeout;
+extern time_t          deadline;
+int                    nofetch;
 
 enum repo_state {
        REPO_LOADING = 0,
@@ -288,7 +290,7 @@ repo_done(const void *vp, int ok)
                if (vp == rp->rsync)
                        entityq_flush(&rp->queue, rp);
                if (vp == rp->rrdp) {
-                       if (!ok) {
+                       if (!ok && !nofetch) {
                                /* try to fall back to rsync */
                                rp->rrdp = NULL;
                                rp->rsync = rsync_get(rp->repouri,
@@ -937,8 +939,8 @@ rrdp_finish(unsigned int id, int ok)
                stats.rrdp_repos++;
                rr->state = REPO_DONE;
        } else {
-               warnx("%s: load from network failed, fallback to rsync",
-                   rr->notifyuri);
+               warnx("%s: load from network failed, fallback to %s",
+                   rr->notifyuri, nofetch ? "cache" : "rsync");
                stats.rrdp_fails++;
                rr->state = REPO_FAILED;
                /* clear the RRDP repo since it failed */
@@ -1044,7 +1046,6 @@ repo_lookup(int talid, const char *uri, 
 {
        struct repo     *rp;
        char            *repouri;
-       int              nofetch = 0;
 
        if ((repouri = rsync_base_uri(uri)) == NULL)
                errx(1, "bad caRepository URI: %s", uri);
@@ -1223,8 +1224,26 @@ repo_check_timeout(int timeout)
 {
        struct repo     *rp;
        time_t           now;
+       int              diff;
 
        now = getmonotime();
+
+       /* check against our runtime deadline first */
+       if (deadline != 0) {
+               if (deadline <= now) {
+                       warnx("deadline reached, giving up on repository sync");
+                       nofetch = 1;
+                       /* clear deadline since nofetch is set */
+                       deadline = 0;
+                       /* increase now enough so that all pending repos fail */
+                       now += repo_timeout;
+               } else {
+                       diff = deadline - now;
+                       diff *= 1000;
+                       if (timeout == INFTIM || diff < timeout)
+                               timeout = diff;
+               }
+       }
        /* Look up in repository table. (Lookup should actually fail here) */
        SLIST_FOREACH(rp, &repos, entry) {
                if (repo_state(rp) == REPO_LOADING) {
@@ -1233,7 +1252,7 @@ repo_check_timeout(int timeout)
                                    rp->repouri);
                                repo_abort(rp);
                        } else {
-                               int diff = rp->alarm - now;
+                               diff = rp->alarm - now;
                                diff *= 1000;
                                if (timeout == INFTIM || diff < timeout)
                                        timeout = diff;

Reply via email to