On Mon, Feb 08, 2021 at 05:15:40PM +0100, Claudio Jeker wrote:
> Split the repository code into two parts:
>
> - fetch of the trust anchors (the certs referenced by TAL files)
> - fetch of the MFT files of a repository
>
> While the two things kind of look similar there are some differences.
>
> - TA files are loaded via rsync or https URI (only one file needs to be
> loaded)
> - MFT files need everything inside the repository to be loaded since they
> reference to other files (.roa, .cer, .crl). These repositories are
> synced once with rsync and many mft may be part of a repo. Also these
> repositories can be synced via rsync or RRDP
>
> To simplify these diverse options it is time to split the code up.
> Introduce a ta_lookup() along with repo_lookup(). Refactor the repo_lookup
> code into subfunctions repo_alloc() and repo_fetch() (both are also used
> by ta_lookup()). Use the caRepository URI to figure out the base URI.
> Simplify rsync_uri_parse() into rsync_base_uri() which clips of excess
> directories from the URI (else thousends of individual rsync calls would
> be made against the RIR's CA repos).
>
> The big change is that the layout of the cache directory is changed.
> The cache will now have two base directories:
> - ta/ (for all trust anchors)
> - rsync/ (for all other repositories)
>
My plan at the moment is that rpki-client will split the cache directory
into three parts. ta/, rsync/, and rrdp/. This is done to ensure that data
does not get mixed up. Once this is in then my next step is to support
https:// links in TAL files and fetch the trust anchor via https instead
of rsync. Later RRDP will follow.
--
:wq Claudio
Index: extern.h
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.42
diff -u -p -r1.42 extern.h
--- extern.h 8 Feb 2021 09:22:53 -0000 1.42
+++ extern.h 8 Feb 2021 13:44:22 -0000
@@ -392,9 +392,7 @@ void proc_parser(int) __attribute__((n
/* Rsync-specific. */
-int rsync_uri_parse(const char **, size_t *,
- const char **, size_t *, const char **, size_t *,
- enum rtype *, const char *);
+char *rsync_base_uri(const char *);
void proc_rsync(char *, char *, int) __attribute__((noreturn));
/* Logging (though really used for OpenSSL errors). */
Index: main.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.98
diff -u -p -r1.98 main.c
--- main.c 5 Feb 2021 12:26:52 -0000 1.98
+++ main.c 8 Feb 2021 13:50:20 -0000
@@ -78,11 +78,12 @@
* An rsync repository.
*/
struct repo {
- char *repo; /* repository rsync URI */
- char *local; /* local path name */
- char *notify; /* RRDB notify URI if available */
- size_t id; /* identifier (array index) */
- int loaded; /* whether loaded or not */
+ char *repouri; /* CA repository base URI */
+ char *local; /* local path name */
+ char *uris[2]; /* URIs to fetch from */
+ size_t id; /* identifier (array index) */
+ int uriidx; /* which URI is fetched */
+ int loaded; /* whether loaded or not */
};
size_t entity_queue;
@@ -284,33 +285,12 @@ entityq_add(struct entityq *q, char *fil
}
/*
- * Look up a repository, queueing it for discovery if not found.
+ * Allocat a new repository be extending the repotable.
*/
-static const struct repo *
-repo_lookup(const char *uri)
+static struct repo *
+repo_alloc(void)
{
- const char *host, *mod;
- size_t hostsz, modsz, i;
- char *local;
- struct repo *rp;
- struct ibuf *b;
-
- if (!rsync_uri_parse(&host, &hostsz,
- &mod, &modsz, NULL, NULL, NULL, uri))
- errx(1, "%s: malformed", uri);
-
- if (asprintf(&local, "%.*s/%.*s", (int)hostsz, host,
- (int)modsz, mod) == -1)
- err(1, "asprintf");
-
- /* Look up in repository table. */
-
- for (i = 0; i < rt.reposz; i++) {
- if (strcmp(rt.repos[i].local, local))
- continue;
- free(local);
- return &rt.repos[i];
- }
+ struct repo *rp;
rt.repos = reallocarray(rt.repos,
rt.reposz + 1, sizeof(struct repo));
@@ -320,28 +300,99 @@ repo_lookup(const char *uri)
rp = &rt.repos[rt.reposz++];
memset(rp, 0, sizeof(struct repo));
rp->id = rt.reposz - 1;
- rp->local = local;
- if ((rp->repo = strndup(uri, mod + modsz - uri)) == NULL)
- err(1, "strdup");
+ return rp;
+}
- if (!noop) {
- if (asprintf(&local, "%s", rp->local) == -1)
- err(1, "asprintf");
- logx("%s: pulling from network", local);
- if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL)
- err(1, NULL);
- io_simple_buffer(b, &rp->id, sizeof(rp->id));
- io_str_buffer(b, local);
- io_str_buffer(b, rp->repo);
- ibuf_close(&rsyncq, b);
- free(local);
- } else {
+static void
+repo_fetch(struct repo *rp)
+{
+ struct ibuf *b;
+
+ if (noop) {
rp->loaded = 1;
logx("%s: using cache", rp->local);
stats.repos++;
/* there is nothing in the queue so no need to flush */
+ return;
+ }
+
+ logx("%s: pulling from network", rp->local);
+ if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL)
+ err(1, NULL);
+ io_simple_buffer(b, &rp->id, sizeof(rp->id));
+ io_str_buffer(b, rp->local);
+ io_str_buffer(b, rp->uris[0]);
+ ibuf_close(&rsyncq, b);
+}
+
+/*
+ * Look up a trust anchor, queueing it for download if not found.
+ */
+static const struct repo *
+ta_lookup(const struct tal *tal)
+{
+ struct repo *rp;
+ char *local;
+ size_t i, j;
+
+ if (asprintf(&local, "ta/%s", tal->descr) == -1)
+ err(1, "asprinf");
+
+ /* Look up in repository table. (Lookup should actually fail here) */
+ for (i = 0; i < rt.reposz; i++) {
+ if (rt.repos[i].repouri != NULL ||
+ strcmp(rt.repos[i].local, local))
+ continue;
+ free(local);
+ return &rt.repos[i];
}
+
+ rp = repo_alloc();
+ rp->local = local;
+ for (i = 0, j = 0; i < tal->urisz && j < 2; i++) {
+ if (strncasecmp(tal->uri[i], "rsync://", 8) != 0)
+ continue; /* ignore non rsync URI for now */
+ rp->uris[j++] = tal->uri[i];
+ }
+ if (j == 0)
+ errx(1, "TAL file has no rsync:// URI");
+
+ repo_fetch(rp);
+ return rp;
+}
+
+/*
+ * Look up a repository, queueing it for discovery if not found.
+ */
+static const struct repo *
+repo_lookup(const char *uri)
+{
+ char *local, *repo;
+ struct repo *rp;
+ size_t i;
+
+ if ((repo = rsync_base_uri(uri)) == NULL)
+ return NULL;
+
+ /* Look up in repository table. */
+ for (i = 0; i < rt.reposz; i++) {
+ if (rt.repos[i].repouri == NULL ||
+ strcmp(rt.repos[i].repouri, repo))
+ continue;
+ free(repo);
+ return &rt.repos[i];
+ }
+
+ rp = repo_alloc();
+ rp->repouri = repo;
+ local = strchr(repo, ':') + strlen("://");
+ if (asprintf(&rp->local, "rsync/%s", local) == -1)
+ err(1, "asprintf");
+ if ((rp->uris[0] = strdup(repo)) == NULL)
+ err(1, "strdup");
+
+ repo_fetch(rp);
return rp;
}
@@ -353,7 +404,10 @@ repo_filename(const struct repo *repo, c
{
char *nfile;
- uri += strlen(repo->repo) + 1;
+ if (strstr(uri, repo->repouri) != uri)
+ errx(1, "%s: URI outside of repository", uri);
+ uri += strlen(repo->repouri) + 1; /* skip base and '/' */
+
if (asprintf(&nfile, "%s/%s", repo->local, uri) == -1)
err(1, "asprintf");
return nfile;
@@ -484,22 +538,17 @@ queue_add_from_tal(struct entityq *q, co
{
char *nfile;
const struct repo *repo;
- const char *uri = NULL;
- size_t i;
+ const char *uri;
assert(tal->urisz);
- for (i = 0; i < tal->urisz; i++) {
- uri = tal->uri[i];
- if (strncasecmp(uri, "rsync://", 8) == 0)
- break;
- }
- if (uri == NULL)
- errx(1, "TAL file has no rsync:// URI");
-
/* Look up the repository. */
- repo = repo_lookup(uri);
- nfile = repo_filename(repo, uri);
+ repo = ta_lookup(tal);
+
+ uri = strrchr(repo->uris[0], '/');
+ assert(uri);
+ if (asprintf(&nfile, "%s/%s", repo->local, uri + 1) == -1)
+ err(1, "asprintf");
entityq_add(q, nfile, RTYPE_CER, repo, tal->pkey,
tal->pkeysz, tal->descr);
@@ -515,6 +564,9 @@ queue_add_from_cert(struct entityq *q, c
char *nfile;
repo = repo_lookup(cert->mft);
+ if (repo == NULL) /* bad repository URI */
+ return;
+
nfile = repo_filename(repo, cert->mft);
entityq_add(q, nfile, RTYPE_MFT, repo, NULL, 0, NULL);
@@ -1081,8 +1133,10 @@ main(int argc, char *argv[])
/* Memory cleanup. */
for (i = 0; i < rt.reposz; i++) {
+ free(rt.repos[i].repouri);
free(rt.repos[i].local);
- free(rt.repos[i].repo);
+ free(rt.repos[i].uris[0]);
+ free(rt.repos[i].uris[1]);
}
free(rt.repos);
Index: rsync.c
===================================================================
RCS file: /cvs/src/usr.sbin/rpki-client/rsync.c,v
retrieving revision 1.16
diff -u -p -r1.16 rsync.c
--- rsync.c 3 Feb 2021 09:29:22 -0000 1.16
+++ rsync.c 8 Feb 2021 13:43:44 -0000
@@ -45,110 +45,50 @@ struct rsyncproc {
};
/*
- * Conforms to RFC 5781.
- * Note that "Source" is broken down into the module, path, and also
- * file type relevant to RPKI.
- * Any of the pointers (except "uri") may be NULL.
- * Returns zero on failure, non-zero on success.
+ * Return the base of a rsync URI (rsync://hostname/module). The
+ * caRepository provided by the RIR CAs point deeper than they should
+ * which would result in many rsync calls for almost every subdirectory.
+ * This is inefficent so instead crop the URI to a common base.
+ * The returned string needs to be freed by the caller.
*/
-int
-rsync_uri_parse(const char **hostp, size_t *hostsz,
- const char **modulep, size_t *modulesz,
- const char **pathp, size_t *pathsz,
- enum rtype *rtypep, const char *uri)
+char *
+rsync_base_uri(const char *uri)
{
- const char *host, *module, *path;
- size_t sz;
-
- /* Initialise all output values to NULL or 0. */
-
- if (hostsz != NULL)
- *hostsz = 0;
- if (modulesz != NULL)
- *modulesz = 0;
- if (pathsz != NULL)
- *pathsz = 0;
- if (hostp != NULL)
- *hostp = 0;
- if (modulep != NULL)
- *modulep = 0;
- if (pathp != NULL)
- *pathp = 0;
- if (rtypep != NULL)
- *rtypep = RTYPE_EOF;
+ const char *host, *module, *rest;
/* Case-insensitive rsync URI. */
-
if (strncasecmp(uri, "rsync://", 8)) {
warnx("%s: not using rsync schema", uri);
- return 0;
+ return NULL;
}
/* Parse the non-zero-length hostname. */
-
host = uri + 8;
if ((module = strchr(host, '/')) == NULL) {
warnx("%s: missing rsync module", uri);
- return 0;
+ return NULL;
} else if (module == host) {
warnx("%s: zero-length rsync host", uri);
- return 0;
+ return NULL;
}
- if (hostp != NULL)
- *hostp = host;
- if (hostsz != NULL)
- *hostsz = module - host;
-
/* The non-zero-length module follows the hostname. */
-
- if (module[1] == '\0') {
+ module++;
+ if (*module == '\0') {
warnx("%s: zero-length rsync module", uri);
- return 0;
+ return NULL;
}
- module++;
-
/* The path component is optional. */
-
- if ((path = strchr(module, '/')) == NULL) {
- assert(*module != '\0');
- if (modulep != NULL)
- *modulep = module;
- if (modulesz != NULL)
- *modulesz = strlen(module);
- return 1;
- } else if (path == module) {
+ if ((rest = strchr(module, '/')) == NULL) {
+ return strdup(uri);
+ } else if (rest == module) {
warnx("%s: zero-length module", uri);
- return 0;
- }
-
- if (modulep != NULL)
- *modulep = module;
- if (modulesz != NULL)
- *modulesz = path - module;
-
- path++;
- sz = strlen(path);
-
- if (pathp != NULL)
- *pathp = path;
- if (pathsz != NULL)
- *pathsz = sz;
-
- if (rtypep != NULL && sz > 4) {
- if (strcasecmp(path + sz - 4, ".roa") == 0)
- *rtypep = RTYPE_ROA;
- else if (strcasecmp(path + sz - 4, ".mft") == 0)
- *rtypep = RTYPE_MFT;
- else if (strcasecmp(path + sz - 4, ".cer") == 0)
- *rtypep = RTYPE_CER;
- else if (strcasecmp(path + sz - 4, ".crl") == 0)
- *rtypep = RTYPE_CRL;
+ return NULL;
}
- return 1;
+ return strndup(uri, rest - uri);
}
static void