commit: f757e3d5aff8d4e0a49dd2ed9ef049641c7f1651
Author: Fabian Groffen <grobian <AT> gentoo <DOT> org>
AuthorDate: Wed Aug 27 18:42:48 2025 +0000
Commit: Fabian Groffen <grobian <AT> gentoo <DOT> org>
CommitDate: Thu Aug 28 07:27:31 2025 +0000
URL: https://gitweb.gentoo.org/proj/portage-utils.git/commit/?id=f757e3d5
q: re-order files in gtree cache
Separate cache from ebuilds, and when dealing with ebuilds, don't repeat
metadata, Manifest, files/ for every ebuild. The latter only saves a
little (approx. 1MiB) but it is cleaner from the extraction point of
view. We can now sort of restore this part of the original tree.
Store cache entries first, per PF, such that a process only interested
in the cache (which is most of the time the case) can stop reading
after. Similarly, the sorting of entries allows a reader to determine
early a requested atom is not available in the gtree archive.
Signed-off-by: Fabian Groffen <grobian <AT> gentoo.org>
q.c | 205 ++++++++++++++++++++++++++++++++++++++++----------------------------
1 file changed, 120 insertions(+), 85 deletions(-)
diff --git a/q.c b/q.c
index 76f607d..5335b07 100644
--- a/q.c
+++ b/q.c
@@ -113,8 +113,10 @@ struct q_cache_ctx {
char *cbuf;
size_t cbufsiz;
size_t cbuflen;
+ char last_cat[_Q_PATH_MAX];
+ char last_pkg[_Q_PATH_MAX];
};
-static int q_build_cache_pkg_process_dir(struct q_cache_ctx *ctx,
+static int q_build_gtree_pkg_process_dir(struct q_cache_ctx *ctx,
char *path,
char *pbuf,
size_t pbufsiz,
@@ -149,7 +151,7 @@ static int q_build_cache_pkg_process_dir(struct q_cache_ctx
*ctx,
continue;
}
if (S_ISDIR(st.st_mode)) {
- q_build_cache_pkg_process_dir(ctx, path,
+ q_build_gtree_pkg_process_dir(ctx, path,
pbuf + len, pbufsiz - len, fd);
continue;
}
@@ -169,35 +171,28 @@ static int q_build_cache_pkg_process_dir(struct
q_cache_ctx *ctx,
close(fd);
scandir_free(flist, fcnt);
+
+ return 0;
}
-int q_build_cache_pkg(tree_pkg_ctx *pkg, void *priv)
+int q_build_gtree_cache_pkg(tree_pkg_ctx *pkg, void *priv)
{
struct q_cache_ctx *ctx = priv;
struct archive *a = ctx->archive;
struct archive_entry *entry;
- struct stat st;
depend_atom *atom = tree_get_atom(pkg, false);
char buf[_Q_PATH_MAX];
- char *p;
- size_t siz;
- size_t len;
char *qc;
size_t qclen;
/* construct the common prefix */
- len = snprintf(buf, sizeof(buf), "ebuilds/%s/%s/",
- atom->CATEGORY, atom->PF);
- p = buf + len;
- siz = sizeof(buf) - len;
+ snprintf(buf, sizeof(buf), "caches/%s/%s", atom->CATEGORY, atom->PF);
- /* - ebuilds/CAT/PF
- * + cache keys from md5-cache except _md5_, _eclasses_ and
- * repository (the latter is stored at the top level)
- * in addition to this the required eclass names are
- * stored in a new key called eclasses
- * all of this is stored as key-value file, because
- * storing it as individual keys takes much more storage
- * for no particular benefit */
+ /* keys from md5-cache except _md5_, _eclasses_ and repository (the
+ * latter is stored at the top level)
+ * in addition to this the required eclass names are stored in a new
+ * key called eclasses for easy retrieval/extraction purposes
+ * all of this is stored as key-value file, because storing it as
+ * individual keys takes much more storage for no particular benefit */
/* start over, reusing previous buf allocation */
ctx->cbuflen = 0;
@@ -278,7 +273,6 @@ int q_build_cache_pkg(tree_pkg_ctx *pkg, void *priv)
#undef q_cache_add_cache_entry_val
entry = archive_entry_new();
- snprintf(p, siz, "cache");
archive_entry_set_pathname(entry, buf);
archive_entry_set_size(entry, ctx->cbuflen);
archive_entry_set_mtime(entry, ctx->buildtime, 0);
@@ -288,14 +282,36 @@ int q_build_cache_pkg(tree_pkg_ctx *pkg, void *priv)
archive_write_data(a, ctx->cbuf, ctx->cbuflen);
archive_entry_free(entry);
+ return 0;
+}
+int q_build_gtree_ebuilds_pkg(tree_pkg_ctx *pkg, void *priv)
+{
+ struct q_cache_ctx *ctx = priv;
+ struct archive *a = ctx->archive;
+ struct archive_entry *entry;
+ struct stat st;
+ depend_atom *atom = tree_get_atom(pkg, false);
+ char buf[_Q_PATH_MAX];
+ char *p;
+ size_t siz;
+ size_t len;
+ char *qc;
+
+ /* construct the common prefix */
+ len = snprintf(buf, sizeof(buf), "ebuilds/%s/%s/",
+ atom->CATEGORY, atom->PN);
+ p = buf + len;
+ siz = sizeof(buf) - len;
+
/* + <PF>.ebuild (the file from the tree)
* + Manifest (the file from the tree, to verify distfiles)
* + files/ (the directory from the tree) */
if (pkg->cat_ctx->ctx->treetype == TREE_EBUILD) {
- char pth[_Q_PATH_MAX];
+ char pth[_Q_PATH_MAX * 2];
size_t flen;
int dfd;
int ffd;
+ bool newpkg = true;
/* we could technically pull the ebuild from the VDB, or maybe
* from the binpkg, but for what use? only an ebuild tree is
@@ -307,6 +323,80 @@ int q_build_cache_pkg(tree_pkg_ctx *pkg, void *priv)
dfd = open(pth, O_RDONLY);
if (dfd < 0)
return 1; /* how? */
+
+ if (strcmp(ctx->last_cat, atom->CATEGORY) != 0)
+ snprintf(ctx->last_cat, sizeof(ctx->last_cat),
+ "%s", atom->CATEGORY);
+ else if (strcmp(ctx->last_pkg, atom->PN) != 0)
+ snprintf(ctx->last_pkg, sizeof(ctx->last_pkg),
+ "%s", atom->PN);
+ else
+ newpkg = false;
+
+ if (newpkg) {
+ ffd = openat(dfd, "metadata.xml", O_RDONLY);
+ if (ffd >= 0) {
+ if (fstat(ffd, &st) == 0) {
+ entry = archive_entry_new();
+ snprintf(p, siz, "metadata.xml");
+ archive_entry_set_pathname(entry, buf);
+ archive_entry_set_size(entry,
st.st_size);
+ archive_entry_set_mtime(entry,
ctx->buildtime, 0);
+ archive_entry_set_filetype(entry,
AE_IFREG);
+ archive_entry_set_perm(entry, 0644);
+ archive_write_header(a, entry);
+ while ((flen = read(ffd, pth,
sizeof(pth))) > 0)
+ archive_write_data(a, pth,
flen);
+ archive_entry_free(entry);
+ }
+ close(ffd);
+ }
+ /* for Manifest file we perform a "grep" here on the
only
+ * relevant entries: DIST, this reduces the overall size
+ * of the tree considerably */
+ if (eat_file_at(dfd, "Manifest", &ctx->cbuf,
&ctx->cbufsiz)) {
+ bool start = true;
+ bool write = false;
+ char *wp;
+ for (qc = ctx->cbuf, wp = ctx->cbuf; *qc !=
'\0'; qc++) {
+ if (start && strncmp(qc, "DIST ", 5) ==
0)
+ write = true;
+ start = false;
+ if (write)
+ *wp++ = *qc;
+ if (*qc == '\r' || *qc == '\n') {
+ start = true;
+ write = false;
+ }
+ }
+ ctx->cbuflen = wp - ctx->cbuf;
+
+ if (ctx->cbuflen > 0) {
+ entry = archive_entry_new();
+ snprintf(p, siz, "Manifest");
+ archive_entry_set_pathname(entry, buf);
+ archive_entry_set_size(entry,
ctx->cbuflen);
+ archive_entry_set_mtime(entry,
ctx->buildtime, 0);
+ archive_entry_set_filetype(entry,
AE_IFREG);
+ archive_entry_set_perm(entry, 0644);
+ archive_write_header(a, entry);
+ archive_write_data(a, ctx->cbuf,
ctx->cbuflen);
+ archive_entry_free(entry);
+ }
+ }
+ /* process files, unfortunately this can be any number
of
+ * directories deep (remember eblitz?) so we'll have to
recurse
+ * for this one */
+ flen = snprintf(p, siz, "files");
+ ffd = openat(dfd, "files", O_RDONLY);
+ if (ffd >= 0) {
+ q_build_gtree_pkg_process_dir(ctx,
+
buf, p + flen, siz - flen,
+
ffd);
+ close(ffd);
+ }
+ }
+
snprintf(pth, sizeof(pth), "%s.ebuild", atom->PF);
ffd = openat(dfd, pth, O_RDONLY);
if (ffd >= 0) {
@@ -325,65 +415,7 @@ int q_build_cache_pkg(tree_pkg_ctx *pkg, void *priv)
}
close(ffd);
}
- ffd = openat(dfd, "metadata.xml", O_RDONLY);
- if (ffd >= 0) {
- if (fstat(ffd, &st) == 0) {
- entry = archive_entry_new();
- snprintf(p, siz, "metadata.xml");
- archive_entry_set_pathname(entry, buf);
- archive_entry_set_size(entry, st.st_size);
- archive_entry_set_mtime(entry, ctx->buildtime,
0);
- archive_entry_set_filetype(entry, AE_IFREG);
- archive_entry_set_perm(entry, 0644);
- archive_write_header(a, entry);
- while ((flen = read(ffd, pth, sizeof(pth))) > 0)
- archive_write_data(a, pth, flen);
- archive_entry_free(entry);
- }
- close(ffd);
- }
- /* for Manifest file we perform a "grep" here on the only
- * relevant entries: DIST, this reduces the overall size
- * of the tree considerably */
- if (eat_file_at(dfd, "Manifest", &ctx->cbuf, &ctx->cbufsiz)) {
- bool start = true;
- bool write = false;
- char *wp;
- for (qc = ctx->cbuf, wp = ctx->cbuf; *qc != '\0'; qc++)
{
- if (start && strncmp(qc, "DIST ", 5) == 0)
- write = true;
- start = false;
- if (write)
- *wp++ = *qc;
- if (*qc == '\r' || *qc == '\n') {
- start = true;
- write = false;
- }
- }
- ctx->cbuflen = wp - ctx->cbuf;
- if (ctx->cbuflen > 0) {
- entry = archive_entry_new();
- snprintf(p, siz, "Manifest");
- archive_entry_set_pathname(entry, buf);
- archive_entry_set_size(entry, ctx->cbuflen);
- archive_entry_set_mtime(entry, ctx->buildtime,
0);
- archive_entry_set_filetype(entry, AE_IFREG);
- archive_entry_set_perm(entry, 0644);
- archive_write_header(a, entry);
- archive_write_data(a, ctx->cbuf, ctx->cbuflen);
- archive_entry_free(entry);
- }
- }
- /* process files, unfortunately this can be any number of
- * directories deep (remember eblitz?) so we'll have to recurse
- * for this one */
- flen = snprintf(p, siz, "files");
- ffd = openat(dfd, "files", O_RDONLY);
- if (ffd >= 0) {
- q_build_cache_pkg_process_dir(ctx, buf, p + flen, siz -
flen, ffd);
- close(ffd);
- }
close(dfd);
}
@@ -714,9 +746,9 @@ int q_main(int argc, char **argv)
* - gtree-1 (mandatory, first file ident)
* - repo.tar{compr}
* - repository
- * - ebuilds/CAT/PF
- * + cache (extracted info from the ebuild)
- * + PF.ebuild (the file from the tree)
+ * - cache/CAT/PF (extracted info from the ebuild)
+ * - ebuilds/CAT/PN
+ * + PF.ebuild (the file from the tree) (repeated for each
PF)
* + metadata.xml (the file from the tree)
* + Manifest (the file from the tree, to verify distfiles)
* + files/ (the directory from the tree)
@@ -839,14 +871,17 @@ int q_main(int argc, char **argv)
archive_entry_free(entry);
}
- /* add ebuilds */
- tree_foreach_pkg(t, q_build_cache_pkg, &qcctx, true,
NULL);
+ /* add cache and ebuilds */
+ tree_foreach_pkg(t, q_build_gtree_cache_pkg, &qcctx,
true, NULL);
+ qcctx.last_cat[0] = '\0';
+ qcctx.last_pkg[0] = '\0';
+ tree_foreach_pkg(t, q_build_gtree_ebuilds_pkg, &qcctx,
true, NULL);
/* add eclasses */
len = snprintf(buf, sizeof(buf), "eclasses");
dfd = openat(t->tree_fd, "eclass", O_RDONLY);
if (dfd >= 0) {
- q_build_cache_pkg_process_dir(&qcctx, buf,
+ q_build_gtree_pkg_process_dir(&qcctx, buf,
buf + len,
sizeof(buf) - len,
dfd);