commit:     5f99849a931bf1b876608f747ec28e86e8807d21
Author:     Fabian Groffen <grobian <AT> gentoo <DOT> org>
AuthorDate: Fri Jan  2 19:24:33 2026 +0000
Commit:     Fabian Groffen <grobian <AT> gentoo <DOT> org>
CommitDate: Fri Jan  2 19:52:21 2026 +0000
URL:        https://gitweb.gentoo.org/proj/portage-utils.git/commit/?id=5f99849a

libq/tree: build cache more generically and reliably

- build cache for Packages and gtree trees in order to sort it
- build cache for binpkgs tree due to mix and match causing order blibs

Signed-off-by: Fabian Groffen <grobian <AT> gentoo.org>

 libq/tree.c | 336 +++++++++++++++++++++++++++++++++++-------------------------
 libq/tree.h |   6 +-
 2 files changed, 201 insertions(+), 141 deletions(-)

diff --git a/libq/tree.c b/libq/tree.c
index 6f5492a..f6772bc 100644
--- a/libq/tree.c
+++ b/libq/tree.c
@@ -255,7 +255,10 @@ tree_close(tree_ctx *ctx)
                ctx->cache.categories = NULL;  /* must happen before close_cat 
*/
 
                array_for_each(t, n, cat) {
+                       int i;
                        /* ensure we cleanup all pkgs */
+                       for (i = 0; i < cat->pkg_cnt; i++)
+                               cat->pkg_ctxs[i]->cached = false;
                        cat->pkg_cur = 0;
                        tree_close_cat(cat);
                }
@@ -454,7 +457,8 @@ void
 tree_close_cat(tree_cat_ctx *cat_ctx)
 {
        if (cat_ctx->ctx->cache.categories != NULL &&
-                       contains_set(cat_ctx->name, 
cat_ctx->ctx->cache.categories))
+               (cat_ctx->ctx->cache.all_categories ||
+                contains_set(cat_ctx->name, cat_ctx->ctx->cache.categories)))
                return;
 
        /* cleanup unreturned pkgs when sorted (or cache in use) */
@@ -1372,8 +1376,8 @@ tree_pkg_read(tree_pkg_ctx *pkg_ctx)
                                        size_t  flen;
 
                                        if (hash_multiple_file_fd(pkg_ctx->fd,
-                                                                               
          srcmd5, NULL, NULL, NULL,
-                                                                               
          NULL, &flen, HASH_MD5) == 0)
+                                                                               
          srcmd5, NULL, NULL, NULL,
+                                                                               
          NULL, &flen, HASH_MD5) == 0)
                                                pkg_ctx->fd = -1;
 
                                        mdmd5 = tree_pkg_meta_get(spkg, _md5_);
@@ -1659,6 +1663,9 @@ tree_close_metadata(tree_metadata_xml *meta_ctx)
 void
 tree_close_pkg(tree_pkg_ctx *pkg_ctx)
 {
+       if (pkg_ctx->cached)
+               return;
+
        if (pkg_ctx->fd >= 0)
                close(pkg_ctx->fd);
        if (pkg_ctx->atom != NULL)
@@ -1675,22 +1682,85 @@ tree_close_pkg(tree_pkg_ctx *pkg_ctx)
        free(pkg_ctx);
 }
 
+static int
+tree_foreach_cache_populate_cb
+(
+       tree_pkg_ctx *ctx,
+       void         *priv
+)
+{
+       tree_cat_ctx  *cat_ctx;
+       tree_pkg_ctx  *pkg;
+       set           *cache   = priv;
+       tree_ctx      *tctx    = ctx->cat_ctx->ctx;
+       depend_atom   *atom    = tree_get_atom(ctx, true);
+       tree_pkg_meta *meta    = tree_pkg_read(ctx);
+
+       cat_ctx = get_set(atom->CATEGORY, cache);
+       if (cat_ctx == NULL) {
+               cat_ctx = tree_open_cat(tctx, ".");
+               if (cache != NULL)  /* for static code analysers */
+                       add_set_value(atom->CATEGORY, cat_ctx, NULL, cache);
+               /* get a pointer from the set */
+               cat_ctx->name = contains_set(atom->CATEGORY, cache);
+       }
+
+       pkg = xcalloc(1, sizeof(*pkg));
+
+       /* intuitively this would feel like it could use a set, but since
+        * we're going to sort it, we need it as array anyway, so this is
+        * better, especially given that the tree should not be able to
+        * produce duplicates */
+       cat_ctx->pkg_cnt++;
+       if (cat_ctx->pkg_cnt > cat_ctx->pkg_siz) {
+               cat_ctx->pkg_siz  = ((cat_ctx->pkg_cnt / 16) + 1) * 16;
+               cat_ctx->pkg_ctxs =
+                       xrealloc(cat_ctx->pkg_ctxs,
+                                        sizeof(*cat_ctx->pkg_ctxs) * 
cat_ctx->pkg_siz);
+       }
+       cat_ctx->pkg_ctxs[cat_ctx->pkg_cnt - 1] = pkg;
+       pkg->cached  = true;
+       pkg->cat_ctx = cat_ctx;
+       pkg->atom    = atom_clone(atom);
+       pkg->name    = xstrdup(pkg->atom->PF);
+       pkg->repo    = tctx->repo != NULL ? xstrdup(tctx->repo) : NULL;
+       if (meta != NULL) {
+               pkg->fd = -2;  /* don't try to read, we fill it in here */
+               if (tctx->treetype == TREE_PACKAGES ||
+                       tctx->treetype == TREE_METADATA_GTREE)
+               {
+                       /* need to copy, source is based on temp space in 
foreach */
+                       pkg->meta = tree_clone_meta(meta);
+               } else if (tctx->treetype == TREE_BINPKGS) {
+                       /* BINPKG case, this one is read/allocated separately 
from
+                        * xpak archive, so can just take it over */
+                       pkg->meta = meta;
+                       ctx->meta = NULL;  /* avoid double free */
+               }
+               pkg->binpkg_isgpkg = ctx->binpkg_isgpkg;
+       } else {
+               pkg->meta = NULL;
+       }
+
+       return 0;
+}
+
 static int
 tree_foreach_packages(tree_ctx *ctx, tree_pkg_cb callback, void *priv)
 {
-       char *p;
-       char *q;
-       char *c;
-       char pkgname[_Q_PATH_MAX];
-       size_t len;
-       int ret = 0;
-       const depend_atom *query = ctx->query_atom;
+       char              *p;
+       char              *q;
+       char              *c;
+       char               pkgname[_Q_PATH_MAX];
+       size_t             len;
+       int                ret      = 0;
+       const depend_atom *query    = ctx->query_atom;
 
        /* reused for every entry */
-       tree_cat_ctx *cat = NULL;
-       tree_pkg_ctx pkg;
-       tree_pkg_meta meta;
-       depend_atom *atom = NULL;
+       tree_cat_ctx      *cat      = NULL;
+       tree_pkg_ctx       pkg;
+       tree_pkg_meta      meta;
+       depend_atom       *atom     = NULL;
 
        /* re-read the contents, this is necessary to make it possible to
         * call this function multiple times */
@@ -1887,7 +1957,6 @@ tree_foreach_packages(tree_ctx *ctx, tree_pkg_cb 
callback, void *priv)
 
        /* ensure we don't free a garbage pointer */
        ctx->repo = NULL;
-       ctx->do_sort = false;
        ctx->cache.store[0] = '\0';
 
        return ret;
@@ -2103,9 +2172,6 @@ tree_foreach_gtree(tree_ctx *ctx, tree_pkg_cb callback, 
void *priv)
                }
        }
 
-       /* ensure we don't free double */
-       ctx->repo = NULL;
-       ctx->do_sort = false;
        if (ctx->cache.storesize > 0)
                ctx->cache.store[0] = '\0';
        free(cat);
@@ -2115,52 +2181,133 @@ tree_foreach_gtree(tree_ctx *ctx, tree_pkg_cb 
callback, void *priv)
 #endif
 
 int
-tree_foreach_pkg(tree_ctx *ctx, tree_pkg_cb callback, void *priv,
-               bool sort, const depend_atom *query)
+tree_foreach_pkg
+(
+       tree_ctx          *ctx,
+       tree_pkg_cb        callback,
+       void              *priv,
+       bool               sort,
+       const depend_atom *query
+)
 {
        tree_cat_ctx *cat_ctx;
        tree_pkg_ctx *pkg_ctx;
-       int ret;
+       int           ret;
+       tree_pkg_cb  *origcb   = callback;
+       void         *origpriv = priv;
+       bool          postsort = false;
+       bool          traverse = true;
 
        if (ctx == NULL)
                return EXIT_FAILURE;
 
-       ctx->do_sort = sort;
-       ctx->query_atom = query;
+       if (ctx->cache.all_categories) {
+               /* always exploit the cache if it exists (next_cat doesn't
+                * consider it if sorting isn't requested) */
+               sort = true;
+       } else {
+               /* perform sorting post retrieval by caching first:
+                * - binpkgs can be a combination of files and directories,
+                *   sorting must happen on the total result to be correct
+                * - packages is an external file that we need to read serially
+                *   and we can only sort on top of a cache
+                * - gtree should be fine, but since we can't assume it is
+                *   (others may start producing them), it's like packages
+                *   above, something we read serially not necessarily in the
+                *   correct order */
+               if (sort &&
+                       (ctx->treetype == TREE_BINPKGS ||
+                        ctx->treetype == TREE_PACKAGES ||
+                        ctx->treetype == TREE_METADATA_GTREE))
+               {
+                       callback = tree_foreach_cache_populate_cb;
+                       priv     = (void *)create_set();
+                       postsort = true;
+               }
 
-       /* handle Packages (binpkgs index) file separately */
-       if (ctx->treetype == TREE_PACKAGES)
-               return tree_foreach_packages(ctx, callback, priv);
+               /* handle Packages (binpkgs index) file separately */
+               if (ctx->treetype == TREE_PACKAGES) {
+                       traverse = false;
+                       ret      = tree_foreach_packages(ctx, callback, priv);
+               }
 
 #ifdef ENABLE_GTREE
-       /* similar a gtree cache can be read sequentially in one go */
-       if (ctx->treetype == TREE_METADATA_GTREE)
-               return tree_foreach_gtree(ctx, callback, priv);
-       if (ctx->treetype == TREE_EBUILD &&
-               ctx->subtree != NULL &&
-               ctx->subtree->treetype == TREE_METADATA_GTREE)
-               return tree_foreach_gtree(ctx->subtree, callback, priv);
+               /* similar a gtree cache can be read sequentially in one go */
+               if (ctx->treetype == TREE_METADATA_GTREE) {
+                       traverse = false;
+                       ret      = tree_foreach_gtree(ctx, callback, priv);
+               }
+               if (ctx->treetype == TREE_EBUILD &&
+                       ctx->subtree != NULL &&
+                       ctx->subtree->treetype == TREE_METADATA_GTREE)
+               {
+                       traverse = false;
+                       ret      = tree_foreach_gtree(ctx->subtree, callback, 
priv);
+               }
 #endif
+       }
 
        ret = 0;
-       while ((cat_ctx = tree_next_cat(ctx))) {
-               while ((pkg_ctx = tree_next_pkg(cat_ctx))) {
-                       ret |= callback(pkg_ctx, priv);
-                       tree_close_pkg(pkg_ctx);
+       if (traverse)
+       {
+               ctx->do_sort    = postsort ? false : sort;
+               ctx->query_atom = query;
+
+               while ((cat_ctx = tree_next_cat(ctx))) {
+                       if (callback != NULL) {
+                               while ((pkg_ctx = tree_next_pkg(cat_ctx))) {
+                                       ret |= callback(pkg_ctx, priv);
+                                       tree_close_pkg(pkg_ctx);
+                               }
+                       }
+                       tree_close_cat(cat_ctx);
+               }
+
+               /* allow foreach to be called again on the same open tree */
+               if (ctx->do_sort &&
+                       ctx->cat_de != NULL)
+               {
+                       scandir_free(ctx->cat_de, ctx->cat_cnt);
+               } else if (ctx->dir != NULL) {
+                       rewinddir(ctx->dir);
                }
-               tree_close_cat(cat_ctx);
        }
 
-       /* allow foreach to be called again on the same open tree */
-       if (ctx->do_sort)
-               scandir_free(ctx->cat_de, ctx->cat_cnt);
-       else
-               rewinddir(ctx->dir);
-       ctx->cat_de = NULL;
+       /* reset states */
+       ctx->cat_de  = NULL;
        ctx->cat_cur = 0;
        ctx->cat_cnt = 0;
+       ctx->dir     = NULL;
        ctx->do_sort = false;
 
+       if (postsort) {
+               DECLARE_ARRAY(cats);
+               size_t n;
+
+               /* should never happen, but perhaps a tree implementation
+                * populated something, then don't leak it */
+               if (ctx->cache.categories != NULL)
+                       free_set(ctx->cache.categories);
+               ctx->cache.categories     = priv;
+               ctx->cache.all_categories = true;
+
+               /* loop through all categories, and sort the pkgs */
+               values_set(ctx->cache.categories, cats);
+               array_for_each(cats, n, cat_ctx) {
+                       if (cat_ctx->pkg_cnt > 1) {
+                               qsort(cat_ctx->pkg_ctxs, cat_ctx->pkg_cnt,
+                                         sizeof(*cat_ctx->pkg_ctxs), 
tree_pkg_compar);
+                       }
+               }
+               xarrayfree_int(cats);
+
+               /* do the final run this call was supposed to be for using the
+                * (sorted) cache, the callback can be empty for tree_match_atom
+                * when it wants to build a cache first */
+               if (origcb != NULL)
+                       ret = tree_foreach_pkg_fast(ctx, origcb, origpriv, 
ctx->query_atom);
+       }
+
        return ret;
 }
 
@@ -2274,57 +2421,6 @@ tree_get_atoms(tree_ctx *ctx, bool fullcpv, set *satoms)
        return state.cpf;
 }
 
-static int
-tree_match_atom_cache_populate_cb(tree_pkg_ctx *ctx, void *priv)
-{
-       tree_cat_ctx  *cat_ctx;
-       tree_pkg_ctx  *pkg;
-       set           *cache   = priv;
-       tree_ctx      *tctx    = ctx->cat_ctx->ctx;
-       depend_atom   *atom    = tree_get_atom(ctx, true);
-       tree_pkg_meta *meta    = tree_pkg_read(ctx);
-
-       (void)priv;
-
-       cat_ctx = get_set(atom->CATEGORY, cache);
-       if (cat_ctx == NULL) {
-               cat_ctx = tree_open_cat(tctx, ".");
-               if (cache != NULL)  /* for static code analysers */
-                       add_set_value(atom->CATEGORY, cat_ctx, NULL, cache);
-               /* get a pointer from the set */
-               cat_ctx->name = contains_set(atom->CATEGORY, cache);
-       }
-
-       pkg = xcalloc(1, sizeof(*pkg));
-
-       /* FIXME: this really could use a set */
-       cat_ctx->pkg_cnt++;
-       cat_ctx->pkg_ctxs = xrealloc(cat_ctx->pkg_ctxs,
-                       sizeof(*cat_ctx->pkg_ctxs) * cat_ctx->pkg_cnt);
-       cat_ctx->pkg_ctxs[cat_ctx->pkg_cnt - 1] = pkg;
-       pkg->cat_ctx = cat_ctx;
-       pkg->atom = atom_clone(atom);
-       pkg->name = xstrdup(pkg->atom->PF);
-       pkg->repo = tctx->repo != NULL ? xstrdup(tctx->repo) : NULL;
-       if (meta != NULL) {
-               pkg->fd = -2;  /* don't try to read, we fill it in here */
-               if (tctx->treetype == TREE_PACKAGES) {
-                       /* need to copy, source is based on temp space in 
foreach */
-                       pkg->meta = tree_clone_meta(meta);
-               } else {
-                       /* BINPKG case, this one is read/allocated separately 
from
-                        * xpak archive, so can just take it over */
-                       pkg->meta = meta;
-                       ctx->meta = NULL;  /* avoid double free */
-               }
-               pkg->binpkg_isgpkg = ctx->binpkg_isgpkg;
-       } else {
-               pkg->meta = NULL;
-       }
-
-       return 0;
-}
-
 static tree_match_ctx *
 tree_match_search_cat_int(
                tree_cat_ctx      *cat_ctx,
@@ -2377,7 +2473,7 @@ tree_match_search_cat_int(
                                                 (cat_ctx->ctx->treetype == 
TREE_BINPKGS ||
                                                  cat_ctx->ctx->treetype == 
TREE_PACKAGES) ?
                                                 (pkg_ctx->binpkg_isgpkg   ? 
".gpkg.tar" : ".xpak")  :
-                                                                               
           "");
+                                                                               
                                                   "");
                        } else {
                                snprintf(n->path, sizeof(n->path), "%s/%s/%s%s",
                                                 (char *)cat_ctx->ctx->path,
@@ -2386,7 +2482,7 @@ tree_match_search_cat_int(
                                                 (cat_ctx->ctx->treetype == 
TREE_BINPKGS ||
                                                  cat_ctx->ctx->treetype == 
TREE_PACKAGES) ?
                                                 (pkg_ctx->binpkg_isgpkg   ? 
".gpkg.tar" : ".tbz2")  :
-                                                                               
           "");
+                                                                               
                                                   "");
                        }
                        if (flags & TREE_MATCH_METADATA)
                                n->meta = tree_pkg_read(pkg_ctx);
@@ -2411,53 +2507,15 @@ tree_match_atom(tree_ctx *ctx, const depend_atom 
*query, int flags)
        tree_cat_ctx *cat_ctx;
        tree_match_ctx *ret = NULL;
 
-       ctx->do_sort = true;     /* sort uses buffer, which cache relies on */
-       ctx->query_atom = NULL;  /* ensure the cache contains ALL pkgs */
-
-       if ((ctx->treetype == TREE_PACKAGES ||
-                ctx->treetype == TREE_BINPKGS) &&
-               ctx->cache.categories == NULL)
-       {
-               set *cache;
-               DECLARE_ARRAY(cats);
-               size_t n;
-
-               /* Reading these formats requires us to traverse the tree for
-                * each operation, so we better cache the entire thing in one
-                * go.  Packages in addition may not store the atoms sorted
-                * (it's a file with content after all) and since we rely on
-                * this for latest and first match, it is important that we sort
-                * the contents so we're sure */
-
-               cache = ctx->cache.categories;
-               if (ctx->cache.categories != NULL)
-                       ctx->cache.categories = NULL;
-               else
-                       cache = create_set();
-
-               tree_foreach_pkg(ctx,
-                               tree_match_atom_cache_populate_cb, cache, true, 
NULL);
-
-               ctx->do_sort = true;  /* turn it back on after tree_foreach_pkg 
*/
-               ctx->cache.all_categories = true;
-               ctx->cache.categories = cache;
-
-               /* loop through all categories, and sort the pkgs */
-               values_set(cache, cats);
-               array_for_each(cats, n, cat_ctx) {
-                       if (cat_ctx->pkg_cnt > 1) {
-                               qsort(cat_ctx->pkg_ctxs, cat_ctx->pkg_cnt,
-                                               sizeof(*cat_ctx->pkg_ctxs), 
tree_pkg_compar);
-                       }
-               }
-               xarrayfree_int(cats);
-       }
+       ctx->query_atom = NULL;  /* if caching, ensure it contains ALL pkgs */
 
        /* activate cache for future lookups, tree_match_atom relies on
         * cache behaviour from tree, which means all categories and
         * packages remain in memory until tree_close is being called */
        if (ctx->cache.categories == NULL)
-               ctx->cache.categories = create_set();
+               tree_foreach_pkg_sorted(ctx, NULL, NULL, NULL);  /* force cache 
*/
+
+       ctx->do_sort = true;     /* often forces/enables cache usage */
 
        if (query->CATEGORY == NULL) {
                tree_match_ctx *tret;

diff --git a/libq/tree.h b/libq/tree.h
index 32dc6f5..1c321f5 100644
--- a/libq/tree.h
+++ b/libq/tree.h
@@ -65,6 +65,7 @@ struct tree_cat_ctx {
        tree_pkg_ctx **pkg_ctxs;
        size_t pkg_cnt;
        size_t pkg_cur;
+       size_t pkg_siz;
 };
 
 /* Package context */
@@ -75,8 +76,9 @@ struct tree_pkg_ctx {
        size_t slot_len;
        size_t repo_len;
        int fd;
-       int binpkg_isgpkg:1;
-       int binpkg_ismulti:1;  /* for path reconstruction */
+       bool cached:1;
+       bool binpkg_isgpkg:1;
+       bool binpkg_ismulti:1;  /* for path reconstruction */
        tree_cat_ctx *cat_ctx;
        depend_atom *atom;
        tree_pkg_meta *meta;

Reply via email to