Hi Frank, On Fri, Feb 21, 2025 at 3:07 PM Frank Ch. Eigler <f...@redhat.com> wrote: > > > commit 082c0a94eed6706753e8019ce348be095deb72f9 (HEAD -> main) > Author: Frank Ch. Eigler <f...@redhat.com> > Date: Fri Feb 21 14:33:49 2025 -0500 > > PR31862: debuginfod: client to cache x-debuginfod-* headers > > This feature allows the extra http headers from debuginfod to be saved > into the client cache, and also thus replayed to clients. This way > they can perform IMA verification again, if they like, or a federating > caching intermediate debuginfod server can replay the headers it > received previously from upstream to future downstream. The headers > are placed adjacent to the payload files .cache/debuginfod/BUILDID/PAYLOAD > as .cache/debuginfod/BUILDID/hdr-PAYLOAD. They are aged the same > atime-based way. > > Testing via an extension of a previous small test, and hand-running both > client & server code under valgrind. No memcheck errors reported.
On my machine memcheck reports leaks due to target_cachehdr_path missing a free. Leaks are only reported during run-debuginfod-ima-verification.sh and run-srcfiles-self.sh. This is surprising since I would have assumed that many more debuginfod tests end up leaking memory for this variable. > > Signed-off-by: Frank Ch. Eigler <f...@redhat.com> > > diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c > index d89beae93ea1..3ea234abb1a8 100644 > --- a/debuginfod/debuginfod-client.c > +++ b/debuginfod/debuginfod-client.c > @@ -569,7 +569,7 @@ debuginfod_clean_cache(debuginfod_client *c, > return -errno; > > regex_t re; > - const char * pattern = > ".*/(metadata.*|[a-f0-9]+(/debuginfo|/executable|/source.*|))$"; /* include > dirs */ > + const char * pattern = > ".*/(metadata.*|[a-f0-9]+(/hdr.*|/debuginfo|/executable|/source.*|))$"; /* > include dirs */ > /* NB: also matches .../section/ subdirs, so extracted section files also > get cleaned. */ > if (regcomp (&re, pattern, REG_EXTENDED | REG_NOSUB) != 0) > return -ENOMEM; > @@ -1777,7 +1777,9 @@ debuginfod_query_server_by_buildid (debuginfod_client > *c, > char *cache_miss_path = NULL; > char *target_cache_dir = NULL; > char *target_cache_path = NULL; > + char *target_cachehdr_path = NULL; > char *target_cache_tmppath = NULL; > + char *target_cachehdr_tmppath = NULL; > char suffix[NAME_MAX]; > char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1]; > int vfd = c->verbose_fd; > @@ -1912,6 +1914,8 @@ debuginfod_query_server_by_buildid (debuginfod_client > *c, > target_cache_path: $HOME/.cache/0123abcd/debuginfo > target_cache_path: $HOME/.cache/0123abcd/executable-.debug_info > target_cache_path: $HOME/.cache/0123abcd/source-HASH-#PATH#TO#SOURCE > + target_cachehdr_path: $HOME/.cache/0123abcd/hdr-debuginfo > + target_cachehdr_path: $HOME/.cache/0123abcd/hdr-executable... > */ > > cache_path = make_cache_path(); > @@ -1923,11 +1927,15 @@ debuginfod_query_server_by_buildid (debuginfod_client > *c, > xalloc_str (target_cache_dir, "%s/%s", cache_path, build_id_bytes); > (void) mkdir (target_cache_dir, 0700); // failures with this mkdir would > be caught later too > > - if (suffix[0] != '\0') /* section, source queries */ > + if (suffix[0] != '\0') { /* section, source queries */ > xalloc_str (target_cache_path, "%s/%s-%s", target_cache_dir, type, > suffix); > - else > + xalloc_str (target_cachehdr_path, "%s/hdr-%s-%s", target_cache_dir, > type, suffix); > + } else { > xalloc_str (target_cache_path, "%s/%s", target_cache_dir, type); > + xalloc_str (target_cachehdr_path, "%s/hdr-%s", target_cache_dir, type); There is some stray white space at the end of this line and elsewhere. Also these two xalloc_str are the source of the target_cachehdr_path leak I mentioned above. > + } > xalloc_str (target_cache_tmppath, "%s.XXXXXX", target_cache_path); > + xalloc_str (target_cachehdr_tmppath, "%s.XXXXXX", target_cachehdr_path); > > /* XXX combine these */ > xalloc_str (interval_path, "%s/%s", cache_path, > cache_clean_interval_filename); > @@ -1978,6 +1986,50 @@ debuginfod_query_server_by_buildid (debuginfod_client > *c, > /* Success!!!! */ > update_atime(fd); > rc = fd; > + > + /* Attempt to transcribe saved headers. */ > + int fdh = open (target_cachehdr_path, O_RDONLY); > + if (fdh >= 0) > + { > + if (fstat (fdh, &st) == 0) > + if (st.st_size > 0) > + { > + c->winning_headers = malloc(st.st_size); > + if (NULL != c->winning_headers) { > + I think '{' was meant to be on this empty line. > + // Read all teh bytes, thanks posix. > + // In case of any error, unlink the file to force a > redownload later. > + off_t hdr_read = 0; > + while (hdr_read < st.st_size) > + { > + ssize_t bytes_read = read (fdh, c->winning_headers > + hdr_read, > + st.st_size - hdr_read); > + if (bytes_read < 0) > + { > + if (errno == EINTR) > + continue; It doesn't have to be done in this patch but we could add a read_retry function to elfutils lib/system.h to handle reading all bytes. There are some read calls in debuginfod.cxx where read_retry could also be used. > + free (c->winning_headers); > + c->winning_headers = NULL; > + (void) unlink (target_cachehdr_path); > + break; > + } > + if (bytes_read == 0) { > + free (c->winning_headers); > + c->winning_headers = NULL; > + (void) unlink (target_cachehdr_path); > + break; > + } > + hdr_read += bytes_read; > + } > + if (vfd >= 0) > + dprintf (vfd, "found %s (bytes=%ld)\n", > target_cachehdr_path, hdr_read); > + } > + } > + > + update_atime (fdh); > + close (fdh); > + } > + > goto out; > } > else > @@ -2004,12 +2056,12 @@ debuginfod_query_server_by_buildid (debuginfod_client > *c, > /* TOCTOU non-problem: if another task races, puts a working > download or an empty file in its place, unlinking here just > means WE will try to download again as uncached. */ > - unlink(target_cache_path); > + (void) unlink(target_cache_path); > } > } > else if (errno == EACCES) > /* Ensure old 000-permission files are not lingering in the cache. */ > - unlink(target_cache_path); > + (void) unlink(target_cache_path); > > if (section != NULL) > { > @@ -2409,6 +2461,18 @@ debuginfod_query_server_by_buildid (debuginfod_client > *c, > /* Perhaps we need not give up right away; could retry or something > ... */ > } > > + /* write out the headers, best effort basis */ > + if (c->winning_headers) { > + int fdh = mkstemp (target_cachehdr_tmppath); > + if (fdh >= 0) { > + int bytes = write (fdh, c->winning_headers, > strlen(c->winning_headers)+1); // include \0 It's probably safer to use write_retry from lib/system.h here. > + (void) close (fdh); > + (void) rename (target_cachehdr_tmppath, target_cachehdr_path); > + if (vfd >= 0) > + dprintf (vfd, "saved %d bytes of headers to %s\n", bytes, > target_cachehdr_path); > + } > + } > + > /* remove all handles from multi */ > for (int i = 0; i < num_urls; i++) > { > @@ -2490,7 +2554,9 @@ debuginfod_query_server_by_buildid (debuginfod_client > *c, > if (rc < 0 && target_cache_tmppath != NULL) > (void)unlink (target_cache_tmppath); > free (target_cache_tmppath); > - > + if (rc < 0 && target_cachehdr_tmppath != NULL) > + (void)unlink (target_cachehdr_tmppath); > + free (target_cachehdr_tmppath); > > return rc; > } > diff --git a/tests/run-debuginfod-artifact-running.sh > b/tests/run-debuginfod-artifact-running.sh > index 8b9aed37fa12..df42957bfdb9 100755 > --- a/tests/run-debuginfod-artifact-running.sh > +++ b/tests/run-debuginfod-artifact-running.sh > @@ -116,4 +116,11 @@ cmp $filename ${PWD}/prog2.c > kill $PID1 > wait $PID1 > PID1=0 > + > +# PR31862: after killing debuginfod, check that the cached version still > exists and holds headers! > +(DEBUGINFOD_VERBOSE=1; export DEBUGINFOD_VERBOSE > + testrun ${abs_top_builddir}/debuginfod/debuginfod-find -v debuginfo > $BUILDID2 2>&1) | tee vlog3 > +tempfiles vlog3 > +grep -i x-debuginfod vlog3 > + > exit 0 > Aaron