PATCH: PR27701: debuginfod clients with long-lived curl handles

2021-04-23 Thread Frank Ch. Eigler via Elfutils-devel
Hi -

Author: Frank Ch. Eigler 
Date:   Fri Apr 23 13:04:26 2021 -0400

PR27701: debuginfod client: encourage reused debuginfod_client objects

Client objects now carry long-lived curl handles for outgoing
connections.  This makes it more efficient for multiple sequential
queries, because the TCP connections and/or TLS state info are kept
around awhile, avoiding O(100ms) setup latencies.  debuginfod is
adjusted to take advantage of this for federation.  Other clients
should gradually do this too, perhaps including elfutils itself (in
the libdwfl->debuginfod_client hooks).

A large gdb session with 117 debuginfo downloads was observed to run
twice as fast (45s vs. 1m30s wall-clock time), just in nuking this
extra setup latency.  This was tested via a debuginfod intermediary:
it should be even faster once gdb reuses its own debuginfod_client.

Signed-off-by: Frank Ch. Eigler 

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index ed2f77cfaece..ad9dbc0a8a74 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,19 @@
+2021-04-23  Frank Ch. Eigler 
+
+   PR27701
+   * debuginfod-client.c (struct debuginfod_client): Add long-lived
+   CURL easy and multi handles.
+   (debuginfo_begin,debuginfod_end): ctor/dtor for these.
+   (debuginfod_query_server): Rework to reuse easy & multi handles.
+   (*_envvar): Just use the DEBUGINFOD_*_ENV_VAR directly instead.
+
+   * debuginfod.cxx (dc_pool): New pile of reusable debuginfod_client
+   objects for upstream federation connections.
+   (debuginfod_pool_{begin,end,groom}): New functions.
+   (handle_buildid): Use them.
+   (handler_cb): Fix keep-alive given libmicrohttpd convention of multiple
+   callbacks.
+
 2021-04-15  Frank Ch. Eigler 
 
* debuginfod.cxx (groom): Only update database stats once.
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index d5e7bbdfbe1b..7fdc6c9f30ec 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -119,6 +119,11 @@ struct debuginfod_client
   /* File descriptor to output any verbose messages if > 0.  */
   int verbose_fd;
 
+  /* Count DEBUGINFOD_URLS elements and corresponding curl handles. */
+  int num_urls;
+  CURL **server_handles;
+  CURLM *server_mhandle;
+  
   /* Can contain all other context, like cache_path, server_urls,
  timeout or other info gotten from environment variables, the
  handle data, etc. So those don't have to be reparsed and
@@ -140,15 +145,12 @@ static const time_t cache_default_max_unused_age_s = 
604800; /* 1 week */
The default parent directory is $HOME, or '/' if $HOME doesn't exist.  */
 static const char *cache_default_name = ".debuginfod_client_cache";
 static const char *cache_xdg_name = "debuginfod_client";
-static const char *cache_path_envvar = DEBUGINFOD_CACHE_PATH_ENV_VAR;
 
 /* URLs of debuginfods, separated by url_delim. */
-static const char *server_urls_envvar = DEBUGINFOD_URLS_ENV_VAR;
 static const char *url_delim =  " ";
 static const char url_delim_char = ' ';
 
 /* Timeout for debuginfods, in seconds (to get at least 100K). */
-static const char *server_timeout_envvar = DEBUGINFOD_TIMEOUT_ENV_VAR;
 static const long default_timeout = 90;
 
 
@@ -523,7 +525,13 @@ debuginfod_query_server (debuginfod_client *c,
 
   /* Is there any server we can query?  If not, don't do any work,
  just return with ENOSYS.  Don't even access the cache.  */
-  urls_envvar = getenv(server_urls_envvar);
+  if (c->num_urls == 0)
+{
+  rc = -ENOSYS;
+  goto out;
+}
+  
+  urls_envvar = getenv(DEBUGINFOD_URLS_ENV_VAR);
   if (vfd >= 0)
 dprintf (vfd, "server urls \"%s\"\n",
 urls_envvar != NULL ? urls_envvar : "");
@@ -611,7 +619,7 @@ debuginfod_query_server (debuginfod_client *c,
 
   /* Determine location of the cache. The path specified by the debuginfod
  cache environment variable takes priority.  */
-  char *cache_var = getenv(cache_path_envvar);
+  char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR);
   if (cache_var != NULL && strlen (cache_var) > 0)
 xalloc_str (cache_path, "%s", cache_var);
   else
@@ -691,7 +699,7 @@ debuginfod_query_server (debuginfod_client *c,
 }
 
   long timeout = default_timeout;
-  const char* timeout_envvar = getenv(server_timeout_envvar);
+  const char* timeout_envvar = getenv(DEBUGINFOD_TIMEOUT_ENV_VAR);
   if (timeout_envvar != NULL)
 timeout = atoi (timeout_envvar);
 
@@ -725,24 +733,13 @@ debuginfod_query_server (debuginfod_client *c,
   goto out0;
 }
 
-  /* Count number of URLs.  */
-  int num_urls = 0;
-  for (int i = 0; server_urls[i] != '\0'; i++)
-if (server_urls[i] != url_delim_char
-&& (i == 0 || server_urls[i - 1] == url_delim_char))
-  num_urls++;
-
-  CURLM *curlm = curl_multi_init();
-  if (curlm == NULL)
-{
-  rc = -ENETUNREACH;
-  goto out0;
-}
-
+ 

[PATCH] libdw: handle DW_FORM_indirect when reading attributes

2021-04-23 Thread Omar Sandoval
From: Omar Sandoval 

Whenever we encounter an attribute with DW_FORM_indirect, we need to
read its true form from the DIE data. Then, we can continue normally.
This adds support to the most obvious places: __libdw_find_attr() and
dwarf_getattrs(). There may be more places that need to be updated.

I encountered this when inspecting a file that was processed by our BOLT
tool: https://github.com/facebookincubator/BOLT. This also adds a couple
of test cases using a file generated by that tool.

Signed-off-by: Omar Sandoval 
---
 libdw/ChangeLog   |   5 +
 libdw/dwarf_child.c   |  13 +
 libdw/dwarf_getattrs.c|  13 +
 tests/ChangeLog   |  10 +
 tests/Makefile.am |   6 +-
 tests/run-low_high_pc-dw-form-indirect.sh |  23 +
 tests/run-readelf-dw-form-indirect.sh | 678 ++
 tests/testfile-dw-form-indirect.bz2   | Bin 0 -> 7007 bytes
 8 files changed, 746 insertions(+), 2 deletions(-)
 create mode 100755 tests/run-low_high_pc-dw-form-indirect.sh
 create mode 100755 tests/run-readelf-dw-form-indirect.sh
 create mode 100755 tests/testfile-dw-form-indirect.bz2

diff --git a/libdw/ChangeLog b/libdw/ChangeLog
index f01bee39..e3e467ee 100644
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,3 +1,8 @@
+2021-04-23  Omar Sandoval  
+
+   * dwarf_child.c (__libdw_find_attr): Handle DW_FORM_indirect.
+   * dwarf_getattrs.c (dwarf_getattrs): Handle DW_FORM_indirect.
+
 2021-02-12  Mark Wielaard  
 
* dwarf_getlocation.c (attr_ok): For DWARF version 4 or higher
diff --git a/libdw/dwarf_child.c b/libdw/dwarf_child.c
index 2e39d834..c8c8bb61 100644
--- a/libdw/dwarf_child.c
+++ b/libdw/dwarf_child.c
@@ -53,6 +53,8 @@ __libdw_find_attr (Dwarf_Die *die, unsigned int search_name,
   return NULL;
 }
 
+  const unsigned char *endp = die->cu->endp;
+
   /* Search the name attribute.  Attribute has been checked when
  Dwarf_Abbrev was created, we can read unchecked.  */
   const unsigned char *attrp = abbrevp->attrp;
@@ -69,6 +71,17 @@ __libdw_find_attr (Dwarf_Die *die, unsigned int search_name,
   if (attr_name == 0 && attr_form == 0)
break;
 
+  if (attr_form == DW_FORM_indirect)
+   {
+ get_uleb128 (attr_form, readp, endp);
+ if (attr_form == DW_FORM_indirect ||
+ attr_form == DW_FORM_implicit_const)
+   {
+ __libdw_seterrno (DWARF_E_INVALID_DWARF);
+ return NULL;
+   }
+   }
+
   /* Is this the name attribute?  */
   if (attr_name == search_name && search_name != INVALID)
{
diff --git a/libdw/dwarf_getattrs.c b/libdw/dwarf_getattrs.c
index 4ac16b1a..770c32c9 100644
--- a/libdw/dwarf_getattrs.c
+++ b/libdw/dwarf_getattrs.c
@@ -55,6 +55,8 @@ dwarf_getattrs (Dwarf_Die *die, int (*callback) 
(Dwarf_Attribute *, void *),
   return -1l;
 }
 
+  const unsigned char *endp = die->cu->endp;
+
   /* This is where the attributes start.  */
   const unsigned char *attrp = abbrevp->attrp;
   const unsigned char *const offset_attrp = abbrevp->attrp + offset;
@@ -78,6 +80,17 @@ dwarf_getattrs (Dwarf_Die *die, int (*callback) 
(Dwarf_Attribute *, void *),
   offset of an attribute.  */
 return 1l;
 
+  if (attr.form == DW_FORM_indirect)
+   {
+ get_uleb128 (attr.form, die_addr, endp);
+ if (attr.form == DW_FORM_indirect ||
+ attr.form == DW_FORM_implicit_const)
+   {
+ __libdw_seterrno (DWARF_E_INVALID_DWARF);
+ return -1l;
+   }
+   }
+
   /* If we are not to OFFSET_ATTRP yet, we just have to skip
 the values of the intervening attributes.  */
   if (remembered_attrp >= offset_attrp)
diff --git a/tests/ChangeLog b/tests/ChangeLog
index ea44d20c..cb22dfbb 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,13 @@
+2021-04-23  Omar Sandoval  
+
+   * run-low_high_pc-dw-form-indirect.sh: New file.
+   * run-readelf-dw-form-indirect.sh: New file.
+   * testfile-dw-form-indirect.bz2: New file.
+   * Makefile.am (TESTS): Add run-low_high_pc-dw-form-indirect.sh and
+   run-readelf-dw-form-indirect.sh.
+   (EXTRA_DIST): Add run-low_high_pc-dw-form-indirect.sh,
+   run-readelf-dw-form-indirect.sh, and testfile-dw-form-indirect.bz2.
+
 2021-03-30  Frank Ch. Eigler  
 
* run-debuginfod-find.sh: Add thread comm checks.
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 852269a3..dc7517d9 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -187,7 +187,8 @@ TESTS = run-arextract.sh run-arsymtest.sh run-ar.sh newfile 
test-nlist \
run-getphdrnum.sh run-test-includes.sh \
leb128 read_unaligned \
msg_tst system-elf-libelf-test \
-   $(asm_TESTS) run-disasm-bpf.sh
+   $(asm_TESTS) run-disasm-bpf.sh run-low_high_pc-dw-form-indirect.sh \
+   run-readelf-dw-form-indirect.sh
 
 if !BIARCH
 e