Re: [Bug debuginfod/27983] ignore duplicate urls

2021-07-26 Thread Noah Sanci via Elfutils-devel
Hello,

The realloc returning NULL issue has been resolved and the patch
successfully rebased onto master. Please find these improvements
attached.


Noah
From a1b4c2b87f3890c7bf2339b3a69e056d487f74d3 Mon Sep 17 00:00:00 2001
From: Noah Sanci 
Date: Fri, 9 Jul 2021 14:53:10 -0400
Subject: [PATCH] debuginfod: PR27983 - ignore duplicate urls

Gazing at server logs, one sees a minority of clients who appear to have
duplicate query traffic coming in: the same URL, milliseconds apart.
Chances are the user accidentally doubled her $DEBUGINFOD_URLS somehow,
and the client library is dutifully asking the servers TWICE. Bug #27863
reduces the pain on the servers' CPU, but dupe network traffic is still
being paid.  We should reject sending outright duplicate concurrent
traffic.
The urls are now simply removed upon finding a duplicate after url
construction.

https://sourceware.org/bugzilla/show_bug.cgi?id=27983

Signed-off-by: Noah Sanci 
---
 debuginfod/ChangeLog   |  9 
 debuginfod/debuginfod-client.c | 97 --
 tests/ChangeLog|  6 +++
 tests/run-debuginfod-find.sh   | 11 
 4 files changed, 94 insertions(+), 29 deletions(-)

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 7709c24d..81eb56f9 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -34,6 +34,15 @@
 	(parse_opt): Handle 'r' option by setting regex_groom to true.
 	(groom): Introduce and use reg_include and reg_exclude.
 
+2021-07-09  Noah Sanci  
+
+	PR27983
+	* debuginfod-client.c (debuginfod_query_server): As full-length
+	urls are generated with standardized formats, ignore duplicates.
+	Created out1 and changed out2 error gotos. Updated url creation print
+	statements.
+	(globals): Removed url_delim_char, as it was no longer used.
+
 2021-06-18  Mark Wielaard  
 
 	* debuginfod-client.c (debuginfod_begin): Don't use client if
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index 26ba1891..9d049d33 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -152,7 +152,6 @@ static const char *cache_xdg_name = "debuginfod_client";
 
 /* URLs of debuginfods, separated by url_delim. */
 static const char *url_delim =  " ";
-static const char url_delim_char = ' ';
 
 /* Timeout for debuginfods, in seconds (to get at least 100K). */
 static const long default_timeout = 90;
@@ -765,13 +764,60 @@ debuginfod_query_server (debuginfod_client *c,
   goto out0;
 }
 
+  /* Initialize the memory to zero */
+  char *strtok_saveptr;
+  char **server_url_list = NULL;
+  char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
   /* Count number of URLs.  */
   int num_urls = 0;
-  for (int i = 0; server_urls[i] != '\0'; i++)
-if (server_urls[i] != url_delim_char
-&& (i == 0 || server_urls[i - 1] == url_delim_char))
-  num_urls++;
-  
+
+  while (server_url != NULL)
+{
+  /* PR 27983: If the url is already set to be used use, skip it */
+  char *slashbuildid;
+  if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/')
+slashbuildid = "buildid";
+  else
+slashbuildid = "/buildid";
+
+  char *tmp_url;
+  if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1)
+{
+  rc = -ENOMEM;
+  goto out1;
+}
+  int url_index;
+  for (url_index = 0; url_index < num_urls; ++url_index)
+{
+  if(strcmp(tmp_url, server_url_list[url_index]) == 0)
+{
+  url_index = -1;
+  break;
+}
+}
+  if (url_index == -1)
+{
+  if (vfd >= 0)
+dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url);
+  free(tmp_url);
+}
+  else
+{
+  num_urls++;
+  char ** realloc_ptr;
+  realloc_ptr = reallocarray(server_url_list, num_urls,
+ sizeof(char*));
+  if (realloc_ptr == NULL)
+{
+  rc = -ENOMEM;
+  goto out1;
+}
+  server_url_list = realloc_ptr;
+  server_url_list[num_urls-1] = tmp_url;
+}
+  server_url = strtok_r(NULL, url_delim, &strtok_saveptr);
+}
+
   int retry_limit = default_retry_limit;
   const char* retry_limit_envvar = getenv(DEBUGINFOD_RETRY_LIMIT_ENV_VAR);
   if (retry_limit_envvar != NULL)
@@ -804,12 +850,11 @@ debuginfod_query_server (debuginfod_client *c,
   data[i].errbuf[0] = '\0';
 }
 
-  char *strtok_saveptr;
-  char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
-
   /* Initialize each handle.  */
-  for (int i = 0; i < num_urls && server_url != NULL; i++)
+  for (int i = 0; i < num_urls; i++)
 {
+  if ((server_url = server_url_list[i]) == NULL)
+break;
   if (vfd >= 0)
 	dprintf (vfd, "init server %d %s\n", i, server_url);
 
@@ -819,18 +864,10 @@ debuginfod_query_server (debuginfod_client

[Bug debuginfod/27982] added DEBUGINFOD_MAXSIZE and DEBUGINFOD_MAXTIME

2021-07-26 Thread Noah Sanci via Elfutils-devel
Hello,

Please find the attached patch for pr 27982. DEBUGINFOD_MAXSIZE and
MAXTIME were added in this patch to allow users to use more
constraints when downloading debuginfo.

Noah
From 93523eb4c8f3f3ca16f3a673369689aec62324cd Mon Sep 17 00:00:00 2001
From: Noah Sanci 
Date: Mon, 26 Jul 2021 13:29:11 -0400
Subject: [PATCH] debuginfod: PR27982 - added DEBUGINFOD_MAXSIZE and
 DEBUGINFOD_MAXTIME

DEBUGINFOD_TIMEOUT is a good way to catch servers that are too slow to
*start* transmitting a file.  But we have no way of limiting total
download time or space.  A user might prefer to have his debugger fetch
only quick & small files, and make do without the bigger ones.  Some
transitive dependencies of e.g. gnome programs are huge: 3GB of LLVM
debuginfo, 1GB of webkitgtk, etc. etc.
DEBUGINFOD_MAXSIZE and DEBUGINFOD_MAXTIME were added to dictate the
max download size and time of a debuginfod client. DEBUGINFOD_MAXSIZE
is handled server-side and is sent using the http header:
X-DEBUGINFOD-MAXSIZE.

https://sourceware.org/bugzilla/show_bug.cgi?id=27982

Signed-off-by: Noah Sanci 
---
 debuginfod/ChangeLog   | 14 +++
 debuginfod/debuginfod-client.c | 71 ++
 debuginfod/debuginfod.cxx  | 13 +++
 debuginfod/debuginfod.h.in |  2 +
 doc/ChangeLog  |  6 +++
 doc/debuginfod-find.1  | 16 +++-
 tests/ChangeLog|  7 
 tests/run-debuginfod-find.sh   | 29 ++
 8 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 7709c24d..bc9b4ceb 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,17 @@
+2021-07-26  Noah Sanci  
+
+	PR27982
+	* debuginfod-client.c (globals): added default_maxsize and
+	default_maxtime.
+	(debuginfod_query_server): Added DEBUGINFOD_MAXSIZE and
+	DEBUGINFOD_MAXTIME envvar processing. DEBUGINFOD_MAXSIZE, if
+	set, sends an additional http header to the server named
+	X-DEBUGINFOD-MAXSIZE.
+	* debuginfod.cxx (handler_cb): If the requested file exceeds
+	maxsize return code 406, signaling the file was too large.
+	* debuginfod.h.in: Added DEBUGINFOD_MAXSIZE_ENV_VAR and
+	DEBUGINFOD_MAXTIME_ENV_VAR.
+
 2021-07-16  Noah Sanci  
 
 	PR28034
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index 26ba1891..cc28d2a3 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -157,6 +157,10 @@ static const char url_delim_char = ' ';
 /* Timeout for debuginfods, in seconds (to get at least 100K). */
 static const long default_timeout = 90;
 
+/* Max size for debuginfods in bytes (B). Default is 0 (infinite). */
+static const long default_maxsize = 0;
+/* Max time to query servers for. Default is MAX_LONG. */
+static const long default_maxtime = LONG_MAX;
 /* Default retry count for download error. */
 static const long default_retry_limit = 2;
 
@@ -556,6 +560,39 @@ debuginfod_query_server (debuginfod_client *c,
   free (c->url);
   c->url = NULL;
 
+  /* PR 27982: Add max size if DEBUGINFOD_MAXSIZE is set. */
+  long maxsize = default_maxsize;
+  const char *maxsize_envvar;
+  maxsize_envvar = getenv(DEBUGINFOD_MAXSIZE_ENV_VAR);
+  if (maxsize_envvar != NULL)
+maxsize = atol (maxsize_envvar);
+
+  /* PR 27982: Add max time if DEBUGINFOD_MAXTIME is set. */
+  long maxtime = default_maxtime;
+  const char *maxtime_envvar;
+  maxtime_envvar = getenv(DEBUGINFOD_MAXTIME_ENV_VAR);
+  if (maxtime_envvar != NULL)
+maxtime = atol (maxtime_envvar);
+  if (vfd >= 0)
+dprintf(vfd, "using max time %lds\n", maxtime);
+
+  /* Default maxsize is valid*/
+  if (maxsize > default_maxsize)
+{
+  if (vfd)
+dprintf (vfd, "using max size %ldB\n", maxsize);
+  char *size_header = NULL;
+  rc = asprintf (&size_header, "X-DEBUGINFOD-MAXSIZE: %ld", maxsize);
+  if (rc < 0)
+{
+  rc = -ENOMEM;
+  goto out;
+}
+  rc = debuginfod_add_http_header(c, size_header);
+  free(size_header);
+  if (rc < 0)
+goto out;
+}
   add_default_headers(c);
 
   /* Copy lowercase hex representation of build_id into buf.  */
@@ -893,8 +930,28 @@ debuginfod_query_server (debuginfod_client *c,
   long loops = 0;
   int committed_to = -1;
   bool verbose_reported = false;
+  struct timespec start_time, cur_time;
+  if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1)
+{
+  rc = errno;
+  goto out1;
+}
+  double delta = 0.0;
   do
 {
+  /* Check to see how long querying is taking. */
+  if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1)
+{
+  rc = errno;
+  goto out1;
+}
+  delta = (double)(cur_time.tv_nsec - start_time.tv_nsec) / 10;
+  if (delta > (double) maxtime)
+{
+  dprintf(vfd, "Timeout with max time=%lds and transfer time=%fs\n", maxtime, delta );
+  rc = -ETIME;
+  goto out1;
+}
   /* Wait 1 second, 

[Bug debuginfod/25607] debuginfod-client: paranoid federation mode

2021-07-26 Thread nsanci at redhat dot com via Elfutils-devel
https://sourceware.org/bugzilla/show_bug.cgi?id=25607

Noah Sanci  changed:

   What|Removed |Added

   Assignee|nsanci at redhat dot com   |unassigned at 
sourceware dot org

-- 
You are receiving this mail because:
You are on the CC list for the bug.