Re: [Bug debuginfod/27983] ignore duplicate urls
Hello, The realloc returning NULL issue has been resolved and the patch successfully rebased onto master. Please find these improvements attached. Noah From a1b4c2b87f3890c7bf2339b3a69e056d487f74d3 Mon Sep 17 00:00:00 2001 From: Noah Sanci Date: Fri, 9 Jul 2021 14:53:10 -0400 Subject: [PATCH] debuginfod: PR27983 - ignore duplicate urls Gazing at server logs, one sees a minority of clients who appear to have duplicate query traffic coming in: the same URL, milliseconds apart. Chances are the user accidentally doubled her $DEBUGINFOD_URLS somehow, and the client library is dutifully asking the servers TWICE. Bug #27863 reduces the pain on the servers' CPU, but dupe network traffic is still being paid. We should reject sending outright duplicate concurrent traffic. The urls are now simply removed upon finding a duplicate after url construction. https://sourceware.org/bugzilla/show_bug.cgi?id=27983 Signed-off-by: Noah Sanci --- debuginfod/ChangeLog | 9 debuginfod/debuginfod-client.c | 97 -- tests/ChangeLog| 6 +++ tests/run-debuginfod-find.sh | 11 4 files changed, 94 insertions(+), 29 deletions(-) diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 7709c24d..81eb56f9 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -34,6 +34,15 @@ (parse_opt): Handle 'r' option by setting regex_groom to true. (groom): Introduce and use reg_include and reg_exclude. +2021-07-09 Noah Sanci + + PR27983 + * debuginfod-client.c (debuginfod_query_server): As full-length + urls are generated with standardized formats, ignore duplicates. + Created out1 and changed out2 error gotos. Updated url creation print + statements. + (globals): Removed url_delim_char, as it was no longer used. + 2021-06-18 Mark Wielaard * debuginfod-client.c (debuginfod_begin): Don't use client if diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index 26ba1891..9d049d33 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -152,7 +152,6 @@ static const char *cache_xdg_name = "debuginfod_client"; /* URLs of debuginfods, separated by url_delim. */ static const char *url_delim = " "; -static const char url_delim_char = ' '; /* Timeout for debuginfods, in seconds (to get at least 100K). */ static const long default_timeout = 90; @@ -765,13 +764,60 @@ debuginfod_query_server (debuginfod_client *c, goto out0; } + /* Initialize the memory to zero */ + char *strtok_saveptr; + char **server_url_list = NULL; + char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); /* Count number of URLs. */ int num_urls = 0; - for (int i = 0; server_urls[i] != '\0'; i++) -if (server_urls[i] != url_delim_char -&& (i == 0 || server_urls[i - 1] == url_delim_char)) - num_urls++; - + + while (server_url != NULL) +{ + /* PR 27983: If the url is already set to be used use, skip it */ + char *slashbuildid; + if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') +slashbuildid = "buildid"; + else +slashbuildid = "/buildid"; + + char *tmp_url; + if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1) +{ + rc = -ENOMEM; + goto out1; +} + int url_index; + for (url_index = 0; url_index < num_urls; ++url_index) +{ + if(strcmp(tmp_url, server_url_list[url_index]) == 0) +{ + url_index = -1; + break; +} +} + if (url_index == -1) +{ + if (vfd >= 0) +dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); + free(tmp_url); +} + else +{ + num_urls++; + char ** realloc_ptr; + realloc_ptr = reallocarray(server_url_list, num_urls, + sizeof(char*)); + if (realloc_ptr == NULL) +{ + rc = -ENOMEM; + goto out1; +} + server_url_list = realloc_ptr; + server_url_list[num_urls-1] = tmp_url; +} + server_url = strtok_r(NULL, url_delim, &strtok_saveptr); +} + int retry_limit = default_retry_limit; const char* retry_limit_envvar = getenv(DEBUGINFOD_RETRY_LIMIT_ENV_VAR); if (retry_limit_envvar != NULL) @@ -804,12 +850,11 @@ debuginfod_query_server (debuginfod_client *c, data[i].errbuf[0] = '\0'; } - char *strtok_saveptr; - char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); - /* Initialize each handle. */ - for (int i = 0; i < num_urls && server_url != NULL; i++) + for (int i = 0; i < num_urls; i++) { + if ((server_url = server_url_list[i]) == NULL) +break; if (vfd >= 0) dprintf (vfd, "init server %d %s\n", i, server_url); @@ -819,18 +864,10 @@ debuginfod_query_server (debuginfod_client
[Bug debuginfod/27982] added DEBUGINFOD_MAXSIZE and DEBUGINFOD_MAXTIME
Hello, Please find the attached patch for pr 27982. DEBUGINFOD_MAXSIZE and MAXTIME were added in this patch to allow users to use more constraints when downloading debuginfo. Noah From 93523eb4c8f3f3ca16f3a673369689aec62324cd Mon Sep 17 00:00:00 2001 From: Noah Sanci Date: Mon, 26 Jul 2021 13:29:11 -0400 Subject: [PATCH] debuginfod: PR27982 - added DEBUGINFOD_MAXSIZE and DEBUGINFOD_MAXTIME DEBUGINFOD_TIMEOUT is a good way to catch servers that are too slow to *start* transmitting a file. But we have no way of limiting total download time or space. A user might prefer to have his debugger fetch only quick & small files, and make do without the bigger ones. Some transitive dependencies of e.g. gnome programs are huge: 3GB of LLVM debuginfo, 1GB of webkitgtk, etc. etc. DEBUGINFOD_MAXSIZE and DEBUGINFOD_MAXTIME were added to dictate the max download size and time of a debuginfod client. DEBUGINFOD_MAXSIZE is handled server-side and is sent using the http header: X-DEBUGINFOD-MAXSIZE. https://sourceware.org/bugzilla/show_bug.cgi?id=27982 Signed-off-by: Noah Sanci --- debuginfod/ChangeLog | 14 +++ debuginfod/debuginfod-client.c | 71 ++ debuginfod/debuginfod.cxx | 13 +++ debuginfod/debuginfod.h.in | 2 + doc/ChangeLog | 6 +++ doc/debuginfod-find.1 | 16 +++- tests/ChangeLog| 7 tests/run-debuginfod-find.sh | 29 ++ 8 files changed, 157 insertions(+), 1 deletion(-) diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 7709c24d..bc9b4ceb 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,17 @@ +2021-07-26 Noah Sanci + + PR27982 + * debuginfod-client.c (globals): added default_maxsize and + default_maxtime. + (debuginfod_query_server): Added DEBUGINFOD_MAXSIZE and + DEBUGINFOD_MAXTIME envvar processing. DEBUGINFOD_MAXSIZE, if + set, sends an additional http header to the server named + X-DEBUGINFOD-MAXSIZE. + * debuginfod.cxx (handler_cb): If the requested file exceeds + maxsize return code 406, signaling the file was too large. + * debuginfod.h.in: Added DEBUGINFOD_MAXSIZE_ENV_VAR and + DEBUGINFOD_MAXTIME_ENV_VAR. + 2021-07-16 Noah Sanci PR28034 diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index 26ba1891..cc28d2a3 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -157,6 +157,10 @@ static const char url_delim_char = ' '; /* Timeout for debuginfods, in seconds (to get at least 100K). */ static const long default_timeout = 90; +/* Max size for debuginfods in bytes (B). Default is 0 (infinite). */ +static const long default_maxsize = 0; +/* Max time to query servers for. Default is MAX_LONG. */ +static const long default_maxtime = LONG_MAX; /* Default retry count for download error. */ static const long default_retry_limit = 2; @@ -556,6 +560,39 @@ debuginfod_query_server (debuginfod_client *c, free (c->url); c->url = NULL; + /* PR 27982: Add max size if DEBUGINFOD_MAXSIZE is set. */ + long maxsize = default_maxsize; + const char *maxsize_envvar; + maxsize_envvar = getenv(DEBUGINFOD_MAXSIZE_ENV_VAR); + if (maxsize_envvar != NULL) +maxsize = atol (maxsize_envvar); + + /* PR 27982: Add max time if DEBUGINFOD_MAXTIME is set. */ + long maxtime = default_maxtime; + const char *maxtime_envvar; + maxtime_envvar = getenv(DEBUGINFOD_MAXTIME_ENV_VAR); + if (maxtime_envvar != NULL) +maxtime = atol (maxtime_envvar); + if (vfd >= 0) +dprintf(vfd, "using max time %lds\n", maxtime); + + /* Default maxsize is valid*/ + if (maxsize > default_maxsize) +{ + if (vfd) +dprintf (vfd, "using max size %ldB\n", maxsize); + char *size_header = NULL; + rc = asprintf (&size_header, "X-DEBUGINFOD-MAXSIZE: %ld", maxsize); + if (rc < 0) +{ + rc = -ENOMEM; + goto out; +} + rc = debuginfod_add_http_header(c, size_header); + free(size_header); + if (rc < 0) +goto out; +} add_default_headers(c); /* Copy lowercase hex representation of build_id into buf. */ @@ -893,8 +930,28 @@ debuginfod_query_server (debuginfod_client *c, long loops = 0; int committed_to = -1; bool verbose_reported = false; + struct timespec start_time, cur_time; + if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1) +{ + rc = errno; + goto out1; +} + double delta = 0.0; do { + /* Check to see how long querying is taking. */ + if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1) +{ + rc = errno; + goto out1; +} + delta = (double)(cur_time.tv_nsec - start_time.tv_nsec) / 10; + if (delta > (double) maxtime) +{ + dprintf(vfd, "Timeout with max time=%lds and transfer time=%fs\n", maxtime, delta ); + rc = -ETIME; + goto out1; +} /* Wait 1 second,
[Bug debuginfod/25607] debuginfod-client: paranoid federation mode
https://sourceware.org/bugzilla/show_bug.cgi?id=25607 Noah Sanci changed: What|Removed |Added Assignee|nsanci at redhat dot com |unassigned at sourceware dot org -- You are receiving this mail because: You are on the CC list for the bug.