Re: [PATCH] readelf.c: Avoid repeating calls to gettext _() in hotpath

Henning Meyer Wed, 02 Jul 2025 12:41:46 -0700

Btw, I would really like to add colored output to readelf, but the waygettext is used currently, translating raw printf format strings insteadof actual words makes that impossible without breaking all translations.

From a security perspective, _("%zu byte block") allows a translator toreorder word order for other languages, but it also allows to translate%zu to %s or %n for crashes.


On 02.07.25 15:27, Mark Wielaard wrote:

Hi Aaron,

On Sun, 2025-06-29 at 23:51 -0400, Aaron Merey wrote:

Calls to the gettext _() macro in hotpaths cause many runtime lookups
of the same translation strings.  This patch introduces macro __()
which caches the result of _() at each call site where __() is used.

When a cached translation string is used as the format string for
fprintf, the compiler might raise a -Wformat-nonliteral warning.

To avoid this, the -Wformat-nonliteral warning is suppressed using _Pragma
around affected fprintf calls, using IGNORE_FMT_NONLITERAL_BEGIN and
_END wrappers.

To avoid suppressing -Wformat-nonliteral across every printf-style
function using _(), __() and IGNORE_FMT_NONLITERAL_* are mostly used
within hotpath loops where caching makes a signficant difference.
Runtime improvements of up to 13% faster have been observed with this patch
applied.

I can see how these changes can increase runtime (for threaded runs I
assume). But I really don't like them very much. It adds a lot of
macros/code around a few invocations of _() that probably should just
be removed.

@@ -4942,7 +4960,9 @@ print_block (size_t n, const void *block, FILE *out)
      fputs (_("empty block\n"), out);

Side question, is that extra \n deliberate? fputs already adds an \n.

    else
      {
-      fprintf (out, _("%zu byte block:"), n);
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __("%zu byte block:"), n);
+      IGNORE_FMT_NONLITERAL_END
        const unsigned char *data = block;
        do
        fprintf (out, " %02x", *data++);

So for these why not simply have translated strings in a static var at
the top of print_block:

static char *empty_block_str = _("empty block");
static char *byte_block_str = _("byte block");

And then in the loop do:

   fputs (empty_block_str, out);
[... else ...]
   fprintf (out, "%zu %s:", n, byte_block_str);

@@ -5866,11 +5886,13 @@ print_debug_abbrev_section (Dwfl_Module *dwflmod 
__attribute__ ((unused)),
          unsigned int tag = dwarf_getabbrevtag (&abbrev);
          int has_children = dwarf_abbrevhaschildren (&abbrev);

+ IGNORE_FMT_NONLITERAL_BEGIN

          fprintf (out, _(" [%5u] offset: %" PRId64
                           ", children: %s, tag: %s\n"),
                   code, (int64_t) offset,
                   has_children ? yes_str : no_str,
                   dwarf_tag_name (tag));
+         IGNORE_FMT_NONLITERAL_END

Why is this necessary here? You aren't using __().
But if you do want this for the "offset", "children" and "tag" strings
I would handle them just like yes_str and no_str are handled here.

@@ -6210,7 +6232,10 @@ print_debug_aranges_section (Dwfl_Module *dwflmod 
__attribute__ ((unused)),
        const unsigned char *hdrstart = readp;
        size_t start_offset = hdrstart - (const unsigned char *) data->d_buf;

- fprintf (out, _("\nTable at offset %zu:\n"), start_offset);

+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __("\nTable at offset %zu:\n"), start_offset);
+      IGNORE_FMT_NONLITERAL_END
+
        if (readp + 4 > readendp)
        {
        invalid_data:

Again, if possibly just add a static char *table_at_offset_str =
_("table_at_offset");

@@ -6230,8 +6255,11 @@ print_debug_aranges_section (Dwfl_Module *dwflmod 
__attribute__ ((unused)),
        }

const unsigned char *nexthdr = readp + length;

-      fprintf (out, _("\n Length:        %6" PRIu64 "\n"),
+
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __("\n Length:        %6" PRIu64 "\n"),
               (uint64_t) length);
+      IGNORE_FMT_NONLITERAL_END

if (unlikely (length > (size_t) (readendp - readp)))

        goto invalid_data;

"Length" can probably also be translated once and then reused
everywhere.

@@ -6242,8 +6270,12 @@ print_debug_aranges_section (Dwfl_Module *dwflmod 
__attribute__ ((unused)),
        if (readp + 2 > readendp)
        goto invalid_data;
        uint_fast16_t version = read_2ubyte_unaligned_inc (dbg, readp);
-      fprintf (out, _(" DWARF version: %6" PRIuFAST16 "\n"),
+
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __(" DWARF version: %6" PRIuFAST16 "\n"),
               version);
+      IGNORE_FMT_NONLITERAL_END
+
        if (version != 2)
        {
          error (0, 0, _("unsupported aranges version"));

Same for "version". I wouldn't try to translate "DWARF".

@@ -6257,14 +6289,21 @@ print_debug_aranges_section (Dwfl_Module *dwflmod 
__attribute__ ((unused)),
        offset = read_8ubyte_unaligned_inc (dbg, readp);
        else
        offset = read_4ubyte_unaligned_inc (dbg, readp);
-      fprintf (out, _(" CU offset:     %6" PRIx64 "\n"),
+
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __(" CU offset:     %6" PRIx64 "\n"),
               (uint64_t) offset);
+      IGNORE_FMT_NONLITERAL_END

if (readp + 1 > readendp)

Just translate "offset" once, keep CU (don't think it is sensible to
translate that).

        goto invalid_data;
        unsigned int address_size = *readp++;
-      fprintf (out, _(" Address size:  %6" PRIu64 "\n"),
+
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __(" Address size:  %6" PRIu64 "\n"),
               (uint64_t) address_size);
+      IGNORE_FMT_NONLITERAL_END
+
        if (address_size != 4 && address_size != 8)
        {
          error (0, 0, _("unsupported address size"));
@@ -6273,9 +6312,13 @@ print_debug_aranges_section (Dwfl_Module *dwflmod 
__attribute__ ((unused)),

if (readp + 1 > readendp)

        goto invalid_data;
+
        unsigned int segment_size = *readp++;
-      fprintf (out, _(" Segment size:  %6" PRIu64 "\n\n"),
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __(" Segment size:  %6" PRIu64 "\n\n"),
               (uint64_t) segment_size);
+      IGNORE_FMT_NONLITERAL_END
+
        if (segment_size != 0 && segment_size != 4 && segment_size != 8)
        {
          error (0, 0, _("unsupported segment size"));

Separate translations of "Address", "size" and "Segment"?
Or maybe just "Address size" and "Segment size"?

@@ -6392,8 +6435,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
        }

ptrdiff_t offset = readp - (unsigned char *) data->d_buf;

-      fprintf (out, _("Table at Offset 0x%" PRIx64 ":\n\n"),
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __("Table at Offset 0x%" PRIx64 ":\n\n"),
               (uint64_t) offset);
+      IGNORE_FMT_NONLITERAL_END

uint64_t unit_length = read_4ubyte_unaligned_inc (dbg, readp);

        unsigned int offset_size = 4;

You already do this above for "Table at offset", but here "Offset" is
capitalized. Maybe use the same capitalization in both places?

@@ -6405,7 +6450,9 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
          unit_length = read_8ubyte_unaligned_inc (dbg, readp);
          offset_size = 8;
        }
-      fprintf (out, _(" Length:         %8" PRIu64 "\n"), unit_length);
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __(" Length:         %8" PRIu64 "\n"), unit_length);
+      IGNORE_FMT_NONLITERAL_END

/* We need at least 2-bytes + 1-byte + 1-byte + 4-bytes = 8

         bytes to complete the header.  And this unit cannot go beyond

Just translate "Length" separately?

@@ -6418,7 +6465,9 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
        const unsigned char *nexthdr = readp + unit_length;

uint16_t version = read_2ubyte_unaligned_inc (dbg, readp);

-      fprintf (out, _(" DWARF version:  %8" PRIu16 "\n"), version);
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __(" DWARF version:  %8" PRIu16 "\n"), version);
+      IGNORE_FMT_NONLITERAL_END

if (version != 5)

Same, translation once of "version" keep DWARF as is.

@@ -6427,8 +6476,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
        }

uint8_t address_size = *readp++;

-      fprintf (out, _(" Address size:   %8" PRIu64 "\n"),
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __(" Address size:   %8" PRIu64 "\n"),
               (uint64_t) address_size);
+      IGNORE_FMT_NONLITERAL_END

if (address_size != 4 && address_size != 8)

"Address size" again, see above.

@@ -6447,8 +6498,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
          }

uint32_t offset_entry_count = read_4ubyte_unaligned_inc (dbg, readp);

-      fprintf (out, _(" Offset entries: %8" PRIu64 "\n"),
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __(" Offset entries: %8" PRIu64 "\n"),
               (uint64_t) offset_entry_count);
+      IGNORE_FMT_NONLITERAL_END

/* We need the CU that uses this unit to get the initial base address. */

        Dwarf_Addr cu_base = 0;


static char *offset_entries_str" = _("Offset entries"); ?

@@ -6463,10 +6516,14 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
          if (dwarf_cu_die (cu, &cudie,
                            NULL, NULL, NULL, NULL,
                            NULL, NULL) == NULL)
-           fprintf (out, _(" Unknown CU base: "));
+           fputs (__(" Unknown CU base: "), out);
          else
-           fprintf (out, _(" CU [%6" PRIx64 "] base: "),
-                    dwarf_dieoffset (&cudie));
+           {
+             IGNORE_FMT_NONLITERAL_BEGIN
+             fprintf (out, __(" CU [%6" PRIx64 "] base: "),
+                      dwarf_dieoffset (&cudie));
+             IGNORE_FMT_NONLITERAL_END
+           }
          print_dwarf_addr (dwflmod, address_size, cu_base, cu_base, out);
          fprintf (out, "\n");
        }

Just translate "base" don't translate CU?

@@ -8556,7 +8613,8 @@ print_debug_units (Dwfl_Module *dwflmod,
          dieoffset = dwarf_dieoffset (dwarf_offdie_types (dbg, cu->start
                                                           + subdie_off,
                                                           &typedie));
-         fprintf (out, _(" Type unit at offset %" PRIu64 ":\n"
+         IGNORE_FMT_NONLITERAL_BEGIN;
+         fprintf (out, __(" Type unit at offset %" PRIu64 ":\n"
                            " Version: %" PRIu16
                            ", Abbreviation section offset: %" PRIu64
                            ", Address size: %" PRIu8
@@ -8565,26 +8623,36 @@ print_debug_units (Dwfl_Module *dwflmod,
                            ", Type offset: %#" PRIx64 " [%" PRIx64 "]\n"),
                   (uint64_t) offset, version, abbroffset, addrsize, offsize,
                   unit_id, (uint64_t) subdie_off, dieoffset);
+         IGNORE_FMT_NONLITERAL_END;
        }

I would split these up. Most words seem repeated "unit, "version",
"section", "size", "Type", "Address".

        else
        {
-         fprintf (out, _(" Compilation unit at offset %" PRIu64 ":\n"
+         IGNORE_FMT_NONLITERAL_BEGIN;
+         fprintf (out, __(" Compilation unit at offset %" PRIu64 ":\n"
                            " Version: %" PRIu16
                            ", Abbreviation section offset: %" PRIu64
                            ", Address size: %" PRIu8
                            ", Offset size: %" PRIu8 "\n"),
                   (uint64_t) offset, version, abbroffset, addrsize, offsize);
+         IGNORE_FMT_NONLITERAL_END;

if (version >= 5 || (unit_type != DW_UT_compile

                               && unit_type != DW_UT_partial))
            {
-             fprintf (out, _(" Unit type: %s (%" PRIu8 ")"),
+
+             IGNORE_FMT_NONLITERAL_BEGIN;
+             fprintf (out, __(" Unit type: %s (%" PRIu8 ")"),
                                dwarf_unit_name (unit_type), unit_type);
+             IGNORE_FMT_NONLITERAL_END;
              if (unit_type == DW_UT_type
                  || unit_type == DW_UT_skeleton
                  || unit_type == DW_UT_split_compile
                  || unit_type == DW_UT_split_type)
-               fprintf (out, ", Unit id: 0x%.16" PRIx64 "", unit_id);
+               {
+                 IGNORE_FMT_NONLITERAL_BEGIN;
+                 fprintf (out, __(", Unit id: 0x%.16" PRIx64 ""), unit_id);
+                 IGNORE_FMT_NONLITERAL_END;
+               }
              if (unit_type == DW_UT_type
                  || unit_type == DW_UT_split_type)
                {

Likewise.

@@ -8593,8 +8661,10 @@ print_debug_units (Dwfl_Module *dwflmod,
                  dwarf_cu_info (cu, NULL, NULL, NULL, &typedie,
                                 NULL, NULL, NULL);
                  dieoffset = dwarf_dieoffset (&typedie);
-                 fprintf (out, ", Unit DIE off: %#" PRIx64 " [%" PRIx64 "]",
+                 IGNORE_FMT_NONLITERAL_BEGIN;
+                 fprintf (out, __(", Unit DIE off: %#" PRIx64 " [%" PRIx64 
"]"),
                           subdie_off, dieoffset);
+                 IGNORE_FMT_NONLITERAL_END;
                }
              fprintf (out, "\n");
            }

"Unit" and "off" (maybe reuse "offset"?) can be translated separately,
keep "DIE" as is?

@@ -9179,7 +9249,9 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl *ebl, 
GElf_Ehdr *ehdr,
      {
        size_t start_offset = linep - (const unsigned char *) data->d_buf;

- fprintf (out, _("\nTable at offset %zu:\n"), start_offset);

+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __("\nTable at offset %zu:\n"), start_offset);
+      IGNORE_FMT_NONLITERAL_END

if (unlikely (linep + 4 > lineendp))

        goto invalid_data;

Another "Table at offset".

@@ -9270,7 +9342,8 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl *ebl, 
GElf_Ehdr *ehdr,
        uint_fast8_t opcode_base = *linep++;

/* Print what we got so far. */

-      fprintf (out, _("\n"
+      IGNORE_FMT_NONLITERAL_BEGIN
+      fprintf (out, __("\n"
                        " Length:                         %" PRIu64 "\n"
                        " DWARF version:                  %" PRIuFAST16 "\n"
                        " Prologue length:                %" PRIu64 "\n"
@@ -9289,6 +9362,7 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl *ebl, 
GElf_Ehdr *ehdr,
               minimum_instr_len, max_ops_per_instr,
               default_is_stmt, line_base,
               line_range, opcode_base);
+      IGNORE_FMT_NONLITERAL_END

if (version < 2 || version > 5)

Only "Prologue" hasn't been seen before?

@@ -9344,13 +9418,13 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
*ebl, GElf_Ehdr *ehdr,
Dwarf_Off str_offsets_base = str_offsets_base_off (dbg, NULL);- fputs (_("\nDirectory table:\n"), out);
+      fputs (__("\nDirectory table:\n"), out);
        if (version > 4)
        {
          struct encpair { uint16_t desc; uint16_t form; };
          struct encpair enc[256];
- fprintf (out, _(" ["));
+         fprintf (out, "      [");

I do like this. Nothing to translate.

          if ((size_t) (lineendp - linep) < 1)
            goto invalid_data;
          unsigned char directory_entry_format_count = *linep++;
@@ -9421,13 +9495,13 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
*ebl, GElf_Ehdr *ehdr,
        if (unlikely (linep >= lineendp))
        goto invalid_unit;

- fputs (_("\nFile name table:\n"), out);

+      fputs (__("\nFile name table:\n"), out);
        if (version > 4)
        {
          struct encpair { uint16_t desc; uint16_t form; };
          struct encpair enc[256];

- fprintf (out, _(" ["));

+         fprintf (out, "      [");
          if ((size_t) (lineendp - linep) < 1)
            goto invalid_data;
          unsigned char file_name_format_count = *linep++;

Likewise.

@@ -9528,11 +9602,11 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
*ebl, GElf_Ehdr *ehdr,

if (linep == lineendp)

        {
-         fputs (_("\nNo line number statements.\n"), out);
+         fputs (__("\nNo line number statements.\n"), out);
          continue;
        }

- fputs (_("\nLine number statements:\n"), out);

+      fputs (__("\nLine number statements:\n"), out);
        Dwarf_Word address = 0;
        unsigned int op_index = 0;
        size_t line = 1;
@@ -9581,15 +9655,25 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
*ebl, GElf_Ehdr *ehdr,
              line += line_increment;
              advance_pc ((opcode - opcode_base) / line_range);

- fprintf (out, _(" special opcode %u: address+%u = "),

+             IGNORE_FMT_NONLITERAL_BEGIN
+             fprintf (out, __(" special opcode %u: address+%u = "),
                       opcode, op_addr_advance);
+             IGNORE_FMT_NONLITERAL_END
              print_dwarf_addr (dwflmod, 0, address, address, out);
              if (op_index > 0)
-               fprintf (out, _(", op_index = %u, line%+d = %zu\n"),
-                        op_index, line_increment, line);
+               {
+                 IGNORE_FMT_NONLITERAL_BEGIN
+                 fprintf (out, __(", op_index = %u, line%+d = %zu\n"),
+                          op_index, line_increment, line);
+                 IGNORE_FMT_NONLITERAL_END
+               }
              else
-               fprintf (out, _(", line%+d = %zu\n"),
-                        line_increment, line);
+               {
+                 IGNORE_FMT_NONLITERAL_BEGIN
+                 fprintf (out, __(", line%+d = %zu\n"),
+                          line_increment, line);
+                 IGNORE_FMT_NONLITERAL_END
+               }
            }

"line" can probably be translated, but "op_index" is how it is called
in the spec.


Similar comments for the rest of the patch.
I really don't like the macros and the __ very much.
If at all possible I would just do this with explicit
static char *frob_str = _("frob"); strings.

Cheers,

Mark

Re: [PATCH] readelf.c: Avoid repeating calls to gettext _() in hotpath

Reply via email to