Re: [PATCH] readelf.c: Avoid repeating calls to gettext _() in hotpath

Mark Wielaard Wed, 02 Jul 2025 07:57:10 -0700

Hi Aaron,

On Sun, 2025-06-29 at 23:51 -0400, Aaron Merey wrote:
> Calls to the gettext _() macro in hotpaths cause many runtime lookups
> of the same translation strings.  This patch introduces macro __()
> which caches the result of _() at each call site where __() is used.
> 
> When a cached translation string is used as the format string for
> fprintf, the compiler might raise a -Wformat-nonliteral warning.
> 
> To avoid this, the -Wformat-nonliteral warning is suppressed using _Pragma
> around affected fprintf calls, using IGNORE_FMT_NONLITERAL_BEGIN and
> _END wrappers.
> 
> To avoid suppressing -Wformat-nonliteral across every printf-style
> function using _(), __() and IGNORE_FMT_NONLITERAL_* are mostly used
> within hotpath loops where caching makes a signficant difference.
> Runtime improvements of up to 13% faster have been observed with this patch
> applied.


I can see how these changes can increase runtime (for threaded runs I
assume). But I really don't like them very much. It adds a lot of
macros/code around a few invocations of _() that probably should just
be removed.

> @@ -4942,7 +4960,9 @@ print_block (size_t n, const void *block, FILE *out)
>      fputs (_("empty block\n"), out);

Side question, is that extra \n deliberate? fputs already adds an \n.

>    else
>      {
> -      fprintf (out, _("%zu byte block:"), n);
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __("%zu byte block:"), n);
> +      IGNORE_FMT_NONLITERAL_END
>        const unsigned char *data = block;
>        do
>       fprintf (out, " %02x", *data++);

So for these why not simply have translated strings in a static var at
the top of print_block:

static char *empty_block_str = _("empty block");
static char *byte_block_str = _("byte block");

And then in the loop do:

  fputs (empty_block_str, out);
[... else ...]
  fprintf (out, "%zu %s:", n, byte_block_str);

> @@ -5866,11 +5886,13 @@ print_debug_abbrev_section (Dwfl_Module *dwflmod 
> __attribute__ ((unused)),
>         unsigned int tag = dwarf_getabbrevtag (&abbrev);
>         int has_children = dwarf_abbrevhaschildren (&abbrev);
>  
> +       IGNORE_FMT_NONLITERAL_BEGIN
>         fprintf (out, _(" [%5u] offset: %" PRId64
>                          ", children: %s, tag: %s\n"),
>                  code, (int64_t) offset,
>                  has_children ? yes_str : no_str,
>                  dwarf_tag_name (tag));
> +       IGNORE_FMT_NONLITERAL_END

Why is this necessary here? You aren't using __().
But if you do want this for the "offset", "children" and "tag" strings
I would handle them just like yes_str and no_str are handled here.

> @@ -6210,7 +6232,10 @@ print_debug_aranges_section (Dwfl_Module *dwflmod 
> __attribute__ ((unused)),
>        const unsigned char *hdrstart = readp;
>        size_t start_offset = hdrstart - (const unsigned char *) data->d_buf;
>  
> -      fprintf (out, _("\nTable at offset %zu:\n"), start_offset);
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __("\nTable at offset %zu:\n"), start_offset);
> +      IGNORE_FMT_NONLITERAL_END
> +
>        if (readp + 4 > readendp)
>       {
>       invalid_data:

Again, if possibly just add a static char *table_at_offset_str =
_("table_at_offset");

> @@ -6230,8 +6255,11 @@ print_debug_aranges_section (Dwfl_Module *dwflmod 
> __attribute__ ((unused)),
>       }
>  
>        const unsigned char *nexthdr = readp + length;
> -      fprintf (out, _("\n Length:        %6" PRIu64 "\n"),
> +
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __("\n Length:        %6" PRIu64 "\n"),
>              (uint64_t) length);
> +      IGNORE_FMT_NONLITERAL_END
>  
>        if (unlikely (length > (size_t) (readendp - readp)))
>       goto invalid_data;

"Length" can probably also be translated once and then reused
everywhere.

> @@ -6242,8 +6270,12 @@ print_debug_aranges_section (Dwfl_Module *dwflmod 
> __attribute__ ((unused)),
>        if (readp + 2 > readendp)
>       goto invalid_data;
>        uint_fast16_t version = read_2ubyte_unaligned_inc (dbg, readp);
> -      fprintf (out, _(" DWARF version: %6" PRIuFAST16 "\n"),
> +
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __(" DWARF version: %6" PRIuFAST16 "\n"),
>              version);
> +      IGNORE_FMT_NONLITERAL_END
> +
>        if (version != 2)
>       {
>         error (0, 0, _("unsupported aranges version"));

Same for "version". I wouldn't try to translate "DWARF".

> @@ -6257,14 +6289,21 @@ print_debug_aranges_section (Dwfl_Module *dwflmod 
> __attribute__ ((unused)),
>       offset = read_8ubyte_unaligned_inc (dbg, readp);
>        else
>       offset = read_4ubyte_unaligned_inc (dbg, readp);
> -      fprintf (out, _(" CU offset:     %6" PRIx64 "\n"),
> +
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __(" CU offset:     %6" PRIx64 "\n"),
>              (uint64_t) offset);
> +      IGNORE_FMT_NONLITERAL_END
>  
>        if (readp + 1 > readendp)

Just translate "offset" once, keep CU (don't think it is sensible to
translate that).

>       goto invalid_data;
>        unsigned int address_size = *readp++;
> -      fprintf (out, _(" Address size:  %6" PRIu64 "\n"),
> +
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __(" Address size:  %6" PRIu64 "\n"),
>              (uint64_t) address_size);
> +      IGNORE_FMT_NONLITERAL_END
> +
>        if (address_size != 4 && address_size != 8)
>       {
>         error (0, 0, _("unsupported address size"));
> @@ -6273,9 +6312,13 @@ print_debug_aranges_section (Dwfl_Module *dwflmod 
> __attribute__ ((unused)),
>  
>        if (readp + 1 > readendp)
>       goto invalid_data;
> +
>        unsigned int segment_size = *readp++;
> -      fprintf (out, _(" Segment size:  %6" PRIu64 "\n\n"),
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __(" Segment size:  %6" PRIu64 "\n\n"),
>              (uint64_t) segment_size);
> +      IGNORE_FMT_NONLITERAL_END
> +
>        if (segment_size != 0 && segment_size != 4 && segment_size != 8)
>       {
>         error (0, 0, _("unsupported segment size"));

Separate translations of "Address", "size" and "Segment"?
Or maybe just "Address size" and "Segment size"?

> @@ -6392,8 +6435,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
>       }
>  
>        ptrdiff_t offset = readp - (unsigned char *) data->d_buf;
> -      fprintf (out, _("Table at Offset 0x%" PRIx64 ":\n\n"),
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __("Table at Offset 0x%" PRIx64 ":\n\n"),
>              (uint64_t) offset);
> +      IGNORE_FMT_NONLITERAL_END
>  
>        uint64_t unit_length = read_4ubyte_unaligned_inc (dbg, readp);
>        unsigned int offset_size = 4;

You already do this above for "Table at offset", but here "Offset" is
capitalized. Maybe use the same capitalization in both places?

> @@ -6405,7 +6450,9 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
>         unit_length = read_8ubyte_unaligned_inc (dbg, readp);
>         offset_size = 8;
>       }
> -      fprintf (out, _(" Length:         %8" PRIu64 "\n"), unit_length);
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __(" Length:         %8" PRIu64 "\n"), unit_length);
> +      IGNORE_FMT_NONLITERAL_END
>  
>        /* We need at least 2-bytes + 1-byte + 1-byte + 4-bytes = 8
>        bytes to complete the header.  And this unit cannot go beyond

Just translate "Length" separately?

> @@ -6418,7 +6465,9 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
>        const unsigned char *nexthdr = readp + unit_length;
>  
>        uint16_t version = read_2ubyte_unaligned_inc (dbg, readp);
> -      fprintf (out, _(" DWARF version:  %8" PRIu16 "\n"), version);
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __(" DWARF version:  %8" PRIu16 "\n"), version);
> +      IGNORE_FMT_NONLITERAL_END
>  
>        if (version != 5)
>       {

Same, translation once of "version" keep DWARF as is.

> @@ -6427,8 +6476,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
>       }
>  
>        uint8_t address_size = *readp++;
> -      fprintf (out, _(" Address size:   %8" PRIu64 "\n"),
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __(" Address size:   %8" PRIu64 "\n"),
>              (uint64_t) address_size);
> +      IGNORE_FMT_NONLITERAL_END
>  
>        if (address_size != 4 && address_size != 8)
>       {

"Address size" again, see above.

> @@ -6447,8 +6498,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
>          }
>  
>        uint32_t offset_entry_count = read_4ubyte_unaligned_inc (dbg, readp);
> -      fprintf (out, _(" Offset entries: %8" PRIu64 "\n"),
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __(" Offset entries: %8" PRIu64 "\n"),
>              (uint64_t) offset_entry_count);
> +      IGNORE_FMT_NONLITERAL_END
>  
>        /* We need the CU that uses this unit to get the initial base address. 
> */
>        Dwarf_Addr cu_base = 0;


static char *offset_entries_str" = _("Offset entries"); ?

> @@ -6463,10 +6516,14 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
>         if (dwarf_cu_die (cu, &cudie,
>                           NULL, NULL, NULL, NULL,
>                           NULL, NULL) == NULL)
> -         fprintf (out, _(" Unknown CU base: "));
> +         fputs (__(" Unknown CU base: "), out);
>         else
> -         fprintf (out, _(" CU [%6" PRIx64 "] base: "),
> -                  dwarf_dieoffset (&cudie));
> +         {
> +           IGNORE_FMT_NONLITERAL_BEGIN
> +           fprintf (out, __(" CU [%6" PRIx64 "] base: "),
> +                    dwarf_dieoffset (&cudie));
> +           IGNORE_FMT_NONLITERAL_END
> +         }
>         print_dwarf_addr (dwflmod, address_size, cu_base, cu_base, out);
>         fprintf (out, "\n");
>       }

Just translate "base" don't translate CU?

> @@ -8556,7 +8613,8 @@ print_debug_units (Dwfl_Module *dwflmod,
>         dieoffset = dwarf_dieoffset (dwarf_offdie_types (dbg, cu->start
>                                                          + subdie_off,
>                                                          &typedie));
> -       fprintf (out, _(" Type unit at offset %" PRIu64 ":\n"
> +       IGNORE_FMT_NONLITERAL_BEGIN;
> +       fprintf (out, __(" Type unit at offset %" PRIu64 ":\n"
>                           " Version: %" PRIu16
>                           ", Abbreviation section offset: %" PRIu64
>                           ", Address size: %" PRIu8
> @@ -8565,26 +8623,36 @@ print_debug_units (Dwfl_Module *dwflmod,
>                           ", Type offset: %#" PRIx64 " [%" PRIx64 "]\n"),
>                  (uint64_t) offset, version, abbroffset, addrsize, offsize,
>                  unit_id, (uint64_t) subdie_off, dieoffset);
> +       IGNORE_FMT_NONLITERAL_END;
>       }

I would split these up. Most words seem repeated "unit, "version",
"section", "size", "Type", "Address".

>        else
>       {
> -       fprintf (out, _(" Compilation unit at offset %" PRIu64 ":\n"
> +       IGNORE_FMT_NONLITERAL_BEGIN;
> +       fprintf (out, __(" Compilation unit at offset %" PRIu64 ":\n"
>                           " Version: %" PRIu16
>                           ", Abbreviation section offset: %" PRIu64
>                           ", Address size: %" PRIu8
>                           ", Offset size: %" PRIu8 "\n"),
>                  (uint64_t) offset, version, abbroffset, addrsize, offsize);
> +       IGNORE_FMT_NONLITERAL_END;
>  
>         if (version >= 5 || (unit_type != DW_UT_compile
>                              && unit_type != DW_UT_partial))
>           {
> -           fprintf (out, _(" Unit type: %s (%" PRIu8 ")"),
> +
> +           IGNORE_FMT_NONLITERAL_BEGIN;
> +           fprintf (out, __(" Unit type: %s (%" PRIu8 ")"),
>                               dwarf_unit_name (unit_type), unit_type);
> +           IGNORE_FMT_NONLITERAL_END;
>             if (unit_type == DW_UT_type
>                 || unit_type == DW_UT_skeleton
>                 || unit_type == DW_UT_split_compile
>                 || unit_type == DW_UT_split_type)
> -             fprintf (out, ", Unit id: 0x%.16" PRIx64 "", unit_id);
> +             {
> +               IGNORE_FMT_NONLITERAL_BEGIN;
> +               fprintf (out, __(", Unit id: 0x%.16" PRIx64 ""), unit_id);
> +               IGNORE_FMT_NONLITERAL_END;
> +             }
>             if (unit_type == DW_UT_type
>                 || unit_type == DW_UT_split_type)
>               {

Likewise.

> @@ -8593,8 +8661,10 @@ print_debug_units (Dwfl_Module *dwflmod,
>                 dwarf_cu_info (cu, NULL, NULL, NULL, &typedie,
>                                NULL, NULL, NULL);
>                 dieoffset = dwarf_dieoffset (&typedie);
> -               fprintf (out, ", Unit DIE off: %#" PRIx64 " [%" PRIx64 "]",
> +               IGNORE_FMT_NONLITERAL_BEGIN;
> +               fprintf (out, __(", Unit DIE off: %#" PRIx64 " [%" PRIx64 
> "]"),
>                          subdie_off, dieoffset);
> +               IGNORE_FMT_NONLITERAL_END;
>               }
>             fprintf (out, "\n");
>           }

"Unit" and "off" (maybe reuse "offset"?) can be translated separately,
keep "DIE" as is?

> @@ -9179,7 +9249,9 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
> *ebl, GElf_Ehdr *ehdr,
>      {
>        size_t start_offset = linep - (const unsigned char *) data->d_buf;
>  
> -      fprintf (out, _("\nTable at offset %zu:\n"), start_offset);
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __("\nTable at offset %zu:\n"), start_offset);
> +      IGNORE_FMT_NONLITERAL_END
>  
>        if (unlikely (linep + 4 > lineendp))
>       goto invalid_data;

Another "Table at offset".

> @@ -9270,7 +9342,8 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
> *ebl, GElf_Ehdr *ehdr,
>        uint_fast8_t opcode_base = *linep++;
>  
>        /* Print what we got so far.  */
> -      fprintf (out, _("\n"
> +      IGNORE_FMT_NONLITERAL_BEGIN
> +      fprintf (out, __("\n"
>                       " Length:                         %" PRIu64 "\n"
>                       " DWARF version:                  %" PRIuFAST16 "\n"
>                       " Prologue length:                %" PRIu64 "\n"
> @@ -9289,6 +9362,7 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
> *ebl, GElf_Ehdr *ehdr,
>              minimum_instr_len, max_ops_per_instr,
>              default_is_stmt, line_base,
>              line_range, opcode_base);
> +      IGNORE_FMT_NONLITERAL_END
>  
>        if (version < 2 || version > 5)
>       {

Only "Prologue" hasn't been seen before?

> @@ -9344,13 +9418,13 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
> *ebl, GElf_Ehdr *ehdr,
>  
>        Dwarf_Off str_offsets_base = str_offsets_base_off (dbg, NULL);
>  
> -      fputs (_("\nDirectory table:\n"), out);
> +      fputs (__("\nDirectory table:\n"), out);
>        if (version > 4)
>       {
>         struct encpair { uint16_t desc; uint16_t form; };
>         struct encpair enc[256];
>  
> -       fprintf (out, _("      ["));
> +       fprintf (out, "      [");

I do like this. Nothing to translate.

>         if ((size_t) (lineendp - linep) < 1)
>           goto invalid_data;
>         unsigned char directory_entry_format_count = *linep++;
> @@ -9421,13 +9495,13 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
> *ebl, GElf_Ehdr *ehdr,
>        if (unlikely (linep >= lineendp))
>       goto invalid_unit;
>  
> -      fputs (_("\nFile name table:\n"), out);
> +      fputs (__("\nFile name table:\n"), out);
>        if (version > 4)
>       {
>         struct encpair { uint16_t desc; uint16_t form; };
>         struct encpair enc[256];
>  
> -       fprintf (out, _("      ["));
> +       fprintf (out, "      [");
>         if ((size_t) (lineendp - linep) < 1)
>           goto invalid_data;
>         unsigned char file_name_format_count = *linep++;

Likewise.

> @@ -9528,11 +9602,11 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
> *ebl, GElf_Ehdr *ehdr,
>  
>        if (linep == lineendp)
>       {
> -       fputs (_("\nNo line number statements.\n"), out);
> +       fputs (__("\nNo line number statements.\n"), out);
>         continue;
>       }
>  
> -      fputs (_("\nLine number statements:\n"), out);
> +      fputs (__("\nLine number statements:\n"), out);
>        Dwarf_Word address = 0;
>        unsigned int op_index = 0;
>        size_t line = 1;
> @@ -9581,15 +9655,25 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl 
> *ebl, GElf_Ehdr *ehdr,
>             line += line_increment;
>             advance_pc ((opcode - opcode_base) / line_range);
>  
> -           fprintf (out, _(" special opcode %u: address+%u = "),
> +           IGNORE_FMT_NONLITERAL_BEGIN
> +           fprintf (out, __(" special opcode %u: address+%u = "),
>                      opcode, op_addr_advance);
> +           IGNORE_FMT_NONLITERAL_END
>             print_dwarf_addr (dwflmod, 0, address, address, out);
>             if (op_index > 0)
> -             fprintf (out, _(", op_index = %u, line%+d = %zu\n"),
> -                      op_index, line_increment, line);
> +             {
> +               IGNORE_FMT_NONLITERAL_BEGIN
> +               fprintf (out, __(", op_index = %u, line%+d = %zu\n"),
> +                        op_index, line_increment, line);
> +               IGNORE_FMT_NONLITERAL_END
> +             }
>             else
> -             fprintf (out, _(", line%+d = %zu\n"),
> -                      line_increment, line);
> +             {
> +               IGNORE_FMT_NONLITERAL_BEGIN
> +               fprintf (out, __(", line%+d = %zu\n"),
> +                        line_increment, line);
> +               IGNORE_FMT_NONLITERAL_END
> +             }
>           }

"line" can probably be translated, but "op_index" is how it is called
in the spec.


Similar comments for the rest of the patch.
I really don't like the macros and the __ very much.
If at all possible I would just do this with explicit
static char *frob_str = _("frob"); strings.

Cheers,

Mark

Re: [PATCH] readelf.c: Avoid repeating calls to gettext _() in hotpath

Reply via email to