Hi Aaron,
On Sun, 2025-06-29 at 23:51 -0400, Aaron Merey wrote:
> Calls to the gettext _() macro in hotpaths cause many runtime lookups
> of the same translation strings. This patch introduces macro __()
> which caches the result of _() at each call site where __() is used.
>
> When a cached translation string is used as the format string for
> fprintf, the compiler might raise a -Wformat-nonliteral warning.
>
> To avoid this, the -Wformat-nonliteral warning is suppressed using _Pragma
> around affected fprintf calls, using IGNORE_FMT_NONLITERAL_BEGIN and
> _END wrappers.
>
> To avoid suppressing -Wformat-nonliteral across every printf-style
> function using _(), __() and IGNORE_FMT_NONLITERAL_* are mostly used
> within hotpath loops where caching makes a signficant difference.
> Runtime improvements of up to 13% faster have been observed with this patch
> applied.
I can see how these changes can increase runtime (for threaded runs I
assume). But I really don't like them very much. It adds a lot of
macros/code around a few invocations of _() that probably should just
be removed.
> @@ -4942,7 +4960,9 @@ print_block (size_t n, const void *block, FILE *out)
> fputs (_("empty block\n"), out);
Side question, is that extra \n deliberate? fputs already adds an \n.
> else
> {
> - fprintf (out, _("%zu byte block:"), n);
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __("%zu byte block:"), n);
> + IGNORE_FMT_NONLITERAL_END
> const unsigned char *data = block;
> do
> fprintf (out, " %02x", *data++);
So for these why not simply have translated strings in a static var at
the top of print_block:
static char *empty_block_str = _("empty block");
static char *byte_block_str = _("byte block");
And then in the loop do:
fputs (empty_block_str, out);
[... else ...]
fprintf (out, "%zu %s:", n, byte_block_str);
> @@ -5866,11 +5886,13 @@ print_debug_abbrev_section (Dwfl_Module *dwflmod
> __attribute__ ((unused)),
> unsigned int tag = dwarf_getabbrevtag (&abbrev);
> int has_children = dwarf_abbrevhaschildren (&abbrev);
>
> + IGNORE_FMT_NONLITERAL_BEGIN
> fprintf (out, _(" [%5u] offset: %" PRId64
> ", children: %s, tag: %s\n"),
> code, (int64_t) offset,
> has_children ? yes_str : no_str,
> dwarf_tag_name (tag));
> + IGNORE_FMT_NONLITERAL_END
Why is this necessary here? You aren't using __().
But if you do want this for the "offset", "children" and "tag" strings
I would handle them just like yes_str and no_str are handled here.
> @@ -6210,7 +6232,10 @@ print_debug_aranges_section (Dwfl_Module *dwflmod
> __attribute__ ((unused)),
> const unsigned char *hdrstart = readp;
> size_t start_offset = hdrstart - (const unsigned char *) data->d_buf;
>
> - fprintf (out, _("\nTable at offset %zu:\n"), start_offset);
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __("\nTable at offset %zu:\n"), start_offset);
> + IGNORE_FMT_NONLITERAL_END
> +
> if (readp + 4 > readendp)
> {
> invalid_data:
Again, if possibly just add a static char *table_at_offset_str =
_("table_at_offset");
> @@ -6230,8 +6255,11 @@ print_debug_aranges_section (Dwfl_Module *dwflmod
> __attribute__ ((unused)),
> }
>
> const unsigned char *nexthdr = readp + length;
> - fprintf (out, _("\n Length: %6" PRIu64 "\n"),
> +
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __("\n Length: %6" PRIu64 "\n"),
> (uint64_t) length);
> + IGNORE_FMT_NONLITERAL_END
>
> if (unlikely (length > (size_t) (readendp - readp)))
> goto invalid_data;
"Length" can probably also be translated once and then reused
everywhere.
> @@ -6242,8 +6270,12 @@ print_debug_aranges_section (Dwfl_Module *dwflmod
> __attribute__ ((unused)),
> if (readp + 2 > readendp)
> goto invalid_data;
> uint_fast16_t version = read_2ubyte_unaligned_inc (dbg, readp);
> - fprintf (out, _(" DWARF version: %6" PRIuFAST16 "\n"),
> +
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(" DWARF version: %6" PRIuFAST16 "\n"),
> version);
> + IGNORE_FMT_NONLITERAL_END
> +
> if (version != 2)
> {
> error (0, 0, _("unsupported aranges version"));
Same for "version". I wouldn't try to translate "DWARF".
> @@ -6257,14 +6289,21 @@ print_debug_aranges_section (Dwfl_Module *dwflmod
> __attribute__ ((unused)),
> offset = read_8ubyte_unaligned_inc (dbg, readp);
> else
> offset = read_4ubyte_unaligned_inc (dbg, readp);
> - fprintf (out, _(" CU offset: %6" PRIx64 "\n"),
> +
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(" CU offset: %6" PRIx64 "\n"),
> (uint64_t) offset);
> + IGNORE_FMT_NONLITERAL_END
>
> if (readp + 1 > readendp)
Just translate "offset" once, keep CU (don't think it is sensible to
translate that).
> goto invalid_data;
> unsigned int address_size = *readp++;
> - fprintf (out, _(" Address size: %6" PRIu64 "\n"),
> +
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(" Address size: %6" PRIu64 "\n"),
> (uint64_t) address_size);
> + IGNORE_FMT_NONLITERAL_END
> +
> if (address_size != 4 && address_size != 8)
> {
> error (0, 0, _("unsupported address size"));
> @@ -6273,9 +6312,13 @@ print_debug_aranges_section (Dwfl_Module *dwflmod
> __attribute__ ((unused)),
>
> if (readp + 1 > readendp)
> goto invalid_data;
> +
> unsigned int segment_size = *readp++;
> - fprintf (out, _(" Segment size: %6" PRIu64 "\n\n"),
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(" Segment size: %6" PRIu64 "\n\n"),
> (uint64_t) segment_size);
> + IGNORE_FMT_NONLITERAL_END
> +
> if (segment_size != 0 && segment_size != 4 && segment_size != 8)
> {
> error (0, 0, _("unsupported segment size"));
Separate translations of "Address", "size" and "Segment"?
Or maybe just "Address size" and "Segment size"?
> @@ -6392,8 +6435,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
> }
>
> ptrdiff_t offset = readp - (unsigned char *) data->d_buf;
> - fprintf (out, _("Table at Offset 0x%" PRIx64 ":\n\n"),
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __("Table at Offset 0x%" PRIx64 ":\n\n"),
> (uint64_t) offset);
> + IGNORE_FMT_NONLITERAL_END
>
> uint64_t unit_length = read_4ubyte_unaligned_inc (dbg, readp);
> unsigned int offset_size = 4;
You already do this above for "Table at offset", but here "Offset" is
capitalized. Maybe use the same capitalization in both places?
> @@ -6405,7 +6450,9 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
> unit_length = read_8ubyte_unaligned_inc (dbg, readp);
> offset_size = 8;
> }
> - fprintf (out, _(" Length: %8" PRIu64 "\n"), unit_length);
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(" Length: %8" PRIu64 "\n"), unit_length);
> + IGNORE_FMT_NONLITERAL_END
>
> /* We need at least 2-bytes + 1-byte + 1-byte + 4-bytes = 8
> bytes to complete the header. And this unit cannot go beyond
Just translate "Length" separately?
> @@ -6418,7 +6465,9 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
> const unsigned char *nexthdr = readp + unit_length;
>
> uint16_t version = read_2ubyte_unaligned_inc (dbg, readp);
> - fprintf (out, _(" DWARF version: %8" PRIu16 "\n"), version);
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(" DWARF version: %8" PRIu16 "\n"), version);
> + IGNORE_FMT_NONLITERAL_END
>
> if (version != 5)
> {
Same, translation once of "version" keep DWARF as is.
> @@ -6427,8 +6476,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
> }
>
> uint8_t address_size = *readp++;
> - fprintf (out, _(" Address size: %8" PRIu64 "\n"),
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(" Address size: %8" PRIu64 "\n"),
> (uint64_t) address_size);
> + IGNORE_FMT_NONLITERAL_END
>
> if (address_size != 4 && address_size != 8)
> {
"Address size" again, see above.
> @@ -6447,8 +6498,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
> }
>
> uint32_t offset_entry_count = read_4ubyte_unaligned_inc (dbg, readp);
> - fprintf (out, _(" Offset entries: %8" PRIu64 "\n"),
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(" Offset entries: %8" PRIu64 "\n"),
> (uint64_t) offset_entry_count);
> + IGNORE_FMT_NONLITERAL_END
>
> /* We need the CU that uses this unit to get the initial base address.
> */
> Dwarf_Addr cu_base = 0;
static char *offset_entries_str" = _("Offset entries"); ?
> @@ -6463,10 +6516,14 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod,
> if (dwarf_cu_die (cu, &cudie,
> NULL, NULL, NULL, NULL,
> NULL, NULL) == NULL)
> - fprintf (out, _(" Unknown CU base: "));
> + fputs (__(" Unknown CU base: "), out);
> else
> - fprintf (out, _(" CU [%6" PRIx64 "] base: "),
> - dwarf_dieoffset (&cudie));
> + {
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(" CU [%6" PRIx64 "] base: "),
> + dwarf_dieoffset (&cudie));
> + IGNORE_FMT_NONLITERAL_END
> + }
> print_dwarf_addr (dwflmod, address_size, cu_base, cu_base, out);
> fprintf (out, "\n");
> }
Just translate "base" don't translate CU?
> @@ -8556,7 +8613,8 @@ print_debug_units (Dwfl_Module *dwflmod,
> dieoffset = dwarf_dieoffset (dwarf_offdie_types (dbg, cu->start
> + subdie_off,
> &typedie));
> - fprintf (out, _(" Type unit at offset %" PRIu64 ":\n"
> + IGNORE_FMT_NONLITERAL_BEGIN;
> + fprintf (out, __(" Type unit at offset %" PRIu64 ":\n"
> " Version: %" PRIu16
> ", Abbreviation section offset: %" PRIu64
> ", Address size: %" PRIu8
> @@ -8565,26 +8623,36 @@ print_debug_units (Dwfl_Module *dwflmod,
> ", Type offset: %#" PRIx64 " [%" PRIx64 "]\n"),
> (uint64_t) offset, version, abbroffset, addrsize, offsize,
> unit_id, (uint64_t) subdie_off, dieoffset);
> + IGNORE_FMT_NONLITERAL_END;
> }
I would split these up. Most words seem repeated "unit, "version",
"section", "size", "Type", "Address".
> else
> {
> - fprintf (out, _(" Compilation unit at offset %" PRIu64 ":\n"
> + IGNORE_FMT_NONLITERAL_BEGIN;
> + fprintf (out, __(" Compilation unit at offset %" PRIu64 ":\n"
> " Version: %" PRIu16
> ", Abbreviation section offset: %" PRIu64
> ", Address size: %" PRIu8
> ", Offset size: %" PRIu8 "\n"),
> (uint64_t) offset, version, abbroffset, addrsize, offsize);
> + IGNORE_FMT_NONLITERAL_END;
>
> if (version >= 5 || (unit_type != DW_UT_compile
> && unit_type != DW_UT_partial))
> {
> - fprintf (out, _(" Unit type: %s (%" PRIu8 ")"),
> +
> + IGNORE_FMT_NONLITERAL_BEGIN;
> + fprintf (out, __(" Unit type: %s (%" PRIu8 ")"),
> dwarf_unit_name (unit_type), unit_type);
> + IGNORE_FMT_NONLITERAL_END;
> if (unit_type == DW_UT_type
> || unit_type == DW_UT_skeleton
> || unit_type == DW_UT_split_compile
> || unit_type == DW_UT_split_type)
> - fprintf (out, ", Unit id: 0x%.16" PRIx64 "", unit_id);
> + {
> + IGNORE_FMT_NONLITERAL_BEGIN;
> + fprintf (out, __(", Unit id: 0x%.16" PRIx64 ""), unit_id);
> + IGNORE_FMT_NONLITERAL_END;
> + }
> if (unit_type == DW_UT_type
> || unit_type == DW_UT_split_type)
> {
Likewise.
> @@ -8593,8 +8661,10 @@ print_debug_units (Dwfl_Module *dwflmod,
> dwarf_cu_info (cu, NULL, NULL, NULL, &typedie,
> NULL, NULL, NULL);
> dieoffset = dwarf_dieoffset (&typedie);
> - fprintf (out, ", Unit DIE off: %#" PRIx64 " [%" PRIx64 "]",
> + IGNORE_FMT_NONLITERAL_BEGIN;
> + fprintf (out, __(", Unit DIE off: %#" PRIx64 " [%" PRIx64
> "]"),
> subdie_off, dieoffset);
> + IGNORE_FMT_NONLITERAL_END;
> }
> fprintf (out, "\n");
> }
"Unit" and "off" (maybe reuse "offset"?) can be translated separately,
keep "DIE" as is?
> @@ -9179,7 +9249,9 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl
> *ebl, GElf_Ehdr *ehdr,
> {
> size_t start_offset = linep - (const unsigned char *) data->d_buf;
>
> - fprintf (out, _("\nTable at offset %zu:\n"), start_offset);
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __("\nTable at offset %zu:\n"), start_offset);
> + IGNORE_FMT_NONLITERAL_END
>
> if (unlikely (linep + 4 > lineendp))
> goto invalid_data;
Another "Table at offset".
> @@ -9270,7 +9342,8 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl
> *ebl, GElf_Ehdr *ehdr,
> uint_fast8_t opcode_base = *linep++;
>
> /* Print what we got so far. */
> - fprintf (out, _("\n"
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __("\n"
> " Length: %" PRIu64 "\n"
> " DWARF version: %" PRIuFAST16 "\n"
> " Prologue length: %" PRIu64 "\n"
> @@ -9289,6 +9362,7 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl
> *ebl, GElf_Ehdr *ehdr,
> minimum_instr_len, max_ops_per_instr,
> default_is_stmt, line_base,
> line_range, opcode_base);
> + IGNORE_FMT_NONLITERAL_END
>
> if (version < 2 || version > 5)
> {
Only "Prologue" hasn't been seen before?
> @@ -9344,13 +9418,13 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl
> *ebl, GElf_Ehdr *ehdr,
>
> Dwarf_Off str_offsets_base = str_offsets_base_off (dbg, NULL);
>
> - fputs (_("\nDirectory table:\n"), out);
> + fputs (__("\nDirectory table:\n"), out);
> if (version > 4)
> {
> struct encpair { uint16_t desc; uint16_t form; };
> struct encpair enc[256];
>
> - fprintf (out, _(" ["));
> + fprintf (out, " [");
I do like this. Nothing to translate.
> if ((size_t) (lineendp - linep) < 1)
> goto invalid_data;
> unsigned char directory_entry_format_count = *linep++;
> @@ -9421,13 +9495,13 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl
> *ebl, GElf_Ehdr *ehdr,
> if (unlikely (linep >= lineendp))
> goto invalid_unit;
>
> - fputs (_("\nFile name table:\n"), out);
> + fputs (__("\nFile name table:\n"), out);
> if (version > 4)
> {
> struct encpair { uint16_t desc; uint16_t form; };
> struct encpair enc[256];
>
> - fprintf (out, _(" ["));
> + fprintf (out, " [");
> if ((size_t) (lineendp - linep) < 1)
> goto invalid_data;
> unsigned char file_name_format_count = *linep++;
Likewise.
> @@ -9528,11 +9602,11 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl
> *ebl, GElf_Ehdr *ehdr,
>
> if (linep == lineendp)
> {
> - fputs (_("\nNo line number statements.\n"), out);
> + fputs (__("\nNo line number statements.\n"), out);
> continue;
> }
>
> - fputs (_("\nLine number statements:\n"), out);
> + fputs (__("\nLine number statements:\n"), out);
> Dwarf_Word address = 0;
> unsigned int op_index = 0;
> size_t line = 1;
> @@ -9581,15 +9655,25 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl
> *ebl, GElf_Ehdr *ehdr,
> line += line_increment;
> advance_pc ((opcode - opcode_base) / line_range);
>
> - fprintf (out, _(" special opcode %u: address+%u = "),
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(" special opcode %u: address+%u = "),
> opcode, op_addr_advance);
> + IGNORE_FMT_NONLITERAL_END
> print_dwarf_addr (dwflmod, 0, address, address, out);
> if (op_index > 0)
> - fprintf (out, _(", op_index = %u, line%+d = %zu\n"),
> - op_index, line_increment, line);
> + {
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(", op_index = %u, line%+d = %zu\n"),
> + op_index, line_increment, line);
> + IGNORE_FMT_NONLITERAL_END
> + }
> else
> - fprintf (out, _(", line%+d = %zu\n"),
> - line_increment, line);
> + {
> + IGNORE_FMT_NONLITERAL_BEGIN
> + fprintf (out, __(", line%+d = %zu\n"),
> + line_increment, line);
> + IGNORE_FMT_NONLITERAL_END
> + }
> }
"line" can probably be translated, but "op_index" is how it is called
in the spec.
Similar comments for the rest of the patch.
I really don't like the macros and the __ very much.
If at all possible I would just do this with explicit
static char *frob_str = _("frob"); strings.
Cheers,
Mark