Hi Aaron, On Sun, 2025-06-29 at 23:51 -0400, Aaron Merey wrote: > Calls to the gettext _() macro in hotpaths cause many runtime lookups > of the same translation strings. This patch introduces macro __() > which caches the result of _() at each call site where __() is used. > > When a cached translation string is used as the format string for > fprintf, the compiler might raise a -Wformat-nonliteral warning. > > To avoid this, the -Wformat-nonliteral warning is suppressed using _Pragma > around affected fprintf calls, using IGNORE_FMT_NONLITERAL_BEGIN and > _END wrappers. > > To avoid suppressing -Wformat-nonliteral across every printf-style > function using _(), __() and IGNORE_FMT_NONLITERAL_* are mostly used > within hotpath loops where caching makes a signficant difference. > Runtime improvements of up to 13% faster have been observed with this patch > applied.
I can see how these changes can increase runtime (for threaded runs I assume). But I really don't like them very much. It adds a lot of macros/code around a few invocations of _() that probably should just be removed. > @@ -4942,7 +4960,9 @@ print_block (size_t n, const void *block, FILE *out) > fputs (_("empty block\n"), out); Side question, is that extra \n deliberate? fputs already adds an \n. > else > { > - fprintf (out, _("%zu byte block:"), n); > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __("%zu byte block:"), n); > + IGNORE_FMT_NONLITERAL_END > const unsigned char *data = block; > do > fprintf (out, " %02x", *data++); So for these why not simply have translated strings in a static var at the top of print_block: static char *empty_block_str = _("empty block"); static char *byte_block_str = _("byte block"); And then in the loop do: fputs (empty_block_str, out); [... else ...] fprintf (out, "%zu %s:", n, byte_block_str); > @@ -5866,11 +5886,13 @@ print_debug_abbrev_section (Dwfl_Module *dwflmod > __attribute__ ((unused)), > unsigned int tag = dwarf_getabbrevtag (&abbrev); > int has_children = dwarf_abbrevhaschildren (&abbrev); > > + IGNORE_FMT_NONLITERAL_BEGIN > fprintf (out, _(" [%5u] offset: %" PRId64 > ", children: %s, tag: %s\n"), > code, (int64_t) offset, > has_children ? yes_str : no_str, > dwarf_tag_name (tag)); > + IGNORE_FMT_NONLITERAL_END Why is this necessary here? You aren't using __(). But if you do want this for the "offset", "children" and "tag" strings I would handle them just like yes_str and no_str are handled here. > @@ -6210,7 +6232,10 @@ print_debug_aranges_section (Dwfl_Module *dwflmod > __attribute__ ((unused)), > const unsigned char *hdrstart = readp; > size_t start_offset = hdrstart - (const unsigned char *) data->d_buf; > > - fprintf (out, _("\nTable at offset %zu:\n"), start_offset); > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __("\nTable at offset %zu:\n"), start_offset); > + IGNORE_FMT_NONLITERAL_END > + > if (readp + 4 > readendp) > { > invalid_data: Again, if possibly just add a static char *table_at_offset_str = _("table_at_offset"); > @@ -6230,8 +6255,11 @@ print_debug_aranges_section (Dwfl_Module *dwflmod > __attribute__ ((unused)), > } > > const unsigned char *nexthdr = readp + length; > - fprintf (out, _("\n Length: %6" PRIu64 "\n"), > + > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __("\n Length: %6" PRIu64 "\n"), > (uint64_t) length); > + IGNORE_FMT_NONLITERAL_END > > if (unlikely (length > (size_t) (readendp - readp))) > goto invalid_data; "Length" can probably also be translated once and then reused everywhere. > @@ -6242,8 +6270,12 @@ print_debug_aranges_section (Dwfl_Module *dwflmod > __attribute__ ((unused)), > if (readp + 2 > readendp) > goto invalid_data; > uint_fast16_t version = read_2ubyte_unaligned_inc (dbg, readp); > - fprintf (out, _(" DWARF version: %6" PRIuFAST16 "\n"), > + > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(" DWARF version: %6" PRIuFAST16 "\n"), > version); > + IGNORE_FMT_NONLITERAL_END > + > if (version != 2) > { > error (0, 0, _("unsupported aranges version")); Same for "version". I wouldn't try to translate "DWARF". > @@ -6257,14 +6289,21 @@ print_debug_aranges_section (Dwfl_Module *dwflmod > __attribute__ ((unused)), > offset = read_8ubyte_unaligned_inc (dbg, readp); > else > offset = read_4ubyte_unaligned_inc (dbg, readp); > - fprintf (out, _(" CU offset: %6" PRIx64 "\n"), > + > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(" CU offset: %6" PRIx64 "\n"), > (uint64_t) offset); > + IGNORE_FMT_NONLITERAL_END > > if (readp + 1 > readendp) Just translate "offset" once, keep CU (don't think it is sensible to translate that). > goto invalid_data; > unsigned int address_size = *readp++; > - fprintf (out, _(" Address size: %6" PRIu64 "\n"), > + > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(" Address size: %6" PRIu64 "\n"), > (uint64_t) address_size); > + IGNORE_FMT_NONLITERAL_END > + > if (address_size != 4 && address_size != 8) > { > error (0, 0, _("unsupported address size")); > @@ -6273,9 +6312,13 @@ print_debug_aranges_section (Dwfl_Module *dwflmod > __attribute__ ((unused)), > > if (readp + 1 > readendp) > goto invalid_data; > + > unsigned int segment_size = *readp++; > - fprintf (out, _(" Segment size: %6" PRIu64 "\n\n"), > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(" Segment size: %6" PRIu64 "\n\n"), > (uint64_t) segment_size); > + IGNORE_FMT_NONLITERAL_END > + > if (segment_size != 0 && segment_size != 4 && segment_size != 8) > { > error (0, 0, _("unsupported segment size")); Separate translations of "Address", "size" and "Segment"? Or maybe just "Address size" and "Segment size"? > @@ -6392,8 +6435,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod, > } > > ptrdiff_t offset = readp - (unsigned char *) data->d_buf; > - fprintf (out, _("Table at Offset 0x%" PRIx64 ":\n\n"), > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __("Table at Offset 0x%" PRIx64 ":\n\n"), > (uint64_t) offset); > + IGNORE_FMT_NONLITERAL_END > > uint64_t unit_length = read_4ubyte_unaligned_inc (dbg, readp); > unsigned int offset_size = 4; You already do this above for "Table at offset", but here "Offset" is capitalized. Maybe use the same capitalization in both places? > @@ -6405,7 +6450,9 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod, > unit_length = read_8ubyte_unaligned_inc (dbg, readp); > offset_size = 8; > } > - fprintf (out, _(" Length: %8" PRIu64 "\n"), unit_length); > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(" Length: %8" PRIu64 "\n"), unit_length); > + IGNORE_FMT_NONLITERAL_END > > /* We need at least 2-bytes + 1-byte + 1-byte + 4-bytes = 8 > bytes to complete the header. And this unit cannot go beyond Just translate "Length" separately? > @@ -6418,7 +6465,9 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod, > const unsigned char *nexthdr = readp + unit_length; > > uint16_t version = read_2ubyte_unaligned_inc (dbg, readp); > - fprintf (out, _(" DWARF version: %8" PRIu16 "\n"), version); > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(" DWARF version: %8" PRIu16 "\n"), version); > + IGNORE_FMT_NONLITERAL_END > > if (version != 5) > { Same, translation once of "version" keep DWARF as is. > @@ -6427,8 +6476,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod, > } > > uint8_t address_size = *readp++; > - fprintf (out, _(" Address size: %8" PRIu64 "\n"), > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(" Address size: %8" PRIu64 "\n"), > (uint64_t) address_size); > + IGNORE_FMT_NONLITERAL_END > > if (address_size != 4 && address_size != 8) > { "Address size" again, see above. > @@ -6447,8 +6498,10 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod, > } > > uint32_t offset_entry_count = read_4ubyte_unaligned_inc (dbg, readp); > - fprintf (out, _(" Offset entries: %8" PRIu64 "\n"), > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(" Offset entries: %8" PRIu64 "\n"), > (uint64_t) offset_entry_count); > + IGNORE_FMT_NONLITERAL_END > > /* We need the CU that uses this unit to get the initial base address. > */ > Dwarf_Addr cu_base = 0; static char *offset_entries_str" = _("Offset entries"); ? > @@ -6463,10 +6516,14 @@ print_debug_rnglists_section (Dwfl_Module *dwflmod, > if (dwarf_cu_die (cu, &cudie, > NULL, NULL, NULL, NULL, > NULL, NULL) == NULL) > - fprintf (out, _(" Unknown CU base: ")); > + fputs (__(" Unknown CU base: "), out); > else > - fprintf (out, _(" CU [%6" PRIx64 "] base: "), > - dwarf_dieoffset (&cudie)); > + { > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(" CU [%6" PRIx64 "] base: "), > + dwarf_dieoffset (&cudie)); > + IGNORE_FMT_NONLITERAL_END > + } > print_dwarf_addr (dwflmod, address_size, cu_base, cu_base, out); > fprintf (out, "\n"); > } Just translate "base" don't translate CU? > @@ -8556,7 +8613,8 @@ print_debug_units (Dwfl_Module *dwflmod, > dieoffset = dwarf_dieoffset (dwarf_offdie_types (dbg, cu->start > + subdie_off, > &typedie)); > - fprintf (out, _(" Type unit at offset %" PRIu64 ":\n" > + IGNORE_FMT_NONLITERAL_BEGIN; > + fprintf (out, __(" Type unit at offset %" PRIu64 ":\n" > " Version: %" PRIu16 > ", Abbreviation section offset: %" PRIu64 > ", Address size: %" PRIu8 > @@ -8565,26 +8623,36 @@ print_debug_units (Dwfl_Module *dwflmod, > ", Type offset: %#" PRIx64 " [%" PRIx64 "]\n"), > (uint64_t) offset, version, abbroffset, addrsize, offsize, > unit_id, (uint64_t) subdie_off, dieoffset); > + IGNORE_FMT_NONLITERAL_END; > } I would split these up. Most words seem repeated "unit, "version", "section", "size", "Type", "Address". > else > { > - fprintf (out, _(" Compilation unit at offset %" PRIu64 ":\n" > + IGNORE_FMT_NONLITERAL_BEGIN; > + fprintf (out, __(" Compilation unit at offset %" PRIu64 ":\n" > " Version: %" PRIu16 > ", Abbreviation section offset: %" PRIu64 > ", Address size: %" PRIu8 > ", Offset size: %" PRIu8 "\n"), > (uint64_t) offset, version, abbroffset, addrsize, offsize); > + IGNORE_FMT_NONLITERAL_END; > > if (version >= 5 || (unit_type != DW_UT_compile > && unit_type != DW_UT_partial)) > { > - fprintf (out, _(" Unit type: %s (%" PRIu8 ")"), > + > + IGNORE_FMT_NONLITERAL_BEGIN; > + fprintf (out, __(" Unit type: %s (%" PRIu8 ")"), > dwarf_unit_name (unit_type), unit_type); > + IGNORE_FMT_NONLITERAL_END; > if (unit_type == DW_UT_type > || unit_type == DW_UT_skeleton > || unit_type == DW_UT_split_compile > || unit_type == DW_UT_split_type) > - fprintf (out, ", Unit id: 0x%.16" PRIx64 "", unit_id); > + { > + IGNORE_FMT_NONLITERAL_BEGIN; > + fprintf (out, __(", Unit id: 0x%.16" PRIx64 ""), unit_id); > + IGNORE_FMT_NONLITERAL_END; > + } > if (unit_type == DW_UT_type > || unit_type == DW_UT_split_type) > { Likewise. > @@ -8593,8 +8661,10 @@ print_debug_units (Dwfl_Module *dwflmod, > dwarf_cu_info (cu, NULL, NULL, NULL, &typedie, > NULL, NULL, NULL); > dieoffset = dwarf_dieoffset (&typedie); > - fprintf (out, ", Unit DIE off: %#" PRIx64 " [%" PRIx64 "]", > + IGNORE_FMT_NONLITERAL_BEGIN; > + fprintf (out, __(", Unit DIE off: %#" PRIx64 " [%" PRIx64 > "]"), > subdie_off, dieoffset); > + IGNORE_FMT_NONLITERAL_END; > } > fprintf (out, "\n"); > } "Unit" and "off" (maybe reuse "offset"?) can be translated separately, keep "DIE" as is? > @@ -9179,7 +9249,9 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl > *ebl, GElf_Ehdr *ehdr, > { > size_t start_offset = linep - (const unsigned char *) data->d_buf; > > - fprintf (out, _("\nTable at offset %zu:\n"), start_offset); > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __("\nTable at offset %zu:\n"), start_offset); > + IGNORE_FMT_NONLITERAL_END > > if (unlikely (linep + 4 > lineendp)) > goto invalid_data; Another "Table at offset". > @@ -9270,7 +9342,8 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl > *ebl, GElf_Ehdr *ehdr, > uint_fast8_t opcode_base = *linep++; > > /* Print what we got so far. */ > - fprintf (out, _("\n" > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __("\n" > " Length: %" PRIu64 "\n" > " DWARF version: %" PRIuFAST16 "\n" > " Prologue length: %" PRIu64 "\n" > @@ -9289,6 +9362,7 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl > *ebl, GElf_Ehdr *ehdr, > minimum_instr_len, max_ops_per_instr, > default_is_stmt, line_base, > line_range, opcode_base); > + IGNORE_FMT_NONLITERAL_END > > if (version < 2 || version > 5) > { Only "Prologue" hasn't been seen before? > @@ -9344,13 +9418,13 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl > *ebl, GElf_Ehdr *ehdr, > > Dwarf_Off str_offsets_base = str_offsets_base_off (dbg, NULL); > > - fputs (_("\nDirectory table:\n"), out); > + fputs (__("\nDirectory table:\n"), out); > if (version > 4) > { > struct encpair { uint16_t desc; uint16_t form; }; > struct encpair enc[256]; > > - fprintf (out, _(" [")); > + fprintf (out, " ["); I do like this. Nothing to translate. > if ((size_t) (lineendp - linep) < 1) > goto invalid_data; > unsigned char directory_entry_format_count = *linep++; > @@ -9421,13 +9495,13 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl > *ebl, GElf_Ehdr *ehdr, > if (unlikely (linep >= lineendp)) > goto invalid_unit; > > - fputs (_("\nFile name table:\n"), out); > + fputs (__("\nFile name table:\n"), out); > if (version > 4) > { > struct encpair { uint16_t desc; uint16_t form; }; > struct encpair enc[256]; > > - fprintf (out, _(" [")); > + fprintf (out, " ["); > if ((size_t) (lineendp - linep) < 1) > goto invalid_data; > unsigned char file_name_format_count = *linep++; Likewise. > @@ -9528,11 +9602,11 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl > *ebl, GElf_Ehdr *ehdr, > > if (linep == lineendp) > { > - fputs (_("\nNo line number statements.\n"), out); > + fputs (__("\nNo line number statements.\n"), out); > continue; > } > > - fputs (_("\nLine number statements:\n"), out); > + fputs (__("\nLine number statements:\n"), out); > Dwarf_Word address = 0; > unsigned int op_index = 0; > size_t line = 1; > @@ -9581,15 +9655,25 @@ print_debug_line_section (Dwfl_Module *dwflmod, Ebl > *ebl, GElf_Ehdr *ehdr, > line += line_increment; > advance_pc ((opcode - opcode_base) / line_range); > > - fprintf (out, _(" special opcode %u: address+%u = "), > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(" special opcode %u: address+%u = "), > opcode, op_addr_advance); > + IGNORE_FMT_NONLITERAL_END > print_dwarf_addr (dwflmod, 0, address, address, out); > if (op_index > 0) > - fprintf (out, _(", op_index = %u, line%+d = %zu\n"), > - op_index, line_increment, line); > + { > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(", op_index = %u, line%+d = %zu\n"), > + op_index, line_increment, line); > + IGNORE_FMT_NONLITERAL_END > + } > else > - fprintf (out, _(", line%+d = %zu\n"), > - line_increment, line); > + { > + IGNORE_FMT_NONLITERAL_BEGIN > + fprintf (out, __(", line%+d = %zu\n"), > + line_increment, line); > + IGNORE_FMT_NONLITERAL_END > + } > } "line" can probably be translated, but "op_index" is how it is called in the spec. Similar comments for the rest of the patch. I really don't like the macros and the __ very much. If at all possible I would just do this with explicit static char *frob_str = _("frob"); strings. Cheers, Mark