Hi Olivier,
I think I like your idea a lot, it should be highly useful for Fortran and GO as well. Would somthing something like this (untested patch) work for you on top of my patch series. It makes use of the zero-termination properties of STRING_CSTs, that the other patch ensures. I have two C test cases for this: $ cat y3.c const char str[3] = "abc"; $ gcc -fmerge-all-constants -S y3.c $ cat y3.s .file "y3.c" .text .globl str .section .rodata.str1.1,"aMS",@progbits,1 .type str, @object .size str, 3 str: .string "abc" .ident "GCC: (GNU) 9.0.0 20180730 (experimental)" .section .note.GNU-stack,"",@progbits $ cat y4.c const char str[4] = "abc"; $ gcc -fmerge-all-constants -S y4.c $ cat y4.s $ cat y4.s .file "y4.c" .text .globl str .section .rodata.str1.1,"aMS",@progbits,1 .type str, @object .size str, 4 str: .string "abc" .ident "GCC: (GNU) 9.0.0 20180730 (experimental)" .section .note.GNU-stack,"",@progbits The .size directive uses the DECL_SIZE_UNIT, and is still 3. I don't know of that needs to change or not. But what is important that we have .string "abc" and not .ascii "abc". What do you think? Thanks Bernd.
Index: gcc/varasm.c =================================================================== --- gcc/varasm.c (revision 263072) +++ gcc/varasm.c (working copy) @@ -111,7 +111,8 @@ static int compare_constant (const tree, const tre static void output_constant_def_contents (rtx); static void output_addressed_constants (tree); static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT, - unsigned int, bool); + unsigned int, bool, + bool = false); static void globalize_decl (tree); static bool decl_readonly_section_1 (enum section_category); #ifdef BSS_SECTION_ASM_OP @@ -827,7 +828,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUS unit = GET_MODE_SIZE (mode); /* Check for embedded NUL characters. */ - for (i = 0; i < len; i += unit) + for (i = 0; i < len - unit; i += unit) { for (j = 0; j < unit; j++) if (str[i + j] != '\0') @@ -2117,7 +2118,7 @@ assemble_noswitch_variable (tree decl, const char static void assemble_variable_contents (tree decl, const char *name, - bool dont_output_data) + bool dont_output_data, bool merge_strings = false) { /* Do any machine/system dependent processing of the object. */ #ifdef ASM_DECLARE_OBJECT_NAME @@ -2140,7 +2141,7 @@ assemble_variable_contents (tree decl, const char output_constant (DECL_INITIAL (decl), tree_to_uhwi (DECL_SIZE_UNIT (decl)), get_variable_align (decl), - false); + false, merge_strings); else /* Leave space for it. */ assemble_zeros (tree_to_uhwi (DECL_SIZE_UNIT (decl))); @@ -2316,7 +2317,9 @@ assemble_variable (tree decl, int top_level ATTRIB switch_to_section (sect); if (align > BITS_PER_UNIT) ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); - assemble_variable_contents (decl, name, dont_output_data); + assemble_variable_contents (decl, name, dont_output_data, + sect->common.flags & SECTION_MERGE + && sect->common.flags & SECTION_STRINGS); if (asan_protected) { unsigned HOST_WIDE_INT int size @@ -4786,6 +4789,19 @@ static unsigned HOST_WIDE_INT output_constructor (tree, unsigned HOST_WIDE_INT, unsigned int, bool, oc_outer_state *); +/* Find out if a string P of length LEN and character size UNIT + has double nul termination. + The string is already known to have a nul termination at + offset LEN. Find out if there is already a NUL termination + at offset LEN - UNIT. */ + +static bool +redundant_nul_term_p (const char *p, unsigned len, unsigned unit) +{ + gcc_assert(len >= unit); + return !memcmp (p + len, p + len - unit, unit); +} + /* Output assembler code for constant EXP, with no label. This includes the pseudo-op such as ".int" or ".byte", and a newline. Assumes output_addressed_constants has been done on EXP already. @@ -4812,7 +4828,7 @@ output_constructor (tree, unsigned HOST_WIDE_INT, static unsigned HOST_WIDE_INT output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align, - bool reverse) + bool reverse, bool merge_strings /* = false */) { enum tree_code code; unsigned HOST_WIDE_INT thissize; @@ -4940,8 +4956,13 @@ output_constant (tree exp, unsigned HOST_WIDE_INT case CONSTRUCTOR: return output_constructor (exp, size, align, reverse, NULL); case STRING_CST: - thissize - = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size); + thissize = (unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp); + if (thissize > size && merge_strings + && !redundant_nul_term_p (TREE_STRING_POINTER (exp), + size, thissize - size)) + ; + else + thissize = MIN (thissize, size); assemble_string (TREE_STRING_POINTER (exp), thissize); break; case VECTOR_CST: