Hi, this is an update of the patch, which has just two Ada test cases added for string merging which are based on Olivier's suggested test case.
Note that except the "Check STRING_CSTs in varasm.c" patch series, there is now another patch needed to work around issues with 71625: [PATCH] Call braced_list_to_string after array size is fixed https://gcc.gnu.org/ml/gcc-patches/2018-08/msg01015.html Thanks Bernd. On 08/07/18 14:55, Bernd Edlinger wrote: > Hi, > > I realized that the previous version did not handle > zero terminated Ada constants correctly as in > > $ cat hello.adb > procedure Hello is > procedure puts (S : String) with Import, Convention => C; > X : String(1..8) := "abcdefg" & Ascii.Nul; > begin > puts ("1234" & Ascii.Nul ); > puts (X); > end; > > accidentally strings were doubly nul-terminated, because I forgot to > handle merge string sections in assemble_constant_contents, and > because get_constant_size increased the string literal by one. > > Fixed, and added a positive test case for the merging non-zero terminated > strings in C. > > > Boot-strapped and regtested on x86_64-pc-linux-gnu. > Is it OK for trunk (after pre-condition patches)? > > > Thanks > Bernd.
gcc: 2018-08-04 Bernd Edlinger <bernd.edlin...@hotmail.de> * varasm.c (output_constant): Add new parameter merge_strings. Make strings properly zero terminated in merge string sections. (mergeable_string_section): Don't fail if the last char is non-zero. (assemble_variable_contents): Handle merge string sections. (assemble_variable): Likewise. (assemble_constant_contents): Likewise. (output_constant_def_contents): Likewise. (get_constant_size): Remove special handling of STRING_CSTs. (redundant_nul_term_p): New helper function. testsuite: 2018-08-04 Bernd Edlinger <bernd.edlin...@hotmail.de> * gnat.dg/string_merge1.adb: New test. * gnat.dg/string_merge2.adb: New test. * gcc.dg/merge-all-constants-1.c: Adjust test. * gcc.dg/merge-all-constants-2.c: New test. Index: gcc/varasm.c =================================================================== --- gcc/varasm.c (revision 263072) +++ gcc/varasm.c (working copy) @@ -111,7 +111,8 @@ static int compare_constant (const tree, const tre static void output_constant_def_contents (rtx); static void output_addressed_constants (tree); static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT, - unsigned int, bool); + unsigned int, bool, + bool = false); static void globalize_decl (tree); static bool decl_readonly_section_1 (enum section_category); #ifdef BSS_SECTION_ASM_OP @@ -827,7 +828,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUS unit = GET_MODE_SIZE (mode); /* Check for embedded NUL characters. */ - for (i = 0; i < len; i += unit) + for (i = 0; i < len - unit; i += unit) { for (j = 0; j < unit; j++) if (str[i + j] != '\0') @@ -2117,7 +2118,7 @@ assemble_noswitch_variable (tree decl, const char static void assemble_variable_contents (tree decl, const char *name, - bool dont_output_data) + bool dont_output_data, bool merge_strings = false) { /* Do any machine/system dependent processing of the object. */ #ifdef ASM_DECLARE_OBJECT_NAME @@ -2140,7 +2141,7 @@ assemble_variable_contents (tree decl, const char output_constant (DECL_INITIAL (decl), tree_to_uhwi (DECL_SIZE_UNIT (decl)), get_variable_align (decl), - false); + false, merge_strings); else /* Leave space for it. */ assemble_zeros (tree_to_uhwi (DECL_SIZE_UNIT (decl))); @@ -2316,7 +2317,9 @@ assemble_variable (tree decl, int top_level ATTRIB switch_to_section (sect); if (align > BITS_PER_UNIT) ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); - assemble_variable_contents (decl, name, dont_output_data); + assemble_variable_contents (decl, name, dont_output_data, + sect->common.flags & SECTION_MERGE + && sect->common.flags & SECTION_STRINGS); if (asan_protected) { unsigned HOST_WIDE_INT int size @@ -3300,12 +3303,7 @@ get_constant_section (tree exp, unsigned int align static HOST_WIDE_INT get_constant_size (tree exp) { - HOST_WIDE_INT size; - - size = int_size_in_bytes (TREE_TYPE (exp)); - if (TREE_CODE (exp) == STRING_CST) - size = MAX (TREE_STRING_LENGTH (exp), size); - return size; + return int_size_in_bytes (TREE_TYPE (exp)); } /* Subroutine of output_constant_def: @@ -3468,7 +3466,8 @@ maybe_output_constant_def_contents (struct constan constant's alignment in bits. */ static void -assemble_constant_contents (tree exp, const char *label, unsigned int align) +assemble_constant_contents (tree exp, const char *label, unsigned int align, + bool merge_strings = false) { HOST_WIDE_INT size; @@ -3478,7 +3477,7 @@ static void targetm.asm_out.declare_constant_name (asm_out_file, label, exp, size); /* Output the value of EXP. */ - output_constant (exp, size, align, false); + output_constant (exp, size, align, false, merge_strings); targetm.asm_out.decl_end (); } @@ -3519,10 +3518,13 @@ output_constant_def_contents (rtx symbol) || (VAR_P (decl) && DECL_IN_CONSTANT_POOL (decl)) ? DECL_ALIGN (decl) : symtab_node::get (decl)->definition_alignment ()); - switch_to_section (get_constant_section (exp, align)); + section *sect = get_constant_section (exp, align); + switch_to_section (sect); if (align > BITS_PER_UNIT) ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); - assemble_constant_contents (exp, XSTR (symbol, 0), align); + assemble_constant_contents (exp, XSTR (symbol, 0), align, + sect->common.flags & SECTION_MERGE + && sect->common.flags & SECTION_STRINGS); if (asan_protected) { HOST_WIDE_INT size = get_constant_size (exp); @@ -4786,6 +4789,19 @@ static unsigned HOST_WIDE_INT output_constructor (tree, unsigned HOST_WIDE_INT, unsigned int, bool, oc_outer_state *); +/* Find out if a string P of length LEN and character size UNIT + has double nul termination. + The string is already known to have a nul termination at + offset LEN. Find out if there is already a NUL termination + at offset LEN - UNIT. */ + +static bool +redundant_nul_term_p (const char *p, unsigned len, unsigned unit) +{ + gcc_assert(len >= unit); + return !memcmp (p + len, p + len - unit, unit); +} + /* Output assembler code for constant EXP, with no label. This includes the pseudo-op such as ".int" or ".byte", and a newline. Assumes output_addressed_constants has been done on EXP already. @@ -4812,7 +4850,7 @@ output_constructor (tree, unsigned HOST_WIDE_INT, static unsigned HOST_WIDE_INT output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align, - bool reverse) + bool reverse, bool merge_strings /* = false */) { enum tree_code code; unsigned HOST_WIDE_INT thissize; @@ -4940,8 +4978,13 @@ output_constant (tree exp, unsigned HOST_WIDE_INT case CONSTRUCTOR: return output_constructor (exp, size, align, reverse, NULL); case STRING_CST: - thissize - = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size); + thissize = (unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp); + if (thissize > size && merge_strings + && !redundant_nul_term_p (TREE_STRING_POINTER (exp), + size, thissize - size)) + ; + else + thissize = MIN (thissize, size); gcc_checking_assert (check_string_literal (exp, size)); assemble_string (TREE_STRING_POINTER (exp), thissize); break; Index: gcc/testsuite/gnat.dg/string_merge1.adb =================================================================== --- gcc/testsuite/gnat.dg/string_merge1.adb (revision 0) +++ gcc/testsuite/gnat.dg/string_merge1.adb (working copy) @@ -0,0 +1,19 @@ +-- { dg-do compile } +-- { dg-options "-O1 -fmerge-all-constants" } + +procedure String_Merge1 is + procedure Process (X : String); + pragma Import (Ada, Process); +begin + Process ("ABCD"); +end; + +-- We expect something like: + +-- .section .rodata.str1.1,"aMS",@progbits,1 +-- .LC1: +-- .string "ABCD" + +-- { dg-final { scan-assembler-times "\\.rodata\\.str" 1 } } +-- { dg-final { scan-assembler-times "\\.string" 1 } } +-- { dg-final { scan-assembler-times "\"ABCD\"" 1 } } Index: gcc/testsuite/gnat.dg/string_merge2.adb =================================================================== --- gcc/testsuite/gnat.dg/string_merge2.adb (revision 0) +++ gcc/testsuite/gnat.dg/string_merge2.adb (working copy) @@ -0,0 +1,19 @@ +-- { dg-do compile } +-- { dg-options "-O1 -fmerge-all-constants" } + +procedure String_Merge2 is + procedure Process (X : String); + pragma Import (Ada, Process); +begin + Process ("ABCD" & Ascii.NUL); +end; + +-- We expect something like: + +-- .section .rodata.str1.1,"aMS",@progbits,1 +-- .LC1: +-- .string "ABCD" + +-- { dg-final { scan-assembler-times "\\.rodata\\.str" 1 } } +-- { dg-final { scan-assembler-times "\\.string" 1 } } +-- { dg-final { scan-assembler-times "\"ABCD\"" 1 } } Index: gcc/testsuite/gcc.dg/merge-all-constants-1.c =================================================================== --- gcc/testsuite/gcc.dg/merge-all-constants-1.c (revision 263072) +++ gcc/testsuite/gcc.dg/merge-all-constants-1.c (working copy) @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-w -O2 -fmerge-all-constants" } */ -const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str1[36] = "\000123456789abcdefghijklmnopqrstuvwxyz"; const char str2[38] = "0123456789abcdefghijklmnopqrstuvwxyz"; -const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str3[10] = "\000123456789abcdefghijklmnopqrstuvwxyz"; -/* { dg-final { scan-assembler-not "\.rodata\.str" } } */ +/* { dg-final { scan-assembler-not "\\.rodata\\.str" } } */ Index: gcc/testsuite/gcc.dg/merge-all-constants-2.c =================================================================== --- gcc/testsuite/gcc.dg/merge-all-constants-2.c (revision 0) +++ gcc/testsuite/gcc.dg/merge-all-constants-2.c (working copy) @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-w -O2 -fmerge-all-constants" } */ + +const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str2[37] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz"; + +/* { dg-final { scan-assembler-not "\\.rodata\[\n\r\]" } } */