Hi, this is an upate of the string-merge section, it is based on the V2-STRING_CST semantic patch series, which was finally installed yesterday. It merges single-byte string constants with or without terminating NUL. The patch has the same Ada and C test cases that were already in the V1 patch.
Thus there are no pre-requisite patches necessary at this time. Bootstrapped and reg-tested on x86_64-pc-linux-gnu. Is it OK for trunk? Thanks Bernd.
gcc: 2018-08-27 Bernd Edlinger <bernd.edlin...@hotmail.de> * varasm.c (output_constant): Add new parameter merge_strings. Make strings properly zero terminated in merge string sections. (mergeable_string_section): Don't fail if the last char is non-zero. (assemble_variable_contents): Handle merge string sections. (assemble_variable): Likewise. (assemble_constant_contents): Likewise. (output_constant_def_contents): Likewise. testsuite: 2018-08-27 Bernd Edlinger <bernd.edlin...@hotmail.de> * gnat.dg/string_merge1.adb: New test. * gnat.dg/string_merge2.adb: New test. * gcc.dg/merge-all-constants-1.c: Adjust test. * gcc.dg/merge-all-constants-2.c: New test. diff -Npur gcc/varasm.c gcc/varasm.c --- gcc/varasm.c 2018-08-26 15:02:43.157905415 +0200 +++ gcc/varasm.c 2018-08-26 17:57:26.488494866 +0200 @@ -111,7 +111,8 @@ static int compare_constant (const tree, static void output_constant_def_contents (rtx); static void output_addressed_constants (tree); static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT, - unsigned int, bool); + unsigned int, bool, + bool = false); static void globalize_decl (tree); static bool decl_readonly_section_1 (enum section_category); #ifdef BSS_SECTION_ASM_OP @@ -804,8 +805,8 @@ mergeable_string_section (tree decl ATTR && TREE_CODE (decl) == STRING_CST && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE && align <= 256 - && (len = int_size_in_bytes (TREE_TYPE (decl))) > 0 - && TREE_STRING_LENGTH (decl) >= len) + && (len = int_size_in_bytes (TREE_TYPE (decl))) >= 0 + && TREE_STRING_LENGTH (decl) == len) { scalar_int_mode mode; unsigned int modesize; @@ -835,7 +836,7 @@ mergeable_string_section (tree decl ATTR if (j == unit) break; } - if (i == len - unit) + if (i == len - unit || (unit == 1 && i == len)) { sprintf (name, "%s.str%d.%d", prefix, modesize / 8, (int) (align / 8)); @@ -2117,7 +2118,7 @@ assemble_noswitch_variable (tree decl, c static void assemble_variable_contents (tree decl, const char *name, - bool dont_output_data) + bool dont_output_data, bool merge_strings = false) { /* Do any machine/system dependent processing of the object. */ #ifdef ASM_DECLARE_OBJECT_NAME @@ -2140,7 +2141,7 @@ assemble_variable_contents (tree decl, c output_constant (DECL_INITIAL (decl), tree_to_uhwi (DECL_SIZE_UNIT (decl)), get_variable_align (decl), - false); + false, merge_strings); else /* Leave space for it. */ assemble_zeros (tree_to_uhwi (DECL_SIZE_UNIT (decl))); @@ -2316,7 +2317,9 @@ assemble_variable (tree decl, int top_le switch_to_section (sect); if (align > BITS_PER_UNIT) ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); - assemble_variable_contents (decl, name, dont_output_data); + assemble_variable_contents (decl, name, dont_output_data, + sect->common.flags & SECTION_MERGE + && sect->common.flags & SECTION_STRINGS); if (asan_protected) { unsigned HOST_WIDE_INT int size @@ -3471,7 +3474,8 @@ maybe_output_constant_def_contents (stru constant's alignment in bits. */ static void -assemble_constant_contents (tree exp, const char *label, unsigned int align) +assemble_constant_contents (tree exp, const char *label, unsigned int align, + bool merge_strings = false) { HOST_WIDE_INT size; @@ -3481,7 +3485,7 @@ assemble_constant_contents (tree exp, co targetm.asm_out.declare_constant_name (asm_out_file, label, exp, size); /* Output the value of EXP. */ - output_constant (exp, size, align, false); + output_constant (exp, size, align, false, merge_strings); targetm.asm_out.decl_end (); } @@ -3522,10 +3526,13 @@ output_constant_def_contents (rtx symbol || (VAR_P (decl) && DECL_IN_CONSTANT_POOL (decl)) ? DECL_ALIGN (decl) : symtab_node::get (decl)->definition_alignment ()); - switch_to_section (get_constant_section (exp, align)); + section *sect = get_constant_section (exp, align); + switch_to_section (sect); if (align > BITS_PER_UNIT) ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT)); - assemble_constant_contents (exp, XSTR (symbol, 0), align); + assemble_constant_contents (exp, XSTR (symbol, 0), align, + sect->common.flags & SECTION_MERGE + && sect->common.flags & SECTION_STRINGS); if (asan_protected) { HOST_WIDE_INT size = get_constant_size (exp); @@ -4838,7 +4845,7 @@ output_constructor (tree, unsigned HOST_ static unsigned HOST_WIDE_INT output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align, - bool reverse) + bool reverse, bool merge_strings /* = false */) { enum tree_code code; unsigned HOST_WIDE_INT thissize; @@ -4966,8 +4973,11 @@ output_constant (tree exp, unsigned HOST case CONSTRUCTOR: return output_constructor (exp, size, align, reverse, NULL); case STRING_CST: - thissize - = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size); + thissize = (unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp); + if (merge_strings + && (thissize == 0 + || TREE_STRING_POINTER (exp) [thissize - 1] != '\0')) + thissize++; gcc_checking_assert (check_string_literal (exp, size)); assemble_string (TREE_STRING_POINTER (exp), thissize); break; diff -Npur gcc/testsuite/gcc.dg/merge-all-constants-1.c gcc/testsuite/gcc.dg/merge-all-constants-1.c --- gcc/testsuite/gcc.dg/merge-all-constants-1.c 2018-08-16 17:28:11.000000000 +0200 +++ gcc/testsuite/gcc.dg/merge-all-constants-1.c 2018-08-26 16:31:12.650271931 +0200 @@ -1,8 +1,8 @@ /* { dg-do compile } */ /* { dg-options "-w -O2 -fmerge-all-constants" } */ -const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str1[36] = "\000123456789abcdefghijklmnopqrstuvwxyz"; const char str2[38] = "0123456789abcdefghijklmnopqrstuvwxyz"; -const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str3[10] = "\000123456789abcdefghijklmnopqrstuvwxyz"; -/* { dg-final { scan-assembler-not "\.rodata\.str" } } */ +/* { dg-final { scan-assembler-not "\\.rodata\\.str" } } */ diff -Npur gcc/testsuite/gcc.dg/merge-all-constants-2.c gcc/testsuite/gcc.dg/merge-all-constants-2.c --- gcc/testsuite/gcc.dg/merge-all-constants-2.c 1970-01-01 01:00:00.000000000 +0100 +++ gcc/testsuite/gcc.dg/merge-all-constants-2.c 2018-08-26 16:31:12.650271931 +0200 @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-w -O2 -fmerge-all-constants" } */ + +const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str2[37] = "0123456789abcdefghijklmnopqrstuvwxyz"; +const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz"; + +/* { dg-final { scan-assembler-not "\\.rodata\[\n\r\]" } } */ diff -Npur gcc/testsuite/gnat.dg/string_merge1.adb gcc/testsuite/gnat.dg/string_merge1.adb --- gcc/testsuite/gnat.dg/string_merge1.adb 1970-01-01 01:00:00.000000000 +0100 +++ gcc/testsuite/gnat.dg/string_merge1.adb 2018-08-26 16:31:12.650271931 +0200 @@ -0,0 +1,19 @@ +-- { dg-do compile } +-- { dg-options "-O1 -fmerge-all-constants" } + +procedure String_Merge1 is + procedure Process (X : String); + pragma Import (Ada, Process); +begin + Process ("ABCD"); +end; + +-- We expect something like: + +-- .section .rodata.str1.1,"aMS",@progbits,1 +-- .LC1: +-- .string "ABCD" + +-- { dg-final { scan-assembler-times "\\.rodata\\.str" 1 } } +-- { dg-final { scan-assembler-times "\\.string" 1 } } +-- { dg-final { scan-assembler-times "\"ABCD\"" 1 } } diff -Npur gcc/testsuite/gnat.dg/string_merge2.adb gcc/testsuite/gnat.dg/string_merge2.adb --- gcc/testsuite/gnat.dg/string_merge2.adb 1970-01-01 01:00:00.000000000 +0100 +++ gcc/testsuite/gnat.dg/string_merge2.adb 2018-08-26 16:31:12.650271931 +0200 @@ -0,0 +1,19 @@ +-- { dg-do compile } +-- { dg-options "-O1 -fmerge-all-constants" } + +procedure String_Merge2 is + procedure Process (X : String); + pragma Import (Ada, Process); +begin + Process ("ABCD" & Ascii.NUL); +end; + +-- We expect something like: + +-- .section .rodata.str1.1,"aMS",@progbits,1 +-- .LC1: +-- .string "ABCD" + +-- { dg-final { scan-assembler-times "\\.rodata\\.str" 1 } } +-- { dg-final { scan-assembler-times "\\.string" 1 } } +-- { dg-final { scan-assembler-times "\"ABCD\"" 1 } }