Hi,

this is an update of the patch, which has just two Ada test cases
added for string merging which are based on Olivier's suggested test case.

Note that except the "Check STRING_CSTs in varasm.c" patch series,
there is now another patch needed to work around issues with 71625:

[PATCH] Call braced_list_to_string after array size is fixed
https://gcc.gnu.org/ml/gcc-patches/2018-08/msg01015.html


Thanks
Bernd.


On 08/07/18 14:55, Bernd Edlinger wrote:
> Hi,
> 
> I realized that the previous version did not handle
> zero terminated Ada constants correctly as in
> 
> $ cat hello.adb
> procedure Hello is
>     procedure puts  (S : String) with Import, Convention => C;
>     X : String(1..8) := "abcdefg" & Ascii.Nul;
> begin
>    puts ("1234" & Ascii.Nul );
>    puts (X);
> end;
> 
> accidentally strings were doubly nul-terminated, because I forgot to
> handle merge string sections in assemble_constant_contents, and
> because get_constant_size increased the string literal by one.
> 
> Fixed, and added a positive test case for the merging non-zero terminated
> strings in C.
> 
> 
> Boot-strapped and regtested on x86_64-pc-linux-gnu.
> Is it OK for trunk (after pre-condition patches)?
> 
> 
> Thanks
> Bernd.
gcc:
2018-08-04  Bernd Edlinger  <bernd.edlin...@hotmail.de>

	* varasm.c (output_constant): Add new parameter merge_strings.
	Make strings properly zero terminated in merge string sections.
	(mergeable_string_section): Don't fail if the last char is non-zero.
	(assemble_variable_contents): Handle merge string sections.
	(assemble_variable): Likewise.
	(assemble_constant_contents): Likewise.
	(output_constant_def_contents): Likewise.
	(get_constant_size): Remove special handling of STRING_CSTs.
	(redundant_nul_term_p): New helper function.

testsuite:
2018-08-04  Bernd Edlinger  <bernd.edlin...@hotmail.de>

	* gnat.dg/string_merge1.adb: New test.
	* gnat.dg/string_merge2.adb: New test.
	* gcc.dg/merge-all-constants-1.c: Adjust test.
	* gcc.dg/merge-all-constants-2.c: New test.

Index: gcc/varasm.c
===================================================================
--- gcc/varasm.c	(revision 263072)
+++ gcc/varasm.c	(working copy)
@@ -111,7 +111,8 @@ static int compare_constant (const tree, const tre
 static void output_constant_def_contents (rtx);
 static void output_addressed_constants (tree);
 static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT,
-					       unsigned int, bool);
+					       unsigned int, bool,
+					       bool = false);
 static void globalize_decl (tree);
 static bool decl_readonly_section_1 (enum section_category);
 #ifdef BSS_SECTION_ASM_OP
@@ -827,7 +828,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUS
 	  unit = GET_MODE_SIZE (mode);
 
 	  /* Check for embedded NUL characters.  */
-	  for (i = 0; i < len; i += unit)
+	  for (i = 0; i < len - unit; i += unit)
 	    {
 	      for (j = 0; j < unit; j++)
 		if (str[i + j] != '\0')
@@ -2117,7 +2118,7 @@ assemble_noswitch_variable (tree decl, const char
 
 static void
 assemble_variable_contents (tree decl, const char *name,
-			    bool dont_output_data)
+			    bool dont_output_data, bool merge_strings = false)
 {
   /* Do any machine/system dependent processing of the object.  */
 #ifdef ASM_DECLARE_OBJECT_NAME
@@ -2140,7 +2141,7 @@ assemble_variable_contents (tree decl, const char
 	output_constant (DECL_INITIAL (decl),
 			 tree_to_uhwi (DECL_SIZE_UNIT (decl)),
 			 get_variable_align (decl),
-			 false);
+			 false, merge_strings);
       else
 	/* Leave space for it.  */
 	assemble_zeros (tree_to_uhwi (DECL_SIZE_UNIT (decl)));
@@ -2316,7 +2317,9 @@ assemble_variable (tree decl, int top_level ATTRIB
 	switch_to_section (sect);
       if (align > BITS_PER_UNIT)
 	ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
-      assemble_variable_contents (decl, name, dont_output_data);
+      assemble_variable_contents (decl, name, dont_output_data,
+				  sect->common.flags & SECTION_MERGE
+				  && sect->common.flags & SECTION_STRINGS);
       if (asan_protected)
 	{
 	  unsigned HOST_WIDE_INT int size
@@ -3300,12 +3303,7 @@ get_constant_section (tree exp, unsigned int align
 static HOST_WIDE_INT
 get_constant_size (tree exp)
 {
-  HOST_WIDE_INT size;
-
-  size = int_size_in_bytes (TREE_TYPE (exp));
-  if (TREE_CODE (exp) == STRING_CST)
-    size = MAX (TREE_STRING_LENGTH (exp), size);
-  return size;
+  return int_size_in_bytes (TREE_TYPE (exp));
 }
 
 /* Subroutine of output_constant_def:
@@ -3468,7 +3466,8 @@ maybe_output_constant_def_contents (struct constan
    constant's alignment in bits.  */
 
 static void
-assemble_constant_contents (tree exp, const char *label, unsigned int align)
+assemble_constant_contents (tree exp, const char *label, unsigned int align,
+			    bool merge_strings = false)
 {
   HOST_WIDE_INT size;
 
@@ -3478,7 +3477,7 @@ static void
   targetm.asm_out.declare_constant_name (asm_out_file, label, exp, size);
 
   /* Output the value of EXP.  */
-  output_constant (exp, size, align, false);
+  output_constant (exp, size, align, false, merge_strings);
 
   targetm.asm_out.decl_end ();
 }
@@ -3519,10 +3518,13 @@ output_constant_def_contents (rtx symbol)
 		   || (VAR_P (decl) && DECL_IN_CONSTANT_POOL (decl))
 		   ? DECL_ALIGN (decl)
 		   : symtab_node::get (decl)->definition_alignment ());
-      switch_to_section (get_constant_section (exp, align));
+      section *sect = get_constant_section (exp, align);
+      switch_to_section (sect);
       if (align > BITS_PER_UNIT)
 	ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
-      assemble_constant_contents (exp, XSTR (symbol, 0), align);
+      assemble_constant_contents (exp, XSTR (symbol, 0), align,
+				  sect->common.flags & SECTION_MERGE
+				  && sect->common.flags & SECTION_STRINGS);
       if (asan_protected)
 	{
 	  HOST_WIDE_INT size = get_constant_size (exp);
@@ -4786,6 +4789,19 @@ static unsigned HOST_WIDE_INT
 output_constructor (tree, unsigned HOST_WIDE_INT, unsigned int, bool,
 		    oc_outer_state *);
 
+/* Find out if a string P of length LEN and character size UNIT
+   has double nul termination.
+   The string is already known to have a nul termination at
+   offset LEN.  Find out if there is already a NUL termination
+   at offset LEN - UNIT.  */
+
+static bool
+redundant_nul_term_p (const char *p, unsigned len, unsigned unit)
+{
+  gcc_assert(len >= unit);
+  return !memcmp (p + len, p + len - unit, unit);
+}
+
 /* Output assembler code for constant EXP, with no label.
    This includes the pseudo-op such as ".int" or ".byte", and a newline.
    Assumes output_addressed_constants has been done on EXP already.
@@ -4812,7 +4850,7 @@ output_constructor (tree, unsigned HOST_WIDE_INT,
 
 static unsigned HOST_WIDE_INT
 output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align,
-		 bool reverse)
+		 bool reverse, bool merge_strings /* = false */)
 {
   enum tree_code code;
   unsigned HOST_WIDE_INT thissize;
@@ -4940,8 +4978,13 @@ output_constant (tree exp, unsigned HOST_WIDE_INT
 	case CONSTRUCTOR:
 	  return output_constructor (exp, size, align, reverse, NULL);
 	case STRING_CST:
-	  thissize
-	    = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size);
+	  thissize = (unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp);
+	  if (thissize > size && merge_strings
+	      && !redundant_nul_term_p (TREE_STRING_POINTER (exp),
+					size, thissize - size))
+	    ;
+	  else
+	    thissize = MIN (thissize, size);
 	  gcc_checking_assert (check_string_literal (exp, size));
 	  assemble_string (TREE_STRING_POINTER (exp), thissize);
 	  break;
Index: gcc/testsuite/gnat.dg/string_merge1.adb
===================================================================
--- gcc/testsuite/gnat.dg/string_merge1.adb	(revision 0)
+++ gcc/testsuite/gnat.dg/string_merge1.adb	(working copy)
@@ -0,0 +1,19 @@
+-- { dg-do compile }
+-- { dg-options "-O1 -fmerge-all-constants" }
+
+procedure String_Merge1 is
+   procedure Process (X : String);
+   pragma Import (Ada, Process);
+begin
+   Process ("ABCD");
+end;
+
+-- We expect something like:
+
+-- .section  .rodata.str1.1,"aMS",@progbits,1
+-- .LC1:
+--     .string "ABCD"
+
+-- { dg-final { scan-assembler-times "\\.rodata\\.str" 1 } }
+-- { dg-final { scan-assembler-times "\\.string" 1 } }
+-- { dg-final { scan-assembler-times "\"ABCD\"" 1 } }
Index: gcc/testsuite/gnat.dg/string_merge2.adb
===================================================================
--- gcc/testsuite/gnat.dg/string_merge2.adb	(revision 0)
+++ gcc/testsuite/gnat.dg/string_merge2.adb	(working copy)
@@ -0,0 +1,19 @@
+-- { dg-do compile }
+-- { dg-options "-O1 -fmerge-all-constants" }
+
+procedure String_Merge2 is
+   procedure Process (X : String);
+   pragma Import (Ada, Process);
+begin
+   Process ("ABCD" & Ascii.NUL);
+end;
+
+-- We expect something like:
+
+-- .section  .rodata.str1.1,"aMS",@progbits,1
+-- .LC1:
+--     .string "ABCD"
+
+-- { dg-final { scan-assembler-times "\\.rodata\\.str" 1 } }
+-- { dg-final { scan-assembler-times "\\.string" 1 } }
+-- { dg-final { scan-assembler-times "\"ABCD\"" 1 } }
Index: gcc/testsuite/gcc.dg/merge-all-constants-1.c
===================================================================
--- gcc/testsuite/gcc.dg/merge-all-constants-1.c	(revision 263072)
+++ gcc/testsuite/gcc.dg/merge-all-constants-1.c	(working copy)
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-w -O2 -fmerge-all-constants" } */
 
-const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz";
+const char str1[36] = "\000123456789abcdefghijklmnopqrstuvwxyz";
 const char str2[38] = "0123456789abcdefghijklmnopqrstuvwxyz";
-const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz";
+const char str3[10] = "\000123456789abcdefghijklmnopqrstuvwxyz";
 
-/* { dg-final { scan-assembler-not "\.rodata\.str" } } */
+/* { dg-final { scan-assembler-not "\\.rodata\\.str" } } */
Index: gcc/testsuite/gcc.dg/merge-all-constants-2.c
===================================================================
--- gcc/testsuite/gcc.dg/merge-all-constants-2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/merge-all-constants-2.c	(working copy)
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-w -O2 -fmerge-all-constants" } */
+
+const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz";
+const char str2[37] = "0123456789abcdefghijklmnopqrstuvwxyz";
+const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz";
+
+/* { dg-final { scan-assembler-not "\\.rodata\[\n\r\]" } } */

Reply via email to