Hi,
I realized that the previous version did not handle
zero terminated Ada constants correctly as in
$ cat hello.adb
procedure Hello is
procedure puts (S : String) with Import, Convention => C;
X : String(1..8) := "abcdefg" & Ascii.Nul;
begin
puts ("1234" & Ascii.Nul );
puts (X);
end;
accidentally strings were doubly nul-terminated, because I forgot to
handle merge string sections in assemble_constant_contents, and
because get_constant_size increased the string literal by one.
Fixed, and added a positive test case for the merging non-zero terminated
strings in C.
Boot-strapped and regtested on x86_64-pc-linux-gnu.
Is it OK for trunk (after pre-condition patches)?
Thanks
Bernd.
gcc:
2018-08-04 Bernd Edlinger <[email protected]>
* varasm.c (output_constant): Add new parameter merge_strings.
Make strings properly zero terminated in merge string sections.
(mergeable_string_section): Don't fail if the last char is non-zero.
(assemble_variable_contents): Handle merge string sections.
(assemble_variable): Likewise.
(assemble_constant_contents): Likewise.
(output_constant_def_contents): Likewise.
(get_constant_size): Remove special handling of STRING_CSTs.
(redundant_nul_term_p): New helper function.
testsuite:
2018-08-04 Bernd Edlinger <[email protected]>
* gcc.dg/merge-all-constants-1.c: Adjust test.
* gcc.dg/merge-all-constants-2.c: New test.
Index: gcc/varasm.c
===================================================================
--- gcc/varasm.c (revision 263072)
+++ gcc/varasm.c (working copy)
@@ -111,7 +111,8 @@ static int compare_constant (const tree, const tre
static void output_constant_def_contents (rtx);
static void output_addressed_constants (tree);
static unsigned HOST_WIDE_INT output_constant (tree, unsigned HOST_WIDE_INT,
- unsigned int, bool);
+ unsigned int, bool,
+ bool = false);
static void globalize_decl (tree);
static bool decl_readonly_section_1 (enum section_category);
#ifdef BSS_SECTION_ASM_OP
@@ -827,7 +828,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUS
unit = GET_MODE_SIZE (mode);
/* Check for embedded NUL characters. */
- for (i = 0; i < len; i += unit)
+ for (i = 0; i < len - unit; i += unit)
{
for (j = 0; j < unit; j++)
if (str[i + j] != '\0')
@@ -2117,7 +2118,7 @@ assemble_noswitch_variable (tree decl, const char
static void
assemble_variable_contents (tree decl, const char *name,
- bool dont_output_data)
+ bool dont_output_data, bool merge_strings = false)
{
/* Do any machine/system dependent processing of the object. */
#ifdef ASM_DECLARE_OBJECT_NAME
@@ -2140,7 +2141,7 @@ assemble_variable_contents (tree decl, const char
output_constant (DECL_INITIAL (decl),
tree_to_uhwi (DECL_SIZE_UNIT (decl)),
get_variable_align (decl),
- false);
+ false, merge_strings);
else
/* Leave space for it. */
assemble_zeros (tree_to_uhwi (DECL_SIZE_UNIT (decl)));
@@ -2316,7 +2317,9 @@ assemble_variable (tree decl, int top_level ATTRIB
switch_to_section (sect);
if (align > BITS_PER_UNIT)
ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
- assemble_variable_contents (decl, name, dont_output_data);
+ assemble_variable_contents (decl, name, dont_output_data,
+ sect->common.flags & SECTION_MERGE
+ && sect->common.flags & SECTION_STRINGS);
if (asan_protected)
{
unsigned HOST_WIDE_INT int size
@@ -3300,12 +3303,7 @@ get_constant_section (tree exp, unsigned int align
static HOST_WIDE_INT
get_constant_size (tree exp)
{
- HOST_WIDE_INT size;
-
- size = int_size_in_bytes (TREE_TYPE (exp));
- if (TREE_CODE (exp) == STRING_CST)
- size = MAX (TREE_STRING_LENGTH (exp), size);
- return size;
+ return int_size_in_bytes (TREE_TYPE (exp));
}
/* Subroutine of output_constant_def:
@@ -3468,7 +3466,8 @@ maybe_output_constant_def_contents (struct constan
constant's alignment in bits. */
static void
-assemble_constant_contents (tree exp, const char *label, unsigned int align)
+assemble_constant_contents (tree exp, const char *label, unsigned int align,
+ bool merge_strings = false)
{
HOST_WIDE_INT size;
@@ -3478,7 +3477,7 @@ static void
targetm.asm_out.declare_constant_name (asm_out_file, label, exp, size);
/* Output the value of EXP. */
- output_constant (exp, size, align, false);
+ output_constant (exp, size, align, false, merge_strings);
targetm.asm_out.decl_end ();
}
@@ -3519,10 +3518,13 @@ output_constant_def_contents (rtx symbol)
|| (VAR_P (decl) && DECL_IN_CONSTANT_POOL (decl))
? DECL_ALIGN (decl)
: symtab_node::get (decl)->definition_alignment ());
- switch_to_section (get_constant_section (exp, align));
+ section *sect = get_constant_section (exp, align);
+ switch_to_section (sect);
if (align > BITS_PER_UNIT)
ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (align / BITS_PER_UNIT));
- assemble_constant_contents (exp, XSTR (symbol, 0), align);
+ assemble_constant_contents (exp, XSTR (symbol, 0), align,
+ sect->common.flags & SECTION_MERGE
+ && sect->common.flags & SECTION_STRINGS);
if (asan_protected)
{
HOST_WIDE_INT size = get_constant_size (exp);
@@ -4786,6 +4789,19 @@ static unsigned HOST_WIDE_INT
output_constructor (tree, unsigned HOST_WIDE_INT, unsigned int, bool,
oc_outer_state *);
+/* Find out if a string P of length LEN and character size UNIT
+ has double nul termination.
+ The string is already known to have a nul termination at
+ offset LEN. Find out if there is already a NUL termination
+ at offset LEN - UNIT. */
+
+static bool
+redundant_nul_term_p (const char *p, unsigned len, unsigned unit)
+{
+ gcc_assert(len >= unit);
+ return !memcmp (p + len, p + len - unit, unit);
+}
+
/* Output assembler code for constant EXP, with no label.
This includes the pseudo-op such as ".int" or ".byte", and a newline.
Assumes output_addressed_constants has been done on EXP already.
@@ -4812,7 +4850,7 @@ output_constructor (tree, unsigned HOST_WIDE_INT,
static unsigned HOST_WIDE_INT
output_constant (tree exp, unsigned HOST_WIDE_INT size, unsigned int align,
- bool reverse)
+ bool reverse, bool merge_strings /* = false */)
{
enum tree_code code;
unsigned HOST_WIDE_INT thissize;
@@ -4940,8 +4978,13 @@ output_constant (tree exp, unsigned HOST_WIDE_INT
case CONSTRUCTOR:
return output_constructor (exp, size, align, reverse, NULL);
case STRING_CST:
- thissize
- = MIN ((unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp), size);
+ thissize = (unsigned HOST_WIDE_INT)TREE_STRING_LENGTH (exp);
+ if (thissize > size && merge_strings
+ && !redundant_nul_term_p (TREE_STRING_POINTER (exp),
+ size, thissize - size))
+ ;
+ else
+ thissize = MIN (thissize, size);
gcc_checking_assert (check_string_literal (exp, size));
assemble_string (TREE_STRING_POINTER (exp), thissize);
break;
Index: gcc/testsuite/gcc.dg/merge-all-constants-1.c
===================================================================
--- gcc/testsuite/gcc.dg/merge-all-constants-1.c (revision 263072)
+++ gcc/testsuite/gcc.dg/merge-all-constants-1.c (working copy)
@@ -1,8 +1,8 @@
/* { dg-do compile } */
/* { dg-options "-w -O2 -fmerge-all-constants" } */
-const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz";
+const char str1[36] = "\000123456789abcdefghijklmnopqrstuvwxyz";
const char str2[38] = "0123456789abcdefghijklmnopqrstuvwxyz";
-const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz";
+const char str3[10] = "\000123456789abcdefghijklmnopqrstuvwxyz";
/* { dg-final { scan-assembler-not "\.rodata\.str" } } */
Index: gcc/testsuite/gcc.dg/merge-all-constants-2.c
===================================================================
--- gcc/testsuite/gcc.dg/merge-all-constants-2.c (revision 0)
+++ gcc/testsuite/gcc.dg/merge-all-constants-2.c (working copy)
--- /dev/null 2018-07-24 12:25:16.122475518 +0200
+++ gcc/testsuite/gcc.dg/merge-all-constants-2.c 2018-08-07 10:14:35.384190377 +0200
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-w -O2 -fmerge-all-constants" } */
+
+const char str1[36] = "0123456789abcdefghijklmnopqrstuvwxyz";
+const char str2[37] = "0123456789abcdefghijklmnopqrstuvwxyz";
+const char str3[10] = "0123456789abcdefghijklmnopqrstuvwxyz";
+
+/* { dg-final { scan-assembler-not "\.rodata\[\n\r\]" } } */