On February 16, 2019 8:19:06 AM GMT+01:00, Jakub Jelinek <ja...@redhat.com> 
wrote:
>Hi!
>
>On the following testcase, we've regressed in bar since 8.x, in 8.x
>store merging came up with mem = 64-bit constant, but starting with the
>change to transform {0,1,2,3,4,5,6,7} char initializers into
>STRING_CSTs,
>we don't do that anymore.  The mem = STRING_CST expansion can do that,
>but only if there are no embedded zeros.  The following patch improves
>it even for embedded zeros, by using a new callback for the
>can_store_by_pieces/store_by_pieces calls which knows how to handle
>STRING_CST.  We don't need strlen in that case, can use TREE_STRING_CST
>instead.  Additionally, if the STRING_CST is slightly shorter than the
>destination region, it might generate better code by trying to
>store_by_pieces it all in one go (bytes from STRING_CST until the last
>one,
>followed by artificially added zeros) and only if that doesn't seem to
>be
>beneficial (e.g. very small STRING_CST followed by kilobytes of zeros)
>goes for the store_by_pieces of STRING_CST (rounded up to next
>STORE_MAX_PIECES) followed by a clear_storage.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK. 

Richard. 

>2019-02-16  Jakub Jelinek  <ja...@redhat.com>
>
>       PR rtl-optimization/66152
>       * builtins.h (c_readstr): Declare.
>       * builtins.c (c_readstr): Remove forward declaration.  Add
>       null_terminated_p argument, if false, read all bytes from the
>       string instead of stopping after '\0'.
>       * expr.c (string_cst_read_str): New function.
>       (store_expr): Use string_cst_read_str instead of
>       builtin_strncpy_read_str.  Try to store by pieces the whole
>       exp_len first, and only if that fails, split it up into
>       store by pieces followed by clear_storage.  Formatting fix.
>
>       * gcc.target/i386/pr66152.c: New test.
>
>--- gcc/builtins.h.jj  2019-02-14 08:06:37.878546571 +0100
>+++ gcc/builtins.h     2019-02-15 11:33:50.208180171 +0100
>@@ -103,6 +103,7 @@ struct c_strlen_data
> };
> 
>extern tree c_strlen (tree, int, c_strlen_data * = NULL, unsigned = 1);
>+extern rtx c_readstr (const char *, scalar_int_mode, bool = true);
> extern void expand_builtin_setjmp_setup (rtx, rtx);
> extern void expand_builtin_setjmp_receiver (rtx);
> extern void expand_builtin_update_setjmp_buf (rtx);
>--- gcc/builtins.c.jj  2019-02-11 20:58:48.509965578 +0100
>+++ gcc/builtins.c     2019-02-15 11:37:00.046029652 +0100
>@@ -95,7 +95,6 @@ builtin_info_type builtin_info[(int)END_
> /* Non-zero if __builtin_constant_p should be folded right away.  */
> bool force_folding_builtin_constant_p;
> 
>-static rtx c_readstr (const char *, scalar_int_mode);
> static int target_char_cast (tree, char *);
> static rtx get_memory_rtx (tree, tree);
> static int apply_args_size (void);
>@@ -802,10 +801,14 @@ c_strlen (tree src, int only_value, c_st
> }
> 
> /* Return a constant integer corresponding to target reading
>-   GET_MODE_BITSIZE (MODE) bits from string constant STR.  */
>-
>-static rtx
>-c_readstr (const char *str, scalar_int_mode mode)
>+   GET_MODE_BITSIZE (MODE) bits from string constant STR.  If
>+   NULL_TERMINATED_P, reading stops after '\0' character, all further
>ones
>+   are assumed to be zero, otherwise it reads as many characters
>+   as needed.  */
>+
>+rtx
>+c_readstr (const char *str, scalar_int_mode mode,
>+         bool null_terminated_p/*=true*/)
> {
>   HOST_WIDE_INT ch;
>   unsigned int i, j;
>@@ -830,7 +833,7 @@ c_readstr (const char *str, scalar_int_m
>       j = j + UNITS_PER_WORD - 2 * (j % UNITS_PER_WORD) - 1;
>       j *= BITS_PER_UNIT;
> 
>-      if (ch)
>+      if (ch || !null_terminated_p)
>       ch = (unsigned char) str[i];
> tmp[j / HOST_BITS_PER_WIDE_INT] |= ch << (j % HOST_BITS_PER_WIDE_INT);
>     }
>--- gcc/expr.c.jj      2019-02-08 20:00:40.309835608 +0100
>+++ gcc/expr.c 2019-02-15 11:37:18.715719809 +0100
>@@ -5453,6 +5453,30 @@ emit_storent_insn (rtx to, rtx from)
>   return maybe_expand_insn (code, 2, ops);
> }
> 
>+/* Helper function for store_expr storing of STRING_CST.  */
>+
>+static rtx
>+string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode
>mode)
>+{
>+  tree str = (tree) data;
>+
>+  gcc_assert (offset >= 0);
>+  if (offset >= TREE_STRING_LENGTH (str))
>+    return const0_rtx;
>+
>+  if ((unsigned HOST_WIDE_INT) offset + GET_MODE_SIZE (mode)
>+      > (unsigned HOST_WIDE_INT) TREE_STRING_LENGTH (str))
>+    {
>+      char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode));
>+      size_t l = TREE_STRING_LENGTH (str) - offset;
>+      memcpy (p, TREE_STRING_POINTER (str) + offset, l);
>+      memset (p + l, '\0', GET_MODE_SIZE (mode) - l);
>+      return c_readstr (p, mode, false);
>+    }
>+
>+  return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false);
>+}
>+
> /* Generate code for computing expression EXP,
>    and storing the value into TARGET.
> 
>@@ -5472,7 +5496,7 @@ emit_storent_insn (rtx to, rtx from)
> 
> rtx
> store_expr (tree exp, rtx target, int call_param_p,
>-                      bool nontemporal, bool reverse)
>+          bool nontemporal, bool reverse)
> {
>   rtx temp;
>   rtx alt_rtl = NULL_RTX;
>@@ -5606,36 +5630,32 @@ store_expr (tree exp, rtx target, int ca
>       if (TREE_STRING_LENGTH (str) <= 0)
>       goto normal_expr;
> 
>-      str_copy_len = strlen (TREE_STRING_POINTER (str));
>-      if (str_copy_len < TREE_STRING_LENGTH (str) - 1)
>-      goto normal_expr;
>+      if (can_store_by_pieces (exp_len, string_cst_read_str, (void *)
>str,
>+                             MEM_ALIGN (target), false))
>+      {
>+        store_by_pieces (target, exp_len, string_cst_read_str, (void *)
>str,
>+                         MEM_ALIGN (target), false, RETURN_BEGIN);
>+        return NULL_RTX;
>+      }
> 
>       str_copy_len = TREE_STRING_LENGTH (str);
>-      if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0
>-        && TREE_STRING_POINTER (str)[TREE_STRING_LENGTH (str) - 1] == '\0')
>+      if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0)
>       {
>         str_copy_len += STORE_MAX_PIECES - 1;
>         str_copy_len &= ~(STORE_MAX_PIECES - 1);
>       }
>-      str_copy_len = MIN (str_copy_len, exp_len);
>-      if (!can_store_by_pieces (str_copy_len,
>builtin_strncpy_read_str,
>-                              CONST_CAST (char *, TREE_STRING_POINTER (str)),
>-                              MEM_ALIGN (target), false))
>+      if (str_copy_len >= exp_len)
>       goto normal_expr;
> 
>-      dest_mem = target;
>+      if (!can_store_by_pieces (str_copy_len, string_cst_read_str,
>+                              (void *) str, MEM_ALIGN (target), false))
>+      goto normal_expr;
> 
>-      memop_ret retmode = exp_len > str_copy_len ? RETURN_END :
>RETURN_BEGIN;
>-      dest_mem = store_by_pieces (dest_mem,
>-                                str_copy_len, builtin_strncpy_read_str,
>-                                CONST_CAST (char *,
>-                                            TREE_STRING_POINTER (str)),
>-                                MEM_ALIGN (target), false,
>-                                retmode);
>-      if (exp_len > str_copy_len)
>-      clear_storage (adjust_address (dest_mem, BLKmode, 0),
>-                     GEN_INT (exp_len - str_copy_len),
>-                     BLOCK_OP_NORMAL);
>+      dest_mem = store_by_pieces (target, str_copy_len,
>string_cst_read_str,
>+                                (void *) str, MEM_ALIGN (target), false,
>+                                RETURN_END);
>+      clear_storage (adjust_address (dest_mem, BLKmode, 0),
>+                   GEN_INT (exp_len - str_copy_len), BLOCK_OP_NORMAL);
>       return NULL_RTX;
>     }
>   else
>--- gcc/testsuite/gcc.target/i386/pr66152.c.jj 2019-02-15
>11:55:56.212164557 +0100
>+++ gcc/testsuite/gcc.target/i386/pr66152.c    2019-02-15
>11:56:47.769308378 +0100
>@@ -0,0 +1,25 @@
>+/* PR rtl-optimization/66152 */
>+/* { dg-do compile } */
>+/* { dg-options "-O2" } */
>+/* { dg-final { scan-assembler "movabs\[^\n\r]*506097522914230528" {
>target { ! ia32 } } } } */
>+/* { dg-final { scan-assembler "movabs\[^\n\r]*505813836079825408" {
>target { ! ia32 } } } } */
>+/* { dg-final { scan-assembler "mov\[^\n\r]*50462976" { target ia32 }
>} } */
>+/* { dg-final { scan-assembler "mov\[^\n\r]*117835012" { target ia32 }
>} } */
>+/* { dg-final { scan-assembler "mov\[^\n\r]*100925952" { target ia32 }
>} } */
>+/* { dg-final { scan-assembler "mov\[^\n\r]*117768961" { target ia32 }
>} } */
>+
>+void foo (char *);
>+
>+void
>+bar (void)
>+{
>+  char a[] = {0,1,2,3,4,5,6,7};
>+  foo (a);
>+}
>+
>+void
>+baz (void)
>+{
>+  char a[8] = "\0\2\4\6\1\3\5\7";
>+  foo (a);
>+}
>
>       Jakub

Reply via email to