On February 16, 2019 8:19:06 AM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >On the following testcase, we've regressed in bar since 8.x, in 8.x >store merging came up with mem = 64-bit constant, but starting with the >change to transform {0,1,2,3,4,5,6,7} char initializers into >STRING_CSTs, >we don't do that anymore. The mem = STRING_CST expansion can do that, >but only if there are no embedded zeros. The following patch improves >it even for embedded zeros, by using a new callback for the >can_store_by_pieces/store_by_pieces calls which knows how to handle >STRING_CST. We don't need strlen in that case, can use TREE_STRING_CST >instead. Additionally, if the STRING_CST is slightly shorter than the >destination region, it might generate better code by trying to >store_by_pieces it all in one go (bytes from STRING_CST until the last >one, >followed by artificially added zeros) and only if that doesn't seem to >be >beneficial (e.g. very small STRING_CST followed by kilobytes of zeros) >goes for the store_by_pieces of STRING_CST (rounded up to next >STORE_MAX_PIECES) followed by a clear_storage. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Richard. >2019-02-16 Jakub Jelinek <ja...@redhat.com> > > PR rtl-optimization/66152 > * builtins.h (c_readstr): Declare. > * builtins.c (c_readstr): Remove forward declaration. Add > null_terminated_p argument, if false, read all bytes from the > string instead of stopping after '\0'. > * expr.c (string_cst_read_str): New function. > (store_expr): Use string_cst_read_str instead of > builtin_strncpy_read_str. Try to store by pieces the whole > exp_len first, and only if that fails, split it up into > store by pieces followed by clear_storage. Formatting fix. > > * gcc.target/i386/pr66152.c: New test. > >--- gcc/builtins.h.jj 2019-02-14 08:06:37.878546571 +0100 >+++ gcc/builtins.h 2019-02-15 11:33:50.208180171 +0100 >@@ -103,6 +103,7 @@ struct c_strlen_data > }; > >extern tree c_strlen (tree, int, c_strlen_data * = NULL, unsigned = 1); >+extern rtx c_readstr (const char *, scalar_int_mode, bool = true); > extern void expand_builtin_setjmp_setup (rtx, rtx); > extern void expand_builtin_setjmp_receiver (rtx); > extern void expand_builtin_update_setjmp_buf (rtx); >--- gcc/builtins.c.jj 2019-02-11 20:58:48.509965578 +0100 >+++ gcc/builtins.c 2019-02-15 11:37:00.046029652 +0100 >@@ -95,7 +95,6 @@ builtin_info_type builtin_info[(int)END_ > /* Non-zero if __builtin_constant_p should be folded right away. */ > bool force_folding_builtin_constant_p; > >-static rtx c_readstr (const char *, scalar_int_mode); > static int target_char_cast (tree, char *); > static rtx get_memory_rtx (tree, tree); > static int apply_args_size (void); >@@ -802,10 +801,14 @@ c_strlen (tree src, int only_value, c_st > } > > /* Return a constant integer corresponding to target reading >- GET_MODE_BITSIZE (MODE) bits from string constant STR. */ >- >-static rtx >-c_readstr (const char *str, scalar_int_mode mode) >+ GET_MODE_BITSIZE (MODE) bits from string constant STR. If >+ NULL_TERMINATED_P, reading stops after '\0' character, all further >ones >+ are assumed to be zero, otherwise it reads as many characters >+ as needed. */ >+ >+rtx >+c_readstr (const char *str, scalar_int_mode mode, >+ bool null_terminated_p/*=true*/) > { > HOST_WIDE_INT ch; > unsigned int i, j; >@@ -830,7 +833,7 @@ c_readstr (const char *str, scalar_int_m > j = j + UNITS_PER_WORD - 2 * (j % UNITS_PER_WORD) - 1; > j *= BITS_PER_UNIT; > >- if (ch) >+ if (ch || !null_terminated_p) > ch = (unsigned char) str[i]; > tmp[j / HOST_BITS_PER_WIDE_INT] |= ch << (j % HOST_BITS_PER_WIDE_INT); > } >--- gcc/expr.c.jj 2019-02-08 20:00:40.309835608 +0100 >+++ gcc/expr.c 2019-02-15 11:37:18.715719809 +0100 >@@ -5453,6 +5453,30 @@ emit_storent_insn (rtx to, rtx from) > return maybe_expand_insn (code, 2, ops); > } > >+/* Helper function for store_expr storing of STRING_CST. */ >+ >+static rtx >+string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode >mode) >+{ >+ tree str = (tree) data; >+ >+ gcc_assert (offset >= 0); >+ if (offset >= TREE_STRING_LENGTH (str)) >+ return const0_rtx; >+ >+ if ((unsigned HOST_WIDE_INT) offset + GET_MODE_SIZE (mode) >+ > (unsigned HOST_WIDE_INT) TREE_STRING_LENGTH (str)) >+ { >+ char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode)); >+ size_t l = TREE_STRING_LENGTH (str) - offset; >+ memcpy (p, TREE_STRING_POINTER (str) + offset, l); >+ memset (p + l, '\0', GET_MODE_SIZE (mode) - l); >+ return c_readstr (p, mode, false); >+ } >+ >+ return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false); >+} >+ > /* Generate code for computing expression EXP, > and storing the value into TARGET. > >@@ -5472,7 +5496,7 @@ emit_storent_insn (rtx to, rtx from) > > rtx > store_expr (tree exp, rtx target, int call_param_p, >- bool nontemporal, bool reverse) >+ bool nontemporal, bool reverse) > { > rtx temp; > rtx alt_rtl = NULL_RTX; >@@ -5606,36 +5630,32 @@ store_expr (tree exp, rtx target, int ca > if (TREE_STRING_LENGTH (str) <= 0) > goto normal_expr; > >- str_copy_len = strlen (TREE_STRING_POINTER (str)); >- if (str_copy_len < TREE_STRING_LENGTH (str) - 1) >- goto normal_expr; >+ if (can_store_by_pieces (exp_len, string_cst_read_str, (void *) >str, >+ MEM_ALIGN (target), false)) >+ { >+ store_by_pieces (target, exp_len, string_cst_read_str, (void *) >str, >+ MEM_ALIGN (target), false, RETURN_BEGIN); >+ return NULL_RTX; >+ } > > str_copy_len = TREE_STRING_LENGTH (str); >- if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0 >- && TREE_STRING_POINTER (str)[TREE_STRING_LENGTH (str) - 1] == '\0') >+ if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0) > { > str_copy_len += STORE_MAX_PIECES - 1; > str_copy_len &= ~(STORE_MAX_PIECES - 1); > } >- str_copy_len = MIN (str_copy_len, exp_len); >- if (!can_store_by_pieces (str_copy_len, >builtin_strncpy_read_str, >- CONST_CAST (char *, TREE_STRING_POINTER (str)), >- MEM_ALIGN (target), false)) >+ if (str_copy_len >= exp_len) > goto normal_expr; > >- dest_mem = target; >+ if (!can_store_by_pieces (str_copy_len, string_cst_read_str, >+ (void *) str, MEM_ALIGN (target), false)) >+ goto normal_expr; > >- memop_ret retmode = exp_len > str_copy_len ? RETURN_END : >RETURN_BEGIN; >- dest_mem = store_by_pieces (dest_mem, >- str_copy_len, builtin_strncpy_read_str, >- CONST_CAST (char *, >- TREE_STRING_POINTER (str)), >- MEM_ALIGN (target), false, >- retmode); >- if (exp_len > str_copy_len) >- clear_storage (adjust_address (dest_mem, BLKmode, 0), >- GEN_INT (exp_len - str_copy_len), >- BLOCK_OP_NORMAL); >+ dest_mem = store_by_pieces (target, str_copy_len, >string_cst_read_str, >+ (void *) str, MEM_ALIGN (target), false, >+ RETURN_END); >+ clear_storage (adjust_address (dest_mem, BLKmode, 0), >+ GEN_INT (exp_len - str_copy_len), BLOCK_OP_NORMAL); > return NULL_RTX; > } > else >--- gcc/testsuite/gcc.target/i386/pr66152.c.jj 2019-02-15 >11:55:56.212164557 +0100 >+++ gcc/testsuite/gcc.target/i386/pr66152.c 2019-02-15 >11:56:47.769308378 +0100 >@@ -0,0 +1,25 @@ >+/* PR rtl-optimization/66152 */ >+/* { dg-do compile } */ >+/* { dg-options "-O2" } */ >+/* { dg-final { scan-assembler "movabs\[^\n\r]*506097522914230528" { >target { ! ia32 } } } } */ >+/* { dg-final { scan-assembler "movabs\[^\n\r]*505813836079825408" { >target { ! ia32 } } } } */ >+/* { dg-final { scan-assembler "mov\[^\n\r]*50462976" { target ia32 } >} } */ >+/* { dg-final { scan-assembler "mov\[^\n\r]*117835012" { target ia32 } >} } */ >+/* { dg-final { scan-assembler "mov\[^\n\r]*100925952" { target ia32 } >} } */ >+/* { dg-final { scan-assembler "mov\[^\n\r]*117768961" { target ia32 } >} } */ >+ >+void foo (char *); >+ >+void >+bar (void) >+{ >+ char a[] = {0,1,2,3,4,5,6,7}; >+ foo (a); >+} >+ >+void >+baz (void) >+{ >+ char a[8] = "\0\2\4\6\1\3\5\7"; >+ foo (a); >+} > > Jakub