On March 14, 2015 10:04:53 AM GMT+01:00, Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >This issue is practically the same as PR63341, except in this case it >is for >dr_explicit_realign rather than dr_explicit_realign_optimized, and the >bump >isn't passed through multiple functions and thus is easier to fix. > >Without the patch we use (dataptr & -16) for the first load and >((dataptr + 12) & -16) for the second load, which works just fine if >the >elements are properly aligned (4 byte at least), but in this case we >have >underaligned accesses (coming from folding of memcpy in this testcase, >and >from 4 byte loads combined together recognized by bswap pass in the >original >source), and so we really want to use ((dataptr + 15) & -16), otherwise >if we are unlucky we might read the same memory twice even when dataptr >is not 16 byte aligned. > >Bootstrapped/regtested on >{x86_64,i686,aarch64,powerpc64{,le},s390{,x}}-linux, ok for trunk?
OK. Thanks, Richard. >2015-03-14 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/65369 > * tree-vect-stmts.c (vectorizable_load) <case dr_explicit_realign>: > Set bump to vs * TYPE_SIZE_UNIT (elem_type) - 1 instead of > (vs - 1) * TYPE_SIZE_UNIT (elem_type). > > * gcc.c-torture/execute/pr65369.c: New test. > >--- gcc/tree-vect-stmts.c.jj 2015-03-09 08:05:13.000000000 +0100 >+++ gcc/tree-vect-stmts.c 2015-03-13 17:27:30.613529768 +0100 >@@ -6468,9 +6468,8 @@ vectorizable_load (gimple stmt, gimple_s > case dr_explicit_realign: > { > tree ptr, bump; >- tree vs_minus_1; > >- vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1); >+ tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype)); > > if (compute_in_loop) > msq = vect_setup_realignment (first_stmt, gsi, >@@ -6499,8 +6498,9 @@ vectorizable_load (gimple stmt, gimple_s > vect_finish_stmt_generation (stmt, new_stmt, gsi); > msq = new_temp; > >- bump = size_binop (MULT_EXPR, vs_minus_1, >+ bump = size_binop (MULT_EXPR, vs, > TYPE_SIZE_UNIT (elem_type)); >+ bump = size_binop (MINUS_EXPR, bump, size_one_node); > ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump); > new_stmt = gimple_build_assign > (NULL_TREE, BIT_AND_EXPR, ptr, >--- gcc/testsuite/gcc.c-torture/execute/pr65369.c.jj 2015-03-13 >17:37:10.926175685 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr65369.c 2015-03-13 >17:35:40.000000000 +0100 >@@ -0,0 +1,45 @@ >+/* PR tree-optimization/65369 */ >+ >+static const char data[] = >+ "12345678901234567890123456789012345678901234567890" >+ "123456789012345678901234567890"; >+ >+__attribute__ ((noinline)) >+static void foo (const unsigned int *buf) >+{ >+ if (__builtin_memcmp (buf, data, 64)) >+ __builtin_abort (); >+} >+ >+__attribute__ ((noinline)) >+static void bar (const unsigned char *block) >+{ >+ unsigned int buf[16]; >+ __builtin_memcpy (buf + 0, block + 0, 4); >+ __builtin_memcpy (buf + 1, block + 4, 4); >+ __builtin_memcpy (buf + 2, block + 8, 4); >+ __builtin_memcpy (buf + 3, block + 12, 4); >+ __builtin_memcpy (buf + 4, block + 16, 4); >+ __builtin_memcpy (buf + 5, block + 20, 4); >+ __builtin_memcpy (buf + 6, block + 24, 4); >+ __builtin_memcpy (buf + 7, block + 28, 4); >+ __builtin_memcpy (buf + 8, block + 32, 4); >+ __builtin_memcpy (buf + 9, block + 36, 4); >+ __builtin_memcpy (buf + 10, block + 40, 4); >+ __builtin_memcpy (buf + 11, block + 44, 4); >+ __builtin_memcpy (buf + 12, block + 48, 4); >+ __builtin_memcpy (buf + 13, block + 52, 4); >+ __builtin_memcpy (buf + 14, block + 56, 4); >+ __builtin_memcpy (buf + 15, block + 60, 4); >+ foo (buf); >+} >+ >+int >+main () >+{ >+ unsigned char input[sizeof data + 16] __attribute__((aligned (16))); >+ __builtin_memset (input, 0, sizeof input); >+ __builtin_memcpy (input + 1, data, sizeof data); >+ bar (input + 1); >+ return 0; >+} > > Jakub