On 06/11/15 10:46, Kyrill Tkachov wrote: > Hi all, > > In this wrong-code PR the vector setmem expansion and > arm_block_set_aligned_vect in particular > use the wrong offset when calling adjust_automodify_address. In the attached > testcase during the > initial zeroing out we get two V16QI stores, but they both are recorded by > adjust_automodify_address > as modifying x+0 rather than x+0 and x+12 (the total size to be written is > 28). > > This led to the scheduling pass moving the store from "x.g = 2;" to before > the zeroing stores. > > This patch fixes the problem by keeping track of the offset to which stores > are emitted and > passing it to adjust_automodify_address as appropriate. > > From inspection I see arm_block_set_unaligned_vect also has this issue so I > performed the same > fix in that function as well. > > Bootstrapped and tested on arm-none-linux-gnueabihf. > > Ok for trunk? > > This bug appears on GCC 5 too and I'm currently testing this patch there. > Ok to backport to GCC 5 as well?
> > Thanks, > Kyrill > > 2015-11-06 Kyrylo Tkachov <kyrylo.tkac...@arm.com> > > PR target/68143 > * config/arm/arm.c (arm_block_set_unaligned_vect): Keep track of > offset from dstbase and use it appropriately in > adjust_automodify_address. > (arm_block_set_aligned_vect): Likewise. > > 2015-11-06 Kyrylo Tkachov <kyrylo.tkac...@arm.com> > > PR target/68143 > * gcc.target/arm/pr68143_1.c: New test. Sorry about the delay in reviewing this. There's nothing arm specific about this test - I'd just put this in gcc.c-torture/execute, there are enough auto-testers with neon on that will show up issues if this starts failing. Ok with that change. Ramana > > arm-setmem-offset.patch > > > commit 78c6989a7af1df672ea227057180d79d717ed5f3 > Author: Kyrylo Tkachov <kyrylo.tkac...@arm.com> > Date: Wed Oct 28 17:29:18 2015 +0000 > > [ARM] Properly update memory offsets when expanding setmem > > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index 66e8afc..adf3143 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -29268,7 +29268,7 @@ arm_block_set_unaligned_vect (rtx dstbase, > rtx (*gen_func) (rtx, rtx); > machine_mode mode; > unsigned HOST_WIDE_INT v = value; > - > + unsigned int offset = 0; > gcc_assert ((align & 0x3) != 0); > nelt_v8 = GET_MODE_NUNITS (V8QImode); > nelt_v16 = GET_MODE_NUNITS (V16QImode); > @@ -29289,7 +29289,7 @@ arm_block_set_unaligned_vect (rtx dstbase, > return false; > > dst = copy_addr_to_reg (XEXP (dstbase, 0)); > - mem = adjust_automodify_address (dstbase, mode, dst, 0); > + mem = adjust_automodify_address (dstbase, mode, dst, offset); > > v = sext_hwi (v, BITS_PER_WORD); > val_elt = GEN_INT (v); > @@ -29306,7 +29306,11 @@ arm_block_set_unaligned_vect (rtx dstbase, > { > emit_insn ((*gen_func) (mem, reg)); > if (i + 2 * nelt_mode <= length) > - emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode))); > + { > + emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode))); > + offset += nelt_mode; > + mem = adjust_automodify_address (dstbase, mode, dst, offset); > + } > } > > /* If there are not less than nelt_v8 bytes leftover, we must be in > @@ -29317,6 +29321,9 @@ arm_block_set_unaligned_vect (rtx dstbase, > if (i + nelt_v8 < length) > { > emit_insn (gen_add2_insn (dst, GEN_INT (length - i))); > + offset += length - i; > + mem = adjust_automodify_address (dstbase, mode, dst, offset); > + > /* We are shifting bytes back, set the alignment accordingly. */ > if ((length & 1) != 0 && align >= 2) > set_mem_align (mem, BITS_PER_UNIT); > @@ -29327,12 +29334,13 @@ arm_block_set_unaligned_vect (rtx dstbase, > else if (i < length && i + nelt_v8 >= length) > { > if (mode == V16QImode) > - { > - reg = gen_lowpart (V8QImode, reg); > - mem = adjust_automodify_address (dstbase, V8QImode, dst, 0); > - } > + reg = gen_lowpart (V8QImode, reg); > + > emit_insn (gen_add2_insn (dst, GEN_INT ((length - i) > + (nelt_mode - nelt_v8)))); > + offset += (length - i) + (nelt_mode - nelt_v8); > + mem = adjust_automodify_address (dstbase, V8QImode, dst, offset); > + > /* We are shifting bytes back, set the alignment accordingly. */ > if ((length & 1) != 0 && align >= 2) > set_mem_align (mem, BITS_PER_UNIT); > @@ -29359,6 +29367,7 @@ arm_block_set_aligned_vect (rtx dstbase, > rtx rval[MAX_VECT_LEN]; > machine_mode mode; > unsigned HOST_WIDE_INT v = value; > + unsigned int offset = 0; > > gcc_assert ((align & 0x3) == 0); > nelt_v8 = GET_MODE_NUNITS (V8QImode); > @@ -29390,14 +29399,15 @@ arm_block_set_aligned_vect (rtx dstbase, > /* Handle first 16 bytes specially using vst1:v16qi instruction. */ > if (mode == V16QImode) > { > - mem = adjust_automodify_address (dstbase, mode, dst, 0); > + mem = adjust_automodify_address (dstbase, mode, dst, offset); > emit_insn (gen_movmisalignv16qi (mem, reg)); > i += nelt_mode; > /* Handle (8, 16) bytes leftover using vst1:v16qi again. */ > if (i + nelt_v8 < length && i + nelt_v16 > length) > { > emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode))); > - mem = adjust_automodify_address (dstbase, mode, dst, 0); > + offset += length - nelt_mode; > + mem = adjust_automodify_address (dstbase, mode, dst, offset); > /* We are shifting bytes back, set the alignment accordingly. */ > if ((length & 0x3) == 0) > set_mem_align (mem, BITS_PER_UNIT * 4); > @@ -29419,7 +29429,7 @@ arm_block_set_aligned_vect (rtx dstbase, > for (; (i + nelt_mode <= length); i += nelt_mode) > { > addr = plus_constant (Pmode, dst, i); > - mem = adjust_automodify_address (dstbase, mode, addr, i); > + mem = adjust_automodify_address (dstbase, mode, addr, offset + i); > emit_move_insn (mem, reg); > } > > @@ -29428,8 +29438,8 @@ arm_block_set_aligned_vect (rtx dstbase, > if (i + UNITS_PER_WORD == length) > { > addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD); > - mem = adjust_automodify_address (dstbase, mode, > - addr, i - UNITS_PER_WORD); > + offset += i - UNITS_PER_WORD; > + mem = adjust_automodify_address (dstbase, mode, addr, offset); > /* We are shifting 4 bytes back, set the alignment accordingly. */ > if (align > UNITS_PER_WORD) > set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD); > @@ -29441,7 +29451,8 @@ arm_block_set_aligned_vect (rtx dstbase, > else if (i < length) > { > emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode))); > - mem = adjust_automodify_address (dstbase, mode, dst, 0); > + offset += length - nelt_mode; > + mem = adjust_automodify_address (dstbase, mode, dst, offset); > /* We are shifting bytes back, set the alignment accordingly. */ > if ((length & 1) == 0) > set_mem_align (mem, BITS_PER_UNIT * 2); > diff --git a/gcc/testsuite/gcc.target/arm/pr68143_1.c > b/gcc/testsuite/gcc.target/arm/pr68143_1.c > new file mode 100644 > index 0000000..323473f > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/pr68143_1.c > @@ -0,0 +1,36 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target arm_neon_hw } */ > +/* { dg-options "-O3 -mcpu=cortex-a57" } */ > +/* { dg-add-options arm_neon } */ > + > +#define NULL 0 > + > +struct stuff > +{ > + int a; > + int b; > + int c; > + int d; > + int e; > + char *f; > + int g; > +}; > + > +void __attribute__ ((noinline)) > +bar (struct stuff *x) > +{ > + if (x->g != 2) > + __builtin_abort (); > +} > + > +int > +main (int argc, char** argv) > +{ > + struct stuff x = {0, 0, 0, 0, 0, NULL, 0}; > + x.a = 100; > + x.d = 100; > + x.g = 2; > + /* Struct should now look like {100, 0, 0, 100, 0, 0, 0, 2}. */ > + bar (&x); > + return 0; > +} >