On Thu, 6 Jun 2019 at 16:54, Richard Sandiford <richard.sandif...@arm.com> wrote: > > Szabolcs Nagy <szabolcs.n...@arm.com> writes: > > On 03/06/2019 08:26, Prathamesh Kulkarni wrote: > >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c > >> @@ -0,0 +1,32 @@ > >> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ > >> +/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 > >> --save-temps" } */ > >> + > >> +/* Case 5.2: Interleaved elements and constants. */ > >> + > >> +#include <stdint.h> > >> + > >> +typedef int32_t vnx4si __attribute__((vector_size (32))); > >> + > >> +__attribute__((noipa)) > >> +vnx4si foo(int a, int b, int c, int d) > >> +{ > >> + return (vnx4si) { a, 1, b, 2, c, 3, d, 4 }; > >> +} > >> + > >> +/* > >> +foo: > >> +.LFB0: > >> + .cfi_startproc > >> + ptrue p0.s, vl8 > >> + mov z0.s, w3 > >> + adrp x3, .LANCHOR0 > >> + insr z0.s, w2 > >> + add x3, x3, :lo12:.LANCHOR0 > >> + insr z0.s, w1 > >> + ld1w z1.s, p0/z, [x3] > >> + insr z0.s, w0 > >> + zip1 z0.s, z0.s, z1.s > >> + ret > >> +*/ > >> + > >> +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), > >> w3\n\tadrp\t(x[0-9]+), \.LANCHOR0\n\tinsr\t\1, w2\n\tadd\t\2, \2, > >> :lo12:\.LANCHOR0\n\tinsr\t\1, w1\n\tld1w\t(z[0-9]+\.s), p[0-9]+/z, > >> \[\2\]\n\tinsr\t\1, w0\n\tzip1\t\1, \1, \3} } } */ > > > > this fails with tiny model when i'm testing aarch64-none-elf > > > > $ make check-c 'RUNTESTFLAGS=--target_board=aarch64-elf-qemu{-mcmodel=tiny} > > aarch64-sve.exp=init_8.c' > > ... > > FAIL: gcc.target/aarch64/sve/init_8.c -march=armv8.2-a+sve scan-assembler > > \\tmov\\t(z[0-9]+\\.s), w3\\n\\tadrp\\t(x[0-9]+), > > \\.LANCHOR0\\n\\tinsr\\t\\1, w2\\n\\tadd\\t\\2, \\2, > > :lo12:\\.LANCHOR0\\n\\tinsr\\t\\1, w1\\n\\tld1w\\t(z[0-9]+\\.s), p[0-9]+/z, > > \\[\\2\\]\\n\\tinsr\\t\\1, w0\\n\\tzip1\\t\\1, \\1, \\3 > > > > i think you need conditional scan asm for { target aarch64_small } > > and { target aarch64_tiny } or just skip the test for tiny, > > Maybe we should remove the address calculation and replace the ld1w > address with \[[^]]*\]. All that really matters for this test is that > the vector is loaded from memory. > > > but even then matching exact register name and instruction scheduling > > seems fragile. > > The only hard-coded register names are the parameters, which are > guaranteed by the ABI. Testing for those should be fine. > > The dg-options pass -fno-schedule-insns, but I guess they should > also pass -fno-schedule-insns2. Or maybe just use -O instead. > We can always revisit this later if even that isn't enough to make > the order stable. Thanks for the suggestions. Passing -fno-schedule-insns2 does seem to make the order stable. For init_1.c to init_4.c there were no intervening instructions, and for remaining tests, the patch passes -fno-schedule-insns2 and adjusts dg-scan accordingly. I verified the tests pass with -mcmodel=tiny. OK to commit ?
Thanks, Prathamesh > > Richard
2019-06-07 Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> * gcc.target/aarch64/sve/init_5.c: Pass -fno-schedule-insns2. Update assembly in comments and adjust dg-scan. * gcc.target/aarch64/sve/init_6.c: Likewise. * gcc.target/aarch64/sve/init_7.c: Likewise. * gcc.target/aarch64/sve/init_8.c: Likewise. * gcc.target/aarch64/sve/init_9.c: Likewise. * gcc.target/aarch64/sve/init_10.c: Likewise. * gcc.target/aarch64/sve/init_11.c: Likewise. * gcc.target/aarch64/sve/init_12.c: Likewise. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_10.c b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c index 9d6e2dfc876..08437e5d8f1 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_10.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */ /* Case 5.4: Interleaved repeating elements and non-repeating elements. */ @@ -17,13 +17,14 @@ vnx4si foo(int a, int b, int c, int f) foo: .LFB0: .cfi_startproc - mov z0.s, w2 mov z1.s, w3 + mov z0.s, w2 insr z0.s, w1 - ptrue p0.s, vl8 insr z0.s, w0 zip1 z0.s, z0.s, z1.s + ptrue p0.s, vl8 + st1w z0.s, p0, [x8] ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w3\n\tmov\t(z[0-9]+\.s), w2\n.*\n\tinsr\t\2, w1\n\tinsr\t\2, w0\n\tzip1\t\2, \2, \1} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w3\n\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\2, w1\n\tinsr\t\2, w0\n\tzip1\t\2, \2, \1} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_11.c b/gcc/testsuite/gcc.target/aarch64/sve/init_11.c index e50cd54ef13..786765dbfb7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_11.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_11.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */ /* Case 5.5: Interleaved repeating elements and trailing same elements. */ @@ -18,11 +18,12 @@ foo: .LFB0: .cfi_startproc mov z0.s, w1 - mov z1.s, w2 insr z0.s, w0 - ptrue p0.s, vl8 + mov z1.s, w2 zip1 z0.s, z0.s, z1.s + ptrue p0.s, vl8 + st1w z0.s, p0, [x8] ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w1\n\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\1, w0\n.*\tzip1\t\1, \1, \2} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w1\n\tinsr\t\1, w0\n\tmov\t(z[0-9]+\.s), w2\n\tzip1\t\1, \1, \2} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_12.c b/gcc/testsuite/gcc.target/aarch64/sve/init_12.c index 21d9e764360..e65b1af475c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_12.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_12.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */ /* Case 5.5: Interleaved repeating elements and trailing same elements. */ @@ -17,14 +17,15 @@ vnx4si foo(int a, int b, int f) foo: .LFB0: .cfi_startproc - mov z0.s, w0 mov z1.s, w2 + mov z0.s, w0 insr z0.s, w1 - ptrue p0.s, vl8 insr z0.s, w1 insr z0.s, w1 zip1 z0.s, z0.s, z1.s + ptrue p0.s, vl8 + st1w z0.s, p0, [x8] ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tmov\t(z[0-9]+\.s), w0\n.*\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tzip1\t\2, \2, \1} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tmov\t(z[0-9]+\.s), w0\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tzip1\t\2, \2, \1} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_5.c b/gcc/testsuite/gcc.target/aarch64/sve/init_5.c index e7fbdd1a2aa..acab6d2d405 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_5.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */ /* Case 3: Trailing same element. */ @@ -18,10 +18,11 @@ foo: .LFB0: .cfi_startproc mov z0.s, w2 - ptrue p0.s, vl8 insr z0.s, w1 insr z0.s, w0 + ptrue p0.s, vl8 + st1w z0.s, p0, [x8] ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n.*\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_6.c b/gcc/testsuite/gcc.target/aarch64/sve/init_6.c index f6f3da5958d..fd6d4b9b85a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_6.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */ /* Case 3: Trailing same element. */ @@ -18,11 +18,12 @@ foo: .LFB0: .cfi_startproc mov z0.s, w2 - ptrue p0.s, vl8 insr z0.s, w1 insr z0.s, w0 rev z0.s, z0.s + ptrue p0.s, vl8 + st1w z0.s, p0, [x8] ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n.*\tinsr\t\1, w1\n\tinsr\t\1, w0\n\trev\t\1, \1} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0\n\trev\t\1, \1} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_7.c b/gcc/testsuite/gcc.target/aarch64/sve/init_7.c index e3104a35f13..cf6926d3a73 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_7.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */ /* Case 5.1: All elements. */ @@ -18,7 +18,6 @@ foo: .LFB0: .cfi_startproc mov z0.s, w7 - ptrue p0.s, vl8 insr z0.s, w6 insr z0.s, w5 insr z0.s, w4 @@ -26,7 +25,9 @@ foo: insr z0.s, w2 insr z0.s, w1 insr z0.s, w0 + ptrue p0.s, vl8 + st1w z0.s, p0, [x8] ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w7\n.*\tinsr\t\1, w6\n\tinsr\t\1, w5\n\tinsr\t\1, w4\n\tinsr\t\1, w3\n\tinsr\t\1, w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w7\n\tinsr\t\1, w6\n\tinsr\t\1, w5\n\tinsr\t\1, w4\n\tinsr\t\1, w3\n\tinsr\t\1, w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_8.c b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c index 7ff3e0849cc..b3ed32e4c82 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */ /* Case 5.2: Interleaved elements and constants. */ @@ -18,15 +18,16 @@ foo: .LFB0: .cfi_startproc ptrue p0.s, vl8 + adrp x4, .LANCHOR0 + add x4, x4, :lo12:.LANCHOR0 + ld1w z1.s, p0/z, [x4] mov z0.s, w3 - adrp x3, .LANCHOR0 insr z0.s, w2 - add x3, x3, :lo12:.LANCHOR0 insr z0.s, w1 - ld1w z1.s, p0/z, [x3] insr z0.s, w0 zip1 z0.s, z0.s, z1.s + st1w z0.s, p0, [x8] ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w3\n\tadrp\t(x[0-9]+), \.LANCHOR0\n\tinsr\t\1, w2\n\tadd\t\2, \2, :lo12:\.LANCHOR0\n\tinsr\t\1, w1\n\tld1w\t(z[0-9]+\.s), p[0-9]+/z, \[\2\]\n\tinsr\t\1, w0\n\tzip1\t\1, \1, \3} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-9]+/z, \[x[0-9]+\]\n\tmov\t(z[0-9]+\.s), w3\n\tinsr\t\2, w2\n\tinsr\t\2, w1\n\tinsr\t\2, w0\n\tzip1\t\2, \2, \1} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_9.c b/gcc/testsuite/gcc.target/aarch64/sve/init_9.c index 4d3c59b3bf8..333bd4f2852 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_9.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_9.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -msve-vector-bits=256 --save-temps" } */ /* Case 5.3: Repeated elements. */ @@ -19,9 +19,10 @@ foo: .cfi_startproc mov z0.s, w0 mov z1.s, w1 - ptrue p0.s, vl8 zip1 z0.s, z0.s, z1.s + ptrue p0.s, vl8 + st1w z0.s, p0, [x8] ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w0\n\tmov\t(z[0-9]+\.s), w1\n.*\tzip1\t\1, \1, \2} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w0\n\tmov\t(z[0-9]+\.s), w1\n\tzip1\t\1, \1, \2} } } */