[gcc r15-323] RISC-V: Add test for sraiw-31 special case

2024-05-08 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:dd388198b8be52ab378c935fc517a269e0ba741c

commit r15-323-gdd388198b8be52ab378c935fc517a269e0ba741c
Author: Christoph Müllner 
Date:   Tue May 7 22:59:44 2024 +0200

RISC-V: Add test for sraiw-31 special case

We already optimize a sign-extension of a right-shift by 31 in
si3_extend.  Let's add a test for that (similar to
zero-extend-1.c).

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sign-extend-1.c: New test.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.target/riscv/sign-extend-1.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sign-extend-1.c 
b/gcc/testsuite/gcc.target/riscv/sign-extend-1.c
new file mode 100644
index ..e9056ec0d424
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sign-extend-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { riscv64*-*-* } } } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+
+signed long
+foo1 (int i)
+{
+  return i >> 31;
+}
+/* { dg-final { scan-assembler "sraiw\ta\[0-9\],a\[0-9\],31" } } */
+
+/* { dg-final { scan-assembler-not "srai\t" } } */
+/* { dg-final { scan-assembler-not "srli\t" } } */
+/* { dg-final { scan-assembler-not "srliw\t" } } */


[gcc r15-338] RISC-V: Add tests for cpymemsi expansion

2024-05-09 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:4d38e88227ea48e559a2f354c0e62d372e181b82

commit r15-338-g4d38e88227ea48e559a2f354c0e62d372e181b82
Author: Christoph Müllner 
Date:   Thu Apr 11 12:07:10 2024 +0200

RISC-V: Add tests for cpymemsi expansion

cpymemsi expansion was available for RISC-V since the initial port.
However, there are not tests to detect regression.
This patch adds such tests.

Three of the tests target the expansion requirements (known length and
alignment). One test reuses an existing memcpy test from the by-pieces
framework (gcc/testsuite/gcc.dg/torture/inline-mem-cpy-1.c).

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cpymemsi-1.c: New test.
* gcc.target/riscv/cpymemsi-2.c: New test.
* gcc.target/riscv/cpymemsi-3.c: New test.
* gcc.target/riscv/cpymemsi.c: New test.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.target/riscv/cpymemsi-1.c |  9 ++
 gcc/testsuite/gcc.target/riscv/cpymemsi-2.c | 42 
 gcc/testsuite/gcc.target/riscv/cpymemsi-3.c | 43 +
 gcc/testsuite/gcc.target/riscv/cpymemsi.c   | 22 +++
 4 files changed, 116 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c 
b/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c
new file mode 100644
index ..983b564ccaf7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-march=rv32gc -save-temps -g0 -fno-lto" { target { rv32 } } } 
*/
+/* { dg-options "-march=rv64gc -save-temps -g0 -fno-lto" { target { rv64 } } } 
*/
+/* { dg-additional-options "-DRUN_FRACTION=11" { target simulator } } */
+/* { dg-timeout-factor 2 } */
+
+#include "../../gcc.dg/memcmp-1.c"
+/* Yeah, this memcmp test exercises plenty of memcpy, more than any of the
+   memcpy tests.  */
diff --git a/gcc/testsuite/gcc.target/riscv/cpymemsi-2.c 
b/gcc/testsuite/gcc.target/riscv/cpymemsi-2.c
new file mode 100644
index ..833d1c044876
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cpymemsi-2.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+#include 
+#define aligned32 __attribute__ ((aligned (32)))
+
+const char myconst15[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst23[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst31[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+
+/* No expansion (unknown alignment) */
+#define MY_MEM_CPY_N(N)\
+void my_mem_cpy_##N (char *b1, const char *b2) \
+{  \
+  __builtin_memcpy (b1, b2, N);\
+}
+
+/* No expansion (unknown alignment) */
+#define MY_MEM_CPY_CONST_N(N)  \
+void my_mem_cpy_const_##N (char *b1)   \
+{  \
+  __builtin_memcpy (b1, myconst##N, sizeof(myconst##N));\
+}
+
+MY_MEM_CPY_N(15)
+MY_MEM_CPY_CONST_N(15)
+
+MY_MEM_CPY_N(23)
+MY_MEM_CPY_CONST_N(23)
+
+MY_MEM_CPY_N(31)
+MY_MEM_CPY_CONST_N(31)
+
+/* { dg-final { scan-assembler-times "\t(call|tail)\tmemcpy" 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/cpymemsi-3.c 
b/gcc/testsuite/gcc.target/riscv/cpymemsi-3.c
new file mode 100644
index ..803765195b24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cpymemsi-3.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+#include 
+#define aligned32 __attribute__ ((aligned (32)))
+
+const char myconst15[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst23[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst31[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+
+#define MY_MEM_CPY_ALIGNED_N(N)\
+void my_mem_cpy_aligned_##N(char *b1, const char *b2)  \
+{  \
+  b1 = __

[gcc r15-505] RISC-V: Add test cases for cpymem expansion

2024-05-15 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:00029408387e9cc64e135324c22d15cd5a70e946

commit r15-505-g00029408387e9cc64e135324c22d15cd5a70e946
Author: Christoph Müllner 
Date:   Wed May 1 16:54:42 2024 +0200

RISC-V: Add test cases for cpymem expansion

We have two mechanisms in the RISC-V backend that expand
cpymem pattern: a) by-pieces, b) riscv_expand_block_move()
in riscv-string.cc. The by-pieces framework has higher priority
and emits a sequence of up to 15 instructions
(see use_by_pieces_infrastructure_p() for more details).

As a rule-of-thumb, by-pieces emits alternating load/store sequences
and the setmem expansion in the backend emits a sequence of loads
followed by a sequence of stores.

Let's add some test cases to document the current behaviour
and to have tests to identify regressions.

Signed-off-by: Christoph Müllner 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cpymem-32-ooo.c: New test.
* gcc.target/riscv/cpymem-32.c: New test.
* gcc.target/riscv/cpymem-64-ooo.c: New test.
* gcc.target/riscv/cpymem-64.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c | 131 +++
 gcc/testsuite/gcc.target/riscv/cpymem-32.c | 138 +
 gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c | 129 +++
 gcc/testsuite/gcc.target/riscv/cpymem-64.c | 138 +
 4 files changed, 536 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
new file mode 100644
index ..33fb9891d823
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
@@ -0,0 +1,131 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32 } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -mtune=generic-ooo" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-allow-blank-lines-in-output 1 } */
+
+#define COPY_N(N)  \
+void copy_##N (void *to, void *from)   \
+{  \
+  __builtin_memcpy (to, from, N);  \
+}
+
+#define COPY_ALIGNED_N(N)  \
+void copy_aligned_##N (void *to, void *from)   \
+{  \
+  to = __builtin_assume_aligned(to, sizeof(long)); \
+  from = __builtin_assume_aligned(from, sizeof(long)); \
+  __builtin_memcpy (to, from, N);  \
+}
+
+/*
+**copy_7:
+**...
+**lw\t[at][0-9],0\([at][0-9]\)
+**sw\t[at][0-9],0\([at][0-9]\)
+**...
+**lbu\t[at][0-9],6\([at][0-9]\)
+**sb\t[at][0-9],6\([at][0-9]\)
+**...
+*/
+COPY_N(7)
+
+/*
+**copy_aligned_7:
+**...
+**lw\t[at][0-9],0\([at][0-9]\)
+**sw\t[at][0-9],0\([at][0-9]\)
+**...
+**lbu\t[at][0-9],6\([at][0-9]\)
+**sb\t[at][0-9],6\([at][0-9]\)
+**...
+*/
+COPY_ALIGNED_N(7)
+
+/*
+**copy_8:
+**...
+**lw\ta[0-9],0\(a[0-9]\)
+**sw\ta[0-9],0\(a[0-9]\)
+**...
+*/
+COPY_N(8)
+
+/*
+**copy_aligned_8:
+**...
+**lw\ta[0-9],0\(a[0-9]\)
+**sw\ta[0-9],0\(a[0-9]\)
+**...
+*/
+COPY_ALIGNED_N(8)
+
+/*
+**copy_11:
+**...
+**lbu\t[at][0-9],0\([at][0-9]\)
+**...
+**lbu\t[at][0-9],10\([at][0-9]\)
+**...
+**sb\t[at][0-9],0\([at][0-9]\)
+**...
+**sb\t[at][0-9],10\([at][0-9]\)
+**...
+*/
+COPY_N(11)
+
+/*
+**copy_aligned_11:
+**...
+**lw\t[at][0-9],0\([at][0-9]\)
+**...
+**sw\t[at][0-9],0\([at][0-9]\)
+**...
+**lbu\t[at][0-9],10\([at][0-9]\)
+**sb\t[at][0-9],10\([at][0-9]\)
+**...
+*/
+COPY_ALIGNED_N(11)
+
+/*
+**copy_15:
+**...
+**(call|tail)\tmemcpy
+**...
+*/
+COPY_N(15)
+
+/*
+**copy_aligned_15:
+**...
+**lw\t[at][0-9],0\([at][0-9]\)
+**...
+**sw\t[at][0-9],0\([at][0-9]\)
+**...
+**lbu\t[at][0-9],14\([at][0-9]\)
+**sb\t[at][0-9],14\([at][0-9]\)
+**...
+*/
+COPY_ALIGNED_N(15)
+
+/*
+**copy_27:
+**...
+**(call|tail)\tmemcpy
+**...
+*/
+COPY_N(27)
+
+/*
+**copy_aligned_27:
+**...
+**lw\t[at][0-9],20\([at][0-9]\)
+**...
+**sw\t[at][0-9],20\([at][0-9]\)
+**...
+**lbu\t[at][0-9],26\([at][0-9]\)
+**sb\t[at][0-9],26\([at][0-9]\)
+**...
+*/
+COPY_ALIGNED_N(27)
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-32.c
new file mode 100644
index ..44ba14a1d51f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-32.c
@@ -0,0 +1,138 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32 } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -mtune=rocket" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-allow-blank-lines-in-outpu

[gcc r15-514] RISC-V: Test cbo.zero expansion for rv32

2024-05-15 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:5609d77e683944439fae38323ecabc44a1eb4671

commit r15-514-g5609d77e683944439fae38323ecabc44a1eb4671
Author: Christoph Müllner 
Date:   Wed May 15 01:34:54 2024 +0200

RISC-V: Test cbo.zero expansion for rv32

We had an issue when expanding via cmo-zero for RV32.
This was fixed upstream, but we don't have a RV32 test.
Therefore, this patch introduces such a test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmo-zicboz-zic64-1.c: Fix for rv32.

Signed-off-by: Christoph Müllner 

Diff:
---
 .../gcc.target/riscv/cmo-zicboz-zic64-1.c  | 37 +++---
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
index 6d4535287d08..9192b391b11d 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
@@ -1,24 +1,9 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zic64b_zicboz -mabi=lp64d" } */
+/* { dg-options "-march=rv32gc_zic64b_zicboz" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zic64b_zicboz" { target { rv64 } } } */
 /* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
-/* { dg-final { check-function-bodies "**" "" } } */
-/* { dg-allow-blank-lines-in-output 1 } */
 
-/*
-**clear_buf_123:
-**...
-**cbo\.zero\t0\(a[0-9]+\)
-**sd\tzero,64\(a[0-9]+\)
-**sd\tzero,72\(a[0-9]+\)
-**sd\tzero,80\(a[0-9]+\)
-**sd\tzero,88\(a[0-9]+\)
-**sd\tzero,96\(a[0-9]+\)
-**sd\tzero,104\(a[0-9]+\)
-**sd\tzero,112\(a[0-9]+\)
-**sh\tzero,120\(a[0-9]+\)
-**sb\tzero,122\(a[0-9]+\)
-**...
-*/
+// 1x cbo.zero, 7x sd (rv64) or 14x sw (rv32), 1x sh, 1x sb
 int
 clear_buf_123 (void *p)
 {
@@ -26,17 +11,17 @@ clear_buf_123 (void *p)
   __builtin_memset (p, 0, 123);
 }
 
-/*
-**clear_buf_128:
-**...
-**cbo\.zero\t0\(a[0-9]+\)
-**addi\ta[0-9]+,a[0-9]+,64
-**cbo\.zero\t0\(a[0-9]+\)
-**...
-*/
+// 2x cbo.zero, 1x addi 64
 int
 clear_buf_128 (void *p)
 {
   p = __builtin_assume_aligned(p, 64);
   __builtin_memset (p, 0, 128);
 }
+
+/* { dg-final { scan-assembler-times "cbo\.zero\t" 3 } } */
+/* { dg-final { scan-assembler-times "addi\ta\[0-9\]+,a\[0-9\]+,64" 1 } } */
+/* { dg-final { scan-assembler-times "sd\t" 7 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "sw\t" 14 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "sh\t" 1 } } */
+/* { dg-final { scan-assembler-times "sb\t" 1 } } */


[gcc r15-572] RISC-V: testsuite: Drop march-string in cmpmemsi/cpymemsi tests

2024-05-16 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:b8b82bb05c10544da05cd0d3d39e6bc3763a8d9f

commit r15-572-gb8b82bb05c10544da05cd0d3d39e6bc3763a8d9f
Author: Christoph Müllner 
Date:   Thu May 16 09:53:47 2024 +0200

RISC-V: testsuite: Drop march-string in cmpmemsi/cpymemsi tests

The tests cmpmemsi-1.c and cpymemsi-1.c are execution ("dg-do run")
tests, which does not have any restrictions for the enabled extensions.
Further, no other listed options are required.
Let's drop the options, so that the test can also be executed on
non-f and non-d targets.  However, we need to set options to the
defaults without '-ansi', because the included test file uses the
'asm' keyword, which is not part of ANSI C.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmpmemsi-1.c: Drop options.
* gcc.target/riscv/cpymemsi-1.c: Likewise.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c | 3 +--
 gcc/testsuite/gcc.target/riscv/cpymemsi-1.c | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c 
b/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c
index d7e0bc474073..698f27d89fbf 100644
--- a/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c
@@ -1,6 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=rv32gc_zbb -save-temps -g0 -fno-lto" { target { rv32 } 
} } */
-/* { dg-options "-march=rv64gc_zbb -save-temps -g0 -fno-lto" { target { rv64 } 
} } */
+/* { dg-options "-pedantic-errors" } */
 /* { dg-timeout-factor 2 } */
 
 #include "../../gcc.dg/memcmp-1.c"
diff --git a/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c 
b/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c
index 983b564ccaf7..30e9f119bedc 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c
@@ -1,7 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=rv32gc -save-temps -g0 -fno-lto" { target { rv32 } } } 
*/
-/* { dg-options "-march=rv64gc -save-temps -g0 -fno-lto" { target { rv64 } } } 
*/
-/* { dg-additional-options "-DRUN_FRACTION=11" { target simulator } } */
+/* { dg-options "-pedantic-errors" } */
 /* { dg-timeout-factor 2 } */
 
 #include "../../gcc.dg/memcmp-1.c"


[gcc r15-1909] RISC-V: testsuite: Properly gate LTO tests

2024-07-09 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:0717d50fc4ff983b79093bdef43b04e4584cc3cd

commit r15-1909-g0717d50fc4ff983b79093bdef43b04e4584cc3cd
Author: Christoph Müllner 
Date:   Fri Jul 5 09:53:34 2024 +0200

RISC-V: testsuite: Properly gate LTO tests

There are two test cases with the following skip directive:
  dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" }
This reads as: skip if both '-flto' and '-fno-fat-lto-objects'
are present.  This is not the case if only '-flto' is present.

Since both tests depend on instruction sequences (one does
check-function-bodies the other tests for an assembler error
message), they won't work reliably with fat LTO objects.

Let's change the skip line to gate the test on '-flto'
to avoid failing tests like this:

FAIL: gcc.target/riscv/interrupt-misaligned.c   -O2 -flto   
check-function-bodies interrupt
FAIL: gcc.target/riscv/interrupt-misaligned.c   -O2 -flto 
-flto-partition=none   check-function-bodies interrupt
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto   (test for errors, line 10)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto   (test for errors, line 9)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto -flto-partition=none   (test 
for errors, line 10)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto -flto-partition=none   (test 
for errors, line 9)

gcc/testsuite/ChangeLog:

* gcc.target/riscv/interrupt-misaligned.c: Remove
"-fno-fat-lto-objects" from skip condition.
* gcc.target/riscv/pr93202.c: Likewise.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c | 2 +-
 gcc/testsuite/gcc.target/riscv/pr93202.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c 
b/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
index b5f8e6c2bbef..912f180e4d65 100644
--- a/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=rv64gc -mabi=lp64d -fno-schedule-insns 
-fno-schedule-insns2" } */
-/* { dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" } } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
 
 /*  Make sure no stack offset are misaligned.
 **  interrupt:
diff --git a/gcc/testsuite/gcc.target/riscv/pr93202.c 
b/gcc/testsuite/gcc.target/riscv/pr93202.c
index 5501191ea52c..5de003fac421 100644
--- a/gcc/testsuite/gcc.target/riscv/pr93202.c
+++ b/gcc/testsuite/gcc.target/riscv/pr93202.c
@@ -1,7 +1,7 @@
 /* PR inline-asm/93202 */
 /* { dg-do compile { target fpic } } */
 /* { dg-options "-fpic" } */
-/* { dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" } } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
 
 void
 foo (void)


[gcc r15-2043] RISC-V: Attribute parser: Use alloca() instead of new + std::unique_ptr

2024-07-15 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:5040c273484d7123a40a99cdeb434cecbd17a2e9

commit r15-2043-g5040c273484d7123a40a99cdeb434cecbd17a2e9
Author: Christoph Müllner 
Date:   Fri Jul 5 04:48:15 2024 +0200

RISC-V: Attribute parser: Use alloca() instead of new + std::unique_ptr

Allocating an object on the heap with new, wrapping it in a
std::unique_ptr and finally getting the buffer via buf.get()
is a correct way to allocate a buffer that is automatically
freed on return.  However, a simple invocation of alloca()
does the same with less overhead.

gcc/ChangeLog:

* config/riscv/riscv-target-attr.cc 
(riscv_target_attr_parser::parse_arch):
Replace new + std::unique_ptr by alloca().
(riscv_process_one_target_attr): Likewise.
(riscv_process_target_attr): Likewise.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/config/riscv/riscv-target-attr.cc | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/riscv-target-attr.cc 
b/gcc/config/riscv/riscv-target-attr.cc
index 0bbe7df25d19..3d7753f64574 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -109,8 +109,7 @@ riscv_target_attr_parser::parse_arch (const char *str)
 {
   /* Parsing the extension list like "+[,+]*".  */
   size_t len = strlen (str);
-  std::unique_ptr buf (new char[len+1]);
-  char *str_to_check = buf.get ();
+  char *str_to_check = (char *) alloca (len + 1);
   strcpy (str_to_check, str);
   const char *token = strtok_r (str_to_check, ",", &str_to_check);
   m_subset_list = riscv_cmdline_subset_list ()->clone ();
@@ -247,8 +246,7 @@ riscv_process_one_target_attr (char *arg_str,
   return false;
 }
 
-  std::unique_ptr buf (new char[len+1]);
-  char *str_to_check = buf.get();
+  char *str_to_check = (char *) alloca (len + 1);
   strcpy (str_to_check, arg_str);
 
   char *arg = strchr (str_to_check, '=');
@@ -334,8 +332,7 @@ riscv_process_target_attr (tree fndecl, tree args, 
location_t loc,
   return false;
 }
 
-  std::unique_ptr buf (new char[len+1]);
-  char *str_to_check = buf.get ();
+  char *str_to_check = (char *) alloca (len + 1);
   strcpy (str_to_check, TREE_STRING_POINTER (args));
 
   /* Used to catch empty spaces between semi-colons i.e.


[gcc r15-2046] Revert "RISC-V: Attribute parser: Use alloca() instead of new + std::unique_ptr"

2024-07-15 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:eb0c163aada970b8351067b17121f013fc58dbc9

commit r15-2046-geb0c163aada970b8351067b17121f013fc58dbc9
Author: Christoph Müllner 
Date:   Mon Jul 15 23:42:39 2024 +0200

Revert "RISC-V: Attribute parser: Use alloca() instead of new + 
std::unique_ptr"

This reverts commit 5040c273484d7123a40a99cdeb434cecbd17a2e9.

Diff:
---
 gcc/config/riscv/riscv-target-attr.cc | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-target-attr.cc 
b/gcc/config/riscv/riscv-target-attr.cc
index 57235c9c0a7e..1645a6692177 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -101,7 +101,8 @@ riscv_target_attr_parser::parse_arch (const char *str)
 {
   /* Parsing the extension list like "+[,+]*".  */
   size_t len = strlen (str);
-  char *str_to_check = (char *) alloca (len + 1);
+  std::unique_ptr buf (new char[len+1]);
+  char *str_to_check = buf.get ();
   strcpy (str_to_check, str);
   const char *token = strtok_r (str_to_check, ",", &str_to_check);
   const char *local_arch_str = global_options.x_riscv_arch_string;
@@ -253,7 +254,8 @@ riscv_process_one_target_attr (char *arg_str,
   return false;
 }
 
-  char *str_to_check = (char *) alloca (len + 1);
+  std::unique_ptr buf (new char[len+1]);
+  char *str_to_check = buf.get();
   strcpy (str_to_check, arg_str);
 
   char *arg = strchr (str_to_check, '=');
@@ -339,7 +341,8 @@ riscv_process_target_attr (tree args, location_t loc)
   return false;
 }
 
-  char *str_to_check = (char *) alloca (len + 1);
+  std::unique_ptr buf (new char[len+1]);
+  char *str_to_check = buf.get ();
   strcpy (str_to_check, TREE_STRING_POINTER (args));
 
   /* Used to catch empty spaces between semi-colons i.e.


[gcc r14-10428] RISC-V: testsuite: Properly gate LTO tests

2024-07-16 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:ea5907d6d458b1c9318814c96ebb277c7c8505f5

commit r14-10428-gea5907d6d458b1c9318814c96ebb277c7c8505f5
Author: Christoph Müllner 
Date:   Fri Jul 5 09:53:34 2024 +0200

RISC-V: testsuite: Properly gate LTO tests

There are two test cases with the following skip directive:
  dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" }
This reads as: skip if both '-flto' and '-fno-fat-lto-objects'
are present.  This is not the case if only '-flto' is present.

Since both tests depend on instruction sequences (one does
check-function-bodies the other tests for an assembler error
message), they won't work reliably with fat LTO objects.

Let's change the skip line to gate the test on '-flto'
to avoid failing tests like this:

FAIL: gcc.target/riscv/interrupt-misaligned.c   -O2 -flto   
check-function-bodies interrupt
FAIL: gcc.target/riscv/interrupt-misaligned.c   -O2 -flto 
-flto-partition=none   check-function-bodies interrupt
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto   (test for errors, line 10)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto   (test for errors, line 9)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto -flto-partition=none   (test 
for errors, line 10)
FAIL: gcc.target/riscv/pr93202.c   -O2 -flto -flto-partition=none   (test 
for errors, line 9)

gcc/testsuite/ChangeLog:

* gcc.target/riscv/interrupt-misaligned.c: Remove
"-fno-fat-lto-objects" from skip condition.
* gcc.target/riscv/pr93202.c: Likewise.

(cherry picked from commit 0717d50fc4ff983b79093bdef43b04e4584cc3cd)
Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c | 2 +-
 gcc/testsuite/gcc.target/riscv/pr93202.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c 
b/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
index b5f8e6c2bbef..912f180e4d65 100644
--- a/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
+++ b/gcc/testsuite/gcc.target/riscv/interrupt-misaligned.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -march=rv64gc -mabi=lp64d -fno-schedule-insns 
-fno-schedule-insns2" } */
-/* { dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" } } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
 
 /*  Make sure no stack offset are misaligned.
 **  interrupt:
diff --git a/gcc/testsuite/gcc.target/riscv/pr93202.c 
b/gcc/testsuite/gcc.target/riscv/pr93202.c
index 5501191ea52c..5de003fac421 100644
--- a/gcc/testsuite/gcc.target/riscv/pr93202.c
+++ b/gcc/testsuite/gcc.target/riscv/pr93202.c
@@ -1,7 +1,7 @@
 /* PR inline-asm/93202 */
 /* { dg-do compile { target fpic } } */
 /* { dg-options "-fpic" } */
-/* { dg-skip-if "" { *-*-* } { "-flto -fno-fat-lto-objects" } } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
 
 void
 foo (void)


[gcc r15-2243] RISC-V: Disable Zba optimization pattern if XTheadMemIdx is enabled

2024-07-24 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:9817d29cd66762893782a52b2c304c5083bc0023

commit r15-2243-g9817d29cd66762893782a52b2c304c5083bc0023
Author: Christoph Müllner 
Date:   Tue Jul 23 14:48:02 2024 +0200

RISC-V: Disable Zba optimization pattern if XTheadMemIdx is enabled

It is possible that the Zba optimization pattern zero_extendsidi2_bitmanip
matches for a XTheadMemIdx INSN with the effect of emitting an invalid
instruction as reported in PR116035.

The pattern above is used to emit a zext.w instruction to zero-extend
SI mode registers to DI mode.  A similar functionality can be achieved
by XTheadBb's th.extu instruction.  And indeed, we have the equivalent
pattern in thead.md (zero_extendsidi2_th_extu).  However, that pattern
depends on !TARGET_XTHEADMEMIDX.  To compensate for that, there are
specific patterns that ensure that zero-extension instruction can still
be emitted (th_memidx_bb_zero_extendsidi2 and friends).

While we could implement something similar (th_memidx_zba_zero_extendsidi2)
it would only make sense, if there existed real HW that does implement Zba
and XTheadMemIdx, but not XTheadBb.  Unless such a machine exists, let's
simply disable zero_extendsidi2_bitmanip if XTheadMemIdx is available.

PR target/116035

gcc/ChangeLog:

* config/riscv/bitmanip.md: Disable zero_extendsidi2_bitmanip
for XTheadMemIdx.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116035-1.c: New test.
* gcc.target/riscv/pr116035-2.c: New test.

Reported-by: Patrick O'Neill 
Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/config/riscv/bitmanip.md|  2 +-
 gcc/testsuite/gcc.target/riscv/pr116035-1.c | 29 +
 gcc/testsuite/gcc.target/riscv/pr116035-2.c | 26 ++
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index d262430485e7..9fc5215d6e35 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -22,7 +22,7 @@
 (define_insn "*zero_extendsidi2_bitmanip"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
-  "TARGET_64BIT && TARGET_ZBA"
+  "TARGET_64BIT && TARGET_ZBA && !TARGET_XTHEADMEMIDX"
   "@
zext.w\t%0,%1
lwu\t%0,%1"
diff --git a/gcc/testsuite/gcc.target/riscv/pr116035-1.c 
b/gcc/testsuite/gcc.target/riscv/pr116035-1.c
new file mode 100644
index ..bc45941ff8f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr116035-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Os" "-Oz" } } */
+/* { dg-options "-march=rv64g_zba_xtheadmemidx" { target { rv64 } } } */
+/* { dg-options "-march=rv32g_zba_xtheadmemidx" { target { rv32 } } } */
+
+void a(long);
+unsigned b[11];
+void c()
+{
+  for (int d = 0; d < 11; ++d)
+a(b[d]);
+}
+
+#if __riscv_xlen == 64
+unsigned long zext64_32(unsigned int u32)
+{
+  /* Missed optimization for Zba+XTheadMemIdx.  */
+  return u32; //zext.w a0, a0
+}
+#endif
+
+/* { dg-final { scan-assembler "th.lwuia\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),4,0" { 
target rv64 } } } */
+/* { dg-final { scan-assembler "th.lwia\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),4,0" { 
target rv32 } } } */
+
+/* { dg-final { scan-assembler-not "lwu\t\[a-x0-9\]+,\(\[a-x0-9\]+\),4,0" } } 
*/
+
+/* Missed optimizations for Zba+XTheadMemIdx.  */
+/* { dg-final { scan-assembler "zext.w\t" { target rv64 xfail rv64 } } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/pr116035-2.c 
b/gcc/testsuite/gcc.target/riscv/pr116035-2.c
new file mode 100644
index ..2c1a96948605
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr116035-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Os" "-Oz" } } */
+/* { dg-options "-march=rv64g_xtheadbb_xtheadmemidx" { target { rv64 } } } */
+/* { dg-options "-march=rv32g_xtheadbb_xtheadmemidx" { target { rv32 } } } */
+
+void a(long);
+unsigned b[11];
+void c()
+{
+  for (int d = 0; d < 11; ++d)
+a(b[d]);
+}
+
+#if __riscv_xlen == 64
+unsigned long zext64_32(unsigned int u32)
+{
+return u32; //th.extu a0, a0, 31, 0
+}
+#endif
+
+/* { dg-final { scan-assembler "th.lwuia\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),4,0" { 
target { rv64 } } } } */
+/* { dg-final { scan-assembler "th.lwia\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),4,0" { 
target { rv32 } } } } */
+
+/* { dg-final { scan-assembler-not "lwu\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),4,0" } 
} */
+
+/* { dg-final { scan-assembler "th.extu\t" { target rv64 } } } */


[gcc r14-10506] RISC-V: Disable Zba optimization pattern if XTheadMemIdx is enabled

2024-07-24 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:ab0386679fef35c544d139270436c63026e00ff2

commit r14-10506-gab0386679fef35c544d139270436c63026e00ff2
Author: Christoph Müllner 
Date:   Tue Jul 23 14:48:02 2024 +0200

RISC-V: Disable Zba optimization pattern if XTheadMemIdx is enabled

It is possible that the Zba optimization pattern zero_extendsidi2_bitmanip
matches for a XTheadMemIdx INSN with the effect of emitting an invalid
instruction as reported in PR116035.

The pattern above is used to emit a zext.w instruction to zero-extend
SI mode registers to DI mode.  A similar functionality can be achieved
by XTheadBb's th.extu instruction.  And indeed, we have the equivalent
pattern in thead.md (zero_extendsidi2_th_extu).  However, that pattern
depends on !TARGET_XTHEADMEMIDX.  To compensate for that, there are
specific patterns that ensure that zero-extension instruction can still
be emitted (th_memidx_bb_zero_extendsidi2 and friends).

While we could implement something similar (th_memidx_zba_zero_extendsidi2)
it would only make sense, if there existed real HW that does implement Zba
and XTheadMemIdx, but not XTheadBb.  Unless such a machine exists, let's
simply disable zero_extendsidi2_bitmanip if XTheadMemIdx is available.

PR target/116035

gcc/ChangeLog:

* config/riscv/bitmanip.md: Disable zero_extendsidi2_bitmanip
for XTheadMemIdx.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116035-1.c: New test.
* gcc.target/riscv/pr116035-2.c: New test.

(cherry picked from commit 9817d29cd66762893782a52b2c304c5083bc0023)
Reported-by: Patrick O'Neill 
Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/config/riscv/bitmanip.md|  2 +-
 gcc/testsuite/gcc.target/riscv/pr116035-1.c | 29 +
 gcc/testsuite/gcc.target/riscv/pr116035-2.c | 26 ++
 3 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index ccda25c01c1b..0612e69fcb3b 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -22,7 +22,7 @@
 (define_insn "*zero_extendsidi2_bitmanip"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
-  "TARGET_64BIT && TARGET_ZBA"
+  "TARGET_64BIT && TARGET_ZBA && !TARGET_XTHEADMEMIDX"
   "@
zext.w\t%0,%1
lwu\t%0,%1"
diff --git a/gcc/testsuite/gcc.target/riscv/pr116035-1.c 
b/gcc/testsuite/gcc.target/riscv/pr116035-1.c
new file mode 100644
index ..bc45941ff8f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr116035-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Os" "-Oz" } } */
+/* { dg-options "-march=rv64g_zba_xtheadmemidx" { target { rv64 } } } */
+/* { dg-options "-march=rv32g_zba_xtheadmemidx" { target { rv32 } } } */
+
+void a(long);
+unsigned b[11];
+void c()
+{
+  for (int d = 0; d < 11; ++d)
+a(b[d]);
+}
+
+#if __riscv_xlen == 64
+unsigned long zext64_32(unsigned int u32)
+{
+  /* Missed optimization for Zba+XTheadMemIdx.  */
+  return u32; //zext.w a0, a0
+}
+#endif
+
+/* { dg-final { scan-assembler "th.lwuia\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),4,0" { 
target rv64 } } } */
+/* { dg-final { scan-assembler "th.lwia\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),4,0" { 
target rv32 } } } */
+
+/* { dg-final { scan-assembler-not "lwu\t\[a-x0-9\]+,\(\[a-x0-9\]+\),4,0" } } 
*/
+
+/* Missed optimizations for Zba+XTheadMemIdx.  */
+/* { dg-final { scan-assembler "zext.w\t" { target rv64 xfail rv64 } } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/pr116035-2.c 
b/gcc/testsuite/gcc.target/riscv/pr116035-2.c
new file mode 100644
index ..2c1a96948605
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr116035-2.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Os" "-Oz" } } */
+/* { dg-options "-march=rv64g_xtheadbb_xtheadmemidx" { target { rv64 } } } */
+/* { dg-options "-march=rv32g_xtheadbb_xtheadmemidx" { target { rv32 } } } */
+
+void a(long);
+unsigned b[11];
+void c()
+{
+  for (int d = 0; d < 11; ++d)
+a(b[d]);
+}
+
+#if __riscv_xlen == 64
+unsigned long zext64_32(unsigned int u32)
+{
+return u32; //th.extu a0, a0, 31, 0
+}
+#endif
+
+/* { dg-final { scan-assembler "th.lwuia\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),4,0" { 
target { rv64 } } } } */
+/* { dg-final { scan-assembler "th.lwia\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),4,0" { 
target { rv32 } } } } */
+
+/* { dg-final { scan-assembler-not "lwu\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),4,0" } 
} */
+
+/* { dg-final { scan-assembler "th.extu\t" { target rv64 } } } */


[gcc r13-8462] riscv: xtheadmempair: Fix CFA reg notes

2024-03-18 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:d9d51e0552693bf1bcf6f23d53d058d60d72416e

commit r13-8462-gd9d51e0552693bf1bcf6f23d53d058d60d72416e
Author: Christoph Müllner 
Date:   Mon Apr 24 23:09:06 2023 +0200

riscv: xtheadmempair: Fix CFA reg notes

The current implementation triggers an assertion in
dwarf2out_frame_debug_cfa_offset() under certain circumstances.
The standard code uses REG_FRAME_RELATED_EXPR notes instead
of REG_CFA_OFFSET notes when saving registers on the stack.
So let's do this as well.

gcc/ChangeLog:

PR target/114160
* config/riscv/thead.cc (th_mempair_save_regs):
Emit REG_FRAME_RELATED_EXPR notes in prologue.

(cherry picked from commit 93973e4c5d3bcde1f84cad3b42a8c36e23900d19)

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/config/riscv/thead.cc | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/thead.cc b/gcc/config/riscv/thead.cc
index 75203805310..d7e3cf80d9b 100644
--- a/gcc/config/riscv/thead.cc
+++ b/gcc/config/riscv/thead.cc
@@ -368,8 +368,12 @@ th_mempair_save_regs (rtx operands[4])
   rtx set2 = gen_rtx_SET (operands[2], operands[3]);
   rtx insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set1, 
set2)));
   RTX_FRAME_RELATED_P (insn) = 1;
-  add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set1));
-  add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set2));
+
+  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+ copy_rtx (set1), REG_NOTES (insn));
+
+  REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+ copy_rtx (set2), REG_NOTES (insn));
 }
 
 /* Similar like riscv_restore_reg, but restores two registers from memory


[gcc r14-9607] RISC-V: Don't add fractional LMUL types to V_VLS for XTheadVector

2024-03-21 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:fd5e5dda8d79d62396f56d4fdd628b4bc5f9fa24

commit r14-9607-gfd5e5dda8d79d62396f56d4fdd628b4bc5f9fa24
Author: Christoph Müllner 
Date:   Thu Mar 21 15:40:49 2024 +0100

RISC-V: Don't add fractional LMUL types to V_VLS for XTheadVector

The expansion of `memset` (via expand_builtin_memset_args())
uses clear_by_pieces() and store_by_pieces() to avoid calls
to the C runtime. To check if a type can be used for that purpose
the function by_pieces_mode_supported_p() tests if a `mov` and
a `vec_duplicate` INSN can be expaned by the backend.

The `vec_duplicate` expansion takes arguments of type `V_VLS`.
The `mov` expansions take arguments of type `V`, `VB`, `VT`,
`VLS_AVL_IMM`, and `VLS_AVL_REG`. Some of these types (in fact
not types but type iterators) include fractional LMUL types.
E.g. `V_VLS` includes `V`, which includes `VI`, which includes
`RVVMF2QI`.

This results in an attempt to use fractional LMUL-types for
the `memset` expansion resulting in an ICE for XTheadVector,
because that extension cannot handle fractional LMULs.

This patch addresses this issue by splitting the definition
of the `VI` mode itereator into `VI_NOFRAC` (without fractional
LMUL types) and `VI_FRAC` (only fractional LMUL types).
Further, it defines `V_VLS` such, that `VI_FRAC` types are only
included if XTheadVector is not enabled.

The effect is demonstrated by a new test case that shows
that the by-pieces framework now emits `sb` instructions
instead of triggering an ICE.

Signed-off-by: Christoph Müllner 

PR target/114194

gcc/ChangeLog:

* config/riscv/vector-iterators.md: Split VI into VI_FRAC and 
VI_NOFRAC.
Only include VI_NOFRAC in V_VLS without TARGET_XTHEADVECTOR.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/xtheadvector/pr114194.c: New test.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/config/riscv/vector-iterators.md   | 19 +---
 .../gcc.target/riscv/rvv/xtheadvector/pr114194.c   | 56 ++
 2 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index c2ea7e8b10a..a24e1bf078f 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -108,17 +108,24 @@
   UNSPECV_FRM_RESTORE_EXIT
 ])
 
-(define_mode_iterator VI [
-  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN 
> 32")
-
-  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
-
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+;; Subset of VI with fractional LMUL types
+(define_mode_iterator VI_FRAC [
+  RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  (RVVMF2SI "TARGET_MIN_VLEN > 32")
+])
 
+;; Subset of VI with non-fractional LMUL types
+(define_mode_iterator VI_NOFRAC [
+  RVVM8QI RVVM4QI RVVM2QI RVVM1QI
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI
   (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
   (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
 ])
 
+(define_mode_iterator VI [ VI_NOFRAC (VI_FRAC "!TARGET_XTHEADVECTOR") ])
+
 ;; This iterator is the same as above but with TARGET_VECTOR_ELEN_FP_16
 ;; changed to TARGET_ZVFH.  TARGET_VECTOR_ELEN_FP_16 is also true for
 ;; TARGET_ZVFHMIN while we actually want to disable all instructions apart
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c 
b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c
new file mode 100644
index 000..fc2d1349425
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/xtheadvector/pr114194.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_xtheadvector" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_xtheadvector" { target { rv64 } } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** foo0_1:
+** sb\tzero,0([a-x0-9]+)
+** ret
+*/
+void foo0_1 (void *p)
+{
+  __builtin_memset (p, 0, 1);
+}
+
+/*
+** foo0_7:
+** sb\tzero,0([a-x0-9]+)
+** sb\tzero,1([a-x0-9]+)
+** sb\tzero,2([a-x0-9]+)
+** sb\tzero,3([a-x0-9]+)
+** sb\tzero,4([a-x0-9]+)
+** sb\tzero,5([a-x0-9]+)
+** sb\tzero,6([a-x0-9]+)
+** ret
+*/
+void foo0_7 (void *p)
+{
+  __builtin_memset (p, 0, 7);
+}
+
+/*
+** foo1_1:
+** li\t[a-x0-9]+,1
+** sb\t[a-x0-9]+,0([a-x0-9]+)
+** ret
+*/
+void foo1_1 (void *p)
+{
+  __builtin_memset (p, 1, 1);
+}
+
+/*
+** foo1_5:
+** li\t[a-x0-9]+,1
+** sb\t[a-x0-9]+,0([a-x0-9]+)
+** sb\t[a-x0-9]+,1([a-x0-9]+)
+** sb\t[a-x0-9]+,2([a-x0-9]+)
+** sb\t[a-x0-9]+,3([a-x0-9]+)
+** sb\t[a-x0-9]+,4([a-x0-9]+)
+** ret
+*/
+void foo1_5 (void *p)
+{
+  __builtin_memset (p, 1, 5);
+}


[gcc r15-44] RISC-V: Fix parsing of Zic* extensions

2024-04-29 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:285300eb928b171236e895f28c960ad02dcb0d67

commit r15-44-g285300eb928b171236e895f28c960ad02dcb0d67
Author: Christoph Müllner 
Date:   Mon Apr 29 00:46:06 2024 +0200

RISC-V: Fix parsing of Zic* extensions

The extension parsing table entries for a range of Zic* extensions
does not match the mask definition in riscv.opt.
This results in broken TARGET_ZIC* macros, because the values of
riscv_zi_subext and riscv_zicmo_subext are set wrong.

This patch fixes this by moving Zic64b into riscv_zicmo_subext
and all other affected Zic* extensions to riscv_zi_subext.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: Move ziccamoa, ziccif,
zicclsm, and ziccrse into riscv_zi_subext.
* config/riscv/riscv.opt: Define MASK_ZIC64B for
riscv_ziccmo_subext.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/common/config/riscv/riscv-common.cc | 8 
 gcc/config/riscv/riscv.opt  | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 43b7549e3ec..8cc0e727737 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1638,15 +1638,15 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
 
   {"zihintntl", &gcc_options::x_riscv_zi_subext, MASK_ZIHINTNTL},
   {"zihintpause", &gcc_options::x_riscv_zi_subext, MASK_ZIHINTPAUSE},
+  {"ziccamoa", &gcc_options::x_riscv_zi_subext, MASK_ZICCAMOA},
+  {"ziccif", &gcc_options::x_riscv_zi_subext, MASK_ZICCIF},
+  {"zicclsm", &gcc_options::x_riscv_zi_subext, MASK_ZICCLSM},
+  {"ziccrse", &gcc_options::x_riscv_zi_subext, MASK_ZICCRSE},
 
   {"zicboz", &gcc_options::x_riscv_zicmo_subext, MASK_ZICBOZ},
   {"zicbom", &gcc_options::x_riscv_zicmo_subext, MASK_ZICBOM},
   {"zicbop", &gcc_options::x_riscv_zicmo_subext, MASK_ZICBOP},
   {"zic64b", &gcc_options::x_riscv_zicmo_subext, MASK_ZIC64B},
-  {"ziccamoa", &gcc_options::x_riscv_zicmo_subext, MASK_ZICCAMOA},
-  {"ziccif", &gcc_options::x_riscv_zicmo_subext, MASK_ZICCIF},
-  {"zicclsm", &gcc_options::x_riscv_zicmo_subext, MASK_ZICCLSM},
-  {"ziccrse", &gcc_options::x_riscv_zicmo_subext, MASK_ZICCRSE},
 
   {"zve32x",   &gcc_options::x_target_flags, MASK_VECTOR},
   {"zve32f",   &gcc_options::x_target_flags, MASK_VECTOR},
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index b14888e9816..ee824756381 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -237,8 +237,6 @@ Mask(ZIHINTPAUSE) Var(riscv_zi_subext)
 
 Mask(ZICOND)  Var(riscv_zi_subext)
 
-Mask(ZIC64B)  Var(riscv_zi_subext)
-
 Mask(ZICCAMOA)Var(riscv_zi_subext)
 
 Mask(ZICCIF)  Var(riscv_zi_subext)
@@ -390,6 +388,8 @@ Mask(ZICBOM) Var(riscv_zicmo_subext)
 
 Mask(ZICBOP) Var(riscv_zicmo_subext)
 
+Mask(ZIC64B) Var(riscv_zicmo_subext)
+
 TargetVariable
 int riscv_zf_subext


[gcc r15-2319] RISC-V: xtheadmemidx: Fix mode test for pre/post-modify addressing

2024-07-25 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:a86c0cb9379e7b86625908a0250cf698276e9e02

commit r15-2319-ga86c0cb9379e7b86625908a0250cf698276e9e02
Author: Christoph Müllner 
Date:   Wed Jul 24 14:10:01 2024 +0200

RISC-V: xtheadmemidx: Fix mode test for pre/post-modify addressing

auto_inc_dec (-O3) performs optimizations like the following
if RVV and XTheadMemIdx is enabled.

(insn 23 20 27 3 (set (mem:V4QI (reg:DI 136 [ ivtmp.13 ]) [0 MEM  [(char *)_39]+0 S4 A32])
(reg:V4QI 168)) "gcc/testsuite/gcc.target/riscv/pr116033.c":12:27 
3183 {*movv4qi}
 (nil))
(insn 40 39 41 3 (set (reg:DI 136 [ ivtmp.13 ])
(plus:DI (reg:DI 136 [ ivtmp.13 ])
(const_int 20 [0x14]))) 5 {adddi3}
 (nil))
>
(insn 23 20 27 3 (set (mem:V4QI (post_modify:DI (reg:DI 136 [ ivtmp.13 ])
(plus:DI (reg:DI 136 [ ivtmp.13 ])
(const_int 20 [0x14]))) [0 MEM  [(char 
*)_39]+0 S4 A32])
(reg:V4QI 168)) "gcc/testsuite/gcc.target/riscv/pr116033.c":12:27 
3183 {*movv4qi}
 (expr_list:REG_INC (reg:DI 136 [ ivtmp.13 ])
(nil)))

The reason why the pass believes that this is legal is,
that the mode test in th_memidx_classify_address_modify()
requires INTEGRAL_MODE_P (mode), which includes vector modes.

Let's restrict the mode test such, that only MODE_INT is allowed.

PR target/116033

gcc/ChangeLog:

* config/riscv/thead.cc (th_memidx_classify_address_modify):
Fix mode test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116033.c: New test.

Reported-by: Patrick O'Neill 
Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/config/riscv/thead.cc |  6 ++
 gcc/testsuite/gcc.target/riscv/pr116033.c | 16 
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/thead.cc b/gcc/config/riscv/thead.cc
index 951b60888596..6f5edeb7e0ac 100644
--- a/gcc/config/riscv/thead.cc
+++ b/gcc/config/riscv/thead.cc
@@ -453,10 +453,8 @@ th_memidx_classify_address_modify (struct 
riscv_address_info *info, rtx x,
   if (!TARGET_XTHEADMEMIDX)
 return false;
 
-  if (!TARGET_64BIT && mode == DImode)
-return false;
-
-  if (!(INTEGRAL_MODE_P (mode) && GET_MODE_SIZE (mode).to_constant () <= 8))
+  if (GET_MODE_CLASS (mode) != MODE_INT
+  || GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
 return false;
 
   if (GET_CODE (x) != POST_MODIFY
diff --git a/gcc/testsuite/gcc.target/riscv/pr116033.c 
b/gcc/testsuite/gcc.target/riscv/pr116033.c
new file mode 100644
index ..881922da0260
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr116033.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Os" "-Oz" } } */
+/* { dg-options "-march=rv64gv_xtheadmemidx" { target { rv64 } } } */
+/* { dg-options "-march=rv32gv_xtheadmemidx" { target { rv32 } } } */
+
+char arr_3[20][20];
+void init()
+{
+  for (int i_0 = 0; i_0 < 20; ++i_0)
+for (int i_1 = 0; i_0 < 20; ++i_0)
+  for (int i_1 = 0; i_1 < 20; ++i_0)
+for (int i_1 = 0; i_1 < 20; ++i_1)
+  arr_3[i_0][i_1] = i_1;
+}
+
+/* { dg-final { scan-assembler-not 
"vse8.v\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),\[0-9\]+,\[0-9\]+" } } */


[gcc r14-10558] RISC-V: xtheadmemidx: Fix mode test for pre/post-modify addressing

2024-08-05 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:eccf707e5ceb7e405ffe4edfbcae2f769b8386cf

commit r14-10558-geccf707e5ceb7e405ffe4edfbcae2f769b8386cf
Author: Christoph Müllner 
Date:   Wed Jul 24 14:10:01 2024 +0200

RISC-V: xtheadmemidx: Fix mode test for pre/post-modify addressing

auto_inc_dec (-O3) performs optimizations like the following
if RVV and XTheadMemIdx is enabled.

(insn 23 20 27 3 (set (mem:V4QI (reg:DI 136 [ ivtmp.13 ]) [0 MEM  [(char *)_39]+0 S4 A32])
(reg:V4QI 168)) "gcc/testsuite/gcc.target/riscv/pr116033.c":12:27 
3183 {*movv4qi}
 (nil))
(insn 40 39 41 3 (set (reg:DI 136 [ ivtmp.13 ])
(plus:DI (reg:DI 136 [ ivtmp.13 ])
(const_int 20 [0x14]))) 5 {adddi3}
 (nil))
>
(insn 23 20 27 3 (set (mem:V4QI (post_modify:DI (reg:DI 136 [ ivtmp.13 ])
(plus:DI (reg:DI 136 [ ivtmp.13 ])
(const_int 20 [0x14]))) [0 MEM  [(char 
*)_39]+0 S4 A32])
(reg:V4QI 168)) "gcc/testsuite/gcc.target/riscv/pr116033.c":12:27 
3183 {*movv4qi}
 (expr_list:REG_INC (reg:DI 136 [ ivtmp.13 ])
(nil)))

The reason why the pass believes that this is legal is,
that the mode test in th_memidx_classify_address_modify()
requires INTEGRAL_MODE_P (mode), which includes vector modes.

Let's restrict the mode test such, that only MODE_INT is allowed.

PR target/116033

gcc/ChangeLog:

* config/riscv/thead.cc (th_memidx_classify_address_modify):
Fix mode test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr116033.c: New test.

(cherry picked from commit a86c0cb9379e7b86625908a0250cf698276e9e02)
Reported-by: Patrick O'Neill 
Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/config/riscv/thead.cc |  6 ++
 gcc/testsuite/gcc.target/riscv/pr116033.c | 16 
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/thead.cc b/gcc/config/riscv/thead.cc
index 951b60888596..6f5edeb7e0ac 100644
--- a/gcc/config/riscv/thead.cc
+++ b/gcc/config/riscv/thead.cc
@@ -453,10 +453,8 @@ th_memidx_classify_address_modify (struct 
riscv_address_info *info, rtx x,
   if (!TARGET_XTHEADMEMIDX)
 return false;
 
-  if (!TARGET_64BIT && mode == DImode)
-return false;
-
-  if (!(INTEGRAL_MODE_P (mode) && GET_MODE_SIZE (mode).to_constant () <= 8))
+  if (GET_MODE_CLASS (mode) != MODE_INT
+  || GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
 return false;
 
   if (GET_CODE (x) != POST_MODIFY
diff --git a/gcc/testsuite/gcc.target/riscv/pr116033.c 
b/gcc/testsuite/gcc.target/riscv/pr116033.c
new file mode 100644
index ..881922da0260
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr116033.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" "-Os" "-Oz" } } */
+/* { dg-options "-march=rv64gv_xtheadmemidx" { target { rv64 } } } */
+/* { dg-options "-march=rv32gv_xtheadmemidx" { target { rv32 } } } */
+
+char arr_3[20][20];
+void init()
+{
+  for (int i_0 = 0; i_0 < 20; ++i_0)
+for (int i_1 = 0; i_0 < 20; ++i_0)
+  for (int i_1 = 0; i_1 < 20; ++i_0)
+for (int i_1 = 0; i_1 < 20; ++i_1)
+  arr_3[i_0][i_1] = i_1;
+}
+
+/* { dg-final { scan-assembler-not 
"vse8.v\t\[a-x0-9\]+,\\(\[a-x0-9\]+\\),\[0-9\]+,\[0-9\]+" } } */


[gcc r15-2820] Rearrange SLP nodes with duplicate statements [PR98138]

2024-08-08 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:ab18785840d7b8afd9f716bab9d1eab415bc4fe9

commit r15-2820-gab18785840d7b8afd9f716bab9d1eab415bc4fe9
Author: Manolis Tsamis 
Date:   Tue Jun 25 08:00:04 2024 -0700

Rearrange SLP nodes with duplicate statements [PR98138]

This change checks when a two_operators SLP node has multiple occurrences of
the same statement (e.g. {A, B, A, B, ...}) and tries to rearrange the 
operands
so that there are no duplicates. Two vec_perm expressions are then 
introduced
to recreate the original ordering. These duplicates can appear due to how
two_operators nodes are handled, and they prevent vectorization in some 
cases.

This targets the vectorization of the SPEC2017 x264 pixel_satd functions.
In some processors a larger than 10% improvement on x264 has been observed.

PR tree-optimization/98138

gcc/ChangeLog:

* tree-vect-slp.cc: Avoid duplicates in two_operators nodes.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/vect-slp-two-operator.c: New test.

Diff:
---
 .../gcc.target/aarch64/vect-slp-two-operator.c |  36 +++
 gcc/tree-vect-slp.cc   | 114 +
 2 files changed, 150 insertions(+)

diff --git a/gcc/testsuite/gcc.target/aarch64/vect-slp-two-operator.c 
b/gcc/testsuite/gcc.target/aarch64/vect-slp-two-operator.c
new file mode 100644
index ..b6b093ffc349
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-slp-two-operator.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect 
-fdump-tree-vect-details" } */
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+
+#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) {\
+int t0 = s0 + s1;\
+int t1 = s0 - s1;\
+int t2 = s2 + s3;\
+int t3 = s2 - s3;\
+d0 = t0 + t2;\
+d1 = t1 + t3;\
+d2 = t0 - t2;\
+d3 = t1 - t3;\
+}
+
+void sink(uint32_t tmp[4][4]);
+
+int x264_pixel_satd_8x4( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
+{
+uint32_t tmp[4][4];
+int sum = 0;
+for( int i = 0; i < 4; i++, pix1 += i_pix1, pix2 += i_pix2 )
+{
+uint32_t a0 = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16);
+uint32_t a1 = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16);
+uint32_t a2 = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16);
+uint32_t a3 = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16);
+HADAMARD4( tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0,a1,a2,a3 );
+}
+sink(tmp);
+}
+
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 5f0d9e51c325..43ecd2689701 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2437,6 +2437,95 @@ out:
   }
   swap = NULL;
 
+  bool has_two_operators_perm = false;
+  auto_vec two_op_perm_indices[2];
+  vec two_op_scalar_stmts[2] = {vNULL, vNULL};
+
+  if (two_operators && oprnds_info.length () == 2 && group_size > 2)
+{
+  unsigned idx = 0;
+  hash_map seen;
+  vec new_oprnds_info
+   = vect_create_oprnd_info (1, group_size);
+  bool success = true;
+
+  enum tree_code code = ERROR_MARK;
+  if (oprnds_info[0]->def_stmts[0]
+ && is_a (oprnds_info[0]->def_stmts[0]->stmt))
+   code = gimple_assign_rhs_code (oprnds_info[0]->def_stmts[0]->stmt);
+
+  for (unsigned j = 0; j < group_size; ++j)
+   {
+ FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
+   {
+ stmt_vec_info stmt_info = oprnd_info->def_stmts[j];
+ if (!stmt_info || !stmt_info->stmt
+ || !is_a (stmt_info->stmt)
+ || gimple_assign_rhs_code (stmt_info->stmt) != code
+ || skip_args[i])
+   {
+ success = false;
+ break;
+   }
+
+ bool exists;
+ unsigned &stmt_idx
+   = seen.get_or_insert (stmt_info->stmt, &exists);
+
+ if (!exists)
+   {
+ new_oprnds_info[0]->def_stmts.safe_push (stmt_info);
+ new_oprnds_info[0]->ops.safe_push (oprnd_info->ops[j]);
+ stmt_idx = idx;
+ idx++;
+   }
+
+ two_op_perm_indices[i].safe_push (stmt_idx);
+   }
+
+ if (!success)
+   break;
+   }
+
+  if (success && idx == group_size)
+   {
+ if (dump_enabled_p ())
+   {
+ dump_printf_loc (MSG_NOTE, vect_location,
+  "Replace two_operators operands:\n");
+
+ FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
+   {
+ dump_printf_loc (MSG_NOTE, vect_location,
+  "Operand %u:\n", i);
+ for (unsigne

[gcc r15-2825] RISC-V: testsuite: xtheadfmemidx: Rename test and add similar Zfa test

2024-08-08 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:8e6bc6dd2bb476fa97586b477bc98c670a3fcaf0

commit r15-2825-g8e6bc6dd2bb476fa97586b477bc98c670a3fcaf0
Author: Christoph Müllner 
Date:   Tue Aug 6 07:24:07 2024 +0200

RISC-V: testsuite: xtheadfmemidx: Rename test and add similar Zfa test

Test file xtheadfmemidx-medany.c has been added in b79cd204c780 as a
test case that provoked an ICE when loading DFmode registers via two
SImode register loads followed by a SI->DF[63:32] move from XTheadFmv.
Since Zfa is affected in the same way as XTheadFmv, even if both
have slightly different instructions, let's add a test for Zfa as well
and give the tests proper names.

Let's also add a test into the test files that counts the SI->DF moves
from XTheadFmv/Zfa.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/xtheadfmemidx-medany.c: Move to...
* gcc.target/riscv/xtheadfmemidx-xtheadfmv-medany.c: ...here.
* gcc.target/riscv/xtheadfmemidx-zfa-medany.c: New test.

Signed-off-by: Christoph Müllner 

Diff:
---
 ...x-medany.c => xtheadfmemidx-xtheadfmv-medany.c} |  5 +--
 .../gcc.target/riscv/xtheadfmemidx-zfa-medany.c| 39 ++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-medany.c 
b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-xtheadfmv-medany.c
similarity index 71%
rename from gcc/testsuite/gcc.target/riscv/xtheadfmemidx-medany.c
rename to gcc/testsuite/gcc.target/riscv/xtheadfmemidx-xtheadfmv-medany.c
index 0c8060d06329..7c70b7758246 100644
--- a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-medany.c
+++ b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-xtheadfmv-medany.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
-/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-O3" "-Og" "-Os" "-Oz"} } */
-/* { dg-options "-march=rv32gc_xtheadfmemidx_xtheadfmv_xtheadmemidx 
-mabi=ilp32d -mcmodel=medany -O2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" "-O0" "-O1" "-Og" "-Os" "-Oz" } } */
+/* { dg-options "-march=rv32gc_xtheadfmemidx_xtheadfmv_xtheadmemidx 
-mabi=ilp32d -mcmodel=medany" } */
 
 typedef union {
   double v;
@@ -36,3 +36,4 @@ double foo (int i, int j)
 }
 
 /* { dg-final { scan-assembler-times {\mth\.flrd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mth\.fmv\.hw\.x\M} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-zfa-medany.c 
b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-zfa-medany.c
new file mode 100644
index ..4215eab11951
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/xtheadfmemidx-zfa-medany.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" "-O0" "-O1" "-Og" "-Os" "-Oz" } } */
+/* { dg-options "-march=rv32gc_zfa_xtheadfmemidx_xtheadmemidx -mabi=ilp32d 
-mcmodel=medany" } */
+
+typedef union {
+  double v;
+  unsigned w;
+} my_t;
+
+double z;
+
+double foo (int i, int j)
+{
+
+  if (j)
+{
+  switch (i)
+   {
+   case 0:
+ return 1;
+   case 1:
+ return 0;
+   case 2:
+ return 3.0;
+   }
+}
+
+  if (i == 1)
+{
+  my_t u;
+  u.v = z;
+  u.w = 1;
+  z = u.v;
+}
+  return z;
+}
+
+/* { dg-final { scan-assembler-times {\mth\.flrd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mfmvp\.d\.x\M} 3 } } */


[gcc r15-1351] riscv: Allocate enough space to strcpy() string

2024-06-15 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:6762d5738b02d84ad3f51e89979b48acb68db65b

commit r15-1351-g6762d5738b02d84ad3f51e89979b48acb68db65b
Author: Christoph Müllner 
Date:   Fri Jun 14 20:37:04 2024 +0200

riscv: Allocate enough space to strcpy() string

I triggered an ICE on Ubuntu 24.04 when compiling code that uses
function attributes. Looking into the sources shows that we have
a systematic issue in the attribute handling code:
* we determine the length with strlen() (excluding the terminating null)
* we allocate a buffer with this length
* we copy the original string using strcpy() (incl. the terminating null)

To quote the man page of strcpy():
"The programmer is responsible for allocating a  destination  buffer
large  enough,  that  is, strlen(src)  + 1."

The ICE looks like this:

*** buffer overflow detected ***: terminated
xtheadmempair_bench.c:14:1: internal compiler error: Aborted
   14 | {
  | ^
0xaf3b99 crash_signal
/home/ubuntu/src/gcc/scaleff/gcc/toplev.cc:319
0xe5b957 strcpy
/usr/include/riscv64-linux-gnu/bits/string_fortified.h:79
0xe5b957 riscv_process_target_attr

/home/ubuntu/src/gcc/scaleff/gcc/config/riscv/riscv-target-attr.cc:339
0xe5baaf riscv_process_target_attr

/home/ubuntu/src/gcc/scaleff/gcc/config/riscv/riscv-target-attr.cc:314
0xe5bc5f riscv_option_valid_attribute_p(tree_node*, tree_node*, tree_node*, 
int)

/home/ubuntu/src/gcc/scaleff/gcc/config/riscv/riscv-target-attr.cc:389
0x6a31e5 handle_target_attribute
/home/ubuntu/src/gcc/scaleff/gcc/c-family/c-attribs.cc:5915
0x5d3a07 decl_attributes(tree_node**, tree_node*, int, tree_node*)
/home/ubuntu/src/gcc/scaleff/gcc/attribs.cc:900
0x5db403 c_decl_attributes
/home/ubuntu/src/gcc/scaleff/gcc/c/c-decl.cc:5501
0x5e8965 start_function(c_declspecs*, c_declarator*, tree_node*)
/home/ubuntu/src/gcc/scaleff/gcc/c/c-decl.cc:10562
0x6318ed c_parser_declaration_or_fndef
/home/ubuntu/src/gcc/scaleff/gcc/c/c-parser.cc:2914
0x63a8ad c_parser_external_declaration
/home/ubuntu/src/gcc/scaleff/gcc/c/c-parser.cc:2048
0x63b219 c_parser_translation_unit
/home/ubuntu/src/gcc/scaleff/gcc/c/c-parser.cc:1902
0x63b219 c_parse_file()
/home/ubuntu/src/gcc/scaleff/gcc/c/c-parser.cc:27277
0x68fec5 c_common_parse_file()
/home/ubuntu/src/gcc/scaleff/gcc/c-family/c-opts.cc:1311
Please submit a full bug report, with preprocessed source (by using 
-freport-bug).
Please include the complete backtrace with any bug report.
See  for instructions.

gcc/ChangeLog:

* config/riscv/riscv-target-attr.cc 
(riscv_target_attr_parser::parse_arch):
Fix allocation size of buffer.
(riscv_process_one_target_attr): Likewise.
(riscv_process_target_attr): Likewise.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/config/riscv/riscv-target-attr.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-target-attr.cc 
b/gcc/config/riscv/riscv-target-attr.cc
index 1a73d69bf50d..19eb7b06d548 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -109,7 +109,7 @@ riscv_target_attr_parser::parse_arch (const char *str)
 {
   /* Parsing the extension list like "+[,+]*".  */
   size_t len = strlen (str);
-  std::unique_ptr buf (new char[len]);
+  std::unique_ptr buf (new char[len+1]);
   char *str_to_check = buf.get ();
   strcpy (str_to_check, str);
   const char *token = strtok_r (str_to_check, ",", &str_to_check);
@@ -247,7 +247,7 @@ riscv_process_one_target_attr (char *arg_str,
   return false;
 }
 
-  std::unique_ptr buf (new char[len]);
+  std::unique_ptr buf (new char[len+1]);
   char *str_to_check = buf.get();
   strcpy (str_to_check, arg_str);
 
@@ -334,7 +334,7 @@ riscv_process_target_attr (tree fndecl, tree args, 
location_t loc,
   return false;
 }
 
-  std::unique_ptr buf (new char[len]);
+  std::unique_ptr buf (new char[len+1]);
   char *str_to_check = buf.get ();
   strcpy (str_to_check, TREE_STRING_POINTER (args));


[gcc r14-10313] riscv: Allocate enough space to strcpy() string

2024-06-15 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:3fe255fd3f9b7f93c69d11d4bb0963793fc5edd4

commit r14-10313-g3fe255fd3f9b7f93c69d11d4bb0963793fc5edd4
Author: Christoph Müllner 
Date:   Fri Jun 14 20:37:04 2024 +0200

riscv: Allocate enough space to strcpy() string

I triggered an ICE on Ubuntu 24.04 when compiling code that uses
function attributes. Looking into the sources shows that we have
a systematic issue in the attribute handling code:
* we determine the length with strlen() (excluding the terminating null)
* we allocate a buffer with this length
* we copy the original string using strcpy() (incl. the terminating null)

To quote the man page of strcpy():
"The programmer is responsible for allocating a  destination  buffer
large  enough,  that  is, strlen(src)  + 1."

The ICE looks like this:

*** buffer overflow detected ***: terminated
xtheadmempair_bench.c:14:1: internal compiler error: Aborted
   14 | {
  | ^
0xaf3b99 crash_signal
/home/ubuntu/src/gcc/scaleff/gcc/toplev.cc:319
0xe5b957 strcpy
/usr/include/riscv64-linux-gnu/bits/string_fortified.h:79
0xe5b957 riscv_process_target_attr

/home/ubuntu/src/gcc/scaleff/gcc/config/riscv/riscv-target-attr.cc:339
0xe5baaf riscv_process_target_attr

/home/ubuntu/src/gcc/scaleff/gcc/config/riscv/riscv-target-attr.cc:314
0xe5bc5f riscv_option_valid_attribute_p(tree_node*, tree_node*, tree_node*, 
int)

/home/ubuntu/src/gcc/scaleff/gcc/config/riscv/riscv-target-attr.cc:389
0x6a31e5 handle_target_attribute
/home/ubuntu/src/gcc/scaleff/gcc/c-family/c-attribs.cc:5915
0x5d3a07 decl_attributes(tree_node**, tree_node*, int, tree_node*)
/home/ubuntu/src/gcc/scaleff/gcc/attribs.cc:900
0x5db403 c_decl_attributes
/home/ubuntu/src/gcc/scaleff/gcc/c/c-decl.cc:5501
0x5e8965 start_function(c_declspecs*, c_declarator*, tree_node*)
/home/ubuntu/src/gcc/scaleff/gcc/c/c-decl.cc:10562
0x6318ed c_parser_declaration_or_fndef
/home/ubuntu/src/gcc/scaleff/gcc/c/c-parser.cc:2914
0x63a8ad c_parser_external_declaration
/home/ubuntu/src/gcc/scaleff/gcc/c/c-parser.cc:2048
0x63b219 c_parser_translation_unit
/home/ubuntu/src/gcc/scaleff/gcc/c/c-parser.cc:1902
0x63b219 c_parse_file()
/home/ubuntu/src/gcc/scaleff/gcc/c/c-parser.cc:27277
0x68fec5 c_common_parse_file()
/home/ubuntu/src/gcc/scaleff/gcc/c-family/c-opts.cc:1311
Please submit a full bug report, with preprocessed source (by using 
-freport-bug).
Please include the complete backtrace with any bug report.
See  for instructions.

gcc/ChangeLog:

* config/riscv/riscv-target-attr.cc 
(riscv_target_attr_parser::parse_arch):
Fix allocation size of buffer.
(riscv_process_one_target_attr): Likewise.
(riscv_process_target_attr): Likewise.

(cherry picked from commit 6762d5738b02d84ad3f51e89979b48acb68db65b)
Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/config/riscv/riscv-target-attr.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-target-attr.cc 
b/gcc/config/riscv/riscv-target-attr.cc
index 1a73d69bf50d..19eb7b06d548 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -109,7 +109,7 @@ riscv_target_attr_parser::parse_arch (const char *str)
 {
   /* Parsing the extension list like "+[,+]*".  */
   size_t len = strlen (str);
-  std::unique_ptr buf (new char[len]);
+  std::unique_ptr buf (new char[len+1]);
   char *str_to_check = buf.get ();
   strcpy (str_to_check, str);
   const char *token = strtok_r (str_to_check, ",", &str_to_check);
@@ -247,7 +247,7 @@ riscv_process_one_target_attr (char *arg_str,
   return false;
 }
 
-  std::unique_ptr buf (new char[len]);
+  std::unique_ptr buf (new char[len+1]);
   char *str_to_check = buf.get();
   strcpy (str_to_check, arg_str);
 
@@ -334,7 +334,7 @@ riscv_process_target_attr (tree fndecl, tree args, 
location_t loc,
   return false;
 }
 
-  std::unique_ptr buf (new char[len]);
+  std::unique_ptr buf (new char[len+1]);
   char *str_to_check = buf.get ();
   strcpy (str_to_check, TREE_STRING_POINTER (args));


[gcc r15-6386] RISC-V: List valid -mtune options only once

2024-12-20 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:8af296c290216e03bc20e7291e64c19e0d94cfd6

commit r15-6386-g8af296c290216e03bc20e7291e64c19e0d94cfd6
Author: Christoph Müllner 
Date:   Thu Dec 19 20:59:36 2024 +0100

RISC-V: List valid -mtune options only once

This patch ensures that the list of valid -mtune options
does not contain entries more than once.
The -mtune option accepts CPU identifiers as well as
tuning identifiers and there are cases where a CPU and
its tuning have the same identifier.

PR116347

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc 
(riscv_get_valid_option_values):
Skip adding mtune entries that are already in the list.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/common/config/riscv/riscv-common.cc | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 4c9a72d1180a..2f85bb21a4c0 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -2437,7 +2437,19 @@ riscv_get_valid_option_values (int option_code,
 
const riscv_cpu_info *cpu_info = &riscv_cpu_tables[0];
for (;cpu_info->name; ++cpu_info)
- v.safe_push (cpu_info->name);
+ {
+   /* Skip duplicates.  */
+   bool skip = false;
+   int i;
+   const char *str;
+   FOR_EACH_VEC_ELT (v, i, str)
+ {
+   if (!strcmp (str, cpu_info->name))
+ skip = true;
+ }
+   if (!skip)
+ v.safe_push (cpu_info->name);
+ }
   }
   break;
 case OPT_mcpu_:


[gcc r15-6387] forwprop: Fix lane handling for VEC_PERM sequence blending

2024-12-20 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:eee2891312a9b42acabcc82739604c9fa8421757

commit r15-6387-geee2891312a9b42acabcc82739604c9fa8421757
Author: Christoph Müllner 
Date:   Thu Dec 5 20:39:25 2024 +0100

forwprop: Fix lane handling for VEC_PERM sequence blending

In PR117830 a miscompilation of 464.h264ref was reported.
An analysis showed that wrong code was generated because of
unsatisfied assumptions.  This patch addresses these issues.

The first assumption was that we could independently analyze the two
vec-perms at the start of a vec-perm-simplify sequence and use the
information  later for calculating a final vec-perm selector that
utilizes fewer lanes.  However, this information does not help much,
because for changing the selector entry, we need to ensure that both
elements of the operand vectors v_1 and v_2 remain equal.
This is addressed by removing the function get_vect_selector_index_map
and checking for this equality in the loop where we create the new
selector.

The calculation of the selector vector for the blended sequence
assumed that the indices of the selector vector of the narrowed
sequences are increasing.  This assumption does not hold in general.
This was fixed by allowing a wrap-around when searching for an empty
lane.

Further, there was an issue in the calculation of the selector vector
entries for the second sequence.  The code did not consider that the
lanes of the second sequence could have been moved.

A relevant property of this patch is that it introduces a
couple of nested loops, where the out loop iterates from
i=0..nelts and the inner loop iterates from j=0..i.
To avoid performance concerns, a check is introduced that
ensures nelts won't exceed 4 lanes.

The added test case is derived from h264ref (the other cases from the
benchmark have the same structure and don't provide additional coverage).

Bootstrapped and regression-tested on x86-64 and aarch64.
Further, tested on CPU 2006 h264ref and CPU 2017 x264.

PR117830

gcc/ChangeLog:

* tree-ssa-forwprop.cc (get_vect_selector_index_map): Removed.
(recognise_vec_perm_simplify_seq): Fix calculation of vec-perm
selectors of narrowed sequence.
(calc_perm_vec_perm_simplify_seqs): Fixing calculation of
vec-perm selectors of the blended sequence.
(process_vec_perm_simplify_seq_list): Add whitespace to dump
string to avoid bad formatted dump output.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/vector-11.c: New test.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/vector-11.c |  38 ++
 gcc/tree-ssa-forwprop.cc  | 203 ++
 2 files changed, 162 insertions(+), 79 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vector-11.c 
b/gcc/testsuite/gcc.dg/tree-ssa/vector-11.c
new file mode 100644
index ..e4102d318d29
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vector-11.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-forwprop1-details -Wno-psabi" } */
+
+typedef int vec __attribute__((vector_size (4 * sizeof (int;
+
+void f1 (vec *p_v_in, vec *p_v_out_1, vec *p_v_out_2)
+{
+  vec sel00 = { 2, 3, 2, 2 };
+  vec sel01 = { 1, 0, 1, 1 };
+  vec sel10 = { 3, 2, 3, 3 };
+  vec sel11 = { 0, 1, 0, 0 };
+  vec sel = { 0, 5, 2, 7 };
+  vec v_1, v_2, v_x, v_y, v_out_1, v_out_2;
+  vec v_in = *p_v_in;
+
+  /* First vec perm sequence.  */
+  v_1 = __builtin_shuffle (v_in, v_in, sel00);
+  v_2 = __builtin_shuffle (v_in, v_in, sel01);
+  v_x = v_2 - v_1;
+  v_y = v_1 + v_2;
+  v_out_1 = __builtin_shuffle (v_y, v_x, sel);
+
+  /* Second vec perm sequence.  */
+  v_1 = __builtin_shuffle (v_in, v_in, sel10);
+  v_2 = __builtin_shuffle (v_in, v_in, sel11);
+  v_x = v_2 - v_1;
+  v_y = v_1 + v_2;
+  v_out_2 = __builtin_shuffle (v_y, v_x, sel);
+
+  /* Won't blend because the narrowed sequence
+ utilizes three of the four lanes.  */
+
+  *p_v_out_1 = v_out_1;
+  *p_v_out_2 = v_out_2;
+}
+
+/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" 
"forwprop1" { target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 7, 2, 6 }" "forwprop1" { 
target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 7cae08f0d798..dae8c2f435bc 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -3479,41 +3479,6 @@ fwprop_ssa_val (tree name)
   return name;
 }
 
-/* Get an index map from the provided vector permute selector
-   and return the number of unique indices.
-   E.g.: { 1, 3, 1, 3 } -> <0, 1, 0, 1>, 2
-{ 0, 2, 0, 2 } -> <0, 1, 0, 1>, 2
-{ 3, 2, 1, 0 } -> <0, 1, 2, 3>, 4.  */
-
-static 

[gcc r15-6396] testsuite: Add tests for PR118149

2024-12-20 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:7c50564627e42c619cc64c09cce8a42fd7932166

commit r15-6396-g7c50564627e42c619cc64c09cce8a42fd7932166
Author: Christoph Müllner 
Date:   Fri Dec 20 14:46:51 2024 +0100

testsuite: Add tests for PR118149

A recent bugfix (eee2891312) for PR117830 also addressed PR118149.
This patch adds two test cases for PR118149.
These tests are different than other tests in that one of the
vec-perm selectors contains indices in descending order (1, 1, 0, 0),
which is the root cause for the ICE observed in PR118149.

PR118149

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr118149-2.c: New test.
* gcc.dg/tree-ssa/pr118149.c: New test.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/pr118149-2.c | 37 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr118149.c   | 20 
 2 files changed, 57 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr118149-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr118149-2.c
new file mode 100644
index ..31f3d7e0dc74
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr118149-2.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-forwprop1-details -Wno-psabi" } */
+/* { dg-additional-options "-msse2" { target i?86-*-* x86_64-*-* } } */
+
+typedef int vec __attribute__((vector_size (4 * sizeof (float;
+
+void f1 (vec *p_v_in, vec *p_v_out_1, vec *p_v_out_2)
+{
+  vec sel00 = { 1, 1, 3, 3 };
+  vec sel01 = { 0, 0, 2, 2 };
+  vec sel10 = { 3, 3, 2, 2 };
+  vec sel11 = { 1, 1, 0, 0 };
+  vec sel = { 0, 1, 6, 7 };
+  vec v_1, v_2, v_x, v_y, v_out_1, v_out_2;
+  vec v_in = *p_v_in;
+
+  /* First vec perm sequence.  */
+  v_1 = __builtin_shuffle (v_in, v_in, sel00);
+  v_2 = __builtin_shuffle (v_in, v_in, sel01);
+  v_x = v_2 - v_1;
+  v_y = v_1 + v_2;
+  v_out_1 = __builtin_shuffle (v_y, v_x, sel);
+
+  /* Second vec perm sequence.  */
+  v_1 = __builtin_shuffle (v_in, v_in, sel10);
+  v_2 = __builtin_shuffle (v_in, v_in, sel11);
+  v_x = v_2 - v_1;
+  v_y = v_1 + v_2;
+  v_out_2 = __builtin_shuffle (v_y, v_x, sel);
+
+  *p_v_out_1 = v_out_1;
+  *p_v_out_2 = v_out_2;
+}
+
+/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" 
"forwprop1" { target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 0, 0, 6, 6 }" "forwprop1" { 
target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 1, 1, 7, 7 }" "forwprop1" { 
target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr118149.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr118149.c
new file mode 100644
index ..f471877f6611
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr118149.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-forwprop4-details -Wno-psabi" } */
+/* { dg-additional-options "-msse2" { target i?86-*-* x86_64-*-* } } */
+
+float *fastconv_parse_dst;
+
+void fastconv_parse ()
+{
+  float r3k = fastconv_parse_dst[1] - fastconv_parse_dst[3],
+i0k = fastconv_parse_dst[4] + fastconv_parse_dst[6],
+i1k = fastconv_parse_dst[4] - fastconv_parse_dst[6],
+i2k = fastconv_parse_dst[5] + fastconv_parse_dst[7];
+  fastconv_parse_dst[1] = fastconv_parse_dst[0];
+  fastconv_parse_dst[4] = fastconv_parse_dst[5] = i0k - i2k;
+  fastconv_parse_dst[6] = fastconv_parse_dst[7] = i1k + r3k;
+}
+
+/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" 
"forwprop4" { target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 0, 0, 6, 6 }" "forwprop4" { 
target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 1, 1, 7, 7 }" "forwprop4" { 
target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */


[gcc r15-5563] forwprop: Try to blend two isomorphic VEC_PERM sequences

2024-11-21 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:1c4d39ada33d3655db088a0e5c90a296da794a55

commit r15-5563-g1c4d39ada33d3655db088a0e5c90a296da794a55
Author: Christoph Müllner 
Date:   Wed Nov 13 00:44:43 2024 +0100

forwprop: Try to blend two isomorphic VEC_PERM sequences

This extends forwprop by yet another VEC_PERM optimization:
It attempts to blend two isomorphic vector sequences by using the
redundancy in the lane utilization in these sequences.
This redundancy in lane utilization comes from the way how specific
scalar statements end up vectorized: two VEC_PERMs on top, binary operations
on both of them, and a final VEC_PERM to create the result.
Here is an example of this sequence:

  v_in = {e0, e1, e2, e3}
  v_1 = VEC_PERM 
  // v_1 = {e0, e2, e0, e2}
  v_2 = VEC_PERM 
  // v_2 = {e1, e3, e1, e3}

  v_x = v_1 + v_2
  // v_x = {e0+e1, e2+e3, e0+e1, e2+e3}
  v_y = v_1 - v_2
  // v_y = {e0-e1, e2-e3, e0-e1, e2-e3}

  v_out = VEC_PERM 
  // v_out = {e0+e1, e2+e3, e0-e1, e2-e3}

To remove the redundancy, lanes 2 and 3 can be freed, which allows to
change the last statement into:
  v_out' = VEC_PERM 
  // v_out' = {e0+e1, e2+e3, e0-e1, e2-e3}

The cost of eliminating the redundancy in the lane utilization is that
lowering the VEC PERM expression could get more expensive because of
tighter packing of the lanes.  Therefore this optimization is not done
alone, but in only in case we identify two such sequences that can be
blended.

Once all candidate sequences have been identified, we try to blend them,
so that we can use the freed lanes for the second sequence.
On success we convert 2x (2x BINOP + 1x VEC_PERM) to
2x VEC_PERM + 2x BINOP + 2x VEC_PERM traded for 4x VEC_PERM + 2x BINOP.

The implemented transformation reuses (rewrites) the statements
of the first sequence and the last VEC_PERM of the second sequence.
The remaining four statements of the second statment are left untouched
and will be eliminated by DCE later.

This targets x264_pixel_satd_8x4, which calculates the sum of absolute
transformed differences (SATD) using Hadamard transformation.
We have seen 8% speedup on SPEC's x264 on a 5950X (x86-64) and 7%
speedup on an AArch64 machine.

Bootstrapped and reg-tested on x86-64 and AArch64 (all languages).

gcc/ChangeLog:

* tree-ssa-forwprop.cc (struct _vec_perm_simplify_seq): New data
structure to store analysis results of a vec perm simplify sequence.
(get_vect_selector_index_map): Helper to get an index map from the
provided vector permute selector.
(recognise_vec_perm_simplify_seq): Helper to recognise a
vec perm simplify sequence.
(narrow_vec_perm_simplify_seq): Helper to pack the lanes more
tight.
(can_blend_vec_perm_simplify_seqs_p): Test if two vec perm
sequences can be blended.
(calc_perm_vec_perm_simplify_seqs): Helper to calculate the new
permutation indices.
(blend_vec_perm_simplify_seqs): Helper to blend two vec perm
simplify sequences.
(process_vec_perm_simplify_seq_list): Helper to process a list
of vec perm simplify sequences.
(append_vec_perm_simplify_seq_list): Helper to add a vec perm
simplify sequence to the list.
(pass_forwprop::execute): Integrate new functionality.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/satd-hadamard.c: New test.
* gcc.dg/tree-ssa/vector-10.c: New test.
* gcc.dg/tree-ssa/vector-8.c: New test.
* gcc.dg/tree-ssa/vector-9.c: New test.
* gcc.target/aarch64/sve/satd-hadamard.c: New test.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c  |  43 ++
 gcc/testsuite/gcc.dg/tree-ssa/vector-10.c  | 122 +
 gcc/testsuite/gcc.dg/tree-ssa/vector-8.c   |  34 ++
 gcc/testsuite/gcc.dg/tree-ssa/vector-9.c   |  34 ++
 .../gcc.target/aarch64/sve/satd-hadamard.c |   3 +
 gcc/tree-ssa-forwprop.cc   | 584 -
 6 files changed, 819 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c 
b/gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c
new file mode 100644
index ..576ef01628cc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-forwprop4-details" } */
+
+#include 
+
+#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) {\
+int t0 = s0 + s1;\
+int t1 = s0 - s1;\
+int t2 = s2 + s3;\
+int t3 = s2 - s3;\
+d0 = t0 + t2;\
+d1 = t1 + t3;\
+d2 = t0 - t2;\
+d3 = t1 - t3;\
+}
+
+int
+x264_pixel_satd_8x4_sim

[gcc r15-5573] testsuite: tree-ssa: Limit targets for vec perm tests

2024-11-21 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:ae0d842f3e7a119b21a000824b10920614088684

commit r15-5573-gae0d842f3e7a119b21a000824b10920614088684
Author: Christoph Müllner 
Date:   Thu Nov 21 16:53:49 2024 +0100

testsuite: tree-ssa: Limit targets for vec perm tests

Recently added test cases assume optimized code generation for certain
vectorized code.  However, this optimization might not be applied if
the backends don't support the optimized permuation.

The tests are confirmed to work on aarch64 and x86-64, so this
patch restricts the tests accordingly.

Tested on x86-64.

PR117728

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/satd-hadamard.c: Restrict to aarch64 and x86-64.
* gcc.dg/tree-ssa/vector-8.c: Likewise.
* gcc.dg/tree-ssa/vector-9.c: Likewise.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c | 2 +-
 gcc/testsuite/gcc.dg/tree-ssa/vector-8.c  | 4 ++--
 gcc/testsuite/gcc.dg/tree-ssa/vector-9.c  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c 
b/gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c
index 576ef01628cc..7a22772f2e63 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/satd-hadamard.c
@@ -40,4 +40,4 @@ x264_pixel_satd_8x4_simplified (uint8_t *pix1, int i_pix1, 
uint8_t *pix2, int i_
   return (((uint16_t)sum) + ((uint32_t)sum>>16)) >> 1;
 }
 
-/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 3, 6, 7 }" "forwprop4" } } 
*/
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 3, 6, 7 }" "forwprop4" { 
target { aarch64*-*-* x86_64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vector-8.c 
b/gcc/testsuite/gcc.dg/tree-ssa/vector-8.c
index bc2269065e4f..3a7b62b640d6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/vector-8.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vector-8.c
@@ -30,5 +30,5 @@ void f (vec *p_v_in_1, vec *p_v_in_2, vec *p_v_out_1, vec 
*p_v_out_2)
   *p_v_out_2 = v_out_2;
 }
 
-/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" 
"forwprop1" } } */
-/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 3, 6, 7 }" "forwprop1" } } 
*/
+/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" 
"forwprop1" { target { aarch64*-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 3, 6, 7 }" "forwprop1" { 
target { aarch64*-*-* x86_64-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vector-9.c 
b/gcc/testsuite/gcc.dg/tree-ssa/vector-9.c
index e5f898e02814..ba34fb163d67 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/vector-9.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vector-9.c
@@ -30,5 +30,5 @@ void f (vec *p_v_in_1, vec *p_v_in_2, vec *p_v_out_1, vec 
*p_v_out_2)
   *p_v_out_2 = v_out_2;
 }
 
-/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" 
"forwprop1" } } */
-/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 3, 6, 7 }" "forwprop1" } } 
*/
+/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" 
"forwprop1" { target { aarch64*-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 2, 3, 6, 7 }" "forwprop1" { 
target { aarch64*-*-* x86_64-*-* } } } } */


[gcc r15-6422] testsuite: Don't test pr118149.c on AArch64

2024-12-22 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:b43bb6591f7f934f9807a2cae3b53fdbe8d27169

commit r15-6422-gb43bb6591f7f934f9807a2cae3b53fdbe8d27169
Author: Christoph Müllner 
Date:   Sun Dec 22 23:02:03 2024 +0100

testsuite: Don't test pr118149.c on AArch64

Recently two test cases for PR118149 have been added.
While pr118149-2.c works well for AArch64, pr118149.c fails
because the expected optimization in forwprop4 cannot be applied
as SLP vectorization does not happen.
This patch fixes this issue by disabling the check on AArch64.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr118149.c: Disable for AArch64.

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.dg/tree-ssa/pr118149.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr118149.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr118149.c
index f471877f6611..c9a427c4a07f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr118149.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr118149.c
@@ -15,6 +15,6 @@ void fastconv_parse ()
   fastconv_parse_dst[6] = fastconv_parse_dst[7] = i1k + r3k;
 }
 
-/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" 
"forwprop4" { target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */
-/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 0, 0, 6, 6 }" "forwprop4" { 
target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */
-/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 1, 1, 7, 7 }" "forwprop4" { 
target { aarch64*-*-* i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "Vec perm simplify sequences have been blended" 
"forwprop4" { target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 0, 0, 6, 6 }" "forwprop4" { 
target { i?86-*-* x86_64-*-* } } } } */
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 1, 1, 7, 7 }" "forwprop4" { 
target { i?86-*-* x86_64-*-* } } } } */


[gcc r15-6912] testsuite: i386: Fix expected vectoriziation in pr105493.c

2025-01-15 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:120a37008222bf6fe17658af3d1ba1b384642905

commit r15-6912-g120a37008222bf6fe17658af3d1ba1b384642905
Author: Christoph Müllner 
Date:   Tue Jan 14 13:20:50 2025 +0100

testsuite: i386: Fix expected vectoriziation in pr105493.c

As reported in PR117079, commit ab18785840d7b8 broke the test pr105493.c.
The test code contains two loops, where the first one is exected to be
vectorized.  The commit that broke that vectorization was the first of
several that enabled vectorization of both loops.
Now, that GCC can vectorize the whole function, let's adjust this test
to expect vectorization of both loops by ensuring that we don't write
to the helper-array 'tmp'.

Signed-off-by: Christoph Müllner 

PR target/117079

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr105493.c: Fix expected vectorization

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/testsuite/gcc.target/i386/pr105493.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/pr105493.c 
b/gcc/testsuite/gcc.target/i386/pr105493.c
index c6fd16753cd9..c2b1a8f466ed 100644
--- a/gcc/testsuite/gcc.target/i386/pr105493.c
+++ b/gcc/testsuite/gcc.target/i386/pr105493.c
@@ -45,7 +45,5 @@ foo ( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
 return (((uint16_t)sum) + ((uint32_t)sum>>16)) >> 1;
 }
 
-
-/* The first loop should be vectorized, which will eliminate redundant stores
-   and loads.  */
-/* { dg-final { scan-tree-dump-times "  MEM  
\\\[\[\^\]\]\*\\\] = " 4 "slp1" } } */
+/* All loops should be vectorized.  */
+/* { dg-final { scan-tree-dump-times "MEM\[^\n\]*tmp\[^\n\]*= " 0 "slp1" } } */


[gcc r15-6946] forwprop: Eliminate redundant calls to to_constant()

2025-01-16 Thread Christoph Mテシllner via Gcc-cvs
https://gcc.gnu.org/g:57de373426e27395c0ef581c8a8300ec74c1bb59

commit r15-6946-g57de373426e27395c0ef581c8a8300ec74c1bb59
Author: Christoph Müllner 
Date:   Wed Jan 15 14:53:27 2025 +0100

forwprop: Eliminate redundant calls to to_constant()

When extracting the amount of vector elements, we currently
first check if the value is a contant with is_constant(),
followed by obtaining the value with to_constant(),
which internally calls is_constant() again.
We can address this by using is_constant (T*), which also
provides the constant value.

gcc/ChangeLog:

* tree-ssa-forwprop.cc (recognise_vec_perm_simplify_seq):
Eliminate redundant calls to to_constant().

Signed-off-by: Christoph Müllner 

Diff:
---
 gcc/tree-ssa-forwprop.cc | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 0d62f2bf60db..2f82f0633883 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -3530,6 +3530,8 @@ fwprop_ssa_val (tree name)
 static bool
 recognise_vec_perm_simplify_seq (gassign *stmt, vec_perm_simplify_seq *seq)
 {
+  unsigned HOST_WIDE_INT nelts;
+
   gcc_checking_assert (stmt);
   gcc_checking_assert (gimple_assign_rhs_code (stmt) == VEC_PERM_EXPR);
   basic_block bb = gimple_bb (stmt);
@@ -3539,15 +3541,13 @@ recognise_vec_perm_simplify_seq (gassign *stmt, 
vec_perm_simplify_seq *seq)
   tree v_y = gimple_assign_rhs2 (stmt);
   tree sel = gimple_assign_rhs3 (stmt);
 
-  if (!VECTOR_CST_NELTS (sel).is_constant ()
+  if (!VECTOR_CST_NELTS (sel).is_constant (&nelts)
   || TREE_CODE (v_x) != SSA_NAME
   || TREE_CODE (v_y) != SSA_NAME
   || !has_single_use (v_x)
   || !has_single_use (v_y))
 return false;
 
-  unsigned int nelts = VECTOR_CST_NELTS (sel).to_constant ();
-
   /* Don't analyse sequences with many lanes.  */
   if (nelts > 4)
 return false;
@@ -3614,12 +3614,12 @@ recognise_vec_perm_simplify_seq (gassign *stmt, 
vec_perm_simplify_seq *seq)
   || v_in != gimple_assign_rhs2 (v_2_stmt))
 return false;
 
-  if (!VECTOR_CST_NELTS (v_1_sel).is_constant ()
-  || !VECTOR_CST_NELTS (v_2_sel).is_constant ())
+  unsigned HOST_WIDE_INT v_1_nelts, v_2_nelts;
+  if (!VECTOR_CST_NELTS (v_1_sel).is_constant (&v_1_nelts)
+  || !VECTOR_CST_NELTS (v_2_sel).is_constant (&v_2_nelts))
 return false;
 
-  if (nelts != VECTOR_CST_NELTS (v_1_sel).to_constant ()
-  || nelts != VECTOR_CST_NELTS (v_2_sel).to_constant ())
+  if (nelts != v_1_nelts || nelts != v_2_nelts)
 return false;
 
   /* Create the new selector.  */