[gcc r15-834] [PATCH] libcpp: Correct typo 'r' -> '\r'

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:87463737b9942249ceb0d0d60050adf452f44f7c

commit r15-834-g87463737b9942249ceb0d0d60050adf452f44f7c
Author: Peter Damianov 
Date:   Sun May 26 08:06:14 2024 -0600

[PATCH] libcpp: Correct typo 'r' -> '\r'

libcpp/ChangeLog:
* lex.cc (do_peek_prev): Correct typo in argument to 
__builtin_expect()

Signed-off-by: Peter Damianov 

Diff:
---
 libcpp/lex.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcpp/lex.cc b/libcpp/lex.cc
index c9e44e6..de752bdc9c8 100644
--- a/libcpp/lex.cc
+++ b/libcpp/lex.cc
@@ -5038,7 +5038,7 @@ do_peek_prev (const unsigned char *peek, const unsigned 
char *bound)
 
   unsigned char c = *--peek;
   if (__builtin_expect (c == '\n', false)
-  || __builtin_expect (c == 'r', false))
+  || __builtin_expect (c == '\r', false))
 {
   if (peek == bound)
return peek;


[gcc r15-835] [to-be-committed][RISC-V] Generate nearby constant, then adjust to our final desired constant

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:95660223c434000a42957cf6cabed0236bb4bae8

commit r15-835-g95660223c434000a42957cf6cabed0236bb4bae8
Author: Jeff Law 
Date:   Sun May 26 10:54:18 2024 -0600

[to-be-committed][RISC-V] Generate nearby constant, then adjust to our 
final desired constant

Next step in constant synthesis work.

For some cases it can be advantageous to generate a constant near our 
target,
then do a final addi to fully synthesize C.

The idea is that while our target C may require N instructions to 
synthesize,
C' may only require N-2 (or fewer) instructions.  Thus there's budget to 
adjust
C' into C ending up with a better sequence than if we tried to generate C
directly.

So as an example:

> unsigned long foo_0xf7fe77ff(void) { return 0xf7fe77ffUL; 
}

This is currently 5 instructions on the trunk:

> li  a0,-4096
> addia0,a0,2047
> bclri   a0,a0,31
> bclri   a0,a0,32
> bclri   a0,a0,43

But we can do better by first synthesizing 0xf7fe7800 which is just 
3
instructions.  Then we can subtract 1 from the result.  That gives us this
sequence:

> li  a0,-16789504
> sllia0,a0,19
> addia0,a0,-2048
> addia0,a0,-1

These cases are relatively easy to find once you know what you're looking 
for.
I kept the full set found by the testing code yesterday, mostly because 
some of
them show different patterns for generating C', thus showing generality in 
the
overall synthesis implementation.  While all these tests have 0x7ff in their
low bits.  That's just an artifact to the test script.  The methodology will
work for a variety of other cases.

gcc/
* config/riscv/riscv.cc (riscv_build_integer_1): Try generating
a nearby simpler constant, then using a final addi to set low
bits properly.

gcc/testsuite

* gcc.target/riscv/synthesis-7.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc|   20 +
 gcc/testsuite/gcc.target/riscv/synthesis-7.c | 1531 ++
 2 files changed, 1551 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 92935275aaa..0ebdd696a95 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -963,6 +963,26 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
}
 }
 
+  /* We might be able to generate a constant close to our target
+ then a final ADDI to get the desired constant.  */
+  if (cost > 2
+  && (value & 0xfff) != 0
+  && (value & 0x1800) == 0x1000)
+{
+  HOST_WIDE_INT adjustment = -(0x800 - (value & 0xfff));
+  alt_cost = 1 + riscv_build_integer_1 (alt_codes,
+   value - adjustment, mode);
+
+  if (alt_cost < cost)
+   {
+ alt_codes[alt_cost - 1].code = PLUS;
+ alt_codes[alt_cost - 1].value = adjustment;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+   }
+}
+
   /* Final cases, particularly focused on bseti.  */
   if (cost > 2 && TARGET_ZBS)
 {
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-7.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-7.c
new file mode 100644
index 000..1611c964758
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-7.c
@@ -0,0 +1,1531 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 7571 } } */
+
+unsigned long foo_0xf7fe77ff(void) { return 0xf7fe77ffUL; }
+unsigned long foo_0xeffe77ff(void) { return 0xeffe77ffUL; }
+unsigned long foo_0xdffe77ff(void) { return 0xdffe77ffUL; }
+unsigned long foo_0xbffe77ff(void) { return 0xbffe77ffUL; }
+unsigned long foo_0x7ffe77ff(void) { return 0x7ffe77ffUL; }
+unsigned long foo_0xfffefffe77ff(void) { return 0xfffefffe77ffUL; }
+unsigned long foo_0xfffdfffe77ff(void) { return 0xfffdfffe77ffUL; }
+unsigned long foo_

[gcc r15-836] doc: Quote singular '=' signs

2024-05-26 Thread Gerald Pfeifer via Gcc-cvs
https://gcc.gnu.org/g:53d919873c56cec0e7354907e8da3d8dba158a69

commit r15-836-g53d919873c56cec0e7354907e8da3d8dba158a69
Author: Gerald Pfeifer 
Date:   Sun May 26 23:48:36 2024 +0200

doc: Quote singular '=' signs

gcc:
* doc/extend.texi (Attribute Syntax): Use @samp{=} instead of 
@code{=}.
(Extended Asm): Ditto.

Diff:
---
 gcc/doc/extend.texi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 8786249fb6f..00449bd9630 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10026,7 +10026,7 @@ labelled in C90 or C99, so the ambiguity does not arise 
there.
 @subsubheading Enumerator Attributes
 
 In GNU C, an attribute specifier list may appear as part of an enumerator.
-The attribute goes after the enumeration constant, before @code{=}, if
+The attribute goes after the enumeration constant, before @samp{=}, if
 present.  The optional attribute in the enumerator appertains to the
 enumeration constant.  It is not possible to place the attribute after
 the constant expression, if present.
@@ -10108,7 +10108,7 @@ the @code{noreturn} attribute applies to all the 
functions
 declared; the @code{format} attribute only applies to @code{d1}.
 
 An attribute specifier list may appear immediately before the comma,
-@code{=} or semicolon terminating the declaration of an identifier other
+@samp{=}, or semicolon terminating the declaration of an identifier other
 than a function definition.  Such attribute specifiers apply
 to the declared object or function.  Where an
 assembler name for an object or function is specified (@pxref{Asm
@@ -11804,7 +11804,7 @@ stop:
 
 The following artificial example shows an @code{asm goto} that sets
 up an output only on one path inside the @code{asm goto}.  Usage of
-constraint modifier @code{=} instead of @code{+} would be wrong as
+constraint modifier @samp{=} instead of @samp{+} would be wrong as
 @code{factor} is used on all paths from the @code{asm goto}.
 
 @example


[gcc r15-837] go: Move web references from golang.org to go.dev.

2024-05-26 Thread Gerald Pfeifer via Gcc-cvs
https://gcc.gnu.org/g:a06df6644a4403bb63b0ab68532ea67b938d8baf

commit r15-837-ga06df6644a4403bb63b0ab68532ea67b938d8baf
Author: Gerald Pfeifer 
Date:   Mon May 27 00:23:38 2024 +0200

go: Move web references from golang.org to go.dev.

gcc/go:
* gccgo.texi (Top): Move a web reference from golang.org to go.dev.
(C Interoperability): Move a web reference from golang.org to
pkg.go.dev.

Diff:
---
 gcc/go/gccgo.texi | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/gcc/go/gccgo.texi b/gcc/go/gccgo.texi
index a83d0fc3f58..361f4e2ee55 100644
--- a/gcc/go/gccgo.texi
+++ b/gcc/go/gccgo.texi
@@ -80,7 +80,7 @@ This manual describes how to use @command{gccgo}, the GNU 
compiler for
 the Go programming language.  This manual is specifically about
 @command{gccgo}.  For more information about the Go programming
 language in general, including language specifications and standard
-package documentation, see @uref{https://golang.org/}.
+package documentation, see @uref{https://go.dev}.
 
 @menu
 * Copying:: The GNU General Public License.
@@ -389,8 +389,7 @@ or with C++ code compiled using @code{extern "C"}.
 
 This information is provided largely for documentation purposes.  For
 ordinary use it is best to build programs with the go tool and then
-use @code{import "C"}, as described at
-@url{https://golang.org/cmd/cgo}.
+use @code{import "C"}, as described at @url{https://pkg.go.dev/cmd/cgo}.
 
 @menu
 * C Type Interoperability:: How C and Go types match up.


[gcc r15-838] [to-be-committed] [RISC-V] Try inverting for constant synthesis

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:3a915d6ad5fc3e0fadd14e54515b48b1d655c5a4

commit r15-838-g3a915d6ad5fc3e0fadd14e54515b48b1d655c5a4
Author: Jeff Law 
Date:   Sun May 26 17:54:51 2024 -0600

[to-be-committed] [RISC-V] Try inverting for constant synthesis

So there's another class of constants we're failing to synthesize well.
Specifically those where we can invert our original constant C into C' and 
C'
takes at least 2 fewer instructions to synthesize than C.  In that case we 
can
initially generate C', then use xori with the constant -1 to flip all the 
bits
resulting in our target constant.

I've only seen this trigger when the final synthesis is li+srli+xori. The
original synthesis took on various 4 or 5 instruction forms.

Most of the methods we use to improve constant synthesis are in
riscv_build_integer_1.  I originally tried to put this code in there. But
that'll end up with infinite recursion due to some other ADDI related code
which wants to flip bits and try synthesis.

So this was put into riscv_build_integer and recurses into 
riscv_build_integer.
This isn't unprecedented, just a bit different than most of the other 
synthesis
implementation bits.

This doesn't depend on any extensions.  So it should help any rv64 system.

gcc/

* config/riscv/riscv.cc (riscv_build_integer_one): Verify there
are no bits left to set in the constant when generating bseti.
(riscv_built_integer): Synthesize ~value and if it's cheap use it
with a trailing xori with -1.

gcc/testsuite

* gcc.target/riscv/synthesis-8.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc| 27 +-
 gcc/testsuite/gcc.target/riscv/synthesis-8.c | 34 
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0ebdd696a95..401ddc9eeb7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1022,7 +1022,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
 
   /* If LUI+ADDI+BSETI resulted in a more efficient
 sequence, then use it.  */
-  if (i < cost)
+  if (value == 0 && i < cost)
{
  memcpy (codes, alt_codes, sizeof (alt_codes));
  cost = i;
@@ -1074,6 +1074,31 @@ riscv_build_integer (struct riscv_integer_op *codes, 
HOST_WIDE_INT value,
}
 }
 
+  /* See if we can generate the inverted constant, then use
+ not to get the desired constant.
+
+ This can't be in riscv_build_integer_1 as it'll mutually
+ recurse with another case in there.  And it has to recurse
+ into riscv_build_integer so we get the trailing 0s case
+ above.  */
+  if (cost > 2 && value < 0)
+{
+  struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
+  int alt_cost;
+
+  HOST_WIDE_INT nval = ~value;
+  alt_cost = 1 + riscv_build_integer (alt_codes, nval, mode);
+  if (alt_cost < cost)
+   {
+ alt_codes[alt_cost - 1].code = XOR;
+ alt_codes[alt_cost - 1].value = -1;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+   }
+}
+
+
   if (!TARGET_64BIT
   && (value > INT32_MAX || value < INT32_MIN))
 {
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-8.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-8.c
new file mode 100644
index 000..2bcdb4e774d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-8.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli|srli|xori)" 72 } } */
+
+unsigned long foo_0xc00077ff(void) { return 0xc00077ffUL; }
+unsigned long foo_0xc000b7ff(void) { return 0xc000b7ffUL; }
+unsigned long foo_0xc00137ff(void) { return 0xc00137ffUL; }
+unsigned long foo_0xc00237ff(void) { return 0xc00237ffUL; }
+unsigned long foo_0xc00437ff(void) { return 0xc00437ffUL; }
+unsigned long foo_0xc00837ff(void) { return 0xc00837ffUL; }
+unsigned long foo

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed] [v2] More logical op simplifications in simplify-rtx.cc

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:fee06b936580108cee30467ad5ae98be5c8d1a4f

commit fee06b936580108cee30467ad5ae98be5c8d1a4f
Author: Jeff Law 
Date:   Sat May 25 12:39:05 2024 -0600

[committed] [v2] More logical op simplifications in simplify-rtx.cc

This is a revamp of what started as a target specific patch.

Basically xalan (corrected, I originally thought it was perlbench) has a 
bitset
implementation with a bit of an oddity.  Specifically setBit will clear the 
bit
before it is set:

> if (bitToSet < 32)
> {
> fBits1 &= ~mask;
> fBits1 |= mask;
> }
>  else
> {
> fBits2 &= ~mask;
> fBits2 |= mask;
> }
We can clean this up pretty easily in RTL with a small bit of code in
simplify-rtx.  While xalan doesn't have other cases, we can synthesize tests
pretty easily and handle them as well.

It turns out we don't actually have to recognize this stuff at the bit 
level,
just standard logical identities are sufficient.  For example

(X | Y) & ~Y -> X & ~Y

Andrew P. might poke at this at the gimple level.  The type changes kindof 
get
in the way in gimple but he's much better at match.pd than I am, so if he 
wants
to chase it from the gimple side, I'll fully support that.

Bootstrapped and regression tested on x86.  Also run through my tester on 
its
embedded targets.

Pushing to the trunk.

gcc/

* simplify-rtx.cc (simplify_context::simplify_binary_operation_1): 
Handle
more logical simplifications.

gcc/testsuite/

* g++.target/riscv/redundant-bitmap-1.C: New test.
* g++.target/riscv/redundant-bitmap-2.C: New test.
* g++.target/riscv/redundant-bitmap-3.C: New test.
* g++.target/riscv/redundant-bitmap-4.C: New test.

(cherry picked from commit 05daf617ea22e1d818295ed2d037456937e23530)

Diff:
---
 gcc/simplify-rtx.cc| 29 ++
 .../g++.target/riscv/redundant-bitmap-1.C  | 14 +++
 .../g++.target/riscv/redundant-bitmap-2.C  | 14 +++
 .../g++.target/riscv/redundant-bitmap-3.C  | 14 +++
 .../g++.target/riscv/redundant-bitmap-4.C  | 14 +++
 5 files changed, 85 insertions(+)

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index dceaa1c..bb562c3af2c 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -3549,6 +3549,12 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
return tem;
}
 
+  /* Convert (ior (and (not A) B) A) into A | B.  */
+  if (GET_CODE (op0) == AND
+ && GET_CODE (XEXP (op0, 0)) == NOT
+ && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
+   return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
+
   tem = simplify_byte_swapping_operation (code, mode, op0, op1);
   if (tem)
return tem;
@@ -3801,6 +3807,12 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
return tem;
}
 
+  /* Convert (xor (and (not A) B) A) into A | B.  */
+  if (GET_CODE (op0) == AND
+ && GET_CODE (XEXP (op0, 0)) == NOT
+ && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
+   return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
+
   tem = simplify_byte_swapping_operation (code, mode, op0, op1);
   if (tem)
return tem;
@@ -4006,6 +4018,23 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
  && rtx_equal_p (op1, XEXP (XEXP (op0, 1), 0)))
return simplify_gen_binary (AND, mode, op1, XEXP (op0, 0));
 
+  /* (and (ior/xor (X Y) (not Y)) -> X & ~Y */
+  if ((GET_CODE (op0) == IOR || GET_CODE (op0) == XOR)
+ && GET_CODE (op1) == NOT
+ && rtx_equal_p (XEXP (op1, 0), XEXP (op0, 1)))
+   return simplify_gen_binary (AND, mode, XEXP (op0, 0),
+   simplify_gen_unary (NOT, mode,
+   XEXP (op1, 0),
+   mode));
+  /* (and (ior/xor (Y X) (not Y)) -> X & ~Y */
+  if ((GET_CODE (op0) == IOR || GET_CODE (op0) == XOR)
+ && GET_CODE (op1) == NOT
+ && rtx_equal_p (XEXP (op1, 0), XEXP (op0, 0)))
+   return simplify_gen_binary (AND, mode, XEXP (op0, 1),
+   simplify_gen_unary (NOT, mode,
+   XEXP (op1, 0),
+   mode));
+
   /* Convert (and (ior A C) (ior B C)) into (ior (and A B) C).  */
   if (GET_CODE (op0) == GET_CODE (op1)
  && (GET_CODE (op0) == AND
diff --git a/gcc/testsuite/g++.target/riscv/redundant-bitmap-1.C 
b/gcc/testsuite/g++.ta

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed] [RISC-V] Try inverting for constant synthesis

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:19868f0990e87159025646bf99cf461c07e12563

commit 19868f0990e87159025646bf99cf461c07e12563
Author: Jeff Law 
Date:   Sun May 26 17:54:51 2024 -0600

[to-be-committed] [RISC-V] Try inverting for constant synthesis

So there's another class of constants we're failing to synthesize well.
Specifically those where we can invert our original constant C into C' and 
C'
takes at least 2 fewer instructions to synthesize than C.  In that case we 
can
initially generate C', then use xori with the constant -1 to flip all the 
bits
resulting in our target constant.

I've only seen this trigger when the final synthesis is li+srli+xori. The
original synthesis took on various 4 or 5 instruction forms.

Most of the methods we use to improve constant synthesis are in
riscv_build_integer_1.  I originally tried to put this code in there. But
that'll end up with infinite recursion due to some other ADDI related code
which wants to flip bits and try synthesis.

So this was put into riscv_build_integer and recurses into 
riscv_build_integer.
This isn't unprecedented, just a bit different than most of the other 
synthesis
implementation bits.

This doesn't depend on any extensions.  So it should help any rv64 system.

gcc/

* config/riscv/riscv.cc (riscv_build_integer_one): Verify there
are no bits left to set in the constant when generating bseti.
(riscv_built_integer): Synthesize ~value and if it's cheap use it
with a trailing xori with -1.

gcc/testsuite

* gcc.target/riscv/synthesis-8.c: New test.

(cherry picked from commit 3a915d6ad5fc3e0fadd14e54515b48b1d655c5a4)

Diff:
---
 gcc/config/riscv/riscv.cc| 27 +-
 gcc/testsuite/gcc.target/riscv/synthesis-8.c | 34 
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0ebdd696a95..401ddc9eeb7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1022,7 +1022,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
 
   /* If LUI+ADDI+BSETI resulted in a more efficient
 sequence, then use it.  */
-  if (i < cost)
+  if (value == 0 && i < cost)
{
  memcpy (codes, alt_codes, sizeof (alt_codes));
  cost = i;
@@ -1074,6 +1074,31 @@ riscv_build_integer (struct riscv_integer_op *codes, 
HOST_WIDE_INT value,
}
 }
 
+  /* See if we can generate the inverted constant, then use
+ not to get the desired constant.
+
+ This can't be in riscv_build_integer_1 as it'll mutually
+ recurse with another case in there.  And it has to recurse
+ into riscv_build_integer so we get the trailing 0s case
+ above.  */
+  if (cost > 2 && value < 0)
+{
+  struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
+  int alt_cost;
+
+  HOST_WIDE_INT nval = ~value;
+  alt_cost = 1 + riscv_build_integer (alt_codes, nval, mode);
+  if (alt_cost < cost)
+   {
+ alt_codes[alt_cost - 1].code = XOR;
+ alt_codes[alt_cost - 1].value = -1;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+   }
+}
+
+
   if (!TARGET_64BIT
   && (value > INT32_MAX || value < INT32_MIN))
 {
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-8.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-8.c
new file mode 100644
index 000..2bcdb4e774d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-8.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli|srli|xori)" 72 } } */
+
+unsigned long foo_0xc00077ff(void) { return 0xc00077ffUL; }
+unsigned long foo_0xc000b7ff(void) { return 0xc000b7ffUL; }
+unsigned long foo_0xc00137ff(void) { return 0xc00137ffUL; }
+unsigned long foo_0xc00237ff(void) { return 0xc00237ffUL; }
+unsigned long foo_0xc00437ff(void) { return 0xc00437ffUL; }
+unsigned long foo_0xc

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed][RISC-V] Generate nearby constant, then adjust to our final desired constant

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:f69c19f64e96e3cb04767d2f41191df114d4b309

commit f69c19f64e96e3cb04767d2f41191df114d4b309
Author: Jeff Law 
Date:   Sun May 26 10:54:18 2024 -0600

[to-be-committed][RISC-V] Generate nearby constant, then adjust to our 
final desired constant

Next step in constant synthesis work.

For some cases it can be advantageous to generate a constant near our 
target,
then do a final addi to fully synthesize C.

The idea is that while our target C may require N instructions to 
synthesize,
C' may only require N-2 (or fewer) instructions.  Thus there's budget to 
adjust
C' into C ending up with a better sequence than if we tried to generate C
directly.

So as an example:

> unsigned long foo_0xf7fe77ff(void) { return 0xf7fe77ffUL; 
}

This is currently 5 instructions on the trunk:

> li  a0,-4096
> addia0,a0,2047
> bclri   a0,a0,31
> bclri   a0,a0,32
> bclri   a0,a0,43

But we can do better by first synthesizing 0xf7fe7800 which is just 
3
instructions.  Then we can subtract 1 from the result.  That gives us this
sequence:

> li  a0,-16789504
> sllia0,a0,19
> addia0,a0,-2048
> addia0,a0,-1

These cases are relatively easy to find once you know what you're looking 
for.
I kept the full set found by the testing code yesterday, mostly because 
some of
them show different patterns for generating C', thus showing generality in 
the
overall synthesis implementation.  While all these tests have 0x7ff in their
low bits.  That's just an artifact to the test script.  The methodology will
work for a variety of other cases.

gcc/
* config/riscv/riscv.cc (riscv_build_integer_1): Try generating
a nearby simpler constant, then using a final addi to set low
bits properly.

gcc/testsuite

* gcc.target/riscv/synthesis-7.c: New test.

(cherry picked from commit 95660223c434000a42957cf6cabed0236bb4bae8)

Diff:
---
 gcc/config/riscv/riscv.cc|   20 +
 gcc/testsuite/gcc.target/riscv/synthesis-7.c | 1531 ++
 2 files changed, 1551 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 92935275aaa..0ebdd696a95 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -963,6 +963,26 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
}
 }
 
+  /* We might be able to generate a constant close to our target
+ then a final ADDI to get the desired constant.  */
+  if (cost > 2
+  && (value & 0xfff) != 0
+  && (value & 0x1800) == 0x1000)
+{
+  HOST_WIDE_INT adjustment = -(0x800 - (value & 0xfff));
+  alt_cost = 1 + riscv_build_integer_1 (alt_codes,
+   value - adjustment, mode);
+
+  if (alt_cost < cost)
+   {
+ alt_codes[alt_cost - 1].code = PLUS;
+ alt_codes[alt_cost - 1].value = adjustment;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+   }
+}
+
   /* Final cases, particularly focused on bseti.  */
   if (cost > 2 && TARGET_ZBS)
 {
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-7.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-7.c
new file mode 100644
index 000..1611c964758
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-7.c
@@ -0,0 +1,1531 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 7571 } } */
+
+unsigned long foo_0xf7fe77ff(void) { return 0xf7fe77ffUL; }
+unsigned long foo_0xeffe77ff(void) { return 0xeffe77ffUL; }
+unsigned long foo_0xdffe77ff(void) { return 0xdffe77ffUL; }
+unsigned long foo_0xbffe77ff(void) { return 0xbffe77ffUL; }
+unsigned long foo_0x7ffe77ff(void) { return 0x7ffe77ffUL; }
+unsigned long foo_0xfffefffe77ff(void) { return 0xfffefffe77ffUL; }
+unsigned long foo_0xfffdff

[gcc/riscv/heads/gcc-14-with-riscv-opts] (141 commits) [to-be-committed] [RISC-V] Try inverting for constant synth

2024-05-26 Thread Jeff Law via Gcc-cvs
The branch 'riscv/heads/gcc-14-with-riscv-opts' was updated to point to:

 7f716ba0f6c... [to-be-committed] [RISC-V] Try inverting for constant synth

It previously pointed to:

 19868f0990e... [to-be-committed] [RISC-V] Try inverting for constant synth

Diff:

!!! WARNING: THE FOLLOWING COMMITS ARE NO LONGER ACCESSIBLE (LOST):
---

  19868f0... [to-be-committed] [RISC-V] Try inverting for constant synth
  f69c19f... [to-be-committed][RISC-V] Generate nearby constant, then ad
  fee06b9... [committed] [v2] More logical op simplifications in simplif
  a08b5d4... [to-be-committed,v2,RISC-V] Use bclri in constant synthesis
  4e981cc... RISC-V: Enable vectorization for vect-early-break_124-pr114
  106d603... RISC-V: avoid LUI based const mat in alloca epilogue expans
  259f9f2... RISC-V: avoid LUI based const mat in prologue/epilogue expa
  97fb62e... Regenerate riscv.opt.urls and i386.opt.urls
  5ef9011... DSE: Fix ICE after allow vector type in get_stored_val
  08aaf0d... [to-be-committed][RISC-V][PR target/115142] Do not create i
  1b074bd... RISC-V: Implement -m{,no}fence-tso
  03f61ba... [to-be-committed,RISC-V] Improve some shift-add sequences
  a544526... RISC-V: Fix "Nan-box the result of movbf on soft-bf16"
  af9118f... RISC-V: Modify _Bfloat16 to __bf16
  db2b829... RISC-V: Implement IFN SAT_ADD for both the scalar and vecto
  d6cb9a0... RISC-V: Add initial cost handling for segment loads/stores.
  17dfc97... internal-fn: Do not force vcond_mask operands to reg.
  586e678... RISC-V: Cleanup some temporally files [NFC]
  c1ad575... RISC-V: Enable vectorizable early exit testsuite
  b1aab03... RISC-V: Implement vectorizable early exit with vcond_mask_l
  4ec3a6b... Vect: Support loop len in vectorizable early exit
  674362d... Vect: Support new IFN SAT_ADD for unsigned vector int
  51b69c8... Internal-fn: Support new IFN SAT_ADD for unsigned scalar in
  faf2f9e... RISC-V: testsuite: Drop march-string in cmpmemsi/cpymemsi t
  67195fb... RISC-V: Add Zvfbfwma extension to the -march= option
  45c5684... Add missing hunk in recent change.
  72e6ff2... [v2,2/2] RISC-V: strcmp expansion: Use adjust_address() for
  d57... [v2,1/2] RISC-V: Add cmpmemsi expansion
  f3d5808... RISC-V: Test cbo.zero expansion for rv32
  59e6343... RISC-V: Allow by-pieces to do overlapping accesses in block
  ad0413b... RISC-V: add tests for overlapping mem ops
  69408db... RISC-V: Allow unaligned accesses in cpymemsi expansion
  0dcd2d2... RISC-V: Add test cases for cpymem expansion
  75a0630... [committed] Fix rv32 issues with recent zicboz work
  9de3210... [to-be-committed,RISC-V] Remove redundant AND in shift-add 
  de257cc... RISC-V: avoid LUI based const materialization ... [part of 
  f9a0426... [PATCH 3/3] RISC-V: Add memset-zero expansion to cbo.zero
  0db572d... [PATCH 2/3] RISC-V: testsuite: Make cmo tests LTO safe
  5b00e29... [1/3] expr: Export clear_by_pieces()
  0c1a07d... RISC-V: Fix format issue for trailing operator [NFC]
  c6ed1bc... [to-be-committed,RISC-V] Improve AND with some constants
  7d135c5... RISC-V: Bugfix ICE for RVV intrinisc vfw on _Float16 scalar
  8be088a... [to-be-committed,RISC-V] Improve single inverted bit extrac
  71edaf6... [to-be-committed,RISC-V] Improve usage of slli.uw in consta
  a39fd3b... [to-be-committed] RISC-V Fix minor regression in synthesis 
  28db6a1... [RISC-V] Use shNadd for constant synthesis
  11c50b6... RISC-V: Fix typos in code or comment [NFC]
  13d1b47... [committed] [RISC-V] Provide splitting guidance to combine 
  788ed48... RISC-V: Make full-vec-move1.c test robust for optimization
  2bb25f9... RISC-V: Add tests for cpymemsi expansion
  b59bc76... [PATCH v1 1/1] RISC-V: Nan-box the result of movbf on soft-
  f273ad2... [RISC-V][V2] Fix incorrect if-then-else nesting of Zbs usag
  b6dc4a5... RISC-V: Cover sign-extensions in lshr3_zero_exten
  38fc117... RISC-V: Add zero_extract support for rv64gc
  9041a04... RISC-V: Cover sign-extensions in lshrsi3_zero_extend_2
  9a81321... RISC-V: Add test for sraiw-31 special case
  39c05d2... [committed][RISC-V] Turn on overlap_op_by_pieces for generi
  07f793b... [committed] [RISC-V] Allow uarchs to set TARGET_OVERLAP_OP_
  8b5321b... [RISC-V] [PATCH v2] Enable inlining str* by default
  c396fa6... [PATCH 1/1] RISC-V: Add Zfbfmin extension to the -march= op
  cd0e329... RISC-V: Add testcase for PR114749.
  af9ec7b... [RISC-V] Add support for _Bfloat16
  6b0b012... RISC-V: Document -mcmodel=large
  3ccf8a5... So another constant synthesis improvement.
  3d0f38b... RISC-V: miscll comment fixes [NFC]
  9ba5c71... [committed][RISC-V] Fix nearbyint failure on rv32 and forma
  0099c20... [RFA][RISC-V] Improve constant synthesis for constants with
  63c91bf... [committed] [RISC-V] Don't run new rounding tests on newlib
  1985c80... [committed] [RISC-V] Trivial pattern cleanup
  305a5d2... [committed] [RISC-V] Fix detection of store pair fusion cas
  7fd192a... This is

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add -X to link spec

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:0761c4e460775424643c45a09558b8b85ad6b0a4

commit 0761c4e460775424643c45a09558b8b85ad6b0a4
Author: Fangrui Song 
Date:   Fri Apr 26 18:14:33 2024 -0700

RISC-V: Add -X to link spec

--discard-locals (-X) instructs the linker to remove local .L* symbols,
which occur a lot due to label differences for linker relaxation. The
arm port has a similar need and passes -X to ld.

In contrast, the RISC-V port does not pass -X to ld and rely on the
default --discard-locals in GNU ld's riscv port. The arm way is more
conventional (compiler driver instead of the linker customizes the
default linker behavior) and works with lld.

gcc/ChangeLog:

* config/riscv/elf.h (LINK_SPEC): Add -X.
* config/riscv/freebsd.h (LINK_SPEC): Add -X.
* config/riscv/linux.h (LINK_SPEC): Add -X.

Diff:
---
 gcc/config/riscv/elf.h | 1 +
 gcc/config/riscv/freebsd.h | 1 +
 gcc/config/riscv/linux.h   | 1 +
 3 files changed, 3 insertions(+)

diff --git a/gcc/config/riscv/elf.h b/gcc/config/riscv/elf.h
index f533764d9f8..c97f13c0cca 100644
--- a/gcc/config/riscv/elf.h
+++ b/gcc/config/riscv/elf.h
@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
 #define LINK_SPEC "\
 -melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv \
 %{mno-relax:--no-relax} \
+-X \
 %{mbig-endian:-EB} \
 %{mlittle-endian:-EL} \
 %{shared}"
diff --git a/gcc/config/riscv/freebsd.h b/gcc/config/riscv/freebsd.h
index bd08a985285..5dd4d51c42b 100644
--- a/gcc/config/riscv/freebsd.h
+++ b/gcc/config/riscv/freebsd.h
@@ -44,6 +44,7 @@ along with GCC; see the file COPYING3.  If not see
   %{p:%nconsider using `-pg' instead of `-p' with gprof (1)}   \
   %{v:-V}  \
   %{assert*} %{R*} %{rpath*} %{defsym*}\
+  -X   \
   %{mbig-endian:-EB}   \
   %{mlittle-endian:-EL}\
   %{shared:-Bshareable %{h*} %{soname*}}   \
diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h
index 15851f653bc..3c356227134 100644
--- a/gcc/config/riscv/linux.h
+++ b/gcc/config/riscv/linux.h
@@ -50,6 +50,7 @@ along with GCC; see the file COPYING3.  If not see
 #define LINK_SPEC "\
 -melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv" LD_EMUL_SUFFIX " \
 %{mno-relax:--no-relax} \
+-X \
 %{mbig-endian:-EB} \
 %{mlittle-endian:-EL} \
 %{shared} \


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Fix parsing of Zic* extensions

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:fe3b44c779a51ebf0d5c017d4c676b6cc41932ae

commit fe3b44c779a51ebf0d5c017d4c676b6cc41932ae
Author: Christoph Müllner 
Date:   Mon Apr 29 00:46:06 2024 +0200

RISC-V: Fix parsing of Zic* extensions

The extension parsing table entries for a range of Zic* extensions
does not match the mask definition in riscv.opt.
This results in broken TARGET_ZIC* macros, because the values of
riscv_zi_subext and riscv_zicmo_subext are set wrong.

This patch fixes this by moving Zic64b into riscv_zicmo_subext
and all other affected Zic* extensions to riscv_zi_subext.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: Move ziccamoa, ziccif,
zicclsm, and ziccrse into riscv_zi_subext.
* config/riscv/riscv.opt: Define MASK_ZIC64B for
riscv_ziccmo_subext.

Signed-off-by: Christoph Müllner 
(cherry picked from commit 285300eb928b171236e895f28c960ad02dcb0d67)

Diff:
---
 gcc/common/config/riscv/riscv-common.cc | 8 
 gcc/config/riscv/riscv.opt  | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 43b7549e3ec..8cc0e727737 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1638,15 +1638,15 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
 
   {"zihintntl", &gcc_options::x_riscv_zi_subext, MASK_ZIHINTNTL},
   {"zihintpause", &gcc_options::x_riscv_zi_subext, MASK_ZIHINTPAUSE},
+  {"ziccamoa", &gcc_options::x_riscv_zi_subext, MASK_ZICCAMOA},
+  {"ziccif", &gcc_options::x_riscv_zi_subext, MASK_ZICCIF},
+  {"zicclsm", &gcc_options::x_riscv_zi_subext, MASK_ZICCLSM},
+  {"ziccrse", &gcc_options::x_riscv_zi_subext, MASK_ZICCRSE},
 
   {"zicboz", &gcc_options::x_riscv_zicmo_subext, MASK_ZICBOZ},
   {"zicbom", &gcc_options::x_riscv_zicmo_subext, MASK_ZICBOM},
   {"zicbop", &gcc_options::x_riscv_zicmo_subext, MASK_ZICBOP},
   {"zic64b", &gcc_options::x_riscv_zicmo_subext, MASK_ZIC64B},
-  {"ziccamoa", &gcc_options::x_riscv_zicmo_subext, MASK_ZICCAMOA},
-  {"ziccif", &gcc_options::x_riscv_zicmo_subext, MASK_ZICCIF},
-  {"zicclsm", &gcc_options::x_riscv_zicmo_subext, MASK_ZICCLSM},
-  {"ziccrse", &gcc_options::x_riscv_zicmo_subext, MASK_ZICCRSE},
 
   {"zve32x",   &gcc_options::x_target_flags, MASK_VECTOR},
   {"zve32f",   &gcc_options::x_target_flags, MASK_VECTOR},
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index b14888e9816..ee824756381 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -237,8 +237,6 @@ Mask(ZIHINTPAUSE) Var(riscv_zi_subext)
 
 Mask(ZICOND)  Var(riscv_zi_subext)
 
-Mask(ZIC64B)  Var(riscv_zi_subext)
-
 Mask(ZICCAMOA)Var(riscv_zi_subext)
 
 Mask(ZICCIF)  Var(riscv_zi_subext)
@@ -390,6 +388,8 @@ Mask(ZICBOM) Var(riscv_zicmo_subext)
 
 Mask(ZICBOP) Var(riscv_zicmo_subext)
 
+Mask(ZIC64B) Var(riscv_zicmo_subext)
+
 TargetVariable
 int riscv_zf_subext


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] This is almost exclusively Jivan's work. His original post:

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:1f741dd358b094b57ca820cadf046b3fe6fbe736

commit 1f741dd358b094b57ca820cadf046b3fe6fbe736
Author: Jivan Hakobyan 
Date:   Tue Apr 30 09:44:02 2024 -0600

This is almost exclusively Jivan's work.  His original post:

> https://www.mail-archive.com/gcc-patches@gcc.gnu.org/msg336483.html

This patch is primarily meant to improve the code we generate for FP 
rounding
such as ceil/floor.  It also addresses some unnecessary sign extensions in 
the
same areas.

RISC-V's FP conversions have a bit of undesirable behavior that make them
non-suitable as-is for ceil/floor and other related functions. These
deficiencies are addressed in the Zfa extension, but there's no reason not 
to
pick up a nice improvement when we can.

Basically we can still use the basic FP conversions for floor/ceil and 
friends
when we don't care about inexact exceptions by checking for the special 
cases
first, then emitting the conversion when the special cases don't apply.  
That's
still much faster than calling into glibc.

The redundant sign extensions are eliminated using the same trick Jivan 
added
last year, just in a few more places ;-)

This eliminates roughly 10% of the dynamic instruction count for imagick.  
But
more importantly it's about a 17% performance improvement for that workload
within spec.

This has been bootstrapped as well as regression tested in a cross 
environment.
It's also successfully built & run specint/specfp correctly.

Pushing to the trunk and the coordination branch momentarily.

gcc/
* config/riscv/iterators.md (fix_ops, fix_uns): New iterators.
(RINT, rint_pattern, rint_rm): Remove unused iterators.
* config/riscv/riscv-protos.h (get_fp_rounding_coefficient): 
Prototype.
* config/riscv/riscv-v.cc (get_fp_rounding_coefficient): 
Externalize.
external linkage.
* config/riscv/riscv.md (UNSPEC_LROUND): Remove.
(fix_trunc2): Replace with ...
(_truncsi2): New expander & associated insn.
(_truncsi2_ext): New insn.
(_truncdi2): Likewise.
(l2): Replace with ...
(lrintsi2): New expander and associated insn.
(lrintsi2_ext, lrintdi2): New insns.
(2): Replace with
(lsi2): New expander and associated insn.
(lsi2_sext): New insn.
(ldi2): Likewise.
(2): New expander.

gcc/testsuite/
* gcc.target/riscv/fix.c: New test.
* gcc.target/riscv/round.c: New test.
* gcc.target/riscv/round_32.c: New test.
* gcc.target/riscv/round_64.c: New test.

(cherry picked from commit f652a35877e32d470d649d1aee5d94fa0169a478)

Diff:
---
 gcc/config/riscv/iterators.md |  12 +-
 gcc/config/riscv/riscv-protos.h   |   1 +
 gcc/config/riscv/riscv-v.cc   |   2 +-
 gcc/config/riscv/riscv.md | 211 +++---
 gcc/testsuite/gcc.target/riscv/fix.c  |  34 +
 gcc/testsuite/gcc.target/riscv/round.c| 144 
 gcc/testsuite/gcc.target/riscv/round_32.c |  22 
 gcc/testsuite/gcc.target/riscv/round_64.c |  23 
 8 files changed, 427 insertions(+), 22 deletions(-)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index a7694137685..75e119e407a 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -196,6 +196,13 @@
 
 (define_code_iterator bitmanip_rotate [rotate rotatert])
 
+;; These code iterators allow the signed and unsigned fix operations to use
+;; the same template.
+(define_code_iterator fix_ops [fix unsigned_fix])
+
+(define_code_attr fix_uns [(fix "fix") (unsigned_fix "fixuns")])
+
+
 ;; ---
 ;; Code Attributes
 ;; ---
@@ -312,11 +319,6 @@
 ;; Int Iterators.
 ;; ---
 
-;; Iterator and attributes for floating-point rounding instructions.
-(define_int_iterator RINT [UNSPEC_LRINT UNSPEC_LROUND])
-(define_int_attr rint_pattern [(UNSPEC_LRINT "rint") (UNSPEC_LROUND "round")])
-(define_int_attr rint_rm [(UNSPEC_LRINT "dyn") (UNSPEC_LROUND "rmm")])
-
 ;; Iterator and attributes for quiet comparisons.
 (define_int_iterator QUIET_COMPARISON [UNSPEC_FLT_QUIET UNSPEC_FLE_QUIET])
 (define_int_attr quiet_pattern [(UNSPEC_FLT_QUIET "lt") (UNSPEC_FLE_QUIET 
"le")])
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5d46a29d8b7..e5aebf3fc3d 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -711,6 +711,7 @@ bool gather_scatter_valid_offset_p (machine_mode);
 HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
 bool whole_reg

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Refine the condition for add additional vars in RVV cost model

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:0285854739ff710005770f8e956197f34fe9b967

commit 0285854739ff710005770f8e956197f34fe9b967
Author: demin.han 
Date:   Tue Mar 26 16:52:12 2024 +0800

RISC-V: Refine the condition for add additional vars in RVV cost model

The adjacent_dr_p is sufficient and unnecessary condition for contiguous 
access.
So unnecessary live-ranges are added and result in smaller LMUL.

This patch uses MEMORY_ACCESS_TYPE as condition and constrains segment
load/store.

Tested on RV64 and no regression.

PR target/114506

gcc/ChangeLog:

* config/riscv/riscv-vector-costs.cc 
(non_contiguous_memory_access_p): Rename
(need_additional_vector_vars_p): Rename and refine condition

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/pr114506.c: New test.

Signed-off-by: demin.han 
(cherry picked from commit ca2f531cc5db4f1020d4329976610356033e0246)

Diff:
---
 gcc/config/riscv/riscv-vector-costs.cc | 23 ++
 .../gcc.dg/vect/costmodel/riscv/rvv/pr114506.c | 23 ++
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index d27bb68a7b9..4582b0db425 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -563,14 +563,24 @@ get_store_value (gimple *stmt)
 return gimple_assign_rhs1 (stmt);
 }
 
-/* Return true if it is non-contiguous load/store.  */
+/* Return true if addtional vector vars needed.  */
 static bool
-non_contiguous_memory_access_p (stmt_vec_info stmt_info)
+need_additional_vector_vars_p (stmt_vec_info stmt_info)
 {
   enum stmt_vec_info_type type
 = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
-  return ((type == load_vec_info_type || type == store_vec_info_type)
- && !adjacent_dr_p (STMT_VINFO_DATA_REF (stmt_info)));
+  if (type == load_vec_info_type || type == store_vec_info_type)
+{
+  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+ && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+   return true;
+
+  machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
+  int lmul = riscv_get_v_regno_alignment (mode);
+  if (DR_GROUP_SIZE (stmt_info) * lmul > RVV_M8)
+   return true;
+}
+  return false;
 }
 
 /* Return the LMUL of the current analysis.  */
@@ -739,10 +749,7 @@ update_local_live_ranges (
  stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
  enum stmt_vec_info_type type
= STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
- if (non_contiguous_memory_access_p (stmt_info)
- /* LOAD_LANES/STORE_LANES doesn't need a perm indice.  */
- && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
-  != VMAT_LOAD_STORE_LANES)
+ if (need_additional_vector_vars_p (stmt_info))
{
  /* For non-adjacent load/store STMT, we will potentially
 convert it into:
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c
new file mode 100644
index 000..a88d24b2d2d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/pr114506.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-mrvv-max-lmul=dynamic -fdump-tree-vect-details" } */
+
+float a[32000], b[32000], c[32000], d[32000];
+float aa[256][256], bb[256][256], cc[256][256];
+
+void
+s2275 ()
+{
+  for (int i = 0; i < 256; i++)
+{
+  for (int j = 0; j < 256; j++)
+   {
+ aa[j][i] = aa[j][i] + bb[j][i] * cc[j][i];
+   }
+  a[i] = b[i] + c[i] * d[i];
+}
+}
+
+/* { dg-final { scan-assembler-times {e32,m8} 1 } } */
+/* { dg-final { scan-assembler-not {e32,m4} } } */
+/* { dg-final { scan-assembler-not {csrr} } } */
+/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it 
has unexpected spills" "vect" } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed] [RISC-V] Fix detection of store pair fusion cases

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:241fbf90eb65e11a028aa72e0b48529fecb3f3c9

commit 241fbf90eb65e11a028aa72e0b48529fecb3f3c9
Author: Jeff Law 
Date:   Wed May 1 11:28:41 2024 -0600

[committed] [RISC-V] Fix detection of store pair fusion cases

We've got the ability to count the number of store pair fusions happening in
the front-end of the pipeline.  When comparing some code from last year vs 
the
current trunk we saw a fairly dramatic drop.

The problem is the store pair fusion detection code was actively harmful 
due to
a minor bug in checking offsets.   So instead of pairing up 8 byte stores 
such
as sp+0 with sp+8, it tried to pair up sp+8 and sp+16.

Given uarch sensitivity I didn't try to pull together a testcase.  But we 
could
certainly see the undesirable behavior in benchmarks as simplistic as 
dhrystone
up through spec2017.

Anyway, bootstrapped a while back.  Also verified through our performance
counters that store pair fusion rates are back up.  Regression tested with
crosses a few minutes ago.

gcc/
* config/riscv/riscv.cc (riscv_macro_fusion_pair_p): Break out
tests for easier debugging in store pair fusion case.  Fix offset
check in same.

(cherry picked from commit fad93e7617ce1aafb006983a71b6edc9ae1eb2d1)

Diff:
---
 gcc/config/riscv/riscv.cc | 57 ++-
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0f62b295b96..24d1ead3902 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8874,26 +8874,43 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn 
*curr)
  extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, 
&offset_prev);
  extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, 
&offset_curr);
 
- /* The two stores must be contained within opposite halves of the same
-16 byte aligned block of memory.  We know that the stack pointer 
and
-the frame pointer have suitable alignment.  So we just need to 
check
-the offsets of the two stores for suitable alignment.
-
-Originally the thought was to check MEM_ALIGN, but that was 
reporting
-incorrect alignments, even for SP/FP accesses, so we gave up on 
that
-approach.  */
- if (base_prev != NULL_RTX
- && base_curr != NULL_RTX
- && REG_P (base_prev)
- && REG_P (base_curr)
- && REGNO (base_prev) == REGNO (base_curr)
- && (REGNO (base_prev) == STACK_POINTER_REGNUM
- || REGNO (base_prev) == HARD_FRAME_POINTER_REGNUM)
- && ((INTVAL (offset_prev) == INTVAL (offset_curr) + 8
-  && (INTVAL (offset_prev) % 16) == 0)
- || ((INTVAL (offset_curr) == INTVAL (offset_prev) + 8)
- && (INTVAL (offset_curr) % 16) == 0)))
-   return true;
+ /* Fail if we did not find both bases.  */
+ if (base_prev == NULL_RTX || base_curr == NULL_RTX)
+   return false;
+
+ /* Fail if either base is not a register.  */
+ if (!REG_P (base_prev) || !REG_P (base_curr))
+   return false;
+
+ /* Fail if the bases are not the same register.  */
+ if (REGNO (base_prev) != REGNO (base_curr))
+   return false;
+
+ /* Originally the thought was to check MEM_ALIGN, but that was
+reporting incorrect alignments, even for SP/FP accesses, so we
+gave up on that approach.  Instead just check for stack/hfp
+which we know are aligned.  */
+ if (REGNO (base_prev) != STACK_POINTER_REGNUM
+ && REGNO (base_prev) != HARD_FRAME_POINTER_REGNUM)
+   return false;
+
+ /* The two stores must be contained within opposite halves of the
+same 16 byte aligned block of memory.  We know that the stack
+pointer and the frame pointer have suitable alignment.  So we
+just need to check the offsets of the two stores for suitable
+alignment.  */
+ /* Get the smaller offset into OFFSET_PREV.  */
+ if (INTVAL (offset_prev) > INTVAL (offset_curr))
+   std::swap (offset_prev, offset_curr);
+
+ /* If the smaller offset (OFFSET_PREV) is not 16 byte aligned,
+then fail.  */
+ if ((INTVAL (offset_prev) % 16) != 0)
+   return false;
+
+ /* The higher offset must be 8 bytes more than the lower
+offset.  */
+ return (INTVAL (offset_prev) + 8 == INTVAL (offset_curr));
}
 }


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed] [RISC-V] Don't run new rounding tests on newlib risc-v targets

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d3c8cb96e4e2a04dc3765467fc609a12ca6e6e25

commit d3c8cb96e4e2a04dc3765467fc609a12ca6e6e25
Author: Jeff Law 
Date:   Thu May 2 08:42:32 2024 -0600

[committed] [RISC-V] Don't run new rounding tests on newlib risc-v targets

The new round_32.c and round_64.c tests depend on the optimizers to 
recognize
the conversions feeding the floor/ceil calls and convert them into ceilf,
floorf and the like.

Those transformations only occur when the target indicates the C library has
the appropriate routines (fnclass == function_c99_misc).  While newlib has
these routines, they are not exposed as available to the compiler and thus 
the
transformation the tests depend on do not happen. Naturally the scan-tests 
then
fail.

gcc/testsuite
* gcc.target/riscv/round_32.c: Add require-effective-target glibc.
* gcc.target/riscv/round_64.c: Likewise.

(cherry picked from commit 1e29da0b6508b23a7a6b14a7fb643b917a195003)

Diff:
---
 gcc/testsuite/gcc.target/riscv/round_32.c | 1 +
 gcc/testsuite/gcc.target/riscv/round_64.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/round_32.c 
b/gcc/testsuite/gcc.target/riscv/round_32.c
index f9fea70ad55..88ff77aff2e 100644
--- a/gcc/testsuite/gcc.target/riscv/round_32.c
+++ b/gcc/testsuite/gcc.target/riscv/round_32.c
@@ -1,4 +1,5 @@
 /* { dg-do compile { target { riscv32*-*-* } } } */
+/* { dg-require-effective-target glibc } */
 /* { dg-options "-march=rv32gc -mabi=ilp32d -fno-math-errno 
-funsafe-math-optimizations -fno-inline" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
 
diff --git a/gcc/testsuite/gcc.target/riscv/round_64.c 
b/gcc/testsuite/gcc.target/riscv/round_64.c
index e79690979a5..5e13bccdcd2 100644
--- a/gcc/testsuite/gcc.target/riscv/round_64.c
+++ b/gcc/testsuite/gcc.target/riscv/round_64.c
@@ -1,4 +1,5 @@
 /* { dg-do compile { target { riscv64*-*-* } } } */
+/* { dg-require-effective-target glibc } */
 /* { dg-options "-march=rv64gc -mabi=lp64d -fno-math-errno 
-funsafe-math-optimizations -fno-inline" } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed] [RISC-V] Trivial pattern cleanup

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:1659526b1e18375f3a604245a0769dfa4b5548ab

commit 1659526b1e18375f3a604245a0769dfa4b5548ab
Author: Jeff Law 
Date:   Wed May 1 12:43:37 2024 -0600

[committed] [RISC-V] Trivial pattern cleanup

As I was reviewing and cleaning up some internal work, I noticed a 
particular
idiom being used elsewhere in the RISC-V backend.

Specifically the use of explicit subregs when an adjustment to the
match_operand would be sufficient.

Let's take this example from the and-not splitter:

>  (define_split
>[(set (match_operand:X 0 "register_operand")
> (and:X (not:X (lshiftrt:X (match_operand:X 1 "register_operand")
>   (subreg:QI (match_operand:X 2 
"register_operand") 0)))
>(const_int 1)))]

Note the explicit subreg.  We can instead use a match_operand with QImode.
This ever-so-slightly simplifies the machine description.

It also means that if we have a QImode object lying around (say we loaded it
from memory in QImode), we can use it directly rather than first extending 
it
to X, then truncing to QI.  So we end up with simpler RTL and in rare cases
improve the code we generate.

When used in a define_split or define_insn_and_split we need to make 
suitable
adjustments to the split RTL.

Bootstrapped a while back.  Just re-tested with a cross.

gcc/
* config/riscv/bitmanip.md (splitter to use w-form division): Remove
explicit subregs.
(zero extended bitfield extraction): Similarly.
* config/riscv/thead.md (*th_memidx_operand): Similarly.

(cherry picked from commit 76ca6e1f8b1524b82a871ce29cf58c79e5e77e2b)

Diff:
---
 gcc/config/riscv/bitmanip.md | 9 +
 gcc/config/riscv/thead.md| 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index ccda25c01c1..ad3ad758959 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -50,11 +50,11 @@
(sign_extend:DI (div:SI (plus:SI (ashift:SI (subreg:SI 
(match_operand:DI 1 "register_operand") 0)
(match_operand:QI 2 
"imm123_operand"))
 (subreg:SI (match_operand:DI 3 
"register_operand") 0))
-   (subreg:SI (match_operand:DI 4 
"register_operand") 0
+   (match_operand:SI 4 "register_operand"
(clobber (match_operand:DI 5 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBA"
[(set (match_dup 5) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) 
(match_dup 3)))
-(set (match_dup 0) (sign_extend:DI (div:SI (subreg:SI (match_dup 5) 0) 
(subreg:SI (match_dup 4) 0])
+(set (match_dup 0) (sign_extend:DI (div:SI (subreg:SI (match_dup 5) 0) 
(match_dup 4])
 
 ; Zba does not provide W-forms of sh[123]add(.uw)?, which leads to an
 ; interesting irregularity: we can generate a signed 32-bit result
@@ -722,13 +722,14 @@
 (define_split
   [(set (match_operand:X 0 "register_operand")
(and:X (not:X (lshiftrt:X (match_operand:X 1 "register_operand")
- (subreg:QI (match_operand:X 2 
"register_operand") 0)))
+ (match_operand:QI 2 "register_operand")))
   (const_int 1)))]
   "TARGET_ZBS"
   [(set (match_dup 0) (zero_extract:X (match_dup 1)
  (const_int 1)
  (match_dup 2)))
-   (set (match_dup 0) (xor:X (match_dup 0) (const_int 1)))])
+   (set (match_dup 0) (xor:X (match_dup 0) (const_int 1)))]
+  "operands[2] = gen_lowpart (mode, operands[2]);")
 
 ;; We can create a polarity-reversed mask (i.e. bit N -> { set = 0, clear = -1 
})
 ;; using a bext(i) followed by an addi instruction.
diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md
index 5c7d4beb1b6..a47fe6f28b8 100644
--- a/gcc/config/riscv/thead.md
+++ b/gcc/config/riscv/thead.md
@@ -466,12 +466,12 @@
 (define_insn_and_split "*th_memidx_operand"
   [(set (match_operand:DI 0 "register_operand" "=r")
  (ashift:DI
-   (zero_extend:DI (subreg:SI (match_operand:DI 1 "register_operand" "r") 
0))
+   (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
(match_operand 2 "const_int_operand" "n")))]
   "TARGET_64BIT && TARGET_XTHEADMEMIDX && lra_in_progress"
   "#"
   ""
-  [(set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 1) 0)))
+  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))
(set (match_dup 0) (ashift:DI (match_dup 0) (match_dup 2)))]
   ""
   [(set_attr "type" "bitmanip")])


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RFA][RISC-V] Improve constant synthesis for constants with 2 bits set

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:43130081438c423616549bf3a6b0649b84baddfc

commit 43130081438c423616549bf3a6b0649b84baddfc
Author: Jeff Law 
Date:   Thu May 2 14:06:22 2024 -0600

[RFA][RISC-V] Improve constant synthesis for constants with 2 bits set

In doing some preparation work for using zbkb's pack instructions for 
constant
synthesis I figured it would be wise to get a sense of how well our constant
synthesis is actually working and address any clear issues.

So the first glaring inefficiency is in our handling of constants with a 
small
number of bits set.  Let's start with just two bits set.   There are 2016
distinct constants in that space (rv64).  With Zbs enabled the absolute 
worst
we should ever do is two instructions (bseti+bseti).  Yet we have 503 cases
where we're generating 3+ instructions when there's just two bits set in the
constant.  A constant like 0x80001000 generates 4 instructions!

This patch adds bseti (and indirectly binvi if we needed it) as a first 
class
citizen for constant synthesis.  There's two components to this change.

First, we can't generate an IOR with a constant like (1 << 45) as an 
operand.
The IOR/XOR define_insn is in riscv.md.  The constant argument for those
patterns must match an arith_operand which means its not really usable for
generating bseti directly in the cases we care about (at least one of the 
bits
will be in the 32..63 range and thus won't match arith_operand).

We have a few things we could do.  One would be to extend the existing 
pattern
to incorporate bseti cases.  But I suspect folks like the separation of the
base architecture (riscv.md) from the Zb* extensions (bitmanip.md).  We 
could
also try to generate the RTL for bseti
directly, bypassing gen_fmt_ee (which forces undesirable constants into 
registers based on the predicate of the appropriate define_insn). Neither of 
these seemed particularly appealing to me.

So what I've done instead is to make ior/xor a define_expand and have the
expander allow a wider set of constant operands when Zbs is enabled.  That
allows us to keep the bulk of Zb* support inside bitmanip.md and continue to
use gen_fmt_ee in the constant synthesis paths.

Note the code generation in this case is designed to first set as many bits 
as
we can with lui, then with addi since those can both set multiple bits at a
time.  If there are any residual bits left to set we can emit bseti
instructions up to the current cost ceiling.

This results in fixing all of the 503 2-bit set cases where we emitted too 
many
instructions.  It also significantly helps other scenarios with more bits 
set.

The testcase I'm including verifies the number of instructions we generate 
for
the full set of 2016 possible cases.  Obviously this won't be possible as we
increase the number of bits (there are something like 48k cases with just 3
bits set).

gcc/

* config/riscv/predicates.md (arith_or_zbs_operand): New predicate.
* config/riscv/riscv.cc (riscv_build_integer_one): Use bseti to set
single bits when profitable.
* config/riscv/riscv.md (*3): Renamed with '*' prefix.
(3): New expander for IOR/XOR.

gcc/testsuite
* gcc.target/riscv/synthesis-1.c: New test.

Diff:
---
 gcc/config/riscv/predicates.md   |8 +
 gcc/config/riscv/riscv.cc|   65 +-
 gcc/config/riscv/riscv.md|   17 +-
 gcc/testsuite/gcc.target/riscv/synthesis-1.c | 2034 ++
 4 files changed, 2110 insertions(+), 14 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 539e0f7379b..e7d797d4dbf 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -386,6 +386,14 @@
   (and (match_code "const_int")
(match_test "SINGLE_BIT_MASK_OPERAND (UINTVAL (op))")))
 
+;; Register, small constant or single bit constant for use in
+;; bseti/binvi.
+(define_predicate "arith_or_zbs_operand"
+  (ior (match_operand 0 "const_arith_operand")
+   (match_operand 0 "register_operand")
+   (and (match_test "TARGET_ZBS")
+   (match_operand 0 "single_bit_mask_operand"
+
 (define_predicate "not_single_bit_mask_operand"
   (and (match_code "const_int")
(match_test "SINGLE_BIT_MASK_OPERAND (~UINTVAL (op))")))
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 24d1ead3902..8ed9df8126a 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -725,6 +725,9 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
   HOST_WIDE_INT low_part = CONST_LOW_PART (value);
   int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost;
   struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
+  int upper_trailing_ones = ctz

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: miscll comment fixes [NFC]

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:4751ac4274c4a8b21f35325aae6794c1626f16bc

commit 4751ac4274c4a8b21f35325aae6794c1626f16bc
Author: Vineet Gupta 
Date:   Tue Mar 1 03:45:19 2022 -0800

RISC-V: miscll comment fixes [NFC]

gcc/ChangeLog:
* config/riscv/riscv.cc: Comment updates.
* config/riscv/riscv.h: Ditto.

Signed-off-by: Vineet Gupta 
(cherry picked from commit 467ca4a195e26dba77e7f62cc1a3d45a4e541c72)

Diff:
---
 gcc/config/riscv/riscv.cc | 6 --
 gcc/config/riscv/riscv.h  | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 8ed9df8126a..44945d47fd6 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1258,7 +1258,9 @@ riscv_legitimate_constant_p (machine_mode mode 
ATTRIBUTE_UNUSED, rtx x)
   return riscv_const_insns (x) > 0;
 }
 
-/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.
+   Return true if X cannot (or should not) be spilled to the
+   constant pool.  */
 
 static bool
 riscv_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
@@ -8624,7 +8626,7 @@ riscv_modes_tieable_p (machine_mode mode1, machine_mode 
mode2)
   && GET_MODE_CLASS (mode2) == MODE_FLOAT));
 }
 
-/* Implement CLASS_MAX_NREGS.  */
+/* Implement TARGET_CLASS_MAX_NREGS.  */
 
 static unsigned char
 riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 7797e67317a..58d0b09bf7d 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -315,7 +315,7 @@ ASM_MISA_SPEC
- FRAME_POINTER_REGNUM
- 1 vl register
- 1 vtype register
-   - 30 unused registers for future expansion
+   - 28 unused registers for future expansion
- 32 vector registers  */
 
 #define FIRST_PSEUDO_REGISTER 128


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed][RISC-V] Fix nearbyint failure on rv32 and formatting nits

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:733526b306158f6e68c8ef9ad7bf6adb04f4e894

commit 733526b306158f6e68c8ef9ad7bf6adb04f4e894
Author: Jeff Law 
Date:   Thu May 2 17:13:12 2024 -0600

[committed][RISC-V] Fix nearbyint failure on rv32 and formatting nits

The CI system tripped an execution failure for rv32 with the ceil/round 
patch.

The fundamental problem is the FP->INT step in these sequences requires the
input size to match the output size.  The output size was based on 
rv32/rv64.
Meaning that we'd try to do DF->SI->DF.

That doesn't preserve the semantics we want in at least two ways.

The net is we can't use this trick for DF values on rv32.  While inside the
code I realized we had a similar problem for HF modes.  HF modes we can 
support
only for Zfa.  So I fixed that proactively.

The CI system also pointed out various formatting nits.  I think this fixes 
all
but one overly long line.

Note I could have factored the TARGET_ZFA test.  But I think as-written it's
clearer what the desired cases to transform are.

gcc/
* config/riscv/riscv.md (2): Adjust
condition to match what can be properly implemented.  Fix various
formatting issues.
(lsi2_sext): Fix formatting

(cherry picked from commit 8367c996e55b2c54aeee25e446357a1015a1d11d)

Diff:
---
 gcc/config/riscv/riscv.md | 65 +--
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b9b0acf92c7..d4676507b45 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2077,8 +2077,8 @@
 (define_insn "lsi2_sext"
   [(set (match_operand:DI   0 "register_operand" "=r")
 (sign_extend:DI (unspec:SI
-[(match_operand:ANYF 1 "register_operand" " f")]
-  ROUND)))]
+[(match_operand:ANYF 1 "register_operand" " f")]
+ ROUND)))]
   "TARGET_64BIT && (TARGET_HARD_FLOAT || TARGET_ZFINX)"
   "fcvt.w. %0,%1,"
   [(set_attr "type" "fcvt_f2i")
@@ -2094,13 +2094,25 @@
   [(set_attr "type" "fcvt_f2i")
(set_attr "mode" "")])
 
+;; There are a couple non-obvious restrictions to be aware of.
+;;
+;; We'll do a FP-INT conversion in the sequence.  But we don't
+;; have a .l (64bit) variant of those instructions for rv32.
+;; To preserve proper semantics we must reject DFmode inputs
+;; for rv32 unless Zfa is enabled.
+;;
+;; The ANYF iterator allows HFmode.  We don't have all the
+;; necessary patterns defined for HFmode.  So restrict HFmode
+;; to TARGET_ZFA.
 (define_expand "2"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
-(unspec:ANYF
-[(match_operand:ANYF 1 "register_operand" " f")]
-ROUND))]
-  "TARGET_HARD_FLOAT && (TARGET_ZFA
- || flag_fp_int_builtin_inexact || 
!flag_trapping_math)"
+   (unspec:ANYF
+   [(match_operand:ANYF 1 "register_operand" " f")]
+   ROUND))]
+  "(TARGET_HARD_FLOAT
+&& (TARGET_ZFA || flag_fp_int_builtin_inexact || !flag_trapping_math)
+&& (TARGET_ZFA || TARGET_64BIT || mode != DFmode)
+&& (TARGET_ZFA || mode != HFmode))"
 {
   if (TARGET_ZFA)
 emit_insn (gen__zfa2 (operands[0],
@@ -2116,7 +2128,7 @@
 
   riscv_emit_move (tmp_reg, operands[1]);
   riscv_emit_move (coeff_reg,
-   riscv_vector::get_fp_rounding_coefficient 
(mode));
+  riscv_vector::get_fp_rounding_coefficient 
(mode));
   emit_insn (gen_abs2 (abs_reg, operands[1]));
 
   riscv_expand_conditional_branch (label, LT, abs_reg, coeff_reg);
@@ -2126,29 +2138,20 @@
 
   emit_label (label);
   switch (mode)
-{
-case SFmode:
-  reg = gen_reg_rtx (SImode);
-  emit_insn (gen_lsfsi2 (reg, operands[1]));
-  emit_insn (gen_floatsisf2 (abs_reg, reg));
-  break;
-case DFmode:
-  if (TARGET_64BIT)
-{
-  reg = gen_reg_rtx (DImode);
-  emit_insn (gen_ldfdi2 (reg, operands[1]));
-  emit_insn (gen_floatdidf2 (abs_reg, reg));
-}
-  else
-{
-  reg = gen_reg_rtx (SImode);
-  emit_insn (gen_ldfsi2 (reg, operands[1]));
-  emit_insn (gen_floatsidf2 (abs_reg, reg));
-}
-  break;
-default:
-  gcc_unreachable ();
-}
+   {
+   case SFmode:
+ reg = gen_reg_rtx (SImode);
+ emit_insn (gen_lsfsi2 (reg, operands[1]));
+ emit_insn (gen_floatsisf2 (abs_reg, reg));
+ break;
+   case DFmode:
+ reg = gen_reg_rtx (DImode);
+ emit_insn (gen_ldfdi2 (reg, operands[1]));
+ emit_insn (gen_floatdidf2 (abs_reg, reg));
+ break;
+   default:
+ gcc_unreachable ();
+   }
 
   emit_insn (gen_copysign3 (tm

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] So another constant synthesis improvement.

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:6c0a8dff816db539a523f9feab9d19cb2bf13f94

commit 6c0a8dff816db539a523f9feab9d19cb2bf13f94
Author: Jeff Law 
Date:   Mon May 6 15:27:43 2024 -0600

So another constant synthesis improvement.

In this patch we're looking at cases where we'd like to be able to use
lui+slli, but can't because of the sign extending nature of lui on
TARGET_64BIT.  For example: 0x800110020UL.  The trunk currently 
generates 4
instructions for that constant, when it can be done with 3 
(lui+slli.uw+addi).

When Zba is enabled, we can use lui+slli.uw as the slli.uw masks off the 
bits
32..63 before shifting, giving us the precise semantics we want.

I strongly suspect we'll want to do the same for a set of constants with
lui+add.uw, lui+shNadd.uw, so you'll see the beginnings of generalizing 
support
for lui followed by a "uw" instruction.

The new test just tests the set of cases that showed up while exploring a
particular space of the constant synthesis problem.  It's not meant to be
exhaustive (failure to use shadd when profitable).

gcc/

* config/riscv/riscv.cc (riscv_integer_op): Add field tracking if we
want to use a "uw" instruction variant.
(riscv_build_integer_1): Initialize the new field in various places.
Use lui+slli.uw for some constants.
(riscv_move_integer): Handle slli.uw.

gcc/testsuite/

* gcc.target/riscv/synthesis-2.c: New test.

(cherry picked from commit 975bb17e2f6bc90d366237ab1c5dc9b8df2dee69)

Diff:
---
 gcc/config/riscv/riscv.cc|   60 +-
 gcc/testsuite/gcc.target/riscv/synthesis-2.c | 1481 ++
 2 files changed, 1539 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 44945d47fd6..6f1c67bf3f7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -249,6 +249,7 @@ struct riscv_arg_info {
where A is an accumulator, each CODE[i] is a binary rtl operation
and each VALUE[i] is a constant integer.  CODE[0] is undefined.  */
 struct riscv_integer_op {
+  bool use_uw;
   enum rtx_code code;
   unsigned HOST_WIDE_INT value;
 };
@@ -734,6 +735,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
   /* Simply ADDI or LUI.  */
   codes[0].code = UNKNOWN;
   codes[0].value = value;
+  codes[0].use_uw = false;
   return 1;
 }
   if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
@@ -741,6 +743,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
   /* Simply BSETI.  */
   codes[0].code = UNKNOWN;
   codes[0].value = value;
+  codes[0].use_uw = false;
 
   /* RISC-V sign-extends all 32bit values that live in a 32bit
 register.  To avoid paradoxes, we thus need to use the
@@ -769,6 +772,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
{
  alt_codes[alt_cost-1].code = PLUS;
  alt_codes[alt_cost-1].value = low_part;
+ alt_codes[alt_cost-1].use_uw = false;
  memcpy (codes, alt_codes, sizeof (alt_codes));
  cost = alt_cost;
}
@@ -782,6 +786,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
{
  alt_codes[alt_cost-1].code = XOR;
  alt_codes[alt_cost-1].value = low_part;
+ alt_codes[alt_cost-1].use_uw = false;
  memcpy (codes, alt_codes, sizeof (alt_codes));
  cost = alt_cost;
}
@@ -792,17 +797,37 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
 {
   int shift = ctz_hwi (value);
   unsigned HOST_WIDE_INT x = value;
+  bool use_uw = false;
   x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift);
 
   /* Don't eliminate the lower 12 bits if LUI might apply.  */
-  if (shift > IMM_BITS && !SMALL_OPERAND (x) && LUI_OPERAND (x << 
IMM_BITS))
+  if (shift > IMM_BITS
+ && !SMALL_OPERAND (x)
+ && (LUI_OPERAND (x << IMM_BITS)
+ || (TARGET_64BIT
+ && TARGET_ZBA
+ && LUI_OPERAND ((x << IMM_BITS)
+ & ~HOST_WIDE_INT_C (0x8000)
shift -= IMM_BITS, x <<= IMM_BITS;
 
+  /* Adjust X if it isn't a LUI operand in isolation, but we can use
+a subsequent "uw" instruction form to mask off the undesirable
+bits.  */
+  if (!LUI_OPERAND (x)
+ && TARGET_64BIT
+ && TARGET_ZBA
+ && LUI_OPERAND (x & ~HOST_WIDE_INT_C (0x8000UL)))
+   {
+ x = sext_hwi (x, 32);
+ use_uw = true;
+   }
+
   alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode);
   if (alt_cost < cost)
{
  alt_codes[alt_cost-1].code = ASHIFT;
  alt_codes[alt_cost-1].value = shift;
+ alt_codes[alt_cost-1].

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Document -mcmodel=large

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:fdaac11cd9b4680ef895754de9ef09493eb2182f

commit fdaac11cd9b4680ef895754de9ef09493eb2182f
Author: Palmer Dabbelt 
Date:   Mon May 6 15:34:26 2024 -0600

RISC-V: Document -mcmodel=large

  This slipped through the cracks.  Probably also NEWS-worthy.

gcc/ChangeLog:

* doc/invoke.texi (RISC-V): Add -mcmodel=large.

(cherry picked from commit 6ffea3e37380860507cce08af42a997fbdb5d754)

Diff:
---
 gcc/doc/invoke.texi | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 9456ced468a..dc4c5a3189d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1252,7 +1252,7 @@ See RS/6000 and PowerPC Options.
 -msave-restore  -mno-save-restore
 -mshorten-memrefs  -mno-shorten-memrefs
 -mstrict-align  -mno-strict-align
--mcmodel=medlow  -mcmodel=medany
+-mcmodel=medlow  -mcmodel=medany -mcmodel=large
 -mexplicit-relocs  -mno-explicit-relocs
 -mrelax  -mno-relax
 -mriscv-attribute  -mno-riscv-attribute
@@ -31049,6 +31049,11 @@ The code generated by the medium-any code model is 
position-independent, but is
 not guaranteed to function correctly when linked into position-independent
 executables or libraries.
 
+@opindex -mcmodel=large
+@item -mcmodel=large
+Generate code for a large code model, which has no restrictions on size or
+placement of symbols.
+
 @item -mexplicit-relocs
 @itemx -mno-exlicit-relocs
 Use or do not use assembler relocation operators when dealing with symbolic


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add testcase for PR114749.

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:03d11c2142d2b375161fa50d367c2bb9bfe742bd

commit 03d11c2142d2b375161fa50d367c2bb9bfe742bd
Author: Robin Dapp 
Date:   Mon May 6 15:51:37 2024 -0600

RISC-V: Add testcase for PR114749.

this adds a test case for PR114749.
Going to commit as obvious unless somebody complains.

Regards
 Robin

gcc/testsuite/ChangeLog:

PR tree-optimization/114749

* gcc.target/riscv/rvv/autovec/pr114749.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114749.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114749.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114749.c
new file mode 100644
index 000..6733b0481a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr114749.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl128b -mabi=lp64d -fwhole-program -O3 
-mrvv-vector-bits=zvl" } */
+
+extern int a[];
+extern char b[];
+int c = 24;
+_Bool d[24][24][24];
+_Bool (*e)[24][24] = d;
+int main() {
+  for (short f = 0; f < 24; f += 3)
+for (unsigned g = 0; g < (char)c; g += 2) {
+  a[f] = 0;
+  b[g] |= ({ e[f][f][f]; });
+}
+}


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RISC-V] Add support for _Bfloat16

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:1c61e73ff62e1fc84de2bda1c35215b3d058699c

commit 1c61e73ff62e1fc84de2bda1c35215b3d058699c
Author: Xiao Zeng 
Date:   Mon May 6 15:39:12 2024 -0600

[RISC-V] Add support for _Bfloat16

1 At point ,
  BF16 has already been completed "post public review".

2 LLVM has also added support for RISCV BF16 in
   and
  .

3 According to the discussion 
,
  this use __bf16 and use DF16b in riscv_mangle_type like x86.

Below test are passed for this patch
* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/iterators.md: New mode iterator HFBF.
* config/riscv/riscv-builtins.cc (riscv_init_builtin_types):
Initialize data type _Bfloat16.
* config/riscv/riscv-modes.def (FLOAT_MODE): New.
(ADJUST_FLOAT_FORMAT): New.
* config/riscv/riscv.cc (riscv_mangle_type): Support for BFmode.
(riscv_scalar_mode_supported_p): Ditto.
(riscv_libgcc_floating_mode_supported_p): Ditto.
(riscv_init_libfuncs): Set the conversion method for BFmode and
HFmode.
(riscv_block_arith_comp_libfuncs_for_mode): Set the arithmetic
and comparison libfuncs for the mode.
* config/riscv/riscv.md (mode" ): Add BF.
(movhf): Support for BFmode.
(mov): Ditto.
(*movhf_softfloat): Ditto.
(*mov_softfloat): Ditto.

libgcc/ChangeLog:

* config/riscv/sfp-machine.h (_FP_NANFRAC_B): New.
(_FP_NANSIGN_B): Ditto.
* config/riscv/t-softfp32: Add support for BF16 libfuncs.
* config/riscv/t-softfp64: Ditto.
* soft-fp/floatsibf.c: For si -> bf16.
* soft-fp/floatunsibf.c: For unsi -> bf16.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/bf16_arithmetic.c: New test.
* gcc.target/riscv/bf16_call.c: New test.
* gcc.target/riscv/bf16_comparison.c: New test.
* gcc.target/riscv/bf16_float_libcall_convert.c: New test.
* gcc.target/riscv/bf16_integer_libcall_convert.c: New test.

Co-authored-by: Jin Ma 
(cherry picked from commit 8c7cee80eb50792e57d514be1418c453ddd1073e)

Diff:
---
 gcc/config/riscv/iterators.md  |  2 +
 gcc/config/riscv/riscv-builtins.cc | 16 +
 gcc/config/riscv/riscv-modes.def   |  3 +
 gcc/config/riscv/riscv.cc  | 64 +++--
 gcc/config/riscv/riscv.md  | 24 +++
 gcc/testsuite/gcc.target/riscv/bf16_arithmetic.c   | 42 +++
 gcc/testsuite/gcc.target/riscv/bf16_call.c | 12 
 gcc/testsuite/gcc.target/riscv/bf16_comparison.c   | 36 ++
 .../gcc.target/riscv/bf16_float_libcall_convert.c  | 57 +++
 .../riscv/bf16_integer_libcall_convert.c   | 81 ++
 libgcc/config/riscv/sfp-machine.h  |  3 +
 libgcc/config/riscv/t-softfp32 | 10 ++-
 libgcc/config/riscv/t-softfp64 |  3 +-
 libgcc/soft-fp/floatsibf.c | 45 
 libgcc/soft-fp/floatunsibf.c   | 45 
 15 files changed, 407 insertions(+), 36 deletions(-)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 75e119e407a..32e1b140305 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -75,6 +75,8 @@
 ;; Iterator for floating-point modes that can be loaded into X registers.
 (define_mode_iterator SOFTF [SF (DF "TARGET_64BIT") (HF "TARGET_ZFHMIN")])
 
+;; Iterator for floating-point modes of BF16
+(define_mode_iterator HFBF [HF BF])
 
 ;; ---
 ;; Mode attributes
diff --git a/gcc/config/riscv/riscv-builtins.cc 
b/gcc/config/riscv/riscv-builtins.cc
index d457e306dd1..4c08834288a 100644
--- a/gcc/config/riscv/riscv-builtins.cc
+++ b/gcc/config/riscv/riscv-builtins.cc
@@ -230,6 +230,7 @@ static GTY(()) int riscv_builtin_decl_index[NUM_INSN_CODES];
   riscv_builtin_decls[riscv_builtin_decl_index[(CODE)]]
 
 tree riscv_float16_type_node = NULL_TREE;
+tree riscv_bfloat16_type_node = NULL_TREE;
 
 /* Return the function type associated with function prototype TYPE.  */
 
@@ -273,6 +274,21 @@ riscv_init_builtin_types (void)
   if (!maybe_get_identifier ("_Float16"))
 lang_hooks.types.register_builtin_type (riscv_float16_type_node,
"_Float16");
+
+  /* Provide the _Bfloat16 type and bfloat16_type_node if needed.  */
+  if (!bfloat16_type_node)
+{
+  riscv_bfloat16_type_node = make_node (REAL_TYPE);
+  TYPE_PRECISION (riscv_bfloa

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 1/1] RISC-V: Add Zfbfmin extension to the -march= option

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:a38ee93f25444aea92a24e5cd9ebb948b92dcdc3

commit a38ee93f25444aea92a24e5cd9ebb948b92dcdc3
Author: Xiao Zeng 
Date:   Mon May 6 15:57:37 2024 -0600

[PATCH 1/1] RISC-V: Add Zfbfmin extension to the -march= option

This patch would like to add new sub extension (aka Zfbfmin) to the
-march= option. It introduces a new data type BF16.

1 The Zfbfmin extension depend on 'F', and the FLH, FSH, FMV.X.H, and
FMV.H.X instructions as defined in the Zfh extension.

2 The Zfhmin extension includes the following instructions from the
Zfh extension: FLH, FSH, FMV.X.H, FMV.H.X, FCVT.S.H, and FCVT.H.S.

3 Zfhmin extension depend on 'F'.

4 Simply put, just make Zfbfmin dependent on Zfhmin.

Perhaps in the future, we could propose making the FLH, FSH, FMV.X.H, and
FMV.H.X instructions an independent extension to achieve precise dependency
relationships for the Zfbfmin.

You can locate more information about Zfbfmin from below spec doc.




gcc/
* common/config/riscv/riscv-common.cc (riscv_implied_info): zfbfmin
implies zfhmin.
(riscv_ext_version_table, riscv_ext_flag_table): Add zfbfmin.
* config/riscv/riscv.opt (ZFBFMIN): Add optoion.

gcc/testsuite/
* gcc.target/riscv/arch-35.c: New test.
* gcc.target/riscv/arch-36.c: New test.
* gcc.target/riscv/predef-34.c: New test.
* gcc.target/riscv/predef-35.c: New test.

(cherry picked from commit 35224ead63732a3550ba4b1332c06e9dc7999c31)

Diff:
---
 gcc/common/config/riscv/riscv-common.cc|  3 ++
 gcc/config/riscv/riscv.opt |  2 ++
 gcc/testsuite/gcc.target/riscv/arch-35.c   |  5 
 gcc/testsuite/gcc.target/riscv/arch-36.c   |  5 
 gcc/testsuite/gcc.target/riscv/predef-34.c | 47 ++
 gcc/testsuite/gcc.target/riscv/predef-35.c | 47 ++
 6 files changed, 109 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 8cc0e727737..fb76017ffbc 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -155,6 +155,7 @@ static const riscv_implied_info_t riscv_implied_info[] =
   {"zvksed", "zve32x"},
   {"zvksh",  "zve32x"},
 
+  {"zfbfmin", "zfhmin"},
   {"zfh", "zfhmin"},
   {"zfhmin", "f"},
 
@@ -331,6 +332,7 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zvl32768b", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvl65536b", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"zfbfmin",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"zfh",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"zfhmin",ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvfbfmin",  ISA_SPEC_CLASS_NONE, 1, 0},
@@ -1698,6 +1700,7 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
   {"zvl32768b", &gcc_options::x_riscv_zvl_flags, MASK_ZVL32768B},
   {"zvl65536b", &gcc_options::x_riscv_zvl_flags, MASK_ZVL65536B},
 
+  {"zfbfmin",   &gcc_options::x_riscv_zf_subext, MASK_ZFBFMIN},
   {"zfhmin",&gcc_options::x_riscv_zf_subext, MASK_ZFHMIN},
   {"zfh",   &gcc_options::x_riscv_zf_subext, MASK_ZFH},
   {"zvfbfmin",  &gcc_options::x_riscv_zf_subext, MASK_ZVFBFMIN},
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index ee824756381..7cca1c4aab2 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -393,6 +393,8 @@ Mask(ZIC64B) Var(riscv_zicmo_subext)
 TargetVariable
 int riscv_zf_subext
 
+Mask(ZFBFMIN)  Var(riscv_zf_subext)
+
 Mask(ZFHMIN)  Var(riscv_zf_subext)
 
 Mask(ZFH) Var(riscv_zf_subext)
diff --git a/gcc/testsuite/gcc.target/riscv/arch-35.c 
b/gcc/testsuite/gcc.target/riscv/arch-35.c
new file mode 100644
index 000..6c783769666
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-35.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32i_zfbfmin -mabi=ilp32f" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-36.c 
b/gcc/testsuite/gcc.target/riscv/arch-36.c
new file mode 100644
index 000..cbdccf12807
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-36.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64i_zfbfmin -mabi=lp64f" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/predef-34.c 
b/gcc/testsuite/gcc.target/riscv/predef-34.c
new file mode 100644
index 000..0a993271f7f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-34.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32i_zfbfmin -mabi=ilp32f -mcmodel=medlow 
-misa-spec=20191213" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 32
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i)
+#error "__riscv_i"
+#e

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RISC-V] [PATCH v2] Enable inlining str* by default

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:411f7cc1778500ed2c5cfa26e3217b4b61a5

commit 411f7cc1778500ed2c5cfa26e3217b4b61a5
Author: Jeff Law 
Date:   Tue May 7 11:43:09 2024 -0600

[RISC-V] [PATCH v2] Enable inlining str* by default

So with Chrstoph's patches from late 2022 we've had the ability to inline
strlen, and str[n]cmp (scalar).  However, we never actually turned this
capability on by default!

This patch flips the those default to allow inlinining by default.  It also
fixes one bug exposed by our internal testing when NBYTES is zero for 
strncmp.
I don't think that case happens enough to try and optimize it, we just 
disable
inline expansion for that instance.

This has been bootstrapped and regression tested on rv64gc at various times 
as
well as cross tested on rv64gc more times than I can probably count (we've 
have
this patch internally for a while).  More importantly, I just successfully
tested it on rv64gc and rv32gcv elf configurations with the trunk

gcc/

* config/riscv/riscv-string.cc (riscv_expand_strcmp): Do not inline
strncmp with zero size.
(emit_strcmp_scalar_compare_subword): Adjust rotation for rv32 vs 
rv64.
* config/riscv/riscv.opt (var_inline_strcmp): Enable by default.
(vriscv_inline_strncmp, riscv_inline_strlen): Likewise.

gcc/testsuite

* gcc.target/riscv/zbb-strlen-disabled-2.c: Turn off inlining.

(cherry picked from commit 1139f38e798181572121657e5b267a9698edb62f)

Diff:
---
 gcc/config/riscv/riscv-string.cc   | 9 -
 gcc/config/riscv/riscv.opt | 6 +++---
 gcc/testsuite/gcc.target/riscv/zbb-strlen-disabled-2.c | 4 ++--
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index b09b51d7526..41cb061c746 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -153,7 +153,7 @@ emit_strcmp_scalar_compare_subword (rtx data1, rtx data2, 
rtx orc1,
   rtx imask = gen_rtx_CONST_INT (Xmode, im);
   rtx m_reg = gen_reg_rtx (Xmode);
   emit_insn (gen_rtx_SET (m_reg, imask));
-  do_rotr3 (m_reg, m_reg, GEN_INT (64 - cmp_bytes * BITS_PER_UNIT));
+  do_rotr3 (m_reg, m_reg, GEN_INT (BITS_PER_WORD - cmp_bytes * BITS_PER_UNIT));
   do_and3 (data1, m_reg, data1);
   do_and3 (data2, m_reg, data2);
   if (TARGET_ZBB)
@@ -497,6 +497,13 @@ riscv_expand_strcmp (rtx result, rtx src1, rtx src2,
return false;
   nbytes = UINTVAL (bytes_rtx);
 
+  /* If NBYTES is zero the result of strncmp will always be zero,
+but that would require special casing in the caller.  So for
+now just don't do an inline expansion.  This probably rarely
+happens in practice, but it is tested by the testsuite.  */
+  if (nbytes == 0)
+   return false;
+
   /* We don't emit parts of a strncmp() call.  */
   if (nbytes > compare_max)
return false;
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 7cca1c4aab2..1252834aec5 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -517,15 +517,15 @@ Target Var(TARGET_INLINE_SUBWORD_ATOMIC) Init(1)
 Always inline subword atomic operations.
 
 minline-strcmp
-Target Var(riscv_inline_strcmp) Init(0)
+Target Var(riscv_inline_strcmp) Init(1)
 Inline strcmp calls if possible.
 
 minline-strncmp
-Target Var(riscv_inline_strncmp) Init(0)
+Target Var(riscv_inline_strncmp) Init(1)
 Inline strncmp calls if possible.
 
 minline-strlen
-Target Var(riscv_inline_strlen) Init(0)
+Target Var(riscv_inline_strlen) Init(1)
 Inline strlen calls if possible.
 
 -param=riscv-strcmp-inline-limit=
diff --git a/gcc/testsuite/gcc.target/riscv/zbb-strlen-disabled-2.c 
b/gcc/testsuite/gcc.target/riscv/zbb-strlen-disabled-2.c
index a481068aa0c..1295aeb0086 100644
--- a/gcc/testsuite/gcc.target/riscv/zbb-strlen-disabled-2.c
+++ b/gcc/testsuite/gcc.target/riscv/zbb-strlen-disabled-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv32gc_zbb" { target { rv32 } } } */
-/* { dg-options "-march=rv64gc_zbb" { target { rv64 } } } */
+/* { dg-options "-mno-inline-strlen -march=rv32gc_zbb" { target { rv32 } } } */
+/* { dg-options "-mno-inline-strlen -march=rv64gc_zbb" { target { rv64 } } } */
 /* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
 
 typedef long unsigned int size_t;


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed] [RISC-V] Allow uarchs to set TARGET_OVERLAP_OP_BY_PIECES_P

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:aeadd95aeb5f6e6e0e4f1a98c93a11754f536472

commit aeadd95aeb5f6e6e0e4f1a98c93a11754f536472
Author: Christoph Müllner 
Date:   Tue May 7 15:16:21 2024 -0600

[committed] [RISC-V] Allow uarchs to set TARGET_OVERLAP_OP_BY_PIECES_P

This is almost exclusively work from the VRULL team.

As we've discussed in the Tuesday meeting in the past, we'd like to have a 
knob
in the tuning structure to indicate that overlapped stores during
move_by_pieces expansion of memcpy & friends are acceptable.

This patch adds the that capability in our tuning structure.  It's off for 
all
the uarchs upstream, but we have been using it inside Ventana for our uarch
with success.  So technically it's NFC upstream, but puts in the 
infrastructure
multiple organizations likely need.

gcc/

* config/riscv/riscv.cc (struct riscv_tune_param): Add new
"overlap_op_by_pieces" field.
(rocket_tune_info, sifive_7_tune_info): Set it.
(sifive_p400_tune_info, sifive_p600_tune_info): Likewise.
(thead_c906_tune_info, xiangshan_nanhu_tune_info): Likewise.
(generic_ooo_tune_info, optimize_size_tune_info): Likewise.
(riscv_overlap_op_by_pieces): New function.
(TARGET_OVERLAP_OP_BY_PIECES_P): define.

gcc/testsuite/

* gcc.target/riscv/memcpy-nonoverlapping.c: New test.
* gcc.target/riscv/memset-nonoverlapping.c: New test.

(cherry picked from commit 300393484dbfa9fd3891174ea47aa3fb41915abc)

Diff:
---
 gcc/config/riscv/riscv.cc  | 18 
 .../gcc.target/riscv/memcpy-nonoverlapping.c   | 54 ++
 .../gcc.target/riscv/memset-nonoverlapping.c   | 45 ++
 3 files changed, 117 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 545e68566dc..a9b57d41184 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -288,6 +288,7 @@ struct riscv_tune_param
   unsigned short fmv_cost;
   bool slow_unaligned_access;
   bool use_divmod_expansion;
+  bool overlap_op_by_pieces;
   unsigned int fusible_ops;
   const struct cpu_vector_cost *vec_costs;
 };
@@ -427,6 +428,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   8,   /* fmv_cost */
   true,/* 
slow_unaligned_access */
   false,   /* use_divmod_expansion */
+  false,   /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,   /* fusible_ops */
   NULL,/* vector cost */
 };
@@ -444,6 +446,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   8,   /* fmv_cost */
   true,/* 
slow_unaligned_access */
   false,   /* use_divmod_expansion */
+  false,   /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,   /* fusible_ops */
   NULL,/* vector cost */
 };
@@ -461,6 +464,7 @@ static const struct riscv_tune_param sifive_p400_tune_info 
= {
   4,   /* fmv_cost */
   true,/* 
slow_unaligned_access */
   false,   /* use_divmod_expansion */
+  false,   /* overlap_op_by_pieces */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,/* vector cost */
 };
@@ -478,6 +482,7 @@ static const struct riscv_tune_param sifive_p600_tune_info 
= {
   4,   /* fmv_cost */
   true,/* 
slow_unaligned_access */
   false,   /* use_divmod_expansion */
+  false,   /* overlap_op_by_pieces */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,/* vector cost */
 };
@@ -495,6 +500,7 @@ static const struct riscv_tune_param thead_c906_tune_info = 
{
   8,   /* fmv_cost */
   false,/* slow_unaligned_access */
   false,   /* use_divmod_expansion */
+  false,   /* overlap_op_by_pieces */
   RISCV_FUSE_NOTHING,   /* fusible_ops */
   NULL,/* vector cost */
 };
@@ -512,6 +518,7 @@ static const struct riscv_tune_param 
xiangshan_nanhu_tune_info = {
   3,   /* fmv_cost */
   true,

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed][RISC-V] Turn on overlap_op_by_pieces for generic-ooo tuning

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:30096ff9c4e5b8f31a0f65841db44f3b4fd71fda

commit 30096ff9c4e5b8f31a0f65841db44f3b4fd71fda
Author: Jeff Law 
Date:   Tue May 7 15:34:16 2024 -0600

[committed][RISC-V] Turn on overlap_op_by_pieces for generic-ooo tuning

Per quick email exchange with Palmer.  Given the triviality, I'm just 
pushing
it.

gcc/
* config/riscv/riscv.cc (generic_ooo_tune_info): Turn on
overlap_op_by_pieces.

(cherry picked from commit 9f14f1978260148d4d6208dfd73df1858e623758)

Diff:
---
 gcc/config/riscv/riscv.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index a9b57d41184..62207b6b227 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -536,7 +536,7 @@ static const struct riscv_tune_param generic_ooo_tune_info 
= {
   4,   /* fmv_cost */
   false,   /* slow_unaligned_access */
   false,   /* use_divmod_expansion */
-  false,   /* overlap_op_by_pieces */
+  true,/* overlap_op_by_pieces 
*/
   RISCV_FUSE_NOTHING,   /* fusible_ops */
   &generic_vector_cost,/* vector cost */
 };


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add test for sraiw-31 special case

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:929ef4d2fa08eabbf645c86ffe12ebbfb8219190

commit 929ef4d2fa08eabbf645c86ffe12ebbfb8219190
Author: Christoph Müllner 
Date:   Tue May 7 22:59:44 2024 +0200

RISC-V: Add test for sraiw-31 special case

We already optimize a sign-extension of a right-shift by 31 in
si3_extend.  Let's add a test for that (similar to
zero-extend-1.c).

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sign-extend-1.c: New test.

Signed-off-by: Christoph Müllner 
(cherry picked from commit dd388198b8be52ab378c935fc517a269e0ba741c)

Diff:
---
 gcc/testsuite/gcc.target/riscv/sign-extend-1.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sign-extend-1.c 
b/gcc/testsuite/gcc.target/riscv/sign-extend-1.c
new file mode 100644
index 000..e9056ec0d42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sign-extend-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { riscv64*-*-* } } } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+
+signed long
+foo1 (int i)
+{
+  return i >> 31;
+}
+/* { dg-final { scan-assembler "sraiw\ta\[0-9\],a\[0-9\],31" } } */
+
+/* { dg-final { scan-assembler-not "srai\t" } } */
+/* { dg-final { scan-assembler-not "srli\t" } } */
+/* { dg-final { scan-assembler-not "srliw\t" } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Cover sign-extensions in lshrsi3_zero_extend_2

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:31ab40016ae2864e4aa7741a1e3b6d867a6779cc

commit 31ab40016ae2864e4aa7741a1e3b6d867a6779cc
Author: Christoph Müllner 
Date:   Tue May 7 23:26:02 2024 +0200

RISC-V: Cover sign-extensions in lshrsi3_zero_extend_2

The pattern lshrsi3_zero_extend_2 extracts the MSB bits of the lower
32-bit word and zero-extends it back to DImode.
This is realized using srliw, which operates on 32-bit registers.

The same optimziation can be applied to sign-extensions when emitting
a sraiw instead of the srliw.

Given these two optimizations are so similar, this patch simply
converts the existing one to also cover the sign-extension case as well.

gcc/ChangeLog:

* config/riscv/iterators.md (sraiw): New code iterator 
'any_extract'.
New code attribute 'extract_sidi_shift'.
* config/riscv/riscv.md (*lshrsi3_zero_extend_2): Rename to...
(*lshrsi3_extend_2):...this and add support for sign-extensions.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/sign-extend-1.c: Test sraiw 24 and sraiw 16.

Signed-off-by: Christoph Müllner 
(cherry picked from commit 4e46a3537ff57938a0d98fa524ac2fff8b08ae6d)

Diff:
---
 gcc/config/riscv/iterators.md  |  6 ++
 gcc/config/riscv/riscv.md  |  9 +
 gcc/testsuite/gcc.target/riscv/sign-extend-1.c | 14 ++
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 32e1b140305..c5ca01f382a 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -150,6 +150,12 @@
 ;; to use the same template.
 (define_code_iterator any_extend [sign_extend zero_extend])
 
+;; These code iterators allow unsigned and signed extraction to be generated
+;; from the same template.
+(define_code_iterator any_extract [sign_extract zero_extract])
+(define_code_attr extract_sidi_shift [(sign_extract "sraiw")
+ (zero_extract "srliw")])
+
 ;; This code iterator allows the two right shift instructions to be
 ;; generated from the same template.
 (define_code_iterator any_shiftrt [ashiftrt lshiftrt])
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 24558682eb8..b7fc13e4e61 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2765,16 +2765,17 @@
   [(set_attr "type" "shift")
(set_attr "mode" "SI")])
 
-;; Canonical form for a zero-extend of a logical right shift.
-(define_insn "*lshrsi3_zero_extend_2"
+;; Canonical form for a sign/zero-extend of a logical right shift.
+;; Special case: extract MSB bits of lower 32-bit word
+(define_insn "*lshrsi3_extend_2"
   [(set (match_operand:DI   0 "register_operand" "=r")
-   (zero_extract:DI (match_operand:DI  1 "register_operand" " r")
+   (any_extract:DI (match_operand:DI  1 "register_operand" " r")
 (match_operand 2 "const_int_operand")
 (match_operand 3 "const_int_operand")))]
   "(TARGET_64BIT && (INTVAL (operands[3]) > 0)
 && (INTVAL (operands[2]) + INTVAL (operands[3]) == 32))"
 {
-  return "srliw\t%0,%1,%3";
+  return "\t%0,%1,%3";
 }
   [(set_attr "type" "shift")
(set_attr "mode" "SI")])
diff --git a/gcc/testsuite/gcc.target/riscv/sign-extend-1.c 
b/gcc/testsuite/gcc.target/riscv/sign-extend-1.c
index e9056ec0d42..d8c18dd1aaa 100644
--- a/gcc/testsuite/gcc.target/riscv/sign-extend-1.c
+++ b/gcc/testsuite/gcc.target/riscv/sign-extend-1.c
@@ -9,6 +9,20 @@ foo1 (int i)
 }
 /* { dg-final { scan-assembler "sraiw\ta\[0-9\],a\[0-9\],31" } } */
 
+signed char
+sub2 (long i)
+{
+  return i >> 24;
+}
+/* { dg-final { scan-assembler "sraiw\ta\[0-9\],a\[0-9\],24" } } */
+
+signed short
+sub3 (long i)
+{
+  return i >> 16;
+}
+/* { dg-final { scan-assembler "sraiw\ta\[0-9\],a\[0-9\],16" } } */
+
 /* { dg-final { scan-assembler-not "srai\t" } } */
 /* { dg-final { scan-assembler-not "srli\t" } } */
 /* { dg-final { scan-assembler-not "srliw\t" } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add zero_extract support for rv64gc

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:76f36d93c3107544c6dbdffa616599ce3fdb44eb

commit 76f36d93c3107544c6dbdffa616599ce3fdb44eb
Author: Christoph Müllner 
Date:   Mon May 6 12:33:32 2024 +0200

RISC-V: Add zero_extract support for rv64gc

The combiner attempts to optimize a zero-extension of a logical right shift
using zero_extract. We already utilize this optimization for those cases
that result in a single instructions.  Let's add a insn_and_split
pattern that also matches the generic case, where we can emit an
optimized sequence of a slli/srli.

Tested with SPEC CPU 2017 (rv64gc).

PR target/111501

gcc/ChangeLog:

* config/riscv/riscv.md (*lshr3_zero_extend_4): New
pattern for zero-extraction.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/extend-shift-helpers.h: New test.
* gcc.target/riscv/pr111501.c: New test.
* gcc.target/riscv/zero-extend-rshift-32.c: New test.
* gcc.target/riscv/zero-extend-rshift-64.c: New test.
* gcc.target/riscv/zero-extend-rshift.c: New test.

Signed-off-by: Christoph Müllner 
(cherry picked from commit 3b9c760072c7792cbae6f38894756d2b96c2fd8c)

Diff:
---
 gcc/config/riscv/riscv.md  |  30 ++
 .../gcc.target/riscv/extend-shift-helpers.h|  26 +
 gcc/testsuite/gcc.target/riscv/pr111501.c  |  21 
 .../gcc.target/riscv/zero-extend-rshift-32.c   |  13 +++
 .../gcc.target/riscv/zero-extend-rshift-64.c   |  17 +++
 .../gcc.target/riscv/zero-extend-rshift.c  | 115 +
 6 files changed, 222 insertions(+)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b7fc13e4e61..58bf7712277 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2793,6 +2793,36 @@
   [(set_attr "type" "shift")
(set_attr "mode" "SI")])
 
+;; Canonical form for a zero-extend of a logical right shift.
+;; Special cases are handled above.
+;; Skip for single-bit extraction (Zbs/XTheadBs) and th.extu (XTheadBb)
+(define_insn_and_split "*lshr3_zero_extend_4"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+(zero_extract:GPR
+   (match_operand:GPR 1 "register_operand" " r")
+   (match_operand 2 "const_int_operand")
+   (match_operand 3 "const_int_operand")))
+   (clobber (match_scratch:GPR  4 "=&r"))]
+  "!((TARGET_ZBS || TARGET_XTHEADBS) && (INTVAL (operands[2]) == 1))
+   && !TARGET_XTHEADBB"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4)
+ (ashift:GPR (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+ (lshiftrt:GPR (match_dup 4) (match_dup 3)))]
+{
+  int regbits = GET_MODE_BITSIZE (GET_MODE (operands[0])).to_constant ();
+  int sizebits = INTVAL (operands[2]);
+  int startbits = INTVAL (operands[3]);
+  int lshamt = regbits - sizebits - startbits;
+  int rshamt = lshamt + startbits;
+  operands[2] = GEN_INT (lshamt);
+  operands[3] = GEN_INT (rshamt);
+}
+  [(set_attr "type" "shift")
+   (set_attr "mode" "")])
+
 ;; Handle AND with 2^N-1 for N from 12 to XLEN.  This can be split into
 ;; two logical shifts.  Otherwise it requires 3 instructions: lui,
 ;; xor/addi/srli, and.
diff --git a/gcc/testsuite/gcc.target/riscv/extend-shift-helpers.h 
b/gcc/testsuite/gcc.target/riscv/extend-shift-helpers.h
new file mode 100644
index 000..4853fe490d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/extend-shift-helpers.h
@@ -0,0 +1,26 @@
+#ifndef EXTEND_SHIFT_HELPERS_H
+#define EXTEND_SHIFT_HELPERS_H
+
+#define RT_EXT_CT_RSHIFT_N_AT(RTS,RT,CTS,CT,N,ATS,AT)  \
+RTS RT \
+RTS##_##RT##_ext_##CTS##_##CT##_rshift_##N##_##ATS##_##AT(ATS AT v)\
+{  \
+return (CTS CT)(v >> N);   \
+}
+
+#define ULONG_EXT_USHORT_RSHIFT_N_ULONG(N) \
+   RT_EXT_CT_RSHIFT_N_AT(unsigned,long,unsigned,short,N,unsigned,long)
+
+#define ULONG_EXT_UINT_RSHIFT_N_ULONG(N) \
+   RT_EXT_CT_RSHIFT_N_AT(unsigned,long,unsigned,int,N,unsigned,long)
+
+#define UINT_EXT_USHORT_RSHIFT_N_UINT(N) \
+   RT_EXT_CT_RSHIFT_N_AT(unsigned,int,unsigned,short,N,unsigned,int)
+
+#define UINT_EXT_USHORT_RSHIFT_N_ULONG(N) \
+   RT_EXT_CT_RSHIFT_N_AT(unsigned,int,unsigned,short,N,unsigned,long)
+
+#define ULONG_EXT_USHORT_RSHIFT_N_UINT(N) \
+   RT_EXT_CT_RSHIFT_N_AT(unsigned,long,unsigned,short,N,unsigned,int)
+
+#endif /* EXTEND_SHIFT_HELPERS_H */
diff --git a/gcc/testsuite/gcc.target/riscv/pr111501.c 
b/gcc/testsuite/gcc.target/riscv/pr111501.c
new file mode 100644
index 000..db48c34ce9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr111501.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-options "-march=rv64gc" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } 

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Cover sign-extensions in lshr3_zero_extend_4

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d3660f7816fbc403a97492545a687b0651fe3429

commit d3660f7816fbc403a97492545a687b0651fe3429
Author: Christoph Müllner 
Date:   Tue May 7 22:23:26 2024 +0200

RISC-V: Cover sign-extensions in lshr3_zero_extend_4

The lshr3_zero_extend_4 pattern targets bit extraction
with zero-extension. This pattern represents the canonical form
of zero-extensions of a logical right shift.

The same optimization can be applied to sign-extensions.
Given the two optimizations are so similar, this patch converts
the existing one to also cover the sign-extension case as well.

gcc/ChangeLog:

* config/riscv/iterators.md (ashiftrt): New code attribute
'extract_shift' and adding extractions to optab.
* config/riscv/riscv.md (*lshr3_zero_extend_4): Rename 
to...
(*3):...this and add support for
sign-extensions.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/extend-shift-helpers.h: Add helpers for
sign-extension.
* gcc.target/riscv/sign-extend-rshift-32.c: New test.
* gcc.target/riscv/sign-extend-rshift-64.c: New test.
* gcc.target/riscv/sign-extend-rshift.c: New test.

Signed-off-by: Christoph Müllner 
(cherry picked from commit 3ee30d7981987b86bd6a9a2675e26fadec48e5cd)

Diff:
---
 gcc/config/riscv/iterators.md  |   4 +
 gcc/config/riscv/riscv.md  |  25 +++--
 .../gcc.target/riscv/extend-shift-helpers.h|  20 
 .../gcc.target/riscv/sign-extend-rshift-32.c   |  17 +++
 .../gcc.target/riscv/sign-extend-rshift-64.c   |  17 +++
 .../gcc.target/riscv/sign-extend-rshift.c  | 123 +
 6 files changed, 198 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index c5ca01f382a..8a9d1986b4a 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -155,6 +155,8 @@
 (define_code_iterator any_extract [sign_extract zero_extract])
 (define_code_attr extract_sidi_shift [(sign_extract "sraiw")
  (zero_extract "srliw")])
+(define_code_attr extract_shift [(sign_extract "ashiftrt")
+(zero_extract "lshiftrt")])
 
 ;; This code iterator allows the two right shift instructions to be
 ;; generated from the same template.
@@ -261,6 +263,8 @@
 (us_minus "ussub")
 (sign_extend "extend")
 (zero_extend "zero_extend")
+(sign_extract "extract")
+(zero_extract "zero_extract")
 (fix "fix_trunc")
 (unsigned_fix "fixuns_trunc")])
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 58bf7712277..620a1b3bd32 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2793,24 +2793,33 @@
   [(set_attr "type" "shift")
(set_attr "mode" "SI")])
 
-;; Canonical form for a zero-extend of a logical right shift.
-;; Special cases are handled above.
-;; Skip for single-bit extraction (Zbs/XTheadBs) and th.extu (XTheadBb)
-(define_insn_and_split "*lshr3_zero_extend_4"
+;; Canonical form for a extend of a logical shift right (sign/zero extraction).
+;; Special cases, that are ignored (handled elsewhere):
+;; * Single-bit extraction (Zbs/XTheadBs)
+;; * Single-bit extraction (Zicondops/XVentanaCondops)
+;; * Single-bit extraction (SFB)
+;; * Extraction instruction th.ext(u) (XTheadBb)
+;; * lshrsi3_extend_2 (see above)
+(define_insn_and_split "*3"
   [(set (match_operand:GPR 0 "register_operand" "=r")
-(zero_extract:GPR
+(any_extract:GPR
(match_operand:GPR 1 "register_operand" " r")
(match_operand 2 "const_int_operand")
(match_operand 3 "const_int_operand")))
(clobber (match_scratch:GPR  4 "=&r"))]
-  "!((TARGET_ZBS || TARGET_XTHEADBS) && (INTVAL (operands[2]) == 1))
-   && !TARGET_XTHEADBB"
+  "!((TARGET_ZBS || TARGET_XTHEADBS || TARGET_ZICOND
+  || TARGET_XVENTANACONDOPS || TARGET_SFB_ALU)
+ && (INTVAL (operands[2]) == 1))
+   && !TARGET_XTHEADBB
+   && !(TARGET_64BIT
+&& (INTVAL (operands[3]) > 0)
+&& (INTVAL (operands[2]) + INTVAL (operands[3]) == 32))"
   "#"
   "&& reload_completed"
   [(set (match_dup 4)
  (ashift:GPR (match_dup 1) (match_dup 2)))
(set (match_dup 0)
- (lshiftrt:GPR (match_dup 4) (match_dup 3)))]
+ (:GPR (match_dup 4) (match_dup 3)))]
 {
   int regbits = GET_MODE_BITSIZE (GET_MODE (operands[0])).to_constant ();
   int sizebits = INTVAL (operands[2]);
diff --git a/gcc/testsuite/gcc.target/riscv/extend-shift-helpers.h 
b/gcc/testsuite/gcc.target/riscv/extend-shift-helpers.h
index 4853fe490d8..720672de242 100644
--- a/gcc/testsuite/gcc.target/riscv/extend-shift-helpers.h
+++ b/gcc/testsuite/gcc.target/riscv/extend-shift-helpe

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RISC-V][V2] Fix incorrect if-then-else nesting of Zbs usage in constant synthesis

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b30496ab6f0706b373ca68cea832fe13dd0c0e59

commit b30496ab6f0706b373ca68cea832fe13dd0c0e59
Author: Jeff Law 
Date:   Wed May 8 13:44:00 2024 -0600

[RISC-V][V2] Fix incorrect if-then-else nesting of Zbs usage in constant 
synthesis

Reposting without the patch that ignores whitespace.  The CI system doesn't
like including both patches, that'll generate a failure to apply and none of
the tests actually get run.

So I managed to goof the if-then-else level of the bseti bits last week.  
They
were supposed to be a last ditch effort to improve the result, but ended up
inside a conditional where they don't really belong.  I almost always use 
Zba,
Zbb and Zbs together, so it slipped by.

So it's NFC if you always test with Zbb and Zbs enabled together.  But if 
you
enabled Zbs without Zbb you'd see a failure to use bseti.

gcc/
* config/riscv/riscv.cc (riscv_build_integer_1): Fix incorrect
if-then-else nesting of Zbs code.

(cherry picked from commit 1c234097487927a4388ddcc690b63597bb3a90dc)

Diff:
---
 gcc/config/riscv/riscv.cc | 81 ---
 1 file changed, 41 insertions(+), 40 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 62207b6b227..633b55f9707 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -878,50 +878,51 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
  codes[1].use_uw = false;
  cost = 2;
}
-  /* Final cases, particularly focused on bseti.  */
-  else if (cost > 2 && TARGET_ZBS)
-   {
- int i = 0;
+}
 
- /* First handle any bits set by LUI.  Be careful of the
-SImode sign bit!.  */
- if (value & 0x7800)
-   {
- alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
- alt_codes[i].value = value & 0x7800;
- alt_codes[i].use_uw = false;
- value &= ~0x7800;
- i++;
-   }
+  /* Final cases, particularly focused on bseti.  */
+  if (cost > 2 && TARGET_ZBS)
+{
+  int i = 0;
 
- /* Next, any bits we can handle with addi.  */
- if (value & 0x7ff)
-   {
- alt_codes[i].code = (i == 0 ? UNKNOWN : PLUS);
- alt_codes[i].value = value & 0x7ff;
- alt_codes[i].use_uw = false;
- value &= ~0x7ff;
- i++;
-   }
+  /* First handle any bits set by LUI.  Be careful of the
+SImode sign bit!.  */
+  if (value & 0x7800)
+   {
+ alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
+ alt_codes[i].value = value & 0x7800;
+ alt_codes[i].use_uw = false;
+ value &= ~0x7800;
+  i++;
+   }
 
- /* And any residuals with bseti.  */
- while (i < cost && value)
-   {
- HOST_WIDE_INT bit = ctz_hwi (value);
- alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
- alt_codes[i].value = 1UL << bit;
- alt_codes[i].use_uw = false;
- value &= ~(1ULL << bit);
- i++;
-   }
+  /* Next, any bits we can handle with addi.  */
+  if (value & 0x7ff)
+   {
+ alt_codes[i].code = (i == 0 ? UNKNOWN : PLUS);
+ alt_codes[i].value = value & 0x7ff;
+ alt_codes[i].use_uw = false;
+ value &= ~0x7ff;
+ i++;
+   }
 
- /* If LUI+ADDI+BSETI resulted in a more efficient
-sequence, then use it.  */
- if (i < cost)
-   {
- memcpy (codes, alt_codes, sizeof (alt_codes));
- cost = i;
-   }
+  /* And any residuals with bseti.  */
+  while (i < cost && value)
+   {
+ HOST_WIDE_INT bit = ctz_hwi (value);
+ alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
+ alt_codes[i].value = 1UL << bit;
+ alt_codes[i].use_uw = false;
+ value &= ~(1ULL << bit);
+ i++;
+   }
+
+  /* If LUI+ADDI+BSETI resulted in a more efficient
+sequence, then use it.  */
+  if (i < cost)
+   {
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = i;
}
 }


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH v1 1/1] RISC-V: Nan-box the result of movbf on soft-bf16

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:6c5e2178e97baaba8ca156bc705222eae9a52f17

commit 6c5e2178e97baaba8ca156bc705222eae9a52f17
Author: Xiao Zeng 
Date:   Wed May 8 14:00:58 2024 -0600

[PATCH v1 1/1] RISC-V: Nan-box the result of movbf on soft-bf16

1 This patch implements the Nan-box of bf16.

2 Please refer to the Nan-box implementation of hf16 in:



3 The discussion about Nan-box can be found on the website:



4 Below test are passed for this patch
* The riscv fully regression test.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_legitimize_move): Expand movbf
with Nan-boxing value.
* config/riscv/riscv.md (*movbf_softfloat_boxing): New pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/_Bfloat16-nanboxing.c: New test.

(cherry picked from commit ce51e6727c9d69bbab0e766c449e60fd41f5f2f9)

Diff:
---
 gcc/config/riscv/riscv.cc  | 52 --
 gcc/config/riscv/riscv.md  | 12 -
 .../gcc.target/riscv/_Bfloat16-nanboxing.c | 38 
 3 files changed, 77 insertions(+), 25 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 633b55f9707..2eac67b0ce0 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3130,35 +3130,39 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx 
src)
 }
 
   /* In order to fit NaN boxing, expand
- (set FP_REG (reg:HF src))
+ (set FP_REG (reg:HF/BF src))
  to
  (set (reg:SI/DI mask) (const_int -65536)
- (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF src) 0)))
+ (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF/BF src) 0)))
  (set (reg:SI/DI temp) (ior:SI/DI (reg:SI/DI mask) (reg:SI/DI temp)))
- (set (reg:HF dest) (unspec:HF [ (reg:SI/DI temp) ] UNSPEC_FMV_SFP16_X))
+ (set (reg:HF/BF dest) (unspec:HF/BF[ (reg:SI/DI temp) ]
+   UNSPEC_FMV_SFP16_X/UNSPEC_FMV_SBF16_X))
  */
 
- if (TARGET_HARD_FLOAT
- && !TARGET_ZFHMIN && mode == HFmode
- && REG_P (dest) && FP_REG_P (REGNO (dest))
- && REG_P (src) && !FP_REG_P (REGNO (src))
- && can_create_pseudo_p ())
-   {
- rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
- rtx temp = gen_reg_rtx (word_mode);
- emit_insn (gen_extend_insn (temp,
-simplify_gen_subreg (HImode, src, mode, 0),
-word_mode, HImode, 1));
- if (word_mode == SImode)
-   emit_insn (gen_iorsi3 (temp, mask, temp));
- else
-   emit_insn (gen_iordi3 (temp, mask, temp));
-
- riscv_emit_move (dest, gen_rtx_UNSPEC (HFmode, gen_rtvec (1, temp),
-   UNSPEC_FMV_SFP16_X));
-
- return true;
-   }
+  if (TARGET_HARD_FLOAT
+  && ((!TARGET_ZFHMIN && mode == HFmode)
+ || (!TARGET_ZFBFMIN && mode == BFmode))
+  && REG_P (dest) && FP_REG_P (REGNO (dest))
+  && REG_P (src) && !FP_REG_P (REGNO (src))
+  && can_create_pseudo_p ())
+{
+  rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
+  rtx temp = gen_reg_rtx (word_mode);
+  emit_insn (gen_extend_insn (temp,
+ simplify_gen_subreg (HImode, src, mode, 0),
+ word_mode, HImode, 1));
+  if (word_mode == SImode)
+   emit_insn (gen_iorsi3 (temp, mask, temp));
+  else
+   emit_insn (gen_iordi3 (temp, mask, temp));
+
+  riscv_emit_move (dest,
+  gen_rtx_UNSPEC (mode, gen_rtvec (1, temp),
+  mode == HFmode ? UNSPEC_FMV_SFP16_X
+ : UNSPEC_FMV_SBF16_X));
+
+  return true;
+}
 
   /* We need to deal with constants that would be legitimate
  immediate_operands but aren't legitimate move_operands.  */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 620a1b3bd32..4d6de992557 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -86,8 +86,9 @@
   ;; String unspecs
   UNSPEC_STRLEN
 
-  ;; Workaround for HFmode without hardware extension
+  ;; Workaround for HFmode and BFmode without hardware extension
   UNSPEC_FMV_SFP16_X
+  UNSPEC_FMV_SBF16_X
 
   ;; XTheadFmv moves
   UNSPEC_XTHEADFMV
@@ -1926,6 +1927,15 @@
   [(set_attr "type" "fmove")
(set_attr "mode" "SF")])
 
+(define_insn "*movbf_softfloat_boxing"
+  [(set (match_operand:BF 0 "register_operand"   "=f")
+   (unspec:BF [(match_operand:X 1 "register_operand" " r")]
+UNSPEC_FMV_SBF16_X))]
+  "!TARGET_ZFBFMIN"
+  "fmv.w.x\t%0,%1"
+  [(set_attr "type" "fmove")
+   (set

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Make full-vec-move1.c test robust for optimization

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:227ec9bfd49a7bf388f5c5ddd4f556e254c7a928

commit 227ec9bfd49a7bf388f5c5ddd4f556e254c7a928
Author: Pan Li 
Date:   Thu May 9 10:56:46 2024 +0800

RISC-V: Make full-vec-move1.c test robust for optimization

During investigate the support of early break autovec, we notice
the test full-vec-move1.c will be optimized to 'return 0;' in main
function body.  Because somehow the value of V type is compiler
time constant,  and then the second loop will be considered as
assert (true).

Thus,  the ccp4 pass will eliminate these stmt and just return 0.

typedef int16_t V __attribute__((vector_size (128)));

int main ()
{
  V v;
  for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
(v)[i] = i;

  V res = v;
  for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
assert (res[i] == i); // will be optimized to assert (true)
}

This patch would like to introduce a extern function to use the res[i]
that get rid of the ccp4 optimization.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c:
Introduce extern func use to get rid of ccp4 optimization.

Signed-off-by: Pan Li 
(cherry picked from commit b1520d2260c5e0cfcd7a4354fab70f66e2912ff2)

Diff:
---
 .../gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
index d73bad4af6f..fae2ae91572 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls-vlmax/full-vec-move1.c
@@ -2,11 +2,12 @@
 /* { dg-additional-options "-std=c99 -O3 -march=rv64gcv_zvl128b -mabi=lp64d 
-fno-vect-cost-model -mrvv-vector-bits=zvl" } */
 
 #include 
-#include 
 
 /* This would cause us to emit a vl1r.v for VNx4HImode even when
the hardware vector size vl > 64.  */
 
+extern int16_t test_element (int16_t);
+
 typedef int16_t V __attribute__((vector_size (128)));
 
 int main ()
@@ -14,9 +15,10 @@ int main ()
   V v;
   for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
 (v)[i] = i;
+
   V res = v;
   for (int i = 0; i < sizeof (v) / sizeof (v[0]); i++)
-assert (res[i] == i);
+test_element (res[i]);
 }
 
 /* { dg-final { scan-assembler-not {vl[1248]r.v} } }  */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add tests for cpymemsi expansion

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:94912842c2baae5ac6a1fe0c0d91330a7376d21f

commit 94912842c2baae5ac6a1fe0c0d91330a7376d21f
Author: Christoph Müllner 
Date:   Thu Apr 11 12:07:10 2024 +0200

RISC-V: Add tests for cpymemsi expansion

cpymemsi expansion was available for RISC-V since the initial port.
However, there are not tests to detect regression.
This patch adds such tests.

Three of the tests target the expansion requirements (known length and
alignment). One test reuses an existing memcpy test from the by-pieces
framework (gcc/testsuite/gcc.dg/torture/inline-mem-cpy-1.c).

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cpymemsi-1.c: New test.
* gcc.target/riscv/cpymemsi-2.c: New test.
* gcc.target/riscv/cpymemsi-3.c: New test.
* gcc.target/riscv/cpymemsi.c: New test.

Signed-off-by: Christoph Müllner 
(cherry picked from commit 4d38e88227ea48e559a2f354c0e62d372e181b82)

Diff:
---
 gcc/testsuite/gcc.target/riscv/cpymemsi-1.c |  9 ++
 gcc/testsuite/gcc.target/riscv/cpymemsi-2.c | 42 
 gcc/testsuite/gcc.target/riscv/cpymemsi-3.c | 43 +
 gcc/testsuite/gcc.target/riscv/cpymemsi.c   | 22 +++
 4 files changed, 116 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c 
b/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c
new file mode 100644
index 000..983b564ccaf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c
@@ -0,0 +1,9 @@
+/* { dg-do run } */
+/* { dg-options "-march=rv32gc -save-temps -g0 -fno-lto" { target { rv32 } } } 
*/
+/* { dg-options "-march=rv64gc -save-temps -g0 -fno-lto" { target { rv64 } } } 
*/
+/* { dg-additional-options "-DRUN_FRACTION=11" { target simulator } } */
+/* { dg-timeout-factor 2 } */
+
+#include "../../gcc.dg/memcmp-1.c"
+/* Yeah, this memcmp test exercises plenty of memcpy, more than any of the
+   memcpy tests.  */
diff --git a/gcc/testsuite/gcc.target/riscv/cpymemsi-2.c 
b/gcc/testsuite/gcc.target/riscv/cpymemsi-2.c
new file mode 100644
index 000..833d1c04487
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cpymemsi-2.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+#include 
+#define aligned32 __attribute__ ((aligned (32)))
+
+const char myconst15[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst23[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst31[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+
+/* No expansion (unknown alignment) */
+#define MY_MEM_CPY_N(N)\
+void my_mem_cpy_##N (char *b1, const char *b2) \
+{  \
+  __builtin_memcpy (b1, b2, N);\
+}
+
+/* No expansion (unknown alignment) */
+#define MY_MEM_CPY_CONST_N(N)  \
+void my_mem_cpy_const_##N (char *b1)   \
+{  \
+  __builtin_memcpy (b1, myconst##N, sizeof(myconst##N));\
+}
+
+MY_MEM_CPY_N(15)
+MY_MEM_CPY_CONST_N(15)
+
+MY_MEM_CPY_N(23)
+MY_MEM_CPY_CONST_N(23)
+
+MY_MEM_CPY_N(31)
+MY_MEM_CPY_CONST_N(31)
+
+/* { dg-final { scan-assembler-times "\t(call|tail)\tmemcpy" 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/cpymemsi-3.c 
b/gcc/testsuite/gcc.target/riscv/cpymemsi-3.c
new file mode 100644
index 000..803765195b2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cpymemsi-3.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc" { target { rv64 } } } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Os" "-Og" "-Oz" } } */
+
+#include 
+#define aligned32 __attribute__ ((aligned (32)))
+
+const char myconst15[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst23[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+const char myconst31[] aligned32 = { 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7,
+0, 1, 2, 3, 4, 5, 6, 7 };
+
+#define MY_MEM_CPY_ALIGNED_N(N)\
+void my_mem_cpy_aligned_##N(char *b1, const char *b2)  \
+{

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed] [RISC-V] Provide splitting guidance to combine to faciliate shNadd.uw generation

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:a4b72b2f9d6e5a560ebb42f485121d66576979ee

commit a4b72b2f9d6e5a560ebb42f485121d66576979ee
Author: Jeff Law 
Date:   Thu May 9 21:07:06 2024 -0600

[committed] [RISC-V] Provide splitting guidance to combine to faciliate 
shNadd.uw generation

This fixes a minor code quality issue I found while comparing GCC and LLVM.
Essentially we want to do a bit of re-association to generate shNadd.uw
instructions.

Combine does the right thing and finds all the necessary instructions,
reassociates the operands, combines constants, etc.  Where is fails is 
finding
a good split point.  The backend can trivially provide guidance on how to 
split
via a define_split pattern.

This has survived both Ventana's internal CI system (rv64gcb) as well as my 
own
(rv64gc, rv32gcv).

I'll wait for the external CI system to give the all-clear before pushing.

gcc/
* config/riscv/bitmanip.md: Add splitter for shadd feeding another
add instruction.

gcc/testsuite/

* gcc.target/riscv/zba-shadduw.c: New test.

(cherry picked from commit bfb88b1406cdd8d3f97e280b0d63529aa925f18a)

Diff:
---
 gcc/config/riscv/bitmanip.md | 17 ++
 gcc/testsuite/gcc.target/riscv/zba-shadduw.c | 35 
 2 files changed, 52 insertions(+)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index ad3ad758959..d76a72d30e0 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -184,6 +184,23 @@
   [(set_attr "type" "bitmanip")
(set_attr "mode" "DI")])
 
+;; Combine will reassociate the operands in the most useful way here.  We
+;; just have to give it guidance on where to split the result to facilitate
+;; shNadd.uw generation.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+   (plus:DI (plus:DI (and:DI (ashift:DI (match_operand:DI 1 
"register_operand")
+(match_operand:QI 2 
"imm123_operand"))
+ (match_operand 3 
"consecutive_bits32_operand"))
+ (match_operand:DI 4 "register_operand"))
+(match_operand 5 "immediate_operand")))]
+  "TARGET_64BIT && TARGET_ZBA"
+  [(set (match_dup 0)
+   (plus:DI (and:DI (ashift:DI (match_dup 1) (match_dup 2))
+(match_dup 3))
+(match_dup 4)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 5)))])
+
 ;; ZBB extension.
 
 (define_expand "clzdi2"
diff --git a/gcc/testsuite/gcc.target/riscv/zba-shadduw.c 
b/gcc/testsuite/gcc.target/riscv/zba-shadduw.c
new file mode 100644
index 000..5b77447e681
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zba-shadduw.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zba -mabi=lp64" } */
+
+typedef struct simple_bitmap_def
+{
+  unsigned char *popcount;
+  unsigned int n_bits;
+  unsigned int size;
+  unsigned long elms[1];
+} *sbitmap;
+typedef const struct simple_bitmap_def *const_sbitmap;
+
+typedef unsigned long *sbitmap_ptr;
+typedef const unsigned long *const_sbitmap_ptr;
+static unsigned long sbitmap_elt_popcount (unsigned long);
+
+void
+sbitmap_a_or_b (sbitmap dst, const_sbitmap a, const_sbitmap b)
+{
+  unsigned int i, n = dst->size;
+  sbitmap_ptr dstp = dst->elms;
+  const_sbitmap_ptr ap = a->elms;
+  const_sbitmap_ptr bp = b->elms;
+  unsigned char has_popcount = dst->popcount != ((void *) 0);
+
+  for (i = 0; i < n; i++)
+{
+  const unsigned long tmp = *ap++ | *bp++;
+  *dstp++ = tmp;
+}
+}
+
+
+/* { dg-final { scan-assembler "sh3add.uw" } } */
+/* { dg-final { scan-assembler-not {\mslli.uw} } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Fix typos in code or comment [NFC]

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:73fe7fd03bc4adeac1b5ce7619b58b60e9f5dca6

commit 73fe7fd03bc4adeac1b5ce7619b58b60e9f5dca6
Author: Kito Cheng 
Date:   Tue May 7 10:18:58 2024 +0800

RISC-V: Fix typos in code or comment [NFC]

Just found some typo when fixing bugs and then use aspell to find few
more typos, this patch didn't do anything other than fix typo.

gcc/ChangeLog:

* config/riscv/riscv-vsetvl.cc: Fix typos in comments.
(get_all_predecessors): Ditto.
(pre_vsetvl::m_unknow_info): Rename to...
(pre_vsetvl::m_unknown_info): this.
(pre_vsetvl::compute_vsetvl_def_data): Rename m_unknow_info to
m_unknown_info.
(pre_vsetvl::cleaup): Rename to...
(pre_vsetvl::cleanup): this.
(pre_vsetvl::compute_vsetvl_def_data): Fix typos.
(pass_vsetvl::lazy_vsetvl): Update function name and fix typos.
* config/riscv/riscv.cc: Fix typos in comments.
(struct machine_function): Fix typo in comments.
(riscv_valid_lo_sum_p): Ditto.
(riscv_force_address): Ditto.
(riscv_immediate_operand_p): Ditto.
(riscv_in_small_data_p): Ditto.
(riscv_first_stack_step): Ditto.
(riscv_expand_prologue): Ditto.
(riscv_convert_vector_chunks): Ditto.
(riscv_override_options_internal): Ditto.
(get_common_costs): Ditto.

(cherry picked from commit d83070aebdb810e38f12d008e7a10acf1063f456)

Diff:
---
 gcc/config/riscv/riscv-vsetvl.cc | 64 
 gcc/config/riscv/riscv.cc| 36 +++---
 2 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 48ce757a6ee..bbea2b5fd4f 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -95,7 +95,7 @@ using namespace riscv_vector;
It's a bit different from bitmap_union_of_preds in cfganal.cc. This function
takes into account the case where pred is ENTRY basic block. The main reason
for this difference is to make it easier to insert some special value into
-   the ENTRY base block. For example, vsetvl_info with a status of UNKNOW.  */
+   the ENTRY base block. For example, vsetvl_info with a status of UNKNOWN.  */
 static void
 bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b)
 {
@@ -126,9 +126,9 @@ bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap 
*src, basic_block b)
   }
 }
 
-/* Compute the reaching defintion in and out based on the gen and KILL
-   informations in each Base Blocks.
-   This function references the compute_avaiable implementation in lcm.cc  */
+/* Compute the reaching definition in and out based on the gen and KILL
+   information's in each Base Blocks.
+   This function references the compute_available implementation in lcm.cc  */
 static void
 compute_reaching_defintion (sbitmap *gen, sbitmap *kill, sbitmap *in,
sbitmap *out)
@@ -719,7 +719,7 @@ get_all_predecessors (basic_block bb)
require SEW and LMUL to be fixed.
Therefore, if the former RVV instruction needs DEMAND_RATIO_P and the latter
instruction needs DEMAND_SEW_LMUL_P and its SEW/LMUL is the same as that of
-   the former instruction, then we can make the minimu demand of the former
+   the former instruction, then we can make the minimum demand of the former
instruction strict to DEMAND_SEW_LMUL_P, and its required SEW and LMUL are
the SEW and LMUL of the latter instruction, and the vsetvl instruction
generated according to the new demand can also be used for the latter
@@ -741,7 +741,7 @@ enum demand_flags : unsigned
 /* We split the demand information into three parts. They are sew and lmul
related (sew_lmul_demand_type), tail and mask policy related
(policy_demand_type) and avl related (avl_demand_type). Then we define three
-   interfaces avaiable_with, compatible_p and merge. avaiable_with is
+   interfaces available_p, compatible_p and merge. available_p is
used to determine whether the two vsetvl infos prev_info and next_info are
available or not. If prev_info is available for next_info, it means that the
RVV insn corresponding to next_info on the path from prev_info to next_info
@@ -1361,17 +1361,17 @@ public:
 
 /* Demand system is the RVV-based VSETVL info analysis tools wrapper.
It defines compatible rules for SEW/LMUL, POLICY and AVL.
-   Also, it provides 3 iterfaces avaiable_p, compatible_p and
+   Also, it provides 3 interfaces available_p, compatible_p and
merge for the VSETVL PASS analysis and optimization.
 
- - avaiable_p: Determine whether the next info can get the
-   avaiable VSETVL status from previous info.
+ - available_p: Determine whether the next info can get the
+   available VSETVL status from previous info.
e.g. bb 2 

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [RISC-V] Use shNadd for constant synthesis

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:972cb5c8dbb56c378c12d2ca88b1940c1c1c1c45

commit 972cb5c8dbb56c378c12d2ca88b1940c1c1c1c45
Author: Jeff Law 
Date:   Fri May 10 13:49:44 2024 -0600

[RISC-V] Use shNadd for constant synthesis

So here's the next idiom to improve constant synthesis.

The basic idea here is to try and use shNadd to generate the constant when 
profitable.

Let's take 0x30801.  Right now that generates:

li  a0,3145728
addia0,a0,1
sllia0,a0,12
addia0,a0,-2047

But we can do better.  The constant is evenly divisible by 9 resulting in
0x5639 which doesn't look terribly interesting.  But that constant can 
be
generated with two instructions, then we can use a sh3add to multiply it by 
9.
So the updated sequence looks like:

li  a0,1431654400
addia0,a0,1593
sh3add  a0,a0,a0

This doesn't trigger a whole lot, but I haven't really set up a test to 
explore
the most likely space where this might be useful.  The tests were found
exploring a different class of constant synthesis problems.

If you were to dive into the before/after you'd see that the shNadd 
interacts
quite nicely with the recent bseti work.   The joys of recursion.

Probably the most controversial thing in here is using the "FMA" opcode to
stand in for when we want to use shNadd.  Essentially when we synthesize a
constant we generate a series of RTL opcodes and constants for emission by
another routine.   We don't really have a way to say we want a shift-add.  
But
you can think of shift-add as a limited form of multiply-accumulate.  It's a
bit of a stretch, but not crazy bad IMHO.

Other approaches would be to store our own enum rather than an RTL opcode.  
Or
store an actual generator function rather than any kind of opcode.

It wouldn't take much pushback over (ab)using FMA in this manner to get me 
to
use our own enums rather than RTL opcodes for this stuff.

gcc/

* config/riscv/riscv.cc (riscv_build_integer_1): Recognize cases 
where
we can use shNadd to improve constant synthesis.
(riscv_move_integer): Handle code generation for shNadd.

gcc/testsuite
* gcc.target/riscv/synthesis-1.c: Also count shNadd instructions.
* gcc.target/riscv/synthesis-3.c: New test.

(cherry picked from commit dbbd059b49edc936769737204f5c270d8d6ff553)

Diff:
---
 gcc/config/riscv/riscv.cc| 42 +++
 gcc/testsuite/gcc.target/riscv/synthesis-1.c |  2 +-
 gcc/testsuite/gcc.target/riscv/synthesis-3.c | 81 
 3 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2860137af71..9c98b1da035 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -880,6 +880,40 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
}
 }
 
+  if (cost > 2 && TARGET_64BIT && TARGET_ZBA)
+{
+  if ((value % 9) == 0
+ && (alt_cost
+ = riscv_build_integer_1 (alt_codes, value / 9, mode) + 1) < cost)
+   {
+  alt_codes[alt_cost - 1].code = FMA;
+  alt_codes[alt_cost - 1].value = 9;
+  alt_codes[alt_cost - 1].use_uw = false;
+  memcpy (codes, alt_codes, sizeof (alt_codes));
+  cost = alt_cost;
+   }
+  if ((value % 5) == 0
+ && (alt_cost
+ = riscv_build_integer_1 (alt_codes, value / 5, mode) + 1) < cost)
+   {
+  alt_codes[alt_cost - 1].code = FMA;
+  alt_codes[alt_cost - 1].value = 5;
+  alt_codes[alt_cost - 1].use_uw = false;
+  memcpy (codes, alt_codes, sizeof (alt_codes));
+  cost = alt_cost;
+   }
+  if ((value % 3) == 0
+ && (alt_cost
+ = riscv_build_integer_1 (alt_codes, value / 3, mode) + 1) < cost)
+   {
+  alt_codes[alt_cost - 1].code = FMA;
+  alt_codes[alt_cost - 1].value = 3;
+  alt_codes[alt_cost - 1].use_uw = false;
+  memcpy (codes, alt_codes, sizeof (alt_codes));
+  cost = alt_cost;
+   }
+}
+
   /* Final cases, particularly focused on bseti.  */
   if (cost > 2 && TARGET_ZBS)
 {
@@ -2542,6 +2576,14 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT 
value,
  x = gen_rtx_fmt_ee (AND, mode, x, GEN_INT (value));
  x = riscv_emit_set (t, x);
}
+ else if (codes[i].code == FMA)
+   {
+ HOST_WIDE_INT value = exact_log2 (codes[i].value - 1);
+ rtx ashift = gen_rtx_fmt_ee (ASHIFT, mode, x, GEN_INT (value));
+ x = gen_rtx_fmt_ee (PLUS, mode, ashift, x);
+ rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
+ x = riscv_emit_set (t, x

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed] RISC-V Fix minor regression in synthesis WRT bseti usage

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:4853bebb4dd07ffa6d836f3a8f630188e3180662

commit 4853bebb4dd07ffa6d836f3a8f630188e3180662
Author: Jeff Law 
Date:   Sun May 12 07:05:43 2024 -0600

[to-be-committed] RISC-V Fix minor regression in synthesis WRT bseti usage

Overnight testing showed a small number of cases where constant synthesis 
was
doing something dumb.  Specifically generating more instructions than the
number of bits set in the constant.

It was a minor goof in the recent bseti code.  In the code to first figure 
out
what bits LUI could set, I included one bit outside the space LUI operates.
For some dumb reason I kept thinking in terms of 11 low bits belonging to 
addi,
but it's actually 12 bits.  The net is what we thought should be a single 
LUI
for costing turned into LUI+ADDI.

I didn't let the test run to completion, but over the course of 12 hours it
found 9 cases.  Given we know that the triggers all have 0x800 set, I bet we
could likely find more, but I doubt it's that critical to cover every 
possible
constant that regressed.

gcc/
* config/riscv/riscv.cc (riscv_build_integer_1): Fix thinko in 
testing
when lui can be used to set several bits in bseti path.

gcc/testsuite

* gcc.target/riscv/synthesis-4.c: New test

(cherry picked from commit 77a28ed91b2a527b9006ee1a220b468756b43eca)

Diff:
---
 gcc/config/riscv/riscv.cc|  6 ++---
 gcc/testsuite/gcc.target/riscv/synthesis-4.c | 34 
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 9c98b1da035..049f8f8cb9f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -921,12 +921,12 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
 
   /* First handle any bits set by LUI.  Be careful of the
 SImode sign bit!.  */
-  if (value & 0x7800)
+  if (value & 0x7000)
{
  alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
- alt_codes[i].value = value & 0x7800;
+ alt_codes[i].value = value & 0x7000;
  alt_codes[i].use_uw = false;
- value &= ~0x7800;
+ value &= ~0x7000;
   i++;
}
 
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-4.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-4.c
new file mode 100644
index 000..328a55b9e6e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-4.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions. 
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 45 } } */
+
+
+unsigned long foo_0x640800(void) { return 0x640800UL; }
+
+unsigned long foo_0xc40800(void) { return 0xc40800UL; }
+
+unsigned long foo_0x1840800(void) { return 0x1840800UL; }
+
+unsigned long foo_0x3040800(void) { return 0x3040800UL; }
+
+unsigned long foo_0x6040800(void) { return 0x6040800UL; }
+
+unsigned long foo_0xc040800(void) { return 0xc040800UL; }
+
+unsigned long foo_0x18040800(void) { return 0x18040800UL; }
+
+unsigned long foo_0x30040800(void) { return 0x30040800UL; }
+
+unsigned long foo_0x60040800(void) { return 0x60040800UL; }


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed, RISC-V] Improve usage of slli.uw in constant synthesis

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:a971125e9455f91915d5be5e701c1cfcafed2b94

commit a971125e9455f91915d5be5e701c1cfcafed2b94
Author: Jeff Law 
Date:   Sun May 12 07:12:04 2024 -0600

[to-be-committed,RISC-V] Improve usage of slli.uw in constant synthesis

And an improvement to using slli.uw...

I recently added the ability to use slli.uw in the synthesis path.  That
code was conditional on the right justified constant being a LUI_OPERAND
after sign extending from bit 31 to bit 63.

That code is working fine, but could be improved.  Specifically there's
no reason it shouldn't work for LUI+ADDI under the same circumstances.
So rather than testing the sign extended, right justified constant is a
LUI_OPERAND, we can just test that the right justified constant has
precisely 32 leading zeros.

gcc/
* config/riscv/riscv.cc (riscv_build_integer_1): Use slli.uw more.

gcc/testsuite
* gcc.target/riscv/synthesis-5.c: New test.

(cherry picked from commit 83fb5e6f382ea99ca0e2a0afeb25a9f78909f25f)

Diff:
---
 gcc/config/riscv/riscv.cc|   9 +-
 gcc/testsuite/gcc.target/riscv/synthesis-5.c | 294 +++
 2 files changed, 299 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 049f8f8cb9f..a1e5a014bed 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -819,13 +819,14 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
  & ~HOST_WIDE_INT_C (0x8000)
shift -= IMM_BITS, x <<= IMM_BITS;
 
-  /* Adjust X if it isn't a LUI operand in isolation, but we can use
-a subsequent "uw" instruction form to mask off the undesirable
-bits.  */
+  /* If X has bits 32..63 clear and bit 31 set, then go ahead and mark
+it as desiring a "uw" operation for the shift.  That way we can have
+LUI+ADDI to generate the constant, then shift it into position
+clearing out the undesirable bits.  */
   if (!LUI_OPERAND (x)
  && TARGET_64BIT
  && TARGET_ZBA
- && LUI_OPERAND (x & ~HOST_WIDE_INT_C (0x8000UL)))
+ && clz_hwi (x) == 32)
{
  x = sext_hwi (x, 32);
  use_uw = true;
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-5.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-5.c
new file mode 100644
index 000..4d81565b563
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-5.c
@@ -0,0 +1,294 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 556 } } */
+
+unsigned long foo_0x80180001000(void) { return 0x80180001000UL; }
+
+unsigned long foo_0x80280001000(void) { return 0x80280001000UL; }
+
+unsigned long foo_0x80480001000(void) { return 0x80480001000UL; }
+
+unsigned long foo_0x80880001000(void) { return 0x80880001000UL; }
+
+unsigned long foo_0x81080001000(void) { return 0x81080001000UL; }
+
+unsigned long foo_0x82080001000(void) { return 0x82080001000UL; }
+
+unsigned long foo_0x84080001000(void) { return 0x84080001000UL; }
+
+unsigned long foo_0x88080001000(void) { return 0x88080001000UL; }
+
+unsigned long foo_0x90080001000(void) { return 0x90080001000UL; }
+
+unsigned long foo_0xa0080001000(void) { return 0xa0080001000UL; }
+
+unsigned long foo_0x8031000(void) { return 0x8031000UL; }
+
+unsigned long foo_0x8051000(void) { return 0x8051000UL; }
+
+unsigned long foo_0x8091000(void) { return 0x8091000UL; }
+
+unsigned long foo_0x8111000(void) { return 0x8111000UL; }
+
+unsigned long foo_0x8211000(void) { return 0x8211000UL; }
+
+unsigned long foo_0x8411000(void) { return 0x8411000UL; }
+
+unsigned long foo_0x8811000(void) { return 0x8811000UL; }
+
+unsigned long foo_0x9011000(void) { return 0x9011000UL; }
+
+unsigned long foo_0xa011000(void) { return 0xa011000UL; }
+
+unsigned long foo_0xc011000(void) { return 0xc011000UL; }
+
+unsigned long foo_0x8061000(void) { return 0x8061000UL; }
+
+unsigned long foo_0x80a1000(void) { return 0x80a1000UL; }
+
+unsigned long foo_0x8121000(void) { return 0x8121000

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed, RISC-V] Improve single inverted bit extraction - v3

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:ab251bdcabb261d630eef18f022c63ae9b8e3cd3

commit ab251bdcabb261d630eef18f022c63ae9b8e3cd3
Author: Jeff Law 
Date:   Mon May 13 07:14:08 2024 -0600

[to-be-committed,RISC-V] Improve single inverted bit extraction - v3

So this patch fixes a minor code generation inefficiency that (IIRC) the
RAU team discovered a while ago in spec.

If we want the inverted value of a single bit we can use bext to extract
the bit, then seq to invert the value (if viewed as a 0/1 truth value).

The RTL is fairly convoluted, but it's basically a right shift to get
the bit into position, bitwise-not then masking off all but the low bit.
So it's a 3->2 combine, hidden by the fact that and-not is a
define_insn_and_split, so it actually looks like a 2->2 combine.

We've run this through Ventana's internal CI (which includes
zba_zbb_zbs) and I've run it in my own tester (rv64gc, rv32gcv).  I'll
wait for the upstream CI to finish with positive results before pushing.

gcc/
* config/riscv/bitmanip.md (bextseqzdisi): New patterns.

gcc/testsuite/

* gcc.target/riscv/zbs-bext-2.c: New test.
* gcc.target/riscv/zbs-bext.c: Fix one of the possible expectes 
sequences.

(cherry picked from commit 0c585c8d0dd85601a8d116ada99126a48c8ce9fd)

Diff:
---
 gcc/config/riscv/.riscv.cc.swo  | Bin 0 -> 417792 bytes
 gcc/config/riscv/bitmanip.md|  43 
 gcc/config/riscv/j  |   0
 gcc/testsuite/gcc.target/riscv/zbs-bext-2.c |  19 
 gcc/testsuite/gcc.target/riscv/zbs-bext.c   |   2 +-
 5 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/.riscv.cc.swo b/gcc/config/riscv/.riscv.cc.swo
new file mode 100644
index 000..77ed37353be
Binary files /dev/null and b/gcc/config/riscv/.riscv.cc.swo differ
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index d76a72d30e0..724511b6df3 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -711,6 +711,49 @@
   "bext\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
 
+;; This is a bext followed by a seqz.  Normally this would be a 3->2 split
+;; But the and-not pattern with a constant operand is a define_insn_and_split,
+;; so this looks like a 2->2 split, which combine rejects.  So implement it
+;; as a define_insn_and_split as well.
+(define_insn_and_split "*bextseqzdisi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (and:DI
+ (not:DI
+   (subreg:DI
+ (lshiftrt:SI
+   (match_operand:SI 1 "register_operand" "r")
+   (match_operand:QI 2 "register_operand" "r")) 0))
+ (const_int 1)))]
+  "TARGET_64BIT && TARGET_ZBS"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (zero_extract:DI (match_dup 1)
+(const_int 1)
+(zero_extend:DI (match_dup 2
+   (set (match_dup 0) (eq:DI (match_dup 0) (const_int 0)))]
+  "operands[1] = gen_lowpart (word_mode, operands[1]);"
+  [(set_attr "type" "bitmanip")])
+
+(define_insn_and_split "*bextseqzdisi"
+  [(set (match_operand:X 0 "register_operand" "=r")
+   (and:X
+ (not:X
+   (lshiftrt:X
+ (match_operand:X 1 "register_operand" "r")
+ (match_operand:QI 2 "register_operand" "r")))
+ (const_int 1)))]
+  "TARGET_ZBS"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (zero_extract:X (match_dup 1)
+   (const_int 1)
+   (zero_extend:X (match_dup 2
+   (set (match_dup 0) (eq:X (match_dup 0) (const_int 0)))]
+  "operands[1] = gen_lowpart (word_mode, operands[1]);"
+  [(set_attr "type" "bitmanip")])
+
 ;; When performing `(a & (1UL << bitno)) ? 0 : -1` the combiner
 ;; usually has the `bitno` typed as X-mode (i.e. no further
 ;; zero-extension is performed around the bitno).
diff --git a/gcc/config/riscv/j b/gcc/config/riscv/j
new file mode 100644
index 000..e69de29bb2d
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-bext-2.c 
b/gcc/testsuite/gcc.target/riscv/zbs-bext-2.c
new file mode 100644
index 000..79f120b2286
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbs-bext-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+
+_Bool match(const int ch, int fMap) {
+return ((fMap & (1<<(ch))) == 0);
+}
+
+_Bool match2(const int ch, int fMap) {
+return ((fMap & (1UL<<(ch))) == 0);
+}
+
+
+/* { dg-final { scan-assembler-times "bext\t" 2 } } */
+/* { dg-final { scan-assembler-times "seqz\t|xori\t" 2 } } */
+/* { dg-final { scan-assembler-not "sraw\t" } } */
+/* { dg-final { scan-assembler-not "not\t" } } */
+/* { dg-final { scan-assembler-not "andi\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-bext.c 
b/gcc/testsuite/gcc.target/riscv/zbs-

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Bugfix ICE for RVV intrinisc vfw on _Float16 scalar

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:7bee3e55f02b3ae55128b22db20d0030cbeb745e

commit 7bee3e55f02b3ae55128b22db20d0030cbeb745e
Author: Pan Li 
Date:   Sat May 11 15:25:28 2024 +0800

RISC-V: Bugfix ICE for RVV intrinisc vfw on _Float16 scalar

For the vfw vx format RVV intrinsic, the scalar type _Float16 also
requires the zvfh extension.  Unfortunately,  we only check the
vector tree type and miss the scalar _Float16 type checking.  For
example:

vfloat32mf2_t test_vfwsub_wf_f32mf2(vfloat32mf2_t vs2, _Float16 rs1, size_t 
vl)
{
  return __riscv_vfwsub_wf_f32mf2(vs2, rs1, vl);
}

It should report some error message like zvfh extension is required
instead of ICE for unreg insn.

This patch would like to make up such kind of validation for _Float16
in the RVV intrinsic API.  It will report some error like below when
there is no zvfh enabled.

error: built-in function '__riscv_vfwsub_wf_f32mf2(vs2,  rs1,  vl)'
  requires the zvfhmin or zvfh ISA extension

Passed the rv64gcv fully regression tests, included c/c++/fortran.

PR target/114988

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc
(validate_instance_type_required_extensions): New func impl to
validate the intrinisc func type ops.
(expand_builtin): Validate instance type before expand.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr114988-1.c: New test.
* gcc.target/riscv/rvv/base/pr114988-2.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 41b3cf262e61aee9d26380f1c820e0eaae740f50)

Diff:
---
 gcc/config/riscv/riscv-vector-builtins.cc  | 51 ++
 .../gcc.target/riscv/rvv/base/pr114988-1.c |  9 
 .../gcc.target/riscv/rvv/base/pr114988-2.c |  9 
 3 files changed, 69 insertions(+)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 192a6c230d1..3fdb4400d70 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4632,6 +4632,54 @@ gimple_fold_builtin (unsigned int code, 
gimple_stmt_iterator *gsi, gcall *stmt)
   return gimple_folder (rfn.instance, rfn.decl, gsi, stmt).fold ();
 }
 
+static bool
+validate_instance_type_required_extensions (const rvv_type_info type,
+   tree exp)
+{
+  uint64_t exts = type.required_extensions;
+
+  if ((exts & RVV_REQUIRE_ELEN_FP_16) &&
+!TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the "
+   "zvfhmin or zvfh ISA extension",
+   exp);
+  return false;
+}
+
+  if ((exts & RVV_REQUIRE_ELEN_FP_32) &&
+!TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the "
+   "zve32f, zve64f, zve64d or v ISA extension",
+   exp);
+  return false;
+}
+
+  if ((exts & RVV_REQUIRE_ELEN_FP_64) &&
+!TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the zve64d or v ISA extension",
+   exp);
+  return false;
+}
+
+  if ((exts & RVV_REQUIRE_ELEN_64) &&
+!TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
+{
+  error_at (EXPR_LOCATION (exp),
+   "built-in function %qE requires the "
+   "zve64x, zve64f, zve64d or v ISA extension",
+   exp);
+  return false;
+}
+
+  return true;
+}
+
 /* Expand a call to the RVV function with subcode CODE.  EXP is the call
expression and TARGET is the preferred location for the result.
Return the value of the lhs.  */
@@ -4649,6 +4697,9 @@ expand_builtin (unsigned int code, tree exp, rtx target)
   return target;
 }
 
+  if (!validate_instance_type_required_extensions (rfn.instance.type, exp))
+return target;
+
   return function_expander (rfn.instance, rfn.decl, exp, target).expand ();
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c
new file mode 100644
index 000..b8474804c88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */
+
+#include "riscv_vector.h"
+
+vfloat32mf2_t test_vfwsub_wf_f32mf2(vfloat32mf2_t vs2, _Float16 rs1, size_t vl)
+{
+  return __riscv_vfwsub_wf_f32mf2(vs2, rs1, vl); /* { dg-error {built-in 
function '__riscv_vfwsub_wf_f32mf2\(vs2,  rs1,  vl\)' requires the zvfhmin or 
zvfh ISA extension} } */
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-2.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr114988-2.c
ne

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Fix format issue for trailing operator [NFC]

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:4bcc64ea9e0232535a3fd9a2ee41775e78803244

commit 4bcc64ea9e0232535a3fd9a2ee41775e78803244
Author: Pan Li 
Date:   Tue May 14 09:38:55 2024 +0800

RISC-V: Fix format issue for trailing operator [NFC]

This patch would like to fix below format issue of trailing operator.

=== ERROR type #1: trailing operator (4 error(s)) ===
gcc/config/riscv/riscv-vector-builtins.cc:4641:39:  if ((exts &
RVV_REQUIRE_ELEN_FP_16) &&
gcc/config/riscv/riscv-vector-builtins.cc:4651:39:  if ((exts &
RVV_REQUIRE_ELEN_FP_32) &&
gcc/config/riscv/riscv-vector-builtins.cc:4661:39:  if ((exts &
RVV_REQUIRE_ELEN_FP_64) &&
gcc/config/riscv/riscv-vector-builtins.cc:4670:36:  if ((exts &
RVV_REQUIRE_ELEN_64) &&

Passed the ./contrib/check_GNU_style.sh for this patch,  and double
checked there is no other format issue of the original patch.

Committed as format change.

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins.cc
(validate_instance_type_required_extensions): Remove the
operator from the trailing and put it to new line.

Signed-off-by: Pan Li 
(cherry picked from commit b6dc8464e613d1da2b28235bbd2f9c3fd4bc386b)

Diff:
---
 gcc/config/riscv/riscv-vector-builtins.cc | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 3fdb4400d70..c08d87a2680 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -4638,8 +4638,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
 {
   uint64_t exts = type.required_extensions;
 
-  if ((exts & RVV_REQUIRE_ELEN_FP_16) &&
-!TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_FP_16)
+&& !TARGET_VECTOR_ELEN_FP_16_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the "
@@ -4648,8 +4648,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
   return false;
 }
 
-  if ((exts & RVV_REQUIRE_ELEN_FP_32) &&
-!TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_FP_32)
+&& !TARGET_VECTOR_ELEN_FP_32_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the "
@@ -4658,8 +4658,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
   return false;
 }
 
-  if ((exts & RVV_REQUIRE_ELEN_FP_64) &&
-!TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_FP_64)
+&& !TARGET_VECTOR_ELEN_FP_64_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the zve64d or v ISA extension",
@@ -4667,8 +4667,8 @@ validate_instance_type_required_extensions (const 
rvv_type_info type,
   return false;
 }
 
-  if ((exts & RVV_REQUIRE_ELEN_64) &&
-!TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
+  if ((exts & RVV_REQUIRE_ELEN_64)
+&& !TARGET_VECTOR_ELEN_64_P (riscv_vector_elen_flags))
 {
   error_at (EXPR_LOCATION (exp),
"built-in function %qE requires the "


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed, RISC-V] Improve AND with some constants

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:2d309988ce6388f5a4531a768ba85a983dc50535

commit 2d309988ce6388f5a4531a768ba85a983dc50535
Author: Jeff Law 
Date:   Mon May 13 17:37:46 2024 -0600

[to-be-committed,RISC-V] Improve AND with some constants

If we have an AND with a constant operand and the constant operand
requires synthesis, then we may be able to generate more efficient code
than we do now.

Essentially the need for constant synthesis gives us a budget for
alternative ways to clear bits, which zext.w can do for bits 32..63
trivially.   So if we clear 32..63  via zext.w, the constant for the
remaining bits to clear may be simple enough to use with andi or bseti.
That will save us an instruction.

This has tested in Ventana's CI system as well as my own.  I'll wait for
the upstream CI tester to report success before committing.

Jeff
gcc/
* config/riscv/bitmanip.md: Add new splitter for AND with
a constant that masks off bits 32..63 and needs synthesis.

gcc/testsuite/

* gcc.target/riscv/zba_zbs_and-1.c: New test.

(cherry picked from commit 158aa1b65ce29d5e58182782de66292c51774d71)

Diff:
---
 gcc/config/riscv/bitmanip.md   | 34 ++
 gcc/testsuite/gcc.target/riscv/zba_zbs_and-1.c | 22 +
 2 files changed, 56 insertions(+)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 724511b6df3..8769a6b818b 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -843,6 +843,40 @@
 }
 [(set_attr "type" "bitmanip")])
 
+;; If we have the ZBA extension, then we can clear the upper half of a 64
+;; bit object with a zext.w.  So if we have AND where the constant would
+;; require synthesis of two or more instructions, but 32->64 sign extension
+;; of the constant is a simm12, then we can use zext.w+andi.  If the adjusted
+;; constant is a single bit constant, then we can use zext.w+bclri
+;;
+;; With the mvconst_internal pattern claiming a single insn to synthesize
+;; constants, this must be a define_insn_and_split.
+(define_insn_and_split ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (and:DI (match_operand:DI 1 "register_operand" "r")
+   (match_operand 2 "const_int_operand" "n")))]
+  "TARGET_64BIT
+   && TARGET_ZBA
+   && !paradoxical_subreg_p (operands[1])
+   /* Only profitable if synthesis takes more than one insn.  */
+   && riscv_const_insns (operands[2]) != 1
+   /* We need the upper half to be zero.  */
+   && (INTVAL (operands[2]) & HOST_WIDE_INT_C (0x)) == 0
+   /* And the the adjusted constant must either be something we can
+  implement with andi or bclri.  */
+   && ((SMALL_OPERAND (sext_hwi (INTVAL (operands[2]), 32))
+|| (TARGET_ZBS && popcount_hwi (INTVAL (operands[2])) == 31))
+   && INTVAL (operands[2]) != 0x7fff)"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (zero_extend:DI (match_dup 3)))
+   (set (match_dup 0) (and:DI (match_dup 0) (match_dup 2)))]
+  "{
+ operands[3] = gen_lowpart (SImode, operands[1]);
+ operands[2] = GEN_INT (sext_hwi (INTVAL (operands[2]), 32));
+   }"
+  [(set_attr "type" "bitmanip")])
+
 ;; IF_THEN_ELSE: test for 2 bits of opposite polarity
 (define_insn_and_split "*branch_mask_twobits_equals_singlebit"
   [(set (pc)
diff --git a/gcc/testsuite/gcc.target/riscv/zba_zbs_and-1.c 
b/gcc/testsuite/gcc.target/riscv/zba_zbs_and-1.c
new file mode 100644
index 000..23fd769449e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zba_zbs_and-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+
+unsigned long long w32mem_1(unsigned long long w32)
+{
+return w32 & ~(1U << 0);
+}
+
+unsigned long long w32mem_2(unsigned long long w32)
+{
+return w32 & ~(1U << 30);
+}
+
+unsigned long long w32mem_3(unsigned long long w32)
+{
+return w32 & ~(1U << 31);
+}
+
+/* If we do synthesis, then we'd see an addi.  */
+/* { dg-final { scan-assembler-not "addi\t" } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [1/3] expr: Export clear_by_pieces()

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:94b1fb81dd8f3db24b6db5476bbaa60db81014bd

commit 94b1fb81dd8f3db24b6db5476bbaa60db81014bd
Author: Christoph Müllner 
Date:   Tue May 14 09:19:13 2024 -0600

[1/3] expr: Export clear_by_pieces()

Make clear_by_pieces() available to other parts of the compiler,
similar to store_by_pieces().

gcc/ChangeLog:

* expr.cc (clear_by_pieces): Remove static from clear_by_pieces.
* expr.h (clear_by_pieces): Add prototype for clear_by_pieces.

(cherry picked from commit e6e41b68fd805ab126895a20bb9670442b198f62)

Diff:
---
 gcc/expr.cc | 6 +-
 gcc/expr.h  | 5 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 9f66d479445..1baa39b98eb 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -85,7 +85,6 @@ static void emit_block_move_via_sized_loop (rtx, rtx, rtx, 
unsigned, unsigned);
 static void emit_block_move_via_oriented_loop (rtx, rtx, rtx, unsigned, 
unsigned);
 static rtx emit_block_cmp_via_loop (rtx, rtx, rtx, tree, rtx, bool,
unsigned, unsigned);
-static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int);
 static rtx_insn *compress_float_constant (rtx, rtx);
 static rtx get_subtarget (rtx);
 static rtx store_field (rtx, poly_int64, poly_int64, poly_uint64, poly_uint64,
@@ -1840,10 +1839,7 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
 return to;
 }
 
-/* Generate several move instructions to clear LEN bytes of block TO.  (A MEM
-   rtx with BLKmode).  ALIGN is maximum alignment we can assume.  */
-
-static void
+void
 clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
 {
   if (len == 0)
diff --git a/gcc/expr.h b/gcc/expr.h
index 64956f63029..75181584108 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -245,6 +245,11 @@ extern bool can_store_by_pieces (unsigned HOST_WIDE_INT,
 extern rtx store_by_pieces (rtx, unsigned HOST_WIDE_INT, by_pieces_constfn,
void *, unsigned int, bool, memop_ret);
 
+/* Generate several move instructions to clear LEN bytes of block TO.  (A MEM
+   rtx with BLKmode).  ALIGN is maximum alignment we can assume.  */
+
+extern void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int);
+
 /* If can_store_by_pieces passes for worst-case values near MAX_LEN, call
store_by_pieces within conditionals so as to handle variable LEN 
efficiently,
storing VAL, if non-NULL_RTX, or valc instead.  */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 2/3] RISC-V: testsuite: Make cmo tests LTO safe

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:547adc910bdec0abc590444fe2d5d1eead4358a6

commit 547adc910bdec0abc590444fe2d5d1eead4358a6
Author: Christoph Müllner 
Date:   Tue May 14 09:20:18 2024 -0600

[PATCH 2/3] RISC-V: testsuite: Make cmo tests LTO safe

Let's add '\t' to the instruction match pattern to avoid false positive
matches when compiling with -flto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmo-zicbom-1.c: Add \t to test pattern.
* gcc.target/riscv/cmo-zicbom-2.c: Likewise.
* gcc.target/riscv/cmo-zicbop-1.c: Likewise.
* gcc.target/riscv/cmo-zicbop-2.c: Likewise.
* gcc.target/riscv/cmo-zicboz-1.c: Likewise.
* gcc.target/riscv/cmo-zicboz-2.c: Likewise.

(cherry picked from commit 21855f960141c1811d6a5f6ad3b2065f20d4b353)

Diff:
---
 gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c | 6 +++---
 gcc/testsuite/gcc.target/riscv/cmo-zicbom-2.c | 6 +++---
 gcc/testsuite/gcc.target/riscv/cmo-zicbop-1.c | 6 +++---
 gcc/testsuite/gcc.target/riscv/cmo-zicbop-2.c | 6 +++---
 gcc/testsuite/gcc.target/riscv/cmo-zicboz-1.c | 2 +-
 gcc/testsuite/gcc.target/riscv/cmo-zicboz-2.c | 2 +-
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c
index 6341f7874d3..02c38e201fa 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicbom-1.c
@@ -24,6 +24,6 @@ void foo3()
 __builtin_riscv_zicbom_cbo_inval((void*)0x111);
 }
 
-/* { dg-final { scan-assembler-times "cbo.clean" 3 } } */
-/* { dg-final { scan-assembler-times "cbo.flush" 3 } } */
-/* { dg-final { scan-assembler-times "cbo.inval" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.clean\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.flush\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.inval\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicbom-2.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicbom-2.c
index a04f106c8b0..040b96952bc 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicbom-2.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicbom-2.c
@@ -24,6 +24,6 @@ void foo3()
 __builtin_riscv_zicbom_cbo_inval((void*)0x111);
 }
 
-/* { dg-final { scan-assembler-times "cbo.clean" 3 } } */
-/* { dg-final { scan-assembler-times "cbo.flush" 3 } } */
-/* { dg-final { scan-assembler-times "cbo.inval" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.clean\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.flush\t" 3 } } */
+/* { dg-final { scan-assembler-times "cbo.inval\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicbop-1.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicbop-1.c
index c5d78c1763d..97181154d85 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicbop-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicbop-1.c
@@ -18,6 +18,6 @@ int foo1()
   return __builtin_riscv_zicbop_cbo_prefetchi(1);
 }
 
-/* { dg-final { scan-assembler-times "prefetch.i" 1 } } */
-/* { dg-final { scan-assembler-times "prefetch.r" 4 } } */
-/* { dg-final { scan-assembler-times "prefetch.w" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.i\t" 1 } } */
+/* { dg-final { scan-assembler-times "prefetch.r\t" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.w\t" 4 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicbop-2.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicbop-2.c
index 6576365b39c..4871a97b21a 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicbop-2.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicbop-2.c
@@ -18,6 +18,6 @@ int foo1()
   return __builtin_riscv_zicbop_cbo_prefetchi(1);
 }
 
-/* { dg-final { scan-assembler-times "prefetch.i" 1 } } */
-/* { dg-final { scan-assembler-times "prefetch.r" 4 } } */
-/* { dg-final { scan-assembler-times "prefetch.w" 4 } } */ 
+/* { dg-final { scan-assembler-times "prefetch.i\t" 1 } } */
+/* { dg-final { scan-assembler-times "prefetch.r\t" 4 } } */
+/* { dg-final { scan-assembler-times "prefetch.w\t" 4 } } */ 
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-1.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-1.c
index 5eb78ab94b5..63b8782bf89 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-1.c
@@ -10,4 +10,4 @@ void foo1()
 __builtin_riscv_zicboz_cbo_zero((void*)0x121);
 }
 
-/* { dg-final { scan-assembler-times "cbo.zero" 3 } } */ 
+/* { dg-final { scan-assembler-times "cbo.zero\t" 3 } } */ 
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-2.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-2.c
index fdc9c719669..cc3bd505ec0 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-2.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-2.c
@@ -10,4 +10,4 @@ void foo1()
 __builtin_riscv_zicboz_cbo_zero((void*)0x121);
 }
 
-/* { dg-final { scan-assembler-times "cbo.zero" 3 } } */ 
+/* { dg-final { scan-assembler-times "cbo.zero\t" 3 } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 3/3] RISC-V: Add memset-zero expansion to cbo.zero

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:80bf573afa3709c9716ead134480872bf88318ca

commit 80bf573afa3709c9716ead134480872bf88318ca
Author: Christoph Müllner 
Date:   Tue May 14 09:21:17 2024 -0600

[PATCH 3/3] RISC-V: Add memset-zero expansion to cbo.zero

The Zicboz extension offers the cbo.zero instruction, which can be used
to clean a memory region corresponding to a cache block.
The Zic64b extension defines the cache block size to 64 byte.
If both extensions are available, it is possible to use cbo.zero
to clear memory, if the alignment and size constraints are met.
This patch implements this.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_block_clear): New 
prototype.
* config/riscv/riscv-string.cc 
(riscv_expand_block_clear_zicboz_zic64b):
New function to expand a block-clear with cbo.zero.
(riscv_expand_block_clear): New RISC-V block-clear expansion 
function.
* config/riscv/riscv.md (setmem): New setmem expansion.

(cherry picked from commit 54ba8d44bbd703bca6984700b4d6f978890097e2)

Diff:
---
 gcc/config/riscv/riscv-protos.h|  1 +
 gcc/config/riscv/riscv-string.cc   | 59 ++
 gcc/config/riscv/riscv.md  | 24 +
 .../gcc.target/riscv/cmo-zicboz-zic64-1.c  | 43 
 4 files changed, 127 insertions(+)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index e5aebf3fc3d..255fd6a0de9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -189,6 +189,7 @@ rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
+extern bool riscv_expand_block_clear (rtx, rtx);
 
 /* Information about one CPU we know about.  */
 struct riscv_cpu_info {
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 41cb061c746..87f5fdee3c1 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -794,6 +794,65 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
   return false;
 }
 
+/* Expand a block-clear instruction via cbo.zero instructions.  */
+
+static bool
+riscv_expand_block_clear_zicboz_zic64b (rtx dest, rtx length)
+{
+  unsigned HOST_WIDE_INT hwi_length;
+  unsigned HOST_WIDE_INT align;
+  const unsigned HOST_WIDE_INT cbo_bytes = 64;
+
+  gcc_assert (TARGET_ZICBOZ && TARGET_ZIC64B);
+
+  if (!CONST_INT_P (length))
+return false;
+
+  hwi_length = UINTVAL (length);
+  if (hwi_length < cbo_bytes)
+return false;
+
+  align = MEM_ALIGN (dest) / BITS_PER_UNIT;
+  if (align < cbo_bytes)
+return false;
+
+  /* We don't emit loops.  Instead apply move-bytes limitation.  */
+  unsigned HOST_WIDE_INT max_bytes = RISCV_MAX_MOVE_BYTES_STRAIGHT /
+ UNITS_PER_WORD * cbo_bytes;
+  if (hwi_length > max_bytes)
+return false;
+
+  unsigned HOST_WIDE_INT offset = 0;
+  while (offset + cbo_bytes <= hwi_length)
+{
+  rtx mem = adjust_address (dest, BLKmode, offset);
+  rtx addr = force_reg (Pmode, XEXP (mem, 0));
+  emit_insn (gen_riscv_zero_di (addr));
+  offset += cbo_bytes;
+}
+
+  if (offset < hwi_length)
+{
+  rtx mem = adjust_address (dest, BLKmode, offset);
+  clear_by_pieces (mem, hwi_length - offset, align);
+}
+
+  return true;
+}
+
+bool
+riscv_expand_block_clear (rtx dest, rtx length)
+{
+  /* Only use setmem-zero expansion for Zicboz + Zic64b.  */
+  if (!TARGET_ZICBOZ || !TARGET_ZIC64B)
+return false;
+
+  if (optimize_function_for_size_p (cfun))
+return false;
+
+  return riscv_expand_block_clear_zicboz_zic64b (dest, length);
+}
+
 /* --- Vector expanders --- */
 
 namespace riscv_vector {
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 4d6de992557..c45b1129b0a 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2608,6 +2608,30 @@
 FAIL;
 })
 
+;; Fill memory with constant byte.
+;; Argument 0 is the destination
+;; Argument 1 is the constant byte
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "setmem"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand")
+  (match_operand:QI 2 "const_int_operand"))
+ (use (match_operand:P 1 ""))
+ (use (match_operand:SI 3 "const_int_operand"))])]
+ ""
+ {
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+FAIL;
+
+  if (riscv_expand_block_clear (operands[0], operands[1]))
+DONE;
+  else
+FAIL;
+})
+
+
 ;; Expand in-line code to clear the instruction cache between operand[0] and
 ;; operand[1].
 (define_expand "clear_cache"
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
new file mode 100644
index 000..c2d79eb7ae6
--- /d

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: avoid LUI based const materialization ... [part of PR/106265]

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b80fa21091b28877984b9cc0cabefb2b5687a07e

commit b80fa21091b28877984b9cc0cabefb2b5687a07e
Author: Vineet Gupta 
Date:   Mon May 13 11:45:55 2024 -0700

RISC-V: avoid LUI based const materialization ... [part of PR/106265]

... if the constant can be represented as sum of two S12 values.
The two S12 values could instead be fused with subsequent ADD insn.
The helps
 - avoid an additional LUI insn
 - side benefits of not clobbering a reg

e.g.
w/o patch w/ patch
long  | |
plus(unsigned long i) | li  a5,4096 |
{ | addia5,a5,-2032 | addi a0, a0, 2047
   return i + 2064;   | add a0,a0,a5| addi a0, a0, 17
} | ret | ret

NOTE: In theory not having const in a standalone reg might seem less
  CSE friendly, but for workloads in consideration these mat are
  from very late LRA reloads and follow on GCSE is not doing much
  currently.

The real benefit however is seen in base+offset computation for array
accesses and especially for stack accesses which are finalized late in
optim pipeline, during LRA register allocation. Often the finalized
offsets trigger LRA reloads resulting in mind boggling repetition of
exact same insn sequence including LUI based constant materialization.

This shaves off 290 billion dynamic instrustions (QEMU icounts) in
SPEC 2017 Cactu benchmark which is over 10% of workload. In the rest of
suite, there additional 10 billion shaved, with both gains and losses
in indiv workloads as is usual with compiler changes.

 500.perlbench_r-0 |  1,214,534,029,025 | 1,212,887,959,387 |
 500.perlbench_r-1 |740,383,419,739 |   739,280,308,163 |
 500.perlbench_r-2 |692,074,638,817 |   691,118,734,547 |
 502.gcc_r-0   |190,820,141,435 |   190,857,065,988 |
 502.gcc_r-1   |225,747,660,839 |   225,809,444,357 | <- -0.02%
 502.gcc_r-2   |220,370,089,641 |   220,406,367,876 | <- -0.03%
 502.gcc_r-3   |179,111,460,458 |   179,135,609,723 | <- -0.02%
 502.gcc_r-4   |219,301,546,340 |   219,320,416,956 | <- -0.01%
 503.bwaves_r-0|278,733,324,691 |   278,733,323,575 | <- -0.01%
 503.bwaves_r-1|442,397,521,282 |   442,397,519,616 |
 503.bwaves_r-2|344,112,218,206 |   344,112,216,760 |
 503.bwaves_r-3|417,561,469,153 |   417,561,467,597 |
 505.mcf_r |669,319,257,525 |   669,318,763,084 |
 507.cactuBSSN_r   |  2,852,767,394,456 | 2,564,736,063,742 | <+ 10.10%
 508.namd_r|  1,855,884,342,110 | 1,855,881,110,934 |
 510.parest_r  |  1,654,525,521,053 | 1,654,402,859,174 |
 511.povray_r  |  2,990,146,655,619 | 2,990,060,324,589 |
 519.lbm_r |  1,158,337,294,525 | 1,158,337,294,529 |
 520.omnetpp_r |  1,021,765,791,283 | 1,026,165,661,394 |
 521.wrf_r |  1,715,955,652,503 | 1,714,352,737,385 |
 523.xalancbmk_r   |849,846,008,075 |   849,836,851,752 |
 525.x264_r-0  |277,801,762,763 |   277,488,776,427 |
 525.x264_r-1  |927,281,789,540 |   926,751,516,742 |
 525.x264_r-2  |915,352,631,375 |   914,667,785,953 |
 526.blender_r |  1,652,839,180,887 | 1,653,260,825,512 |
 527.cam4_r|  1,487,053,494,925 | 1,484,526,670,770 |
 531.deepsjeng_r   |  1,641,969,526,837 | 1,642,126,598,866 |
 538.imagick_r |  2,098,016,546,691 | 2,097,997,929,125 |
 541.leela_r   |  1,983,557,323,877 | 1,983,531,314,526 |
 544.nab_r |  1,516,061,611,233 | 1,516,061,407,715 |
 548.exchange2_r   |  2,072,594,330,215 | 2,072,591,648,318 |
 549.fotonik3d_r   |  1,001,499,307,366 | 1,001,478,944,189 |
 554.roms_r|  1,028,799,739,111 | 1,028,780,904,061 |
 557.xz_r-0|363,827,039,684 |   363,057,014,260 |
 557.xz_r-1|906,649,112,601 |   905,928,888,732 |
 557.xz_r-2|509,023,898,187 |   508,140,356,932 |
 997.specrand_fr   |402,535,577 |   403,052,561 |
 999.specrand_ir   |402,535,577 |   403,052,561 |

This should still be considered damage control as the real/deeper fix
would be to reduce number of LRA reloads or CSE/anchor those during
LRA constraint sub-pass (re)runs (thats a different PR/114729.

Implementation Details (for posterity)
--
 - basic idea is to have a splitter selected via a new predicate for 
constant
   being possible sum of two S12 and provide the transform.
   This is however a 2 -> 2 transform which combine can't handle.
   So we specify it using a define_insn_and_split.

 - the initial loose "i" constraint caused LRA to accept invalid insns thus
  

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed, RISC-V] Remove redundant AND in shift-add sequence

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:1db555ae358fd9a98e98c47e8f4a3cfbea155da4

commit 1db555ae358fd9a98e98c47e8f4a3cfbea155da4
Author: Jeff Law 
Date:   Tue May 14 18:17:59 2024 -0600

[to-be-committed,RISC-V] Remove redundant AND in shift-add sequence

So this patch allows us to eliminate an redundant AND in some shift-add
style sequences.   I think the testcase was reduced from xz by the RAU
team, but I'm not highly confident of that.

Specifically the AND is masking off the upper 32 bits of the un-shifted
value and there's an outer SIGN_EXTEND from SI to DI.  However in the
RTL it's working on the post-shifted value, so the constant is left
shifted, so we have to account for that in the pattern's condition.

We can just drop the AND in this case.  So instead we do a 64bit shift,
then a sign extending ADD utilizing the low part of that 64bit shift result.

This has run through Ventana's CI as well as my own.  I'll wait for it
to run through the larger CI system before pushing.

Jeff

gcc/
* config/riscv/riscv.md: Add pattern for sign extended shift-add
sequence with a masked input.

gcc/testsuite

* gcc.target/riscv/shift-add-2.c: New test.

(cherry picked from commit 32ff344d57d56fddb66c4976b5651345d40b7157)

Diff:
---
 gcc/config/riscv/riscv.md| 25 +
 gcc/testsuite/gcc.target/riscv/shift-add-2.c | 16 
 2 files changed, 41 insertions(+)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 893040f2854..ee15c63db10 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4120,6 +4120,31 @@
   [(set_attr "type" "load")
(set (attr "length") (const_int 8))])
 
+;; The AND is redunant here.  It always turns off the high 32 bits  and the
+;; low number of bits equal to the shift count.  Those upper 32 bits will be
+;; reset by the SIGN_EXTEND at the end.
+;;
+;; One could argue combine should have realized this and simplified what it
+;; presented to the backend.  But we can obviously cope with what it gave us.
+(define_insn_and_split ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (sign_extend:DI
+ (plus:SI (subreg:SI
+(and:DI
+  (ashift:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand 2 "const_int_operand" "n"))
+  (match_operand 3 "const_int_operand" "n")) 0)
+  (match_operand:SI 4 "register_operand" "r"
+   (clobber (match_scratch:DI 5 "=&r"))]
+  "TARGET_64BIT
+   && (INTVAL (operands[3]) | ((1 << INTVAL (operands[2])) - 1)) == 0x"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 5) (ashift:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 6) (match_dup 4]
+  "{ operands[6] = gen_lowpart (SImode, operands[5]); }"
+  [(set_attr "type" "arith")])
+
 (include "bitmanip.md")
 (include "crypto.md")
 (include "sync.md")
diff --git a/gcc/testsuite/gcc.target/riscv/shift-add-2.c 
b/gcc/testsuite/gcc.target/riscv/shift-add-2.c
new file mode 100644
index 000..87439858e59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-add-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+int sub2(int a, long long b) {
+  b = (b << 32) >> 31;
+  unsigned int x = a + b;
+  return x;
+}
+
+
+/* { dg-final { scan-assembler-times "\tslli\t" 1 } } */
+/* { dg-final { scan-assembler-times "\taddw\t" 1 } } */
+/* { dg-final { scan-assembler-not "\tsrai\t" } } */
+/* { dg-final { scan-assembler-not "\tsh.add\t" } } */
+


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed] Fix rv32 issues with recent zicboz work

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:184fca91cc3131368c0bfe2ce84ffa835d8ef08e

commit 184fca91cc3131368c0bfe2ce84ffa835d8ef08e
Author: Jeff Law 
Date:   Tue May 14 22:50:15 2024 -0600

[committed] Fix rv32 issues with recent zicboz work

I should have double-checked the CI system before pushing Christoph's 
patches
for memset-zero.  While I thought I'd checked CI state, I must have been
looking at the wrong patch from Christoph.

Anyway, this fixes the rv32 ICEs and disables one of the tests for rv32.

The test would need a revamp for rv32 as the expected output is all rv64 
code
using "sd" instructions.  I'm just not vested deeply enough into rv32 to 
adjust
the test to work in that environment though it should be fairly trivial to 
copy
the test and provide new expected output if someone cares enough.

Verified this fixes the rv32 failures in my tester:
> New tests that FAIL (6 tests):
>
> unix/-march=rv32gcv: gcc: gcc.target/riscv/cmo-zicboz-zic64-1.c   -O1  
(internal compiler error: in extract_insn, at recog.cc:2812)
> unix/-march=rv32gcv: gcc: gcc.target/riscv/cmo-zicboz-zic64-1.c   -O1  
(test for excess errors)
> unix/-march=rv32gcv: gcc: gcc.target/riscv/cmo-zicboz-zic64-1.c   -O2  
(internal compiler error: in extract_insn, at recog.cc:2812)
> unix/-march=rv32gcv: gcc: gcc.target/riscv/cmo-zicboz-zic64-1.c   -O2  
(test for excess errors)
> unix/-march=rv32gcv: gcc: gcc.target/riscv/cmo-zicboz-zic64-1.c   -O3 -g  
(internal compiler error: in extract_insn, at recog.cc:2812)
> unix/-march=rv32gcv: gcc: gcc.target/riscv/cmo-zicboz-zic64-1.c   -O3 -g  
(test for excess errors)

And after the ICE is fixed, these are eliminated by only running the test 
for
rv64:

> New tests that FAIL (3 tests):
>
> unix/-march=rv32gcv: gcc: gcc.target/riscv/cmo-zicboz-zic64-1.c   -O1   
check-function-bodies clear_buf_123
> unix/-march=rv32gcv: gcc: gcc.target/riscv/cmo-zicboz-zic64-1.c   -O2   
check-function-bodies clear_buf_123
> unix/-march=rv32gcv: gcc: gcc.target/riscv/cmo-zicboz-zic64-1.c   -O3 -g  
 check-function-bodies clear_buf_123

gcc/
* config/riscv/riscv-string.cc
(riscv_expand_block_clear_zicboz_zic64b): Handle rv32 correctly.

gcc/testsuite

* gcc.target/riscv/cmo-zicboz-zic64-1.c: Don't run on rv32.

(cherry picked from commit e410ad74e5e4589aeb666aa298b2f933e7b5d9e7)

Diff:
---
 gcc/config/riscv/riscv-string.cc| 5 -
 gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c | 3 +--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 87f5fdee3c1..b515f44d17a 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -827,7 +827,10 @@ riscv_expand_block_clear_zicboz_zic64b (rtx dest, rtx 
length)
 {
   rtx mem = adjust_address (dest, BLKmode, offset);
   rtx addr = force_reg (Pmode, XEXP (mem, 0));
-  emit_insn (gen_riscv_zero_di (addr));
+  if (TARGET_64BIT)
+   emit_insn (gen_riscv_zero_di (addr));
+  else
+   emit_insn (gen_riscv_zero_si (addr));
   offset += cbo_bytes;
 }
 
diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
index c2d79eb7ae6..6d4535287d0 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
@@ -1,6 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zic64b_zicboz" { target { rv64 } } } */
-/* { dg-options "-march=rv32gc_zic64b_zicboz" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zic64b_zicboz -mabi=lp64d" } */
 /* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
 /* { dg-final { check-function-bodies "**" "" } } */
 /* { dg-allow-blank-lines-in-output 1 } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Allow unaligned accesses in cpymemsi expansion

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:242ebff4075f6dde512f75be4b3e4d0b880dd080

commit 242ebff4075f6dde512f75be4b3e4d0b880dd080
Author: Christoph Müllner 
Date:   Wed May 1 18:50:38 2024 +0200

RISC-V: Allow unaligned accesses in cpymemsi expansion

The RISC-V cpymemsi expansion is called, whenever the by-pieces
infrastructure will not take care of the builtin expansion.
The code emitted by the by-pieces infrastructure may emits code,
that includes unaligned accesses if riscv_slow_unaligned_access_p
is false.

The RISC-V cpymemsi expansion is handled via riscv_expand_block_move().
The current implementation of this function does not check
riscv_slow_unaligned_access_p and never emits unaligned accesses.

Since by-pieces emits unaligned accesses, it is reasonable to implement
the same behaviour in the cpymemsi expansion. And that's what this patch
is doing.

The patch checks riscv_slow_unaligned_access_p at the entry and sets
the allowed alignment accordingly. This alignment is then propagated
down to the routines that emit the actual instructions.

The changes introduced by this patch can be seen in the adjustments
of the cpymem tests.

gcc/ChangeLog:

* config/riscv/riscv-string.cc (riscv_block_move_straight): Add
parameter align.
(riscv_adjust_block_mem): Replace parameter length by align.
(riscv_block_move_loop): Add parameter align.
(riscv_expand_block_move_scalar): Set alignment properly if the
target has fast unaligned access.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cpymem-32-ooo.c: Adjust for unaligned access.
* gcc.target/riscv/cpymem-64-ooo.c: Likewise.

Signed-off-by: Christoph Müllner 
(cherry picked from commit 04cd8ccaec90405ccf7471252c0e06ba7f5437dc)

Diff:
---
 gcc/config/riscv/riscv-string.cc   | 54 --
 gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c | 20 +++---
 gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c | 14 ++-
 3 files changed, 60 insertions(+), 28 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index b515f44d17a..b6cd7032356 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -617,11 +617,13 @@ riscv_expand_strlen (rtx result, rtx src, rtx 
search_char, rtx align)
   return false;
 }
 
-/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST
+   with accesses that are ALIGN bytes aligned.
Assume that the areas do not overlap.  */
 
 static void
-riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length)
+riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
+  unsigned HOST_WIDE_INT align)
 {
   unsigned HOST_WIDE_INT offset, delta;
   unsigned HOST_WIDE_INT bits;
@@ -629,8 +631,7 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned 
HOST_WIDE_INT length)
   enum machine_mode mode;
   rtx *regs;
 
-  bits = MAX (BITS_PER_UNIT,
- MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest;
+  bits = MAX (BITS_PER_UNIT, MIN (BITS_PER_WORD, align));
 
   mode = mode_for_size (bits, MODE_INT, 0).require ();
   delta = bits / BITS_PER_UNIT;
@@ -655,21 +656,20 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned 
HOST_WIDE_INT length)
 {
   src = adjust_address (src, BLKmode, offset);
   dest = adjust_address (dest, BLKmode, offset);
-  move_by_pieces (dest, src, length - offset,
- MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN);
+  move_by_pieces (dest, src, length - offset, align, RETURN_BEGIN);
 }
 }
 
 /* Helper function for doing a loop-based block operation on memory
-   reference MEM.  Each iteration of the loop will operate on LENGTH
-   bytes of MEM.
+   reference MEM.
 
Create a new base register for use within the loop and point it to
the start of MEM.  Create a new memory reference that uses this
-   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
+   register and has an alignment of ALIGN.  Store them in *LOOP_REG
+   and *LOOP_MEM respectively.  */
 
 static void
-riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length,
+riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT align,
rtx *loop_reg, rtx *loop_mem)
 {
   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
@@ -677,15 +677,17 @@ riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT 
length,
   /* Although the new mem does not refer to a known location,
  it does keep up to LENGTH bytes of alignment.  */
   *loop_mem = change_address (mem, BLKmode, *loop_reg);
-  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
+  set_mem_align (*loop_mem, align);
 }
 
 /* Move LENGTH bytes from SRC to 

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add test cases for cpymem expansion

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:3993646dcfd9fe06eaca5d0252589487444c7ef5

commit 3993646dcfd9fe06eaca5d0252589487444c7ef5
Author: Christoph Müllner 
Date:   Wed May 1 16:54:42 2024 +0200

RISC-V: Add test cases for cpymem expansion

We have two mechanisms in the RISC-V backend that expand
cpymem pattern: a) by-pieces, b) riscv_expand_block_move()
in riscv-string.cc. The by-pieces framework has higher priority
and emits a sequence of up to 15 instructions
(see use_by_pieces_infrastructure_p() for more details).

As a rule-of-thumb, by-pieces emits alternating load/store sequences
and the setmem expansion in the backend emits a sequence of loads
followed by a sequence of stores.

Let's add some test cases to document the current behaviour
and to have tests to identify regressions.

Signed-off-by: Christoph Müllner 

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cpymem-32-ooo.c: New test.
* gcc.target/riscv/cpymem-32.c: New test.
* gcc.target/riscv/cpymem-64-ooo.c: New test.
* gcc.target/riscv/cpymem-64.c: New test.

(cherry picked from commit 00029408387e9cc64e135324c22d15cd5a70e946)

Diff:
---
 gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c | 131 +++
 gcc/testsuite/gcc.target/riscv/cpymem-32.c | 138 +
 gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c | 129 +++
 gcc/testsuite/gcc.target/riscv/cpymem-64.c | 138 +
 4 files changed, 536 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
new file mode 100644
index 000..33fb9891d82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
@@ -0,0 +1,131 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32 } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -mtune=generic-ooo" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* { dg-allow-blank-lines-in-output 1 } */
+
+#define COPY_N(N)  \
+void copy_##N (void *to, void *from)   \
+{  \
+  __builtin_memcpy (to, from, N);  \
+}
+
+#define COPY_ALIGNED_N(N)  \
+void copy_aligned_##N (void *to, void *from)   \
+{  \
+  to = __builtin_assume_aligned(to, sizeof(long)); \
+  from = __builtin_assume_aligned(from, sizeof(long)); \
+  __builtin_memcpy (to, from, N);  \
+}
+
+/*
+**copy_7:
+**...
+**lw\t[at][0-9],0\([at][0-9]\)
+**sw\t[at][0-9],0\([at][0-9]\)
+**...
+**lbu\t[at][0-9],6\([at][0-9]\)
+**sb\t[at][0-9],6\([at][0-9]\)
+**...
+*/
+COPY_N(7)
+
+/*
+**copy_aligned_7:
+**...
+**lw\t[at][0-9],0\([at][0-9]\)
+**sw\t[at][0-9],0\([at][0-9]\)
+**...
+**lbu\t[at][0-9],6\([at][0-9]\)
+**sb\t[at][0-9],6\([at][0-9]\)
+**...
+*/
+COPY_ALIGNED_N(7)
+
+/*
+**copy_8:
+**...
+**lw\ta[0-9],0\(a[0-9]\)
+**sw\ta[0-9],0\(a[0-9]\)
+**...
+*/
+COPY_N(8)
+
+/*
+**copy_aligned_8:
+**...
+**lw\ta[0-9],0\(a[0-9]\)
+**sw\ta[0-9],0\(a[0-9]\)
+**...
+*/
+COPY_ALIGNED_N(8)
+
+/*
+**copy_11:
+**...
+**lbu\t[at][0-9],0\([at][0-9]\)
+**...
+**lbu\t[at][0-9],10\([at][0-9]\)
+**...
+**sb\t[at][0-9],0\([at][0-9]\)
+**...
+**sb\t[at][0-9],10\([at][0-9]\)
+**...
+*/
+COPY_N(11)
+
+/*
+**copy_aligned_11:
+**...
+**lw\t[at][0-9],0\([at][0-9]\)
+**...
+**sw\t[at][0-9],0\([at][0-9]\)
+**...
+**lbu\t[at][0-9],10\([at][0-9]\)
+**sb\t[at][0-9],10\([at][0-9]\)
+**...
+*/
+COPY_ALIGNED_N(11)
+
+/*
+**copy_15:
+**...
+**(call|tail)\tmemcpy
+**...
+*/
+COPY_N(15)
+
+/*
+**copy_aligned_15:
+**...
+**lw\t[at][0-9],0\([at][0-9]\)
+**...
+**sw\t[at][0-9],0\([at][0-9]\)
+**...
+**lbu\t[at][0-9],14\([at][0-9]\)
+**sb\t[at][0-9],14\([at][0-9]\)
+**...
+*/
+COPY_ALIGNED_N(15)
+
+/*
+**copy_27:
+**...
+**(call|tail)\tmemcpy
+**...
+*/
+COPY_N(27)
+
+/*
+**copy_aligned_27:
+**...
+**lw\t[at][0-9],20\([at][0-9]\)
+**...
+**sw\t[at][0-9],20\([at][0-9]\)
+**...
+**lbu\t[at][0-9],26\([at][0-9]\)
+**sb\t[at][0-9],26\([at][0-9]\)
+**...
+*/
+COPY_ALIGNED_N(27)
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-32.c
new file mode 100644
index 000..44ba14a1d51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-32.c
@@ -0,0 +1,138 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv32 } */
+/* { dg-options "-march=rv32gc -mabi=ilp32d -mtune=rocket" } */
+/* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
+/* { dg-final { check-f

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: add tests for overlapping mem ops

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:15af00b183cdc5a3884e35d733e9a2aad475a806

commit 15af00b183cdc5a3884e35d733e9a2aad475a806
Author: Christoph Müllner 
Date:   Mon Apr 29 03:06:52 2024 +0200

RISC-V: add tests for overlapping mem ops

A recent patch added the field overlap_op_by_pieces to the struct
riscv_tune_param, which is used by the TARGET_OVERLAP_OP_BY_PIECES_P()
hook. This hook is used by the by-pieces infrastructure to decide
if overlapping memory accesses should be emitted.

The changes in the expansion can be seen in the adjustments of the
cpymem test cases. These tests also reveal a limitation in the
RISC-V cpymem expansion that prevents this optimization as only
by-pieces cpymem expansions emit overlapping memory accesses.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cpymem-32-ooo.c: Adjust for overlapping
access.
* gcc.target/riscv/cpymem-64-ooo.c: Likewise.

Signed-off-by: Christoph Müllner 
(cherry picked from commit 5814437b4fcc550697d6e286f49a2f8b108815bf)

Diff:
---
 gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c | 20 +++-
 gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c | 33 ++
 2 files changed, 20 insertions(+), 33 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
index 946a773f77a..947d58c30fa 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
@@ -24,9 +24,8 @@ void copy_aligned_##N (void *to, void *from)  \
 **...
 **lw\t[at][0-9],0\([at][0-9]\)
 **sw\t[at][0-9],0\([at][0-9]\)
-**...
-**lbu\t[at][0-9],6\([at][0-9]\)
-**sb\t[at][0-9],6\([at][0-9]\)
+**lw\t[at][0-9],3\([at][0-9]\)
+**sw\t[at][0-9],3\([at][0-9]\)
 **...
 */
 COPY_N(7)
@@ -36,9 +35,8 @@ COPY_N(7)
 **...
 **lw\t[at][0-9],0\([at][0-9]\)
 **sw\t[at][0-9],0\([at][0-9]\)
-**...
-**lbu\t[at][0-9],6\([at][0-9]\)
-**sb\t[at][0-9],6\([at][0-9]\)
+**lw\t[at][0-9],3\([at][0-9]\)
+**sw\t[at][0-9],3\([at][0-9]\)
 **...
 */
 COPY_ALIGNED_N(7)
@@ -66,11 +64,10 @@ COPY_ALIGNED_N(8)
 **...
 **...
 **lw\t[at][0-9],0\([at][0-9]\)
-**...
 **sw\t[at][0-9],0\([at][0-9]\)
 **...
-**lbu\t[at][0-9],10\([at][0-9]\)
-**sb\t[at][0-9],10\([at][0-9]\)
+**lw\t[at][0-9],7\([at][0-9]\)
+**sw\t[at][0-9],7\([at][0-9]\)
 **...
 */
 COPY_N(11)
@@ -79,11 +76,10 @@ COPY_N(11)
 **copy_aligned_11:
 **...
 **lw\t[at][0-9],0\([at][0-9]\)
-**...
 **sw\t[at][0-9],0\([at][0-9]\)
 **...
-**lbu\t[at][0-9],10\([at][0-9]\)
-**sb\t[at][0-9],10\([at][0-9]\)
+**lw\t[at][0-9],7\([at][0-9]\)
+**sw\t[at][0-9],7\([at][0-9]\)
 **...
 */
 COPY_ALIGNED_N(11)
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
index 08a927b9483..108748690cd 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
@@ -24,9 +24,8 @@ void copy_aligned_##N (void *to, void *from)  \
 **...
 **lw\t[at][0-9],0\([at][0-9]\)
 **sw\t[at][0-9],0\([at][0-9]\)
-**...
-**lbu\t[at][0-9],6\([at][0-9]\)
-**sb\t[at][0-9],6\([at][0-9]\)
+**lw\t[at][0-9],3\([at][0-9]\)
+**sw\t[at][0-9],3\([at][0-9]\)
 **...
 */
 COPY_N(7)
@@ -36,9 +35,8 @@ COPY_N(7)
 **...
 **lw\t[at][0-9],0\([at][0-9]\)
 **sw\t[at][0-9],0\([at][0-9]\)
-**...
-**lbu\t[at][0-9],6\([at][0-9]\)
-**sb\t[at][0-9],6\([at][0-9]\)
+**lw\t[at][0-9],3\([at][0-9]\)
+**sw\t[at][0-9],3\([at][0-9]\)
 **...
 */
 COPY_ALIGNED_N(7)
@@ -66,9 +64,8 @@ COPY_ALIGNED_N(8)
 **...
 **ld\t[at][0-9],0\([at][0-9]\)
 **sd\t[at][0-9],0\([at][0-9]\)
-**...
-**lbu\t[at][0-9],10\([at][0-9]\)
-**sb\t[at][0-9],10\([at][0-9]\)
+**lw\t[at][0-9],7\([at][0-9]\)
+**sw\t[at][0-9],7\([at][0-9]\)
 **...
 */
 COPY_N(11)
@@ -77,11 +74,9 @@ COPY_N(11)
 **copy_aligned_11:
 **...
 **ld\t[at][0-9],0\([at][0-9]\)
-**...
 **sd\t[at][0-9],0\([at][0-9]\)
-**...
-**lbu\t[at][0-9],10\([at][0-9]\)
-**sb\t[at][0-9],10\([at][0-9]\)
+**lw\t[at][0-9],7\([at][0-9]\)
+**sw\t[at][0-9],7\([at][0-9]\)
 **...
 */
 COPY_ALIGNED_N(11)
@@ -90,11 +85,9 @@ COPY_ALIGNED_N(11)
 **copy_15:
 **...
 **ld\t[at][0-9],0\([at][0-9]\)
-**...
 **sd\t[at][0-9],0\([at][0-9]\)
-**...
-**lbu\t[at][0-9],14\([at][0-9]\)
-**sb\t[at][0-9],14\([at][0-9]\)
+**ld\t[at][0-9],7\([at][0-9]\)
+**sd\t[at][0-9],7\([at][0-9]\)
 **...
 */
 COPY_N(15)
@@ -103,11 +96,9 @@ COPY_N(15)
 **copy_aligned_15:
 **...
 **ld\t[at][0-9],0\([at][0-9]\)
-**...
 **sd\t[at][0-9],0\([at][0-9]\)
-**...
-**lbu\t[at][0-9],14\([at][0-9]\)
-**sb\t[at][0-9],14\([at][0-9]\)
+**ld\t[at][0-9],7\([at][0-9]\)
+**sd\t[at

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Allow by-pieces to do overlapping accesses in block_move_straight

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:db2ad3d782827fefec35b3cf140a3690033098af

commit db2ad3d782827fefec35b3cf140a3690033098af
Author: Christoph Müllner 
Date:   Mon Apr 29 02:53:20 2024 +0200

RISC-V: Allow by-pieces to do overlapping accesses in block_move_straight

The current implementation of riscv_block_move_straight() emits a couple
of loads/stores with with maximum width (e.g. 8-byte for RV64).
The remainder is handed over to move_by_pieces().
The by-pieces framework utilizes target hooks to decide about the emitted
instructions (e.g. unaligned accesses or overlapping accesses).

Since the current implementation will always request less than XLEN bytes
to be handled by the by-pieces infrastructure, it is impossible that
overlapping memory accesses can ever be emitted (the by-pieces code does
not know of any previous instructions that were emitted by the backend).

This patch changes the implementation of riscv_block_move_straight()
such, that it utilizes the by-pieces framework if the remaining data
is less than 2*XLEN bytes, which is sufficient to enable overlapping
memory accesses (if the requirements for them are given).

The changes in the expansion can be seen in the adjustments of the
cpymem-NN-ooo test cases. The changes in the cpymem-NN tests are
caused by the different instruction ordering of the code emitted
by the by-pieces infrastructure, which emits alternating load/store
sequences.

gcc/ChangeLog:

* config/riscv/riscv-string.cc (riscv_block_move_straight):
Hand over up to 2xXLEN bytes to move_by_pieces().

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cpymem-32-ooo.c: Adjustments for overlapping
access.
* gcc.target/riscv/cpymem-32.c: Adjustments for code emitted by
by-pieces.
* gcc.target/riscv/cpymem-64-ooo.c: Adjustments for overlapping
access.
* gcc.target/riscv/cpymem-64.c: Adjustments for code emitted by
by-pieces.

Signed-off-by: Christoph Müllner 
(cherry picked from commit ad22c607f3e17f2c6ca45699c1d88adaa618c23c)

Diff:
---
 gcc/config/riscv/riscv-string.cc   |  6 +++---
 gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c | 16 
 gcc/testsuite/gcc.target/riscv/cpymem-32.c | 10 --
 gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c |  8 
 gcc/testsuite/gcc.target/riscv/cpymem-64.c |  9 +++--
 5 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index b6cd7032356..96394844bbb 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -637,18 +637,18 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned 
HOST_WIDE_INT length,
   delta = bits / BITS_PER_UNIT;
 
   /* Allocate a buffer for the temporary registers.  */
-  regs = XALLOCAVEC (rtx, length / delta);
+  regs = XALLOCAVEC (rtx, length / delta - 1);
 
   /* Load as many BITS-sized chunks as possible.  Use a normal load if
  the source has enough alignment, otherwise use left/right pairs.  */
-  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+  for (offset = 0, i = 0; offset + 2 * delta <= length; offset += delta, i++)
 {
   regs[i] = gen_reg_rtx (mode);
   riscv_emit_move (regs[i], adjust_address (src, mode, offset));
 }
 
   /* Copy the chunks to the destination.  */
-  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+  for (offset = 0, i = 0; offset + 2 * delta <= length; offset += delta, i++)
 riscv_emit_move (adjust_address (dest, mode, offset), regs[i]);
 
   /* Mop up any left-over bytes.  */
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
index 947d58c30fa..2a48567353a 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
@@ -91,8 +91,8 @@ COPY_ALIGNED_N(11)
 **...
 **sw\t[at][0-9],0\([at][0-9]\)
 **...
-**lbu\t[at][0-9],14\([at][0-9]\)
-**sb\t[at][0-9],14\([at][0-9]\)
+**lw\t[at][0-9],11\([at][0-9]\)
+**sw\t[at][0-9],11\([at][0-9]\)
 **...
 */
 COPY_N(15)
@@ -104,8 +104,8 @@ COPY_N(15)
 **...
 **sw\t[at][0-9],0\([at][0-9]\)
 **...
-**lbu\t[at][0-9],14\([at][0-9]\)
-**sb\t[at][0-9],14\([at][0-9]\)
+**lw\t[at][0-9],11\([at][0-9]\)
+**sw\t[at][0-9],11\([at][0-9]\)
 **...
 */
 COPY_ALIGNED_N(15)
@@ -117,8 +117,8 @@ COPY_ALIGNED_N(15)
 **...
 **sw\t[at][0-9],20\([at][0-9]\)
 **...
-**lbu\t[at][0-9],26\([at][0-9]\)
-**sb\t[at][0-9],26\([at][0-9]\)
+**lw\t[at][0-9],23\([at][0-9]\)
+**sw\t[at][0-9],23\([at][0-9]\)
 **...
 */
 COPY_N(27)
@@ -130,8 +130,8 @@ COPY_N(27)
 **...
 **sw\t[at][0-9],20\([at][0-9]\)
 **...
-**lbu\t[at][0-9],26\(

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Test cbo.zero expansion for rv32

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d8f7ba2edaecdaa515d0dfca65934d2a90c63db6

commit d8f7ba2edaecdaa515d0dfca65934d2a90c63db6
Author: Christoph Müllner 
Date:   Wed May 15 01:34:54 2024 +0200

RISC-V: Test cbo.zero expansion for rv32

We had an issue when expanding via cmo-zero for RV32.
This was fixed upstream, but we don't have a RV32 test.
Therefore, this patch introduces such a test.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmo-zicboz-zic64-1.c: Fix for rv32.

Signed-off-by: Christoph Müllner 
(cherry picked from commit 5609d77e683944439fae38323ecabc44a1eb4671)

Diff:
---
 .../gcc.target/riscv/cmo-zicboz-zic64-1.c  | 37 +++---
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c 
b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
index 6d4535287d0..9192b391b11 100644
--- a/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cmo-zicboz-zic64-1.c
@@ -1,24 +1,9 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gc_zic64b_zicboz -mabi=lp64d" } */
+/* { dg-options "-march=rv32gc_zic64b_zicboz" { target { rv32 } } } */
+/* { dg-options "-march=rv64gc_zic64b_zicboz" { target { rv64 } } } */
 /* { dg-skip-if "" { *-*-* } {"-O0" "-Os" "-Og" "-Oz" "-flto" } } */
-/* { dg-final { check-function-bodies "**" "" } } */
-/* { dg-allow-blank-lines-in-output 1 } */
 
-/*
-**clear_buf_123:
-**...
-**cbo\.zero\t0\(a[0-9]+\)
-**sd\tzero,64\(a[0-9]+\)
-**sd\tzero,72\(a[0-9]+\)
-**sd\tzero,80\(a[0-9]+\)
-**sd\tzero,88\(a[0-9]+\)
-**sd\tzero,96\(a[0-9]+\)
-**sd\tzero,104\(a[0-9]+\)
-**sd\tzero,112\(a[0-9]+\)
-**sh\tzero,120\(a[0-9]+\)
-**sb\tzero,122\(a[0-9]+\)
-**...
-*/
+// 1x cbo.zero, 7x sd (rv64) or 14x sw (rv32), 1x sh, 1x sb
 int
 clear_buf_123 (void *p)
 {
@@ -26,17 +11,17 @@ clear_buf_123 (void *p)
   __builtin_memset (p, 0, 123);
 }
 
-/*
-**clear_buf_128:
-**...
-**cbo\.zero\t0\(a[0-9]+\)
-**addi\ta[0-9]+,a[0-9]+,64
-**cbo\.zero\t0\(a[0-9]+\)
-**...
-*/
+// 2x cbo.zero, 1x addi 64
 int
 clear_buf_128 (void *p)
 {
   p = __builtin_assume_aligned(p, 64);
   __builtin_memset (p, 0, 128);
 }
+
+/* { dg-final { scan-assembler-times "cbo\.zero\t" 3 } } */
+/* { dg-final { scan-assembler-times "addi\ta\[0-9\]+,a\[0-9\]+,64" 1 } } */
+/* { dg-final { scan-assembler-times "sd\t" 7 { target { rv64 } } } } */
+/* { dg-final { scan-assembler-times "sw\t" 14 { target { rv32 } } } } */
+/* { dg-final { scan-assembler-times "sh\t" 1 } } */
+/* { dg-final { scan-assembler-times "sb\t" 1 } } */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [v2, 1/2] RISC-V: Add cmpmemsi expansion

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:1687d671459b63b7e19fe33d275ebbcddd43381e

commit 1687d671459b63b7e19fe33d275ebbcddd43381e
Author: Christoph Müllner 
Date:   Wed May 15 12:18:20 2024 -0600

[v2,1/2] RISC-V: Add cmpmemsi expansion

GCC has a generic cmpmemsi expansion via the by-pieces framework,
which shows some room for target-specific optimizations.
E.g. for comparing two aligned memory blocks of 15 bytes
we get the following sequence:

my_mem_cmp_aligned_15:
li  a4,0
j   .L2
.L8:
bgeua4,a7,.L7
.L2:
add a2,a0,a4
add a3,a1,a4
lbu a5,0(a2)
lbu a6,0(a3)
addia4,a4,1
li  a7,15// missed hoisting
subwa5,a5,a6
andia5,a5,0xff // useless
beq a5,zero,.L8
lbu a0,0(a2) // loading again!
lbu a5,0(a3) // loading again!
subwa0,a0,a5
ret
.L7:
li  a0,0
ret

Diff first byte: 15 insns
Diff second byte: 25 insns
No diff: 25 insns

Possible improvements:
* unroll the loop and use load-with-displacement to avoid offset increments
* load and compare multiple (aligned) bytes at once
* Use the bitmanip/strcmp result calculation (reverse words and
  synthesize (a2 >= a3) ? 1 : -1 in a branchless sequence)

When applying these improvements we get the following sequence:

my_mem_cmp_aligned_15:
ld  a5,0(a0)
ld  a4,0(a1)
bne a5,a4,.L2
ld  a5,8(a0)
ld  a4,8(a1)
sllia5,a5,8
sllia4,a4,8
bne a5,a4,.L2
li  a0,0
.L3:
sext.w  a0,a0
ret
.L2:
rev8a5,a5
rev8a4,a4
sltua5,a5,a4
neg a5,a5
ori a0,a5,1
j   .L3

Diff first byte: 11 insns
Diff second byte: 16 insns
No diff: 11 insns

This patch implements this improvements.

The tests consist of a execution test (similar to
gcc/testsuite/gcc.dg/torture/inline-mem-cmp-1.c) and a few tests
that test the expansion conditions (known length and alignment).

Similar to the cpymemsi expansion this patch does not introduce any
gating for the cmpmemsi expansion (on top of requiring the known length,
alignment and Zbb).

Bootstrapped and SPEC CPU 2017 tested.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (riscv_expand_block_compare): New
prototype.
* config/riscv/riscv-string.cc (GEN_EMIT_HELPER2): New helper
for zero_extendhi.
(do_load_from_addr): Add support for HI and SI/64 modes.
(do_load): Add helper for zero-extended loads.
(emit_memcmp_scalar_load_and_compare): New helper to emit memcmp.
(emit_memcmp_scalar_result_calculation): Likewise.
(riscv_expand_block_compare_scalar): Likewise.
(riscv_expand_block_compare): New RISC-V expander for memory 
compare.
* config/riscv/riscv.md (cmpmemsi): New cmpmem expansion.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmpmemsi-1.c: New test.
* gcc.target/riscv/cmpmemsi-2.c: New test.
* gcc.target/riscv/cmpmemsi-3.c: New test.
* gcc.target/riscv/cmpmemsi.c: New test.

(cherry picked from commit 4bf1aa1ab90dd487fadc27c86523ec3562b2d2fe)

Diff:
---
 gcc/config/riscv/riscv-protos.h |  1 +
 gcc/config/riscv/riscv-string.cc| 40 +--
 gcc/config/riscv/riscv.md   | 15 ++
 gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c |  6 
 gcc/testsuite/gcc.target/riscv/cmpmemsi-2.c | 42 
 gcc/testsuite/gcc.target/riscv/cmpmemsi-3.c | 43 +
 gcc/testsuite/gcc.target/riscv/cmpmemsi.c   | 22 +++
 7 files changed, 155 insertions(+), 14 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 5c8a52b78a2..565ead1382a 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -189,6 +189,7 @@ rtl_opt_pass * make_pass_avlprop (gcc::context *ctxt);
 rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
 
 /* Routines implemented in riscv-string.c.  */
+extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx);
 extern bool riscv_expand_block_move (rtx, rtx, rtx);
 extern bool riscv_expand_block_clear (rtx, rtx);
 
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 96394844bbb..8f3b6f925e0 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -86,35 +86,47 @@ GEN_EMIT_HELPER2(th_rev) /* do_th_rev2  */
 GEN_EMIT_HELPER

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [v2, 2/2] RISC-V: strcmp expansion: Use adjust_address() for address calculation

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:3411fe01e724eb941b7416e596487c55f59cf9e4

commit 3411fe01e724eb941b7416e596487c55f59cf9e4
Author: Christoph Müllner 
Date:   Wed May 15 12:19:40 2024 -0600

[v2,2/2] RISC-V: strcmp expansion: Use adjust_address() for address 
calculation

We have an arch-independent routine to generate an address with an offset.
Let's use that instead of doing the calculation in the backend.

gcc/ChangeLog:

* config/riscv/riscv-string.cc 
(emit_strcmp_scalar_load_and_compare):
Use adjust_address() to calculate MEM-PLUS pattern.

(cherry picked from commit 1fbbae1d4ba3618a3da829a6d7e11a1606a583b3)

Diff:
---
 gcc/config/riscv/riscv-string.cc | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 8f3b6f925e0..cbb9724d230 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -227,8 +227,6 @@ emit_strcmp_scalar_load_and_compare (rtx result, rtx src1, 
rtx src2,
 rtx final_label)
 {
   const unsigned HOST_WIDE_INT xlen = GET_MODE_SIZE (Xmode);
-  rtx src1_addr = force_reg (Pmode, XEXP (src1, 0));
-  rtx src2_addr = force_reg (Pmode, XEXP (src2, 0));
   unsigned HOST_WIDE_INT offset = 0;
 
   rtx testval = gen_reg_rtx (Xmode);
@@ -246,10 +244,10 @@ emit_strcmp_scalar_load_and_compare (rtx result, rtx 
src1, rtx src2,
   else
load_mode = Xmode;
 
-  rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
-  do_load_from_addr (load_mode, data1, addr1, src1);
-  rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
-  do_load_from_addr (load_mode, data2, addr2, src2);
+  rtx addr1 = adjust_address (src1, load_mode, offset);
+  do_load (load_mode, data1, addr1);
+  rtx addr2 = adjust_address (src2, load_mode, offset);
+  do_load (load_mode, data2, addr2);
 
   if (cmp_bytes == 1)
{


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Add missing hunk in recent change.

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d04ee599f2b7bde5bcef0ea5ff09dbacab214046

commit d04ee599f2b7bde5bcef0ea5ff09dbacab214046
Author: Jeff Law 
Date:   Wed May 15 17:05:24 2024 -0600

Add missing hunk in recent change.

gcc/
* config/riscv/riscv-string.cc: Add missing hunk from last change.

(cherry picked from commit d7e6fe0f72ad41b8361f927d2796dbc275347297)

Diff:
---
 gcc/config/riscv/riscv-string.cc | 177 +++
 1 file changed, 177 insertions(+)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index cbb9724d230..83e7afbd693 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -627,6 +627,183 @@ riscv_expand_strlen (rtx result, rtx src, rtx 
search_char, rtx align)
   return false;
 }
 
+/* Generate the sequence of load and compares for memcmp using Zbb.
+
+   RESULT is the register where the return value of memcmp will be stored.
+   The source pointers are SRC1 and SRC2 (NBYTES bytes to compare).
+   DATA1 and DATA2 are registers where the data chunks will be stored.
+   DIFF_LABEL is the location of the code that calculates the return value.
+   FINAL_LABEL is the location of the code that comes after the calculation
+   of the return value.  */
+
+static void
+emit_memcmp_scalar_load_and_compare (rtx result, rtx src1, rtx src2,
+unsigned HOST_WIDE_INT nbytes,
+rtx data1, rtx data2,
+rtx diff_label, rtx final_label)
+{
+  const unsigned HOST_WIDE_INT xlen = GET_MODE_SIZE (Xmode);
+  unsigned HOST_WIDE_INT offset = 0;
+
+  while (nbytes > 0)
+{
+  unsigned HOST_WIDE_INT cmp_bytes = xlen < nbytes ? xlen : nbytes;
+  machine_mode load_mode;
+
+  /* Special cases to avoid masking of trailing bytes.  */
+  if (cmp_bytes == 1)
+   load_mode = QImode;
+  else if (cmp_bytes == 2)
+   load_mode = HImode;
+  else if (cmp_bytes == 4)
+   load_mode = SImode;
+  else
+   load_mode = Xmode;
+
+  rtx addr1 = adjust_address (src1, load_mode, offset);
+  do_load (load_mode, data1, addr1);
+  rtx addr2 = adjust_address (src2, load_mode, offset);
+  do_load (load_mode, data2, addr2);
+
+  /* Fast-path for a single byte.  */
+  if (cmp_bytes == 1)
+   {
+ rtx tmp = gen_reg_rtx (Xmode);
+ do_sub3 (tmp, data1, data2);
+ emit_insn (gen_movsi (result, gen_lowpart (SImode, tmp)));
+ emit_jump_insn (gen_jump (final_label));
+ emit_barrier (); /* No fall-through.  */
+ return;
+   }
+
+  /* Shift off trailing bytes in words if needed.  */
+  unsigned int load_bytes = GET_MODE_SIZE (load_mode).to_constant ();
+  if (cmp_bytes < load_bytes)
+   {
+ int shamt = (load_bytes - cmp_bytes) * BITS_PER_UNIT;
+ do_ashl3 (data1, data1, GEN_INT (shamt));
+ do_ashl3 (data2, data2, GEN_INT (shamt));
+   }
+
+  /* Break out if data1 != data2 */
+  rtx cond = gen_rtx_NE (VOIDmode, data1, data2);
+  emit_unlikely_jump_insn (gen_cbranch4 (Pmode, cond, data1,
+data2, diff_label));
+  /* Fall-through on equality.  */
+
+  offset += cmp_bytes;
+  nbytes -= cmp_bytes;
+}
+}
+
+/* memcmp result calculation.
+
+   RESULT is the register where the return value will be stored.
+   The two data chunks are in DATA1 and DATA2.  */
+
+static void
+emit_memcmp_scalar_result_calculation (rtx result, rtx data1, rtx data2)
+{
+  /* Get bytes in big-endian order and compare as words.  */
+  do_bswap2 (data1, data1);
+  do_bswap2 (data2, data2);
+  /* Synthesize (data1 >= data2) ? 1 : -1 in a branchless sequence.  */
+  rtx tmp = gen_reg_rtx (Xmode);
+  emit_insn (gen_slt_3 (LTU, Xmode, Xmode, tmp, data1, data2));
+  do_neg2 (tmp, tmp);
+  do_ior3 (tmp, tmp, const1_rtx);
+  emit_insn (gen_movsi (result, gen_lowpart (SImode, tmp)));
+}
+
+/* Expand memcmp using scalar instructions (incl. Zbb).
+
+   RESULT is the register where the return value will be stored.
+   The source pointers are SRC1 and SRC2 (NBYTES bytes to compare).  */
+
+static bool
+riscv_expand_block_compare_scalar (rtx result, rtx src1, rtx src2, rtx nbytes)
+{
+  const unsigned HOST_WIDE_INT xlen = GET_MODE_SIZE (Xmode);
+
+  if (optimize_function_for_size_p (cfun))
+return false;
+
+  /* We don't support big endian.  */
+  if (BYTES_BIG_ENDIAN)
+return false;
+
+  if (!CONST_INT_P (nbytes))
+return false;
+
+  /* We need the rev (bswap) instruction.  */
+  if (!TARGET_ZBB)
+return false;
+
+  unsigned HOST_WIDE_INT length = UINTVAL (nbytes);
+
+  /* Limit to 12-bits (maximum load-offset).  */
+  if (length > IMM_REACH)
+length = IMM_REACH;
+
+  /* We need xlen-aligned memory.  */
+  unsigned HOST_WIDE_INT align = MIN (MEM_ALIGN (src1), MEM_ALIGN (src2));
+  if (align < (xlen * BITS_PER_UNIT))
+return false;

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add Zvfbfwma extension to the -march= option

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:5b202f0c899617507fb325deb7f53611f9561f1e

commit 5b202f0c899617507fb325deb7f53611f9561f1e
Author: Xiao Zeng 
Date:   Wed May 15 10:03:40 2024 +0800

RISC-V: Add Zvfbfwma extension to the -march= option

This patch would like to add new sub extension (aka Zvfbfwma) to the
-march= option. It introduces a new data type BF16.

1 In spec: "Zvfbfwma requires the Zvfbfmin extension and the Zfbfmin 
extension."
  1.1 In EmbeddedProcessor: Zvfbfwma -> Zvfbfmin -> Zve32f
  1.2 In Application Processor: Zvfbfwma -> Zvfbfmin -> V
  1.3 In both scenarios, there are: Zvfbfwma -> Zfbfmin

2 Zvfbfmin's information is in:



3 Zfbfmin's formation is in:



4 Depending on different usage scenarios, the Zvfbfwma extension may
depend on 'V' or 'Zve32f'. This patch only implements dependencies in
scenario of Embedded Processor. This is consistent with the processing
strategy in Zvfbfmin. In scenario of Application Processor, it is
necessary to explicitly indicate the dependent 'V' extension.

5 You can locate more information about Zvfbfwma from below spec doc:



gcc/ChangeLog:

* common/config/riscv/riscv-common.cc:
(riscv_implied_info): Add zvfbfwma item.
(riscv_ext_version_table): Ditto.
(riscv_ext_flag_table): Ditto.
* config/riscv/riscv.opt:
(MASK_ZVFBFWMA): New macro.
(TARGET_ZVFBFWMA): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/arch-37.c: New test.
* gcc.target/riscv/arch-38.c: New test.
* gcc.target/riscv/predef-36.c: New test.
* gcc.target/riscv/predef-37.c: New test.

(cherry picked from commit 38dd4e26e07c6be7cf4d169141ee4f3a03f3a09d)

Diff:
---
 gcc/common/config/riscv/riscv-common.cc|  5 
 gcc/config/riscv/riscv.opt |  2 ++
 gcc/testsuite/gcc.target/riscv/arch-37.c   |  5 
 gcc/testsuite/gcc.target/riscv/arch-38.c   |  5 
 gcc/testsuite/gcc.target/riscv/predef-36.c | 48 ++
 gcc/testsuite/gcc.target/riscv/predef-37.c | 48 ++
 6 files changed, 113 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index fb76017ffbc..88204393fde 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -162,6 +162,8 @@ static const riscv_implied_info_t riscv_implied_info[] =
   {"zfa", "f"},
 
   {"zvfbfmin", "zve32f"},
+  {"zvfbfwma", "zvfbfmin"},
+  {"zvfbfwma", "zfbfmin"},
   {"zvfhmin", "zve32f"},
   {"zvfh", "zve32f"},
   {"zvfh", "zfhmin"},
@@ -336,6 +338,7 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zfh",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"zfhmin",ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvfbfmin",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zvfbfwma",  ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvfhmin",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvfh",  ISA_SPEC_CLASS_NONE, 1, 0},
 
@@ -1667,6 +1670,7 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
   {"zve64f",   &gcc_options::x_riscv_vector_elen_flags, 
MASK_VECTOR_ELEN_FP_32},
   {"zve64d",   &gcc_options::x_riscv_vector_elen_flags, 
MASK_VECTOR_ELEN_FP_64},
   {"zvfbfmin", &gcc_options::x_riscv_vector_elen_flags, 
MASK_VECTOR_ELEN_BF_16},
+  {"zvfbfwma", &gcc_options::x_riscv_vector_elen_flags, 
MASK_VECTOR_ELEN_BF_16},
   {"zvfhmin",  &gcc_options::x_riscv_vector_elen_flags, 
MASK_VECTOR_ELEN_FP_16},
   {"zvfh", &gcc_options::x_riscv_vector_elen_flags, 
MASK_VECTOR_ELEN_FP_16},
 
@@ -1704,6 +1708,7 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
   {"zfhmin",&gcc_options::x_riscv_zf_subext, MASK_ZFHMIN},
   {"zfh",   &gcc_options::x_riscv_zf_subext, MASK_ZFH},
   {"zvfbfmin",  &gcc_options::x_riscv_zf_subext, MASK_ZVFBFMIN},
+  {"zvfbfwma",  &gcc_options::x_riscv_zf_subext, MASK_ZVFBFWMA},
   {"zvfhmin",   &gcc_options::x_riscv_zf_subext, MASK_ZVFHMIN},
   {"zvfh",  &gcc_options::x_riscv_zf_subext, MASK_ZVFH},
 
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 1252834aec5..d209ac896fd 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -401,6 +401,8 @@ Mask(ZFH) Var(riscv_zf_subext)
 
 Mask(ZVFBFMIN) Var(riscv_zf_subext)
 
+Mask(ZVFBFWMA) Var(riscv_zf_subext)
+
 Mask(ZVFHMIN) Var(riscv_zf_subext)
 
 Mask(ZVFH)Var(riscv_zf_subext)
diff --git a/gcc/testsuite/gcc.target/riscv/arch-37.c 
b/gcc/testsuite/gcc.target/riscv/arch-37.c
new file mode 100644
index 000..5b19a73c556
--- /dev/n

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: testsuite: Drop march-string in cmpmemsi/cpymemsi tests

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:423d10a1e064ef86d1c046ee8ed89ed6c5078495

commit 423d10a1e064ef86d1c046ee8ed89ed6c5078495
Author: Christoph Müllner 
Date:   Thu May 16 09:53:47 2024 +0200

RISC-V: testsuite: Drop march-string in cmpmemsi/cpymemsi tests

The tests cmpmemsi-1.c and cpymemsi-1.c are execution ("dg-do run")
tests, which does not have any restrictions for the enabled extensions.
Further, no other listed options are required.
Let's drop the options, so that the test can also be executed on
non-f and non-d targets.  However, we need to set options to the
defaults without '-ansi', because the included test file uses the
'asm' keyword, which is not part of ANSI C.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cmpmemsi-1.c: Drop options.
* gcc.target/riscv/cpymemsi-1.c: Likewise.

Signed-off-by: Christoph Müllner 
(cherry picked from commit b8b82bb05c10544da05cd0d3d39e6bc3763a8d9f)

Diff:
---
 gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c | 3 +--
 gcc/testsuite/gcc.target/riscv/cpymemsi-1.c | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c 
b/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c
index d7e0bc47407..698f27d89fb 100644
--- a/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cmpmemsi-1.c
@@ -1,6 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=rv32gc_zbb -save-temps -g0 -fno-lto" { target { rv32 } 
} } */
-/* { dg-options "-march=rv64gc_zbb -save-temps -g0 -fno-lto" { target { rv64 } 
} } */
+/* { dg-options "-pedantic-errors" } */
 /* { dg-timeout-factor 2 } */
 
 #include "../../gcc.dg/memcmp-1.c"
diff --git a/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c 
b/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c
index 983b564ccaf..30e9f119bed 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymemsi-1.c
@@ -1,7 +1,5 @@
 /* { dg-do run } */
-/* { dg-options "-march=rv32gc -save-temps -g0 -fno-lto" { target { rv32 } } } 
*/
-/* { dg-options "-march=rv64gc -save-temps -g0 -fno-lto" { target { rv64 } } } 
*/
-/* { dg-additional-options "-DRUN_FRACTION=11" { target simulator } } */
+/* { dg-options "-pedantic-errors" } */
 /* { dg-timeout-factor 2 } */
 
 #include "../../gcc.dg/memcmp-1.c"


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:592205a276422c31872bf41764c59589e4a17c85

commit 592205a276422c31872bf41764c59589e4a17c85
Author: Pan Li 
Date:   Wed May 15 10:14:05 2024 +0800

Internal-fn: Support new IFN SAT_ADD for unsigned scalar int

This patch would like to add the middle-end presentation for the
saturation add.  Aka set the result of add to the max when overflow.
It will take the pattern similar as below.

SAT_ADD (x, y) => (x + y) | (-(TYPE)((TYPE)(x + y) < x))

Take uint8_t as example, we will have:

* SAT_ADD (1, 254)   => 255.
* SAT_ADD (1, 255)   => 255.
* SAT_ADD (2, 255)   => 255.
* SAT_ADD (255, 255) => 255.

Given below example for the unsigned scalar integer uint64_t:

uint64_t sat_add_u64 (uint64_t x, uint64_t y)
{
  return (x + y) | (- (uint64_t)((uint64_t)(x + y) < x));
}

Before this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  long unsigned int _1;
  _Bool _2;
  long unsigned int _3;
  long unsigned int _4;
  uint64_t _7;
  long unsigned int _10;
  __complex__ long unsigned int _11;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _11 = .ADD_OVERFLOW (x_5(D), y_6(D));
  _1 = REALPART_EXPR <_11>;
  _10 = IMAGPART_EXPR <_11>;
  _2 = _10 != 0;
  _3 = (long unsigned int) _2;
  _4 = -_3;
  _7 = _1 | _4;
  return _7;
;;succ:   EXIT

}

After this patch:
uint64_t sat_add_uint64_t (uint64_t x, uint64_t y)
{
  uint64_t _7;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _7 = .SAT_ADD (x_5(D), y_6(D)); [tail call]
  return _7;
;;succ:   EXIT
}

The below tests are passed for this patch:
1. The riscv fully regression tests.
3. The x86 bootstrap tests.
4. The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* internal-fn.cc (commutative_binary_fn_p): Add type IFN_SAT_ADD
to the return true switch case(s).
* internal-fn.def (SAT_ADD):  Add new signed optab SAT_ADD.
* match.pd: Add unsigned SAT_ADD match(es).
* optabs.def (OPTAB_NL): Remove fixed-point limitation for
us/ssadd.
* tree-ssa-math-opts.cc (gimple_unsigned_integer_sat_add): New
extern func decl generated in match.pd match.
(match_saturation_arith): New func impl to match the saturation 
arith.
(math_opts_dom_walker::after_dom_children): Try match saturation
arith when IOR expr.

Signed-off-by: Pan Li 
(cherry picked from commit 52b0536710ff3f3ace72ab00ce9ef6c630cd1183)

Diff:
---
 gcc/internal-fn.cc|  1 +
 gcc/internal-fn.def   |  2 ++
 gcc/match.pd  | 51 +++
 gcc/optabs.def|  4 ++--
 gcc/tree-ssa-math-opts.cc | 32 +
 5 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 0a7053c2286..73045ca8c8c 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4202,6 +4202,7 @@ commutative_binary_fn_p (internal_fn fn)
 case IFN_UBSAN_CHECK_MUL:
 case IFN_ADD_OVERFLOW:
 case IFN_MUL_OVERFLOW:
+case IFN_SAT_ADD:
 case IFN_VEC_WIDEN_PLUS:
 case IFN_VEC_WIDEN_PLUS_LO:
 case IFN_VEC_WIDEN_PLUS_HI:
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 848bb9dbff3..25badbb86e5 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -275,6 +275,8 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | 
ECF_NOTHROW, first,
 DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first,
  smulhrs, umulhrs, binary)
 
+DEF_INTERNAL_SIGNED_OPTAB_FN (SAT_ADD, ECF_CONST, first, ssadd, usadd, binary)
+
 DEF_INTERNAL_COND_FN (ADD, ECF_CONST, add, binary)
 DEF_INTERNAL_COND_FN (SUB, ECF_CONST, sub, binary)
 DEF_INTERNAL_COND_FN (MUL, ECF_CONST, smul, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 4a0aa80cee1..d6bfb9a32ae 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3045,6 +3045,57 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| POINTER_TYPE_P (itype))
   && wi::eq_p (wi::to_wide (int_cst), wi::max_value (itype))
 
+/* Unsigned Saturation Add */
+(match (usadd_left_part_1 @0 @1)
+ (plus:c @0 @1)
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+(match (usadd_left_part_2 @0 @1)
+ (realpart (IFN_ADD_OVERFLOW:c @0 @1))
+ (if (INTEGRAL_TYPE_P (type)
+  && TYPE_UNSIGNED (TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@0))
+  && types_match (type, TREE_TYPE (@1)
+
+(match (usadd_right_part_1 @0 @1)
+ (negate (convert (lt (plus:c @0 @1) @0)))
+ (if (INTEGRAL_TYPE_P (type)
+  

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Vect: Support new IFN SAT_ADD for unsigned vector int

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:210fa5eb9089ee81334d8b315a6d4a0ee26b4a8d

commit 210fa5eb9089ee81334d8b315a6d4a0ee26b4a8d
Author: Pan Li 
Date:   Wed May 15 10:14:06 2024 +0800

Vect: Support new IFN SAT_ADD for unsigned vector int

For vectorize, we leverage the existing vect pattern recog to find
the pattern similar to scalar and let the vectorizer to perform
the rest part for standard name usadd3 in vector mode.
The riscv vector backend have insn "Vector Single-Width Saturating
Add and Subtract" which can be leveraged when expand the usadd3
in vector mode.  For example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _80 = .SELECT_VL (ivtmp_78, POLY_INT_CST [2, 2]);
  ivtmp_58 = _80 * 8;
  vect__4.7_61 = .MASK_LEN_LOAD (vectp_x.5_59, 64B, { -1, ... }, _80, 0);
  vect__6.10_65 = .MASK_LEN_LOAD (vectp_y.8_63, 64B, { -1, ... }, _80, 0);
  vect__7.11_66 = vect__4.7_61 + vect__6.10_65;
  mask__8.12_67 = vect__4.7_61 > vect__7.11_66;
  vect__12.15_72 = .VCOND_MASK (mask__8.12_67, { 18446744073709551615,
... }, vect__7.11_66);
  .MASK_LEN_STORE (vectp_out.16_74, 64B, { -1, ... }, _80, 0, 
vect__12.15_72);
  vectp_x.5_60 = vectp_x.5_59 + ivtmp_58;
  vectp_y.8_64 = vectp_y.8_63 + ivtmp_58;
  vectp_out.16_75 = vectp_out.16_74 + ivtmp_58;
  ivtmp_79 = ivtmp_78 - _80;
  ...
}

After this patch:
void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  ...
  _62 = .SELECT_VL (ivtmp_60, POLY_INT_CST [2, 2]);
  ivtmp_46 = _62 * 8;
  vect__4.7_49 = .MASK_LEN_LOAD (vectp_x.5_47, 64B, { -1, ... }, _62, 0);
  vect__6.10_53 = .MASK_LEN_LOAD (vectp_y.8_51, 64B, { -1, ... }, _62, 0);
  vect__12.11_54 = .SAT_ADD (vect__4.7_49, vect__6.10_53);
  .MASK_LEN_STORE (vectp_out.12_56, 64B, { -1, ... }, _62, 0, 
vect__12.11_54);
  ...
}

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* tree-vect-patterns.cc (gimple_unsigned_integer_sat_add): New
func decl generated by match.pd match.
(vect_recog_sat_add_pattern): New func impl to recog the pattern
for unsigned SAT_ADD.

Signed-off-by: Pan Li 
(cherry picked from commit d4dee347b3fe1982bab26485ff31cd039c9df010)

Diff:
---
 gcc/tree-vect-patterns.cc | 52 +++
 1 file changed, 52 insertions(+)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 87c2acff386..6fd2373644f 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4487,6 +4487,57 @@ vect_recog_mult_pattern (vec_info *vinfo,
   return pattern_stmt;
 }
 
+extern bool gimple_unsigned_integer_sat_add (tree, tree*, tree (*)(tree));
+
+/*
+ * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
+ *   _7 = _4 + _6;
+ *   _8 = _4 > _7;
+ *   _9 = (long unsigned int) _8;
+ *   _10 = -_9;
+ *   _12 = _7 | _10;
+ *
+ * And then simplied to
+ *   _12 = .SAT_ADD (_4, _6);
+ */
+
+static gimple *
+vect_recog_sat_add_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo,
+   tree *type_out)
+{
+  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
+
+  if (!is_gimple_assign (last_stmt))
+return NULL;
+
+  tree res_ops[2];
+  tree lhs = gimple_assign_lhs (last_stmt);
+
+  if (gimple_unsigned_integer_sat_add (lhs, res_ops, NULL))
+{
+  tree itype = TREE_TYPE (res_ops[0]);
+  tree vtype = get_vectype_for_scalar_type (vinfo, itype);
+
+  if (vtype != NULL_TREE
+   && direct_internal_fn_supported_p (IFN_SAT_ADD, vtype,
+  OPTIMIZE_FOR_BOTH))
+   {
+ *type_out = vtype;
+ gcall *call = gimple_build_call_internal (IFN_SAT_ADD, 2, res_ops[0],
+   res_ops[1]);
+
+ gimple_call_set_lhs (call, vect_recog_temp_ssa_var (itype, NULL));
+ gimple_call_set_nothrow (call, /* nothrow_p */ false);
+ gimple_set_location (call, gimple_location (last_stmt));
+
+ vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt);
+ return call;
+   }
+}
+
+  return NULL;
+}
+
 /* Detect a signed division by a constant that wouldn't be
otherwise vectorized:
 
@@ -6987,6 +7038,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
   { vect_recog_divmod_pattern, "divmod" },
   { vec

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Vect: Support loop len in vectorizable early exit

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:f3aa41387600263a3c05b05623bd430fd7beeed2

commit f3aa41387600263a3c05b05623bd430fd7beeed2
Author: Pan Li 
Date:   Thu May 16 09:58:13 2024 +0800

Vect: Support loop len in vectorizable early exit

This patch adds early break auto-vectorization support for target which
use length on partial vectorization.  Consider this following example:

unsigned vect_a[802];
unsigned vect_b[802];

void test (unsigned x, int n)
{
  for (int i = 0; i < n; i++)
  {
vect_b[i] = x + i;

if (vect_a[i] > x)
  break;

vect_a[i] = x;
  }
}

We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
And then the IR of RVV looks like below:

  ...
  _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
  _55 = (int) _87;
  ...
  mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
  vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
{0, ... }, _87, 0);
  if (vec_len_mask_72 != { 0, ... })
goto ; [5.50%]
  else
goto ; [94.50%]

The below tests are passed for this patch:
1. The riscv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

gcc/ChangeLog:

* tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen
the loop len mask.
* tree-vect-stmts.cc (vectorizable_early_exit): Invoke the
vect_gen_loop_len_mask for 1 or more stmt(s).
* tree-vectorizer.h (vect_gen_loop_len_mask): New func decl
for vect_gen_loop_len_mask.

Signed-off-by: Pan Li 
(cherry picked from commit 57f8a2f67c1536be23231808ab00613ab69193ed)

Diff:
---
 gcc/tree-vect-loop.cc  | 27 +++
 gcc/tree-vect-stmts.cc | 17 +++--
 gcc/tree-vectorizer.h  |  4 
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 29c03c246d4..6ff3ca09dc6 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11394,6 +11394,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
   return loop_len;
 }
 
+/* Generate the tree for the loop len mask and return it.  Given the lens,
+   nvectors, vectype, index and factor to gen the len mask as below.
+
+   tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
+*/
+tree
+vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
+   gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens,
+   unsigned int nvectors, tree vectype, tree stmt,
+   unsigned int index, unsigned int factor)
+{
+  tree all_one_mask = build_all_ones_cst (vectype);
+  tree all_zero_mask = build_zero_cst (vectype);
+  tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, 
index,
+   factor);
+  tree bias = build_int_cst (intQI_type_node,
+LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
+  tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask");
+  gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt,
+   all_one_mask, all_zero_mask, len,
+   bias);
+  gimple_call_set_lhs (call, len_mask);
+  gsi_insert_before (cond_gsi, call, GSI_SAME_STMT);
+
+  return len_mask;
+}
+
 /* Scale profiling counters by estimation for LOOP which is vectorized
by factor VF.
If FLAT is true, the loop we started with had unrealistically flat
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f8d8636b139..d592dff73e3 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12893,7 +12893,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info 
stmt_info,
 ncopies = vect_get_num_copies (loop_vinfo, vectype);
 
   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+  vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+  bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
 
   /* Now build the new conditional.  Pattern gimple_conds get dropped during
  codegen so we must replace the original insn.  */
@@ -12957,12 +12959,11 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
{
  if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
  OPTIMIZE_FOR_SPEED))
-   return false;
+   vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
  else
vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
}
 
-
   return true;
 }
 
@@ -13015,6 +13016,15 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
  st

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement vectorizable early exit with vcond_mask_len

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:a65f9b17080323a8a936ce5a627b7ff53f25030f

commit a65f9b17080323a8a936ce5a627b7ff53f25030f
Author: Pan Li 
Date:   Thu May 16 10:02:40 2024 +0800

RISC-V: Implement vectorizable early exit with vcond_mask_len

After we support the loop lens for the vectorizable,  we would like to
implement the feature for the RISC-V target.  Given below example:

unsigned vect_a[1923];
unsigned vect_b[1923];

void test (unsigned limit, int n)
{
  for (int i = 0; i < n; i++)
{
  vect_b[i] = limit + i;

  if (vect_a[i] > limit)
{
  ret = vect_b[i];
  return ret;
}

  vect_a[i] = limit;
}
}

Before this patch:
  ...
.L8:
  swa3,0(a5)
  addiw a0,a0,1
  addi  a4,a4,4
  addi  a5,a5,4
  beq   a1,a0,.L2
.L4:
  swa0,0(a4)
  lwa2,0(a5)
  bleu  a2,a3,.L8
  ret

After this patch:
  ...
.L5:
  vsetvli   a5,a3,e8,mf4,ta,ma
  vmv1r.v   v4,v2
  vsetvli   t4,zero,e32,m1,ta,ma
  vmv.v.x   v1,a5
  vadd.vv   v2,v2,v1
  vsetvli   zero,a5,e32,m1,ta,ma
  vadd.vv   v5,v4,v3
  slli  a6,a5,2
  vle32.v   v1,0(t1)
  vmsltu.vv v1,v3,v1
  vcpop.m   t4,v1
  beq   t4,zero,.L4
  vmv.x.s   a4,v4
.L3:
  ...

The below tests are passed for this patch:
1. The riscv fully regression tests.

gcc/ChangeLog:

* 
config/riscv/autovec-opt.md(*vcond_mask_len_popcount_):
New pattern of vcond_mask_len_popcount for vector bool mode.
* config/riscv/autovec.md (vcond_mask_len_): New pattern of
vcond_mask_len for vector bool mode.
(cbranch4): New pattern for vector bool mode.
* config/riscv/vector-iterators.md: Add new unspec 
UNSPEC_SELECT_MASK.
* config/riscv/vector.md (@pred_popcount): Add VLS 
mode
to popcount pattern.
(@pred_popcount): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/early-break-1.c: New test.
* gcc.target/riscv/rvv/autovec/early-break-2.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 6c1de786e53a11150feb16ba990d0d6c6fd910db)

Diff:
---
 gcc/config/riscv/autovec-opt.md| 33 
 gcc/config/riscv/autovec.md| 61 ++
 gcc/config/riscv/vector-iterators.md   |  1 +
 gcc/config/riscv/vector.md | 18 +++
 .../gcc.target/riscv/rvv/autovec/early-break-1.c   | 34 
 .../gcc.target/riscv/rvv/autovec/early-break-2.c   | 37 +
 6 files changed, 175 insertions(+), 9 deletions(-)

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 645dc53d868..04f85d8e455 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1436,3 +1436,36 @@
 DONE;
   }
   [(set_attr "type" "vmalu")])
+
+;; Optimization pattern for early break auto-vectorization
+;; vcond_mask_len (mask, ones, zeros, len, bias) + vlmax popcount
+;; -> non vlmax popcount (mask, len)
+(define_insn_and_split "*vcond_mask_len_popcount_"
+  [(set (match_operand:P 0 "register_operand")
+(popcount:P
+ (unspec:VB_VLS [
+  (unspec:VB_VLS [
+   (match_operand:VB_VLS 1 "register_operand")
+   (match_operand:VB_VLS 2 "const_1_operand")
+   (match_operand:VB_VLS 3 "const_0_operand")
+   (match_operand 4 "autovec_length_operand")
+   (match_operand 5 "const_0_operand")] UNSPEC_SELECT_MASK)
+  (match_operand 6 "autovec_length_operand")
+  (const_int 1)
+  (reg:SI VL_REGNUM)
+  (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
+  "TARGET_VECTOR
+   && can_create_pseudo_p ()
+   && riscv_vector::get_vector_mode (Pmode, GET_MODE_NUNITS 
(mode)).exists ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+riscv_vector::emit_nonvlmax_insn (
+   code_for_pred_popcount (mode, Pmode),
+   riscv_vector::CPOP_OP,
+   operands, operands[4]);
+DONE;
+  }
+  [(set_attr "type" "vector")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index aa1ae0fe075..1ee3c8052fb 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2612,3 +2612,64 @@
 DONE;
   }
 )
+
+;; =
+;; == Early break auto-vectorization patterns
+;; =
+
+;; vcond_mask_len (mask, 1s, 0s, len, bias)
+;; => mask[i] = mask[i] && i < len ? 1 : 0
+(define_insn_and_split "vcond_mask_len_"
+  [(set (match_operand:VB 0 "register_operand")
+(unspec: VB [
+ (match_operand:VB 1 "register_operand")
+ (match_operand:VB 2 "const_1_operand")
+ (match_operand:VB 3 "const_0_operand")
+ (match_operand 4 "autovec_l

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Enable vectorizable early exit testsuite

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:2d6625b39c814a9dc860a361a2855d6547163a69

commit 2d6625b39c814a9dc860a361a2855d6547163a69
Author: Pan Li 
Date:   Thu May 16 10:04:10 2024 +0800

RISC-V: Enable vectorizable early exit testsuite

After we supported vectorizable early exit in RISC-V,  we would like to
enable the gcc vect test for vectorizable early test.

The vect-early-break_124-pr114403.c failed to vectorize for now.
Because that the __builtin_memcpy with 8 bytes failed to folded into
int64 assignment during ccp1.  We will improve that first and mark
this as xfail for RISC-V.

The below tests are passed for this patch:
1. The riscv fully regression tests.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/slp-mask-store-1.c: Add pragma novector as it will
have 2 times LOOP VECTORIZED in RISC-V.
* gcc.dg/vect/vect-early-break_124-pr114403.c: Xfail for the
riscv backend.
* lib/target-supports.exp: Add RISC-V backend.

Signed-off-by: Pan Li 
(cherry picked from commit 556e777298dac8574533935000c57335c5232921)

Diff:
---
 gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c  | 2 ++
 gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c | 2 +-
 gcc/testsuite/lib/target-supports.exp | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c 
b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
index fdd9032da98..2f80bf89e5e 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-mask-store-1.c
@@ -28,6 +28,8 @@ main ()
 
   if (__builtin_memcmp (x, res, sizeof (x)) != 0)
 abort ();
+
+#pragma GCC novector
   for (int i = 0; i < 32; ++i)
 if (flag[i] != 0 && flag[i] != 1)
   abort ();
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
index 51abf245ccb..101ae1e0eaa 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
@@ -2,7 +2,7 @@
 /* { dg-require-effective-target vect_early_break_hw } */
 /* { dg-require-effective-target vect_long_long } */
 
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail riscv*-*-* } } 
} */
 
 #include "tree-vect.h"
 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 3a55b2a4159..6c828b73ded 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4105,6 +4105,7 @@ proc check_effective_target_vect_early_break { } {
|| [check_effective_target_arm_v8_neon_ok]
|| [check_effective_target_sse4]
|| [istarget amdgcn-*-*]
+   || [check_effective_target_riscv_v]
}}]
 }
 
@@ -4120,6 +4121,7 @@ proc check_effective_target_vect_early_break_hw { } {
|| [check_effective_target_arm_v8_neon_hw]
|| [check_sse4_hw_available]
|| [istarget amdgcn-*-*]
+   || [check_effective_target_riscv_v_ok]
}}]
 }


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Cleanup some temporally files [NFC]

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:9beeebac68fc0dfb58db7d9cbdcb1b3f3cebf6aa

commit 9beeebac68fc0dfb58db7d9cbdcb1b3f3cebf6aa
Author: Pan Li 
Date:   Fri May 17 07:45:19 2024 +0800

RISC-V: Cleanup some temporally files [NFC]

Just notice some temporally files under gcc/config/riscv,
deleted as useless.

* Empty file j.
* Vim swap file.

gcc/ChangeLog:

* config/riscv/.riscv.cc.swo: Removed.
* config/riscv/j: Removed.

Signed-off-by: Pan Li 
(cherry picked from commit d477d683d5c6db90c80d348c795709ae6444ba7a)

Diff:
---
 gcc/config/riscv/.riscv.cc.swo | Bin 417792 -> 0 bytes
 gcc/config/riscv/j |   0
 2 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/gcc/config/riscv/.riscv.cc.swo b/gcc/config/riscv/.riscv.cc.swo
deleted file mode 100644
index 77ed37353be..000
Binary files a/gcc/config/riscv/.riscv.cc.swo and /dev/null differ
diff --git a/gcc/config/riscv/j b/gcc/config/riscv/j
deleted file mode 100644
index e69de29bb2d..000


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add initial cost handling for segment loads/stores.

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:7a46f679b3ad302431a0fdd1dc30cca9712c92ec

commit 7a46f679b3ad302431a0fdd1dc30cca9712c92ec
Author: Robin Dapp 
Date:   Mon Feb 26 13:09:15 2024 +0100

RISC-V: Add initial cost handling for segment loads/stores.

This patch makes segment loads and stores more expensive.  It adds
segment_permute_2 as well as 3 to 8 cost fields to the common vector
costs and adds handling to adjust_stmt_cost.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (struct common_vector_cost): Add
segment_permute cost.
* config/riscv/riscv-vector-costs.cc (costs::adjust_stmt_cost):
Handle segment loads/stores.
* config/riscv/riscv.cc: Initialize segment_permute_[2-8] to 1.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c: Adjust test.

(cherry picked from commit e0b9c8ad7098fb08a25a61fe17d4274dd73e5145)

Diff:
---
 gcc/config/riscv/riscv-protos.h|   9 ++
 gcc/config/riscv/riscv-vector-costs.cc | 163 +++--
 gcc/config/riscv/riscv.cc  |  14 ++
 .../gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c   |   4 +-
 4 files changed, 146 insertions(+), 44 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 565ead1382a..004ceb1031b 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -222,6 +222,15 @@ struct common_vector_cost
   const int gather_load_cost;
   const int scatter_store_cost;
 
+  /* Segment load/store permute cost.  */
+  const int segment_permute_2;
+  const int segment_permute_3;
+  const int segment_permute_4;
+  const int segment_permute_5;
+  const int segment_permute_6;
+  const int segment_permute_7;
+  const int segment_permute_8;
+
   /* Cost of a vector-to-scalar operation.  */
   const int vec_to_scalar_cost;
 
diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index 4582b0db425..0a88e142a93 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -1052,6 +1052,25 @@ costs::better_main_loop_than_p (const vector_costs 
*uncast_other) const
   return vector_costs::better_main_loop_than_p (other);
 }
 
+/* Returns the group size i.e. the number of vectors to be loaded by a
+   segmented load/store instruction.  Return 0 if it is no segmented
+   load/store.  */
+static int
+segment_loadstore_group_size (enum vect_cost_for_stmt kind,
+ stmt_vec_info stmt_info)
+{
+  if (stmt_info
+  && (kind == vector_load || kind == vector_store)
+  && STMT_VINFO_DATA_REF (stmt_info))
+{
+  stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+  if (stmt_info
+ && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES)
+   return DR_GROUP_SIZE (stmt_info);
+}
+  return 0;
+}
+
 /* Adjust vectorization cost after calling riscv_builtin_vectorization_cost.
For some statement, we would like to further fine-grain tweak the cost on
top of riscv_builtin_vectorization_cost handling which doesn't have any
@@ -1076,55 +1095,115 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, 
loop_vec_info loop,
 case vector_load:
 case vector_store:
{
- /* Unit-stride vector loads and stores do not have offset addressing
-as opposed to scalar loads and stores.
-If the address depends on a variable we need an additional
-add/sub for each load/store in the worst case.  */
- if (stmt_info && stmt_info->stmt)
+ if (stmt_info && stmt_info->stmt && STMT_VINFO_DATA_REF (stmt_info))
{
- data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
- class loop *father = stmt_info->stmt->bb->loop_father;
- if (!loop && father && !father->inner && father->superloops)
+ /* Segment loads and stores.  When the group size is > 1
+the vectorizer will add a vector load/store statement for
+each vector in the group.  Here we additionally add permute
+costs for each.  */
+ /* TODO: Indexed and ordered/unordered cost.  */
+ int group_size = segment_loadstore_group_size (kind, stmt_info);
+ if (group_size > 1)
+   {
+ switch (group_size)
+   {
+   case 2:
+ if (riscv_v_ext_vector_mode_p (loop->vector_mode))
+   stmt_cost += costs->vla->segment_permute_2;
+ else
+   stmt_cost += costs->vls->segment_permute_2;
+ break;
+   case 3:
+ if (riscv_v_ext_vector_mode_p (loop->vector_mode))
+   stmt_cost += costs->vla->segment_permute_3;
+ else
+   stmt_

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] internal-fn: Do not force vcond_mask operands to reg.

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:12ceb8df30661c10cfba80742a2503636359c79e

commit 12ceb8df30661c10cfba80742a2503636359c79e
Author: Robin Dapp 
Date:   Fri May 10 12:44:44 2024 +0200

internal-fn: Do not force vcond_mask operands to reg.

In order to directly use constants this patch removes force_regs
in the vcond_mask expander.

gcc/ChangeLog:

PR middle-end/113474

* internal-fn.cc (expand_vec_cond_mask_optab_fn):  Remove
force_regs.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr113474.c: New test.

(cherry picked from commit 7ca35f2e430081d6ec91e910002f92d9713350fa)

Diff:
---
 gcc/internal-fn.cc|  3 ---
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c | 13 +
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 73045ca8c8c..9c09026793f 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -3165,9 +3165,6 @@ expand_vec_cond_mask_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
   rtx_op1 = expand_normal (op1);
   rtx_op2 = expand_normal (op2);
 
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (mode, rtx_op1);
-
   rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
   create_output_operand (&ops[0], target, mode);
   create_input_operand (&ops[1], rtx_op1, mode);
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c
new file mode 100644
index 000..0364bf9f5e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr113474.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target riscv_v } }  */
+/* { dg-additional-options "-std=c99" }  */
+
+void
+foo (int n, int **a)
+{
+  int b;
+  for (b = 0; b < n; b++)
+for (long e = 8; e > 0; e--)
+  a[b][e] = a[b][e] == 15;
+}
+
+/* { dg-final { scan-assembler "vmerge.vim" } }  */


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement IFN SAT_ADD for both the scalar and vector

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:0b3502c3dfe791e6c13de9c2419905519689d0da

commit 0b3502c3dfe791e6c13de9c2419905519689d0da
Author: Pan Li 
Date:   Fri May 17 18:49:46 2024 +0800

RISC-V: Implement IFN SAT_ADD for both the scalar and vector

The patch implement the SAT_ADD in the riscv backend as the
sample for both the scalar and vector.  Given below vector
as example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v0,0(a1)
  vle64.v v1,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vadd.vv v1,v0,v1
  vmsgtu.vv   v0,v0,v1
  vmerge.vim  v1,v1,-1,v0
  vse64.v v1,0(a0)
  ...

After this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v1,0(a1)
  vle64.v v2,0(a2)
  sllia4,a5,3
  sub a3,a3,a5
  add a1,a1,a4
  add a2,a2,a4
  vsaddu.vv   v1,v1,v2  <=  Vector Single-Width Saturating Add
  vse64.v v1,0(a0)
  ...

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

PR target/51492
PR target/112600

gcc/ChangeLog:

* config/riscv/autovec.md (usadd3): New pattern expand for
the unsigned SAT_ADD in vector mode.
* config/riscv/riscv-protos.h (riscv_expand_usadd): New func decl
to expand usadd3 pattern.
(expand_vec_usadd): Ditto but for vector.
* config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to emit
the vsadd insn.
(expand_vec_usadd): New func impl to expand usadd3 for vector.
* config/riscv/riscv.cc (riscv_expand_usadd): New func impl to
expand usadd3 for scalar.
* config/riscv/riscv.md (usadd3): New pattern expand for
the unsigned SAT_ADD in scalar mode.
* config/riscv/vector.md: Allow VLS mode for vsaddu.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: New test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c: New 
test.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c: New 
test.
* gcc.target/riscv/sat_arith.h: New test.
* gcc.target/riscv/sat_u_add-1.c: New test.
* gcc.target/riscv/sat_u_add-2.c: New test.
* gcc.target/riscv/sat_u_add-3.c: New test.
* gcc.target/riscv/sat_u_add-4.c: New test.
* gcc.target/riscv/sat_u_add-run-1.c: New test.
* gcc.target/riscv/sat_u_add-run-2.c: New test.
* gcc.target/riscv/sat_u_add-run-3.c: New test.
* gcc.target/riscv/sat_u_add-run-4.c: New test.
* gcc.target/riscv/scalar_sat_binary.h: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 34ed2b4593fa98b613632d0dde30b6ba3e7ecad9)

Diff:
---
 gcc/config/riscv/autovec.md| 17 +
 gcc/config/riscv/riscv-protos.h|  2 +
 gcc/config/riscv/riscv-v.cc| 19 ++
 gcc/config/riscv/riscv.cc  | 55 
 gcc/config/riscv/riscv.md  | 11 
 gcc/config/riscv/vector.md | 12 ++--
 .../riscv/rvv/autovec/binop/vec_sat_binary.h   | 33 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-1.c  | 19 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-2.c  | 20 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-3.c  | 20 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-4.c  | 20 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c  | 75 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c  | 75 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c  | 75 ++
 .../riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c  | 75 ++
 gcc/testsuite/gcc.target/riscv/sat_arith.h | 31 +
 gcc/testsuite/gcc.target/riscv/sat_u_add-1.c   | 19 ++
 gcc/testsuite/gcc.targe

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Modify _Bfloat16 to __bf16

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:a320bcf71489a14a634dbebeccee3b4e932b4ddb

commit a320bcf71489a14a634dbebeccee3b4e932b4ddb
Author: Xiao Zeng 
Date:   Fri May 17 13:48:21 2024 +0800

RISC-V: Modify _Bfloat16 to __bf16

According to the description in:
,
the type representation symbol of BF16 has been corrected.

Kito Cheng pointed out relevant information in the email:


gcc/ChangeLog:

* config/riscv/riscv-builtins.cc (riscv_init_builtin_types):
Modify _Bfloat16 to __bf16.
* config/riscv/riscv.cc (riscv_mangle_type): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/_Bfloat16-nanboxing.c: Move to...
* gcc.target/riscv/__bf16-nanboxing.c: ...here.
* gcc.target/riscv/bf16_arithmetic.c: Modify _Bfloat16 to __bf16.
* gcc.target/riscv/bf16_call.c: Ditto.
* gcc.target/riscv/bf16_comparison.c: Ditto.
* gcc.target/riscv/bf16_float_libcall_convert.c: Ditto.
* gcc.target/riscv/bf16_integer_libcall_convert.c: Ditto.

(cherry picked from commit 6da1d6efde2282e6582c00d1631e7457975ad998)

Diff:
---
 gcc/config/riscv/riscv-builtins.cc   |  6 +++---
 gcc/config/riscv/riscv.cc|  2 +-
 .../riscv/{_Bfloat16-nanboxing.c => __bf16-nanboxing.c}  | 12 ++--
 gcc/testsuite/gcc.target/riscv/bf16_arithmetic.c |  6 +++---
 gcc/testsuite/gcc.target/riscv/bf16_call.c   |  4 ++--
 gcc/testsuite/gcc.target/riscv/bf16_comparison.c |  6 +++---
 gcc/testsuite/gcc.target/riscv/bf16_float_libcall_convert.c  |  2 +-
 .../gcc.target/riscv/bf16_integer_libcall_convert.c  |  2 +-
 8 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/gcc/config/riscv/riscv-builtins.cc 
b/gcc/config/riscv/riscv-builtins.cc
index 4c08834288a..dc54e1a59b5 100644
--- a/gcc/config/riscv/riscv-builtins.cc
+++ b/gcc/config/riscv/riscv-builtins.cc
@@ -275,7 +275,7 @@ riscv_init_builtin_types (void)
 lang_hooks.types.register_builtin_type (riscv_float16_type_node,
"_Float16");
 
-  /* Provide the _Bfloat16 type and bfloat16_type_node if needed.  */
+  /* Provide the __bf16 type and bfloat16_type_node if needed.  */
   if (!bfloat16_type_node)
 {
   riscv_bfloat16_type_node = make_node (REAL_TYPE);
@@ -286,9 +286,9 @@ riscv_init_builtin_types (void)
   else
 riscv_bfloat16_type_node = bfloat16_type_node;
 
-  if (!maybe_get_identifier ("_Bfloat16"))
+  if (!maybe_get_identifier ("__bf16"))
 lang_hooks.types.register_builtin_type (riscv_bfloat16_type_node,
-   "_Bfloat16");
+   "__bf16");
 }
 
 /* Implement TARGET_INIT_BUILTINS.  */
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 9ac2be87acd..2be04ec6bc5 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -10276,7 +10276,7 @@ riscv_asan_shadow_offset (void)
 static const char *
 riscv_mangle_type (const_tree type)
 {
-  /* Half-precision float, _Float16 is "DF16_" and _Bfloat16 is "DF16b".  */
+  /* Half-precision float, _Float16 is "DF16_" and __bf16 is "DF16b".  */
   if (SCALAR_FLOAT_TYPE_P (type) && TYPE_PRECISION (type) == 16)
 {
   if (TYPE_MODE (type) == HFmode)
diff --git a/gcc/testsuite/gcc.target/riscv/_Bfloat16-nanboxing.c 
b/gcc/testsuite/gcc.target/riscv/__bf16-nanboxing.c
similarity index 83%
rename from gcc/testsuite/gcc.target/riscv/_Bfloat16-nanboxing.c
rename to gcc/testsuite/gcc.target/riscv/__bf16-nanboxing.c
index 11a73d22234..a9a586c98b9 100644
--- a/gcc/testsuite/gcc.target/riscv/_Bfloat16-nanboxing.c
+++ b/gcc/testsuite/gcc.target/riscv/__bf16-nanboxing.c
@@ -1,14 +1,14 @@
 /* { dg-do compile } */
 /* { dg-options "-march=rv64ifd -mabi=lp64d -mcmodel=medlow -O" } */
 
-_Bfloat16 gvar = 9.87654;
+__bf16 gvar = 9.87654;
 union U
 {
   unsigned short i16;
-  _Bfloat16 f16;
+  __bf16 f16;
 };
 
-_Bfloat16
+__bf16
 test1 (unsigned short input)
 {
   union U tmp;
@@ -16,19 +16,19 @@ test1 (unsigned short input)
   return tmp.f16;
 }
 
-_Bfloat16
+__bf16
 test2 ()
 {
   return 1.234f;
 }
 
-_Bfloat16
+__bf16
 test3 ()
 {
   return gvar;
 }
 
-_Bfloat16
+__bf16
 test ()
 {
   return 0.0f;
diff --git a/gcc/testsuite/gcc.target/riscv/bf16_arithmetic.c 
b/gcc/testsuite/gcc.target/riscv/bf16_arithmetic.c
index 9e485051260..190cc1d574a 100644
--- a/gcc/testsuite/gcc.target/riscv/bf16_arithmetic.c
+++ b/gcc/testsuite/gcc.target/riscv/bf16_arithmetic.c
@@ -5,9 +5,9 @@
 /* 1) bf -> sf  (call  __extendbfsf2)  */
 /* 2) sf1 [+|-|*|/] sf2 (call  __[add|sub|mul|div]sf3)  */
 /* 3) sf -> bf  (call  __truncsfbf2)  */
-extern _Bfloat16 bf;
-extern _Bfloat16 bf1;
-extern _Bfl

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Fix "Nan-box the result of movbf on soft-bf16"

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:5b632ee99b187761fc6bf02a8c6312278563fbdc

commit 5b632ee99b187761fc6bf02a8c6312278563fbdc
Author: Xiao Zeng 
Date:   Wed May 15 16:23:16 2024 +0800

RISC-V: Fix "Nan-box the result of movbf on soft-bf16"

1 According to unpriv-isa spec:


  1.1 "FMV.H.X moves the half-precision value encoded in IEEE 754-2008
  standard encoding from the lower 16 bits of integer register rs1
  to the floating-point register rd, NaN-boxing the result."
  1.2 "FMV.W.X moves the single-precision value encoded in IEEE 754-2008
  standard encoding from the lower 32 bits of integer register rs1
  to the floating-point register rd. The bits are not modified in the
  transfer, and in particular, the payloads of non-canonical NaNs are 
preserved."

2 When (!TARGET_ZFHMIN == true && TARGET_HARD_FLOAT == true), instruction 
needs
to be added to complete the Nan-box, as done in
"RISC-V: Nan-box the result of movhf on soft-fp16":



3 Consider the "RISC-V: Nan-box the result of movbf on soft-bf16" in:


It ignores that both hf16 and bf16 are 16bits floating-point.

4 zfbfmin -> zfhmin in:



gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_legitimize_move): Optimize movbf
with Nan-boxing value.
* config/riscv/riscv.md (*movhf_softfloat_boxing): Expand movbf
with Nan-boxing value.
(*mov_softfloat_boxing): Ditto.
with Nan-boxing value.
(*movbf_softfloat_boxing): Delete abandon pattern.

(cherry picked from commit 7422e050f33dd9ee7dcd5a72c80b4e11d61995ce)

Diff:
---
 gcc/config/riscv/riscv.cc | 15 ++-
 gcc/config/riscv/riscv.md | 19 +--
 2 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2be04ec6bc5..7a34b4be873 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3192,13 +3192,12 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx 
src)
  (set (reg:SI/DI mask) (const_int -65536)
  (set (reg:SI/DI temp) (zero_extend:SI/DI (subreg:HI (reg:HF/BF src) 0)))
  (set (reg:SI/DI temp) (ior:SI/DI (reg:SI/DI mask) (reg:SI/DI temp)))
- (set (reg:HF/BF dest) (unspec:HF/BF[ (reg:SI/DI temp) ]
-   UNSPEC_FMV_SFP16_X/UNSPEC_FMV_SBF16_X))
- */
+ (set (reg:HF/BF dest) (unspec:HF/BF[ (reg:SI/DI temp) ] 
UNSPEC_FMV_FP16_X))
+  */
 
   if (TARGET_HARD_FLOAT
-  && ((!TARGET_ZFHMIN && mode == HFmode)
- || (!TARGET_ZFBFMIN && mode == BFmode))
+  && !TARGET_ZFHMIN
+  && (mode == HFmode || mode == BFmode)
   && REG_P (dest) && FP_REG_P (REGNO (dest))
   && REG_P (src) && !FP_REG_P (REGNO (src))
   && can_create_pseudo_p ())
@@ -3213,10 +3212,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx 
src)
   else
emit_insn (gen_iordi3 (temp, mask, temp));
 
-  riscv_emit_move (dest,
-  gen_rtx_UNSPEC (mode, gen_rtvec (1, temp),
-  mode == HFmode ? UNSPEC_FMV_SFP16_X
- : UNSPEC_FMV_SBF16_X));
+  riscv_emit_move (dest, gen_rtx_UNSPEC (mode, gen_rtvec (1, temp),
+UNSPEC_FMV_FP16_X));
 
   return true;
 }
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 04f54cedad9..ff4557c1325 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -87,8 +87,7 @@
   UNSPEC_STRLEN
 
   ;; Workaround for HFmode and BFmode without hardware extension
-  UNSPEC_FMV_SFP16_X
-  UNSPEC_FMV_SBF16_X
+  UNSPEC_FMV_FP16_X
 
   ;; XTheadFmv moves
   UNSPEC_XTHEADFMV
@@ -1959,23 +1958,15 @@
(set_attr "type" "fmove,move,load,store,mtc,mfc")
(set_attr "mode" "")])
 
-(define_insn "*movhf_softfloat_boxing"
-  [(set (match_operand:HF 0 "register_operand""=f")
-(unspec:HF [(match_operand:X 1 "register_operand" " r")] 
UNSPEC_FMV_SFP16_X))]
+(define_insn "*mov_softfloat_boxing"
+  [(set (match_operand:HFBF 0 "register_operand"   "=f")
+   (unspec:HFBF [(match_operand:X 1 "register_operand" " r")]
+UNSPEC_FMV_FP16_X))]
   "!TARGET_ZFHMIN"
   "fmv.w.x\t%0,%1"
   [(set_attr "type" "fmove")
(set_attr "mode" "SF")])
 
-(define_insn "*movbf_softfloat_boxing"
-  [(set (match_operand:BF 0 "register_operand"   "=f")
-   (unspec:BF [(match_operand:X 1 "register_operand" " r")]
-UNSPEC_FMV_SBF16_X))]
-  "!TARGET_ZFBFMIN"
-  "f

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed, RISC-V] Improve some shift-add sequences

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:e043aa621946842ccbc319e9683ad042abdf3ed5

commit e043aa621946842ccbc319e9683ad042abdf3ed5
Author: Jeff Law 
Date:   Sat May 18 15:08:07 2024 -0600

[to-be-committed,RISC-V] Improve some shift-add sequences

So this is a minor fix/improvement for shift-add sequences.  This was
supposed to help xz in a minor way IIRC.

Combine may present us with (x + C2') << C1 which was canonicalized from
(x << C1) + C2.

Depending on the precise values of C2 and C2' one form may be better
than the other.  We can (somewhat awkwardly) use riscv_const_insns to
test for which sequence would be preferred.

Tested on Ventana's CI system as well as my own.  Waiting on CI results
from Rivos's tester before moving forward.

Jeff
gcc/
* config/riscv/riscv.md: Add new patterns to allow selection
between (x << C1) + C2 vs (x + C2') << C1 depending on the
cost C2 vs C2'.

gcc/testsuite

* gcc.target/riscv/shift-add-1.c: New test.

(cherry picked from commit 3c9c52a1c0fa7af22f769a2116b28a0b7ea18129)

Diff:
---
 gcc/config/riscv/riscv.md| 56 
 gcc/testsuite/gcc.target/riscv/shift-add-1.c | 21 +++
 2 files changed, 77 insertions(+)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index ff4557c1325..78c16adee98 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -4162,6 +4162,62 @@
   }
 )
 
+;; These are forms of (x << C1) + C2, potentially canonicalized from
+;; ((x + C2') << C1.  Depending on the cost to load C2 vs C2' we may
+;; want to go ahead and recognize this form as C2 may be cheaper to
+;; synthesize than C2'.
+;;
+;; It might be better to refactor riscv_const_insns a bit so that we
+;; can have an API that passes integer values around rather than
+;; constructing a lot of garbage RTL.
+;;
+;; The mvconst_internal pattern in effect requires this pattern to
+;; also be a define_insn_and_split due to insn count costing when
+;; splitting in combine.
+(define_insn_and_split ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (plus:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
+   (match_operand 2 "const_int_operand" "n"))
+(match_operand 3 "const_int_operand" "n")))
+   (clobber (match_scratch:DI 4 "=&r"))]
+  "(TARGET_64BIT
+&& riscv_const_insns (operands[3])
+&& ((riscv_const_insns (operands[3])
+< riscv_const_insns (GEN_INT (INTVAL (operands[3]) >> INTVAL 
(operands[2]
+   || riscv_const_insns (GEN_INT (INTVAL (operands[3]) >> INTVAL 
(operands[2]))) == 0))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (ashift:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4) (match_dup 3))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+  ""
+  [(set_attr "type" "arith")])
+
+(define_insn_and_split ""
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (sign_extend:DI (plus:SI (ashift:SI
+  (match_operand:SI 1 "register_operand" "r")
+  (match_operand 2 "const_int_operand" "n"))
+(match_operand 3 "const_int_operand" "n"
+   (clobber (match_scratch:DI 4 "=&r"))]
+  "(TARGET_64BIT
+&& riscv_const_insns (operands[3])
+&& ((riscv_const_insns (operands[3])
+< riscv_const_insns (GEN_INT (INTVAL (operands[3]) >> INTVAL 
(operands[2]
+   || riscv_const_insns (GEN_INT (INTVAL (operands[3]) >> INTVAL 
(operands[2]))) == 0))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (ashift:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4) (match_dup 3))
+   (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 5) (match_dup 6]
+  "{
+ operands[1] = gen_lowpart (DImode, operands[1]);
+ operands[5] = gen_lowpart (SImode, operands[0]);
+ operands[6] = gen_lowpart (SImode, operands[4]);
+   }"
+  [(set_attr "type" "arith")])
+
+
 (include "bitmanip.md")
 (include "crypto.md")
 (include "sync.md")
diff --git a/gcc/testsuite/gcc.target/riscv/shift-add-1.c 
b/gcc/testsuite/gcc.target/riscv/shift-add-1.c
new file mode 100644
index 000..d98875c3271
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/shift-add-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zba_zbb_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+int composeFromSurrogate(const unsigned short high) {
+
+return  ((high - 0xD800) << 10) ;
+}
+
+
+long composeFromSurrogate_2(const unsigned long high) {
+
+return  ((high - 0xD800) << 10) ;
+}
+
+
+/* { dg-final { scan-assembler-times "\tli\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tslli\t" 2 } } */
+/* { dg-final { scan-assembler-times "\taddw\t" 1 } } */
+/* { dg-final { scan-assembler-times "\tadd\t" 1 } } */
+


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement -m{, no}fence-tso

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:48ab926d0426e3e4d682d518b8656a9c6d8b9b96

commit 48ab926d0426e3e4d682d518b8656a9c6d8b9b96
Author: Palmer Dabbelt 
Date:   Sat May 18 15:15:09 2024 -0600

RISC-V: Implement -m{,no}fence-tso

Some processors from T-Head don't implement the `fence.tso` instruction
natively and instead trap to firmware.  This breaks some users who
haven't yet updated the firmware and one could imagine it breaking users
who are trying to build firmware if they're using the C memory model.

So just add an option to disable emitting it, in a similar fashion to
how we allow users to forbid other instructions.

Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1070959
---
I've just smoke tested this one, but

void func(void) { __atomic_thread_fence(__ATOMIC_ACQ_REL); }

generates `fence.tso` without the argument and `fence rw,rw` with
`-mno-fence-tso`, so it seems to be at least mostly there.  I figured
I'd just send it up for comments before putting together the DG bits:
it's kind of a pain to carry around these workarounds for unimplemented
instructions, but it's in HW so there's not much we can do about that.

gcc/ChangeLog:

* config/riscv/riscv.opt: Add -mno-fence-tso.
* config/riscv/sync-rvwmo.md (mem_thread_fence_rvwmo): Respect
-mno-fence-tso.
* doc/invoke.texi (RISC-V): Document -mno-fence-tso.

(cherry picked from commit a6114c2a691112f9cf5b072c21685d2e43c76d81)

Diff:
---
 gcc/config/riscv/riscv.opt | 4 
 gcc/config/riscv/sync-rvwmo.md | 2 +-
 gcc/doc/invoke.texi| 8 
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index d209ac896fd..87f58332016 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -624,3 +624,7 @@ Enum(tls_type) String(desc) Value(TLS_DESCRIPTORS)
 mtls-dialect=
 Target RejectNegative Joined Enum(tls_type) Var(riscv_tls_dialect) 
Init(TLS_TRADITIONAL) Save
 Specify TLS dialect.
+
+mfence-tso
+Target Var(TARGET_FENCE_TSO) Init(1)
+Specifies whether the fence.tso instruction should be used.
diff --git a/gcc/config/riscv/sync-rvwmo.md b/gcc/config/riscv/sync-rvwmo.md
index d4fd26069f7..e639a1e2392 100644
--- a/gcc/config/riscv/sync-rvwmo.md
+++ b/gcc/config/riscv/sync-rvwmo.md
@@ -33,7 +33,7 @@
 if (model == MEMMODEL_SEQ_CST)
return "fence\trw,rw";
 else if (model == MEMMODEL_ACQ_REL)
-   return "fence.tso";
+   return TARGET_FENCE_TSO ? "fence.tso" : "fence\trw,rw";
 else if (model == MEMMODEL_ACQUIRE)
return "fence\tr,rw";
 else if (model == MEMMODEL_RELEASE)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index dc4c5a3189d..1d48d57bcc7 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1243,6 +1243,7 @@ See RS/6000 and PowerPC Options.
 -mplt  -mno-plt
 -mabi=@var{ABI-string}
 -mfdiv  -mno-fdiv
+-mfence-tso  -mno-fence-tso
 -mdiv  -mno-div
 -misa-spec=@var{ISA-spec-string}
 -march=@var{ISA-string}
@@ -30378,6 +30379,13 @@ Do or don't use hardware floating-point divide and 
square root instructions.
 This requires the F or D extensions for floating-point registers.  The default
 is to use them if the specified architecture has these instructions.
 
+@opindex mfence-tso
+@item -mfence-tso
+@itemx -mno-fence-tso
+Do or don't use the @samp{fence.tso} instruction, which is unimplemented on
+some processors (including those from T-Head).  If the @samp{fence.tso}
+instruction is not availiable then a stronger fence will be used instead.
+
 @opindex mdiv
 @item -mdiv
 @itemx -mno-div


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] DSE: Fix ICE after allow vector type in get_stored_val

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:98a48906bfb653d1e18323997d0854bef18986e7

commit 98a48906bfb653d1e18323997d0854bef18986e7
Author: Pan Li 
Date:   Tue Apr 30 09:42:39 2024 +0800

DSE: Fix ICE after allow vector type in get_stored_val

We allowed vector type for get_stored_val when read is less than or
equal to store in previous.  Unfortunately,  the valididate_subreg
treats the vector type's size is less than vector register as
invalid.  Then we will have ICE here.

This patch would like to fix it by filter-out the invalid type size,
and make sure the subreg is valid for both the read_mode and store_mode
before perform the real gen_lowpart.

The below test suites are passed for this patch:

* The x86 bootstrap test.
* The x86 regression test.
* The riscv rv64gcv regression test.
* The riscv rv64gc regression test.
* The aarch64 regression test.

gcc/ChangeLog:

* dse.cc (get_stored_val): Make sure read_mode/write_mode
is valid subreg before gen_lowpart.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/bug-6.c: New test.

Signed-off-by: Pan Li 
(cherry picked from commit 88b3f83238087cbe2aa2c51c6054796856f2fb94)

Diff:
---
 gcc/dse.cc  |  4 +++-
 gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c | 22 ++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/gcc/dse.cc b/gcc/dse.cc
index edc7a1dfecf..1596da91da0 100644
--- a/gcc/dse.cc
+++ b/gcc/dse.cc
@@ -1946,7 +1946,9 @@ get_stored_val (store_info *store_info, machine_mode 
read_mode,
 copy_rtx (store_info->const_rhs));
   else if (VECTOR_MODE_P (read_mode) && VECTOR_MODE_P (store_mode)
 && known_le (GET_MODE_BITSIZE (read_mode), GET_MODE_BITSIZE (store_mode))
-&& targetm.modes_tieable_p (read_mode, store_mode))
+&& targetm.modes_tieable_p (read_mode, store_mode)
+&& validate_subreg (read_mode, store_mode, copy_rtx (store_info->rhs),
+   subreg_lowpart_offset (read_mode, store_mode)))
 read_reg = gen_lowpart (read_mode, copy_rtx (store_info->rhs));
   else
 read_reg = extract_low_bits (read_mode, store_mode,
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
new file mode 100644
index 000..5bb00b8f587
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-6.c
@@ -0,0 +1,22 @@
+/* Test that we do not have ice when compile */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize" } */
+
+struct A { float x, y; };
+struct B { struct A u; };
+
+extern void bar (struct A *);
+
+float
+f3 (struct B *x, int y)
+{
+  struct A p = {1.0f, 2.0f};
+  struct A *q = &x[y].u;
+
+  __builtin_memcpy (&q->x, &p.x, sizeof (float));
+  __builtin_memcpy (&q->y, &p.y, sizeof (float));
+
+  bar (&p);
+
+  return x[y].u.x + x[y].u.y;
+}


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed][RISC-V][PR target/115142] Do not create invalidate shift-add insn

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:58349b4561b0cc315e60ff5aa2b777a3e80c7f9f

commit 58349b4561b0cc315e60ff5aa2b777a3e80c7f9f
Author: Jeff Law 
Date:   Sun May 19 09:56:16 2024 -0600

[to-be-committed][RISC-V][PR target/115142] Do not create invalidate 
shift-add insn

The circumstances which triggered this weren't something that should appear 
in
the wild (-ftree-ter, without optimization enabled).  So I wasn't planning 
to
backport.  Obviously if it shows up in another context we can revisit that
decision.

I've run this through my rv32gcv and rv64gc tester.  Waiting on the CI 
system before committing.

PR target/115142
gcc/

* config/riscv/riscv.cc (mem_shadd_or_shadd_rtx_p): Make sure
shifted argument is a register.

gcc/testsuite

* gcc.target/riscv/pr115142.c: New test.

(cherry picked from commit e1ce9c37ed68136a99d44c8301990c184ba41849)

Diff:
---
 gcc/config/riscv/riscv.cc |  1 +
 gcc/testsuite/gcc.target/riscv/pr115142.c | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7a34b4be873..d0c22058b8c 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -2465,6 +2465,7 @@ mem_shadd_or_shadd_rtx_p (rtx x)
 {
   return ((GET_CODE (x) == ASHIFT
   || GET_CODE (x) == MULT)
+ && register_operand (XEXP (x, 0), GET_MODE (x))
  && CONST_INT_P (XEXP (x, 1))
  && ((GET_CODE (x) == ASHIFT && IN_RANGE (INTVAL (XEXP (x, 1)), 1, 3))
  || (GET_CODE (x) == MULT
diff --git a/gcc/testsuite/gcc.target/riscv/pr115142.c 
b/gcc/testsuite/gcc.target/riscv/pr115142.c
new file mode 100644
index 000..40ba49dfa20
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr115142.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -ftree-ter" } */
+
+long a;
+char b;
+void e() {
+  char f[8][1];
+  b = f[a][a];
+}
+


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Regenerate riscv.opt.urls and i386.opt.urls

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:e42956aafe991efc2128ac0b661f2ddefd11b4f3

commit e42956aafe991efc2128ac0b661f2ddefd11b4f3
Author: Mark Wielaard 
Date:   Mon May 20 13:13:02 2024 +0200

Regenerate riscv.opt.urls and i386.opt.urls

risc-v added an -mfence-tso option. i386 removed Xeon Phi ISA support
options. But the opt.urls files weren't regenerated.

Fixes: a6114c2a6911 ("RISC-V: Implement -m{,no}fence-tso")
Fixes: e1a7e2c54d52 ("i386: Remove Xeon Phi ISA support")

gcc/ChangeLog:

* config/riscv/riscv.opt.urls: Regenerate.
* config/i386/i386.opt.urls: Likewise.

(cherry picked from commit 591bc70139d898c06b1d605ff4fed591ffd2e2e7)

Diff:
---
 gcc/config/i386/i386.opt.urls   | 15 ---
 gcc/config/riscv/riscv.opt.urls |  3 +++
 2 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls
index 81c5bb9a927..40e8a844936 100644
--- a/gcc/config/i386/i386.opt.urls
+++ b/gcc/config/i386/i386.opt.urls
@@ -238,12 +238,6 @@ UrlSuffix(gcc/x86-Options.html#index-mavx2)
 mavx512f
 UrlSuffix(gcc/x86-Options.html#index-mavx512f)
 
-mavx512pf
-UrlSuffix(gcc/x86-Options.html#index-mavx512pf)
-
-mavx512er
-UrlSuffix(gcc/x86-Options.html#index-mavx512er)
-
 mavx512cd
 UrlSuffix(gcc/x86-Options.html#index-mavx512cd)
 
@@ -262,12 +256,6 @@ UrlSuffix(gcc/x86-Options.html#index-mavx512ifma)
 mavx512vbmi
 UrlSuffix(gcc/x86-Options.html#index-mavx512vbmi)
 
-mavx5124fmaps
-UrlSuffix(gcc/x86-Options.html#index-mavx5124fmaps)
-
-mavx5124vnniw
-UrlSuffix(gcc/x86-Options.html#index-mavx5124vnniw)
-
 mavx512vpopcntdq
 UrlSuffix(gcc/x86-Options.html#index-mavx512vpopcntdq)
 
@@ -409,9 +397,6 @@ UrlSuffix(gcc/x86-Options.html#index-mrdrnd)
 mf16c
 UrlSuffix(gcc/x86-Options.html#index-mf16c)
 
-mprefetchwt1
-UrlSuffix(gcc/x86-Options.html#index-mprefetchwt1)
-
 mfentry
 UrlSuffix(gcc/x86-Options.html#index-mfentry)
 
diff --git a/gcc/config/riscv/riscv.opt.urls b/gcc/config/riscv/riscv.opt.urls
index 2f01ae5d627..e02ef3ee3dd 100644
--- a/gcc/config/riscv/riscv.opt.urls
+++ b/gcc/config/riscv/riscv.opt.urls
@@ -91,3 +91,6 @@ UrlSuffix(gcc/RISC-V-Options.html#index-minline-strlen)
 
 ; skipping UrlSuffix for 'mtls-dialect=' due to finding no URLs
 
+mfence-tso
+UrlSuffix(gcc/RISC-V-Options.html#index-mfence-tso)
+


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: avoid LUI based const mat in prologue/epilogue expansion [PR/105733]

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:5f7595e74bba8ac4ab984ce6b47eac48b82edda2

commit 5f7595e74bba8ac4ab984ce6b47eac48b82edda2
Author: Vineet Gupta 
Date:   Mon May 13 11:46:03 2024 -0700

RISC-V: avoid LUI based const mat in prologue/epilogue expansion [PR/105733]

If the constant used for stack offset can be expressed as sum of two S12
values, the constant need not be materialized (in a reg) and instead the
two S12 bits can be added to instructions involved with frame pointer.
This avoids burning a register and more importantly can often get down
to be 2 insn vs. 3.

The prev patches to generally avoid LUI based const materialization didn't
fix this PR and need this directed fix in funcion prologue/epilogue
expansion.

This fix doesn't move the neddle for SPEC, at all, but it is still a
win considering gcc generates one insn fewer than llvm for the test ;-)

   gcc-13.1 release   |  gcc 230823 |   |
  |g6619b3d4c15c|   This patch  |  
clang/llvm

-
li  t0,-4096 | lit0,-4096  | addi  sp,sp,-2048 | addi 
sp,sp,-2048
addit0,t0,2016   | addi  t0,t0,2032| add   sp,sp,-16   | addi 
sp,sp,-32
li  a4,4096  | add   sp,sp,t0  | add   a5,sp,a0| add  
a1,sp,16
add sp,sp,t0 | addi  a5,sp,-2032   | sbzero,0(a5)  | add  
a0,a0,a1
li  a5,-4096 | add   a0,a5,a0  | addi  sp,sp,2032  | sb   
zero,0(a0)
addia4,a4,-2032  | lit0, 4096  | addi  sp,sp,32| addi 
sp,sp,2032
add a4,a4,a5 | sbzero,2032(a0) | ret   | addi 
sp,sp,48
addia5,sp,16 | addi  t0,t0,-2032   |   | ret
add a5,a4,a5 | add   sp,sp,t0  |
add a0,a5,a0 | ret |
li  t0,4096  |
sd  a5,8(sp) |
sb  zero,2032(a0)|
addit0,t0,-2016  |
add sp,sp,t0 |
ret  |

gcc/ChangeLog:
PR target/105733
* config/riscv/riscv.h: New macros for with aligned offsets.
* config/riscv/riscv.cc (riscv_split_sum_of_two_s12): New
function to split a sum of two s12 values into constituents.
(riscv_expand_prologue): Handle offset being sum of two S12.
(riscv_expand_epilogue): Ditto.
* config/riscv/riscv-protos.h (riscv_split_sum_of_two_s12): New.

gcc/testsuite/ChangeLog:
* gcc.target/riscv/pr105733.c: New Test.
* gcc.target/riscv/rvv/autovec/vls/spill-1.c: Adjust to not
expect LUI 4096.
* gcc.target/riscv/rvv/autovec/vls/spill-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/vls/spill-7.c: Ditto.

Tested-by: Edwin Lu  # pre-commit-CI #1568
Signed-off-by: Vineet Gupta 
(cherry picked from commit f9cfc192ed0127edb7e79818917dd2859fce4d44)

Diff:
---
 gcc/config/riscv/riscv-protos.h|  2 +
 gcc/config/riscv/riscv.cc  | 54 --
 gcc/config/riscv/riscv.h   |  7 +++
 gcc/testsuite/gcc.target/riscv/pr105733.c  | 15 ++
 .../gcc.target/riscv/rvv/autovec/vls/spill-1.c |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-2.c |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-3.c |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-4.c |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-5.c |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-6.c |  4 +-
 .../gcc.target/riscv/rvv/autovec/vls/spill-7.c |  4 +-
 11 files changed, 89 insertions(+), 17 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index c64aae18deb..0704968561b 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -167,6 +167,8 @@ extern void riscv_subword_address (rtx, rtx *, rtx *, rtx 
*, rtx *);
 extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
 extern enum memmodel riscv_union_memmodels (enum memmodel, enum memmodel);
 extern bool riscv_reg_frame_related (rtx);
+extern void riscv_split_sum_of_two_s12 (HOST_WIDE_INT, HOST_WIDE_INT *,
+   HOST_WIDE_INT *);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d0c22058b8c..2ecbcf1d0af 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4075,6 +4075,32 @@ riscv_split_doubleword_move (rtx dest, rtx src)
riscv_emit_move (riscv_s

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: avoid LUI based const mat in alloca epilogue expansion

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:d85def27f51584040cffb571ea476d43a132c59e

commit d85def27f51584040cffb571ea476d43a132c59e
Author: Vineet Gupta 
Date:   Wed Mar 6 15:44:27 2024 -0800

RISC-V: avoid LUI based const mat in alloca epilogue expansion

This is continuing on the prev patch in function epilogue expansion.
Broken out of easy of review.

gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_expand_epilogue): Handle offset
being sum of two S12.

Tested-by: Patrick O'Neill  # pre-commit-CI #1569
Signed-off-by: Vineet Gupta 
(cherry picked from commit 9926c40a902edbc665919d508ef0c36f362f9c41)

Diff:
---
 gcc/config/riscv/riscv.cc | 33 ++---
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 2ecbcf1d0af..85df5b7ab49 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -8111,7 +8111,10 @@ riscv_expand_epilogue (int style)
   need_barrier_p = false;
 
   poly_int64 adjust_offset = -frame->hard_frame_pointer_offset;
+  rtx dwarf_adj = gen_int_mode (adjust_offset, Pmode);
   rtx adjust = NULL_RTX;
+  bool sum_of_two_s12 = false;
+  HOST_WIDE_INT one, two;
 
   if (!adjust_offset.is_constant ())
{
@@ -8123,14 +8126,23 @@ riscv_expand_epilogue (int style)
}
   else
{
- if (!SMALL_OPERAND (adjust_offset.to_constant ()))
+ HOST_WIDE_INT adj_off_value = adjust_offset.to_constant ();
+ if (SMALL_OPERAND (adj_off_value))
+   {
+ adjust = GEN_INT (adj_off_value);
+   }
+ else if (SUM_OF_TWO_S12_ALGN (adj_off_value))
+   {
+ riscv_split_sum_of_two_s12 (adj_off_value, &one, &two);
+ dwarf_adj = adjust = GEN_INT (one);
+ sum_of_two_s12 = true;
+   }
+ else
{
  riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode),
-  GEN_INT (adjust_offset.to_constant ()));
+  GEN_INT (adj_off_value));
  adjust = RISCV_PROLOGUE_TEMP (Pmode);
}
- else
-   adjust = GEN_INT (adjust_offset.to_constant ());
}
 
   insn = emit_insn (
@@ -8138,14 +8150,21 @@ riscv_expand_epilogue (int style)
  adjust));
 
   rtx dwarf = NULL_RTX;
-  rtx cfa_adjust_value = gen_rtx_PLUS (
-  Pmode, hard_frame_pointer_rtx,
-  gen_int_mode (-frame->hard_frame_pointer_offset, 
Pmode));
+  rtx cfa_adjust_value = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx,
+  dwarf_adj);
   rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
+
   RTX_FRAME_RELATED_P (insn) = 1;
 
   REG_NOTES (insn) = dwarf;
+
+  if (sum_of_two_s12)
+   {
+ insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+   GEN_INT (two)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+   }
 }
 
   if (use_restore_libcall || use_multi_pop)


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Enable vectorization for vect-early-break_124-pr114403.c

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:cd9b981f3b5124900f44b5e156c773de2f186ced

commit cd9b981f3b5124900f44b5e156c773de2f186ced
Author: xuli 
Date:   Mon May 20 01:56:47 2024 +

RISC-V: Enable vectorization for vect-early-break_124-pr114403.c

Because "targetm.slow_unaligned_access" is set to true by default
(aka -mtune=rocket) for RISC-V, it causes the __builtin_memcpy with
8 bytes failed to folded into int64 assignment during ccp1.

So adding "-mtune=generic-ooo" to the RISC-V target can vectorize
vect-early-break_124-pr114403.c.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-early-break_124-pr114403.c: Enable vectrization 
for RISC-V target.

(cherry picked from commit ffab721f3c9ecbb9831844d844ad257b69a77993)

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
index 101ae1e0eaa..610b951b262 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_124-pr114403.c
@@ -1,8 +1,9 @@
 /* { dg-add-options vect_early_break } */
 /* { dg-require-effective-target vect_early_break_hw } */
 /* { dg-require-effective-target vect_long_long } */
+/* { dg-additional-options "-mtune=generic-ooo" { target riscv*-*-* } } */
 
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail riscv*-*-* } } 
} */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
 
 #include "tree-vect.h"
 
@@ -74,4 +75,3 @@ int main ()
 
   return 0;
 }
-


[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed, v2, RISC-V] Use bclri in constant synthesis

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:b6c92195b428de63a4aabcea1a8e00911f2c596d

commit b6c92195b428de63a4aabcea1a8e00911f2c596d
Author: Jeff Law 
Date:   Fri May 24 07:27:00 2024 -0600

[to-be-committed,v2,RISC-V] Use bclri in constant synthesis

Testing with Zbs enabled by default showed a minor logic error.  After
the loop clearing things with bclri, we can only use the sequence if we
were able to clear all the necessary bits.  If any bits are still on,
then the bclr sequence turned out to not be profitable.

--

So this is conceptually similar to how we handled direct generation of
bseti for constant synthesis, but this time for bclr.

In the bclr case, we already have an expander for AND.  So we just
needed to adjust the predicate to accept another class of constant
operands (those with a single bit clear).

With that in place constant synthesis is adjusted so that it counts the
number of bits clear in the high 33 bits of a 64bit word.  If that
number is small relative to the current best cost, then we try to
generate the constant with a lui based sequence for the low half which
implicitly sets the upper 32 bits as well.  Then we bclri one or more of
those upper 33 bits.

So as an example, this code goes from 4 instructions down to 3:

 > unsigned long foo_0xfffbf7ff(void) { return
0xfffbf7ffUL; }

Note the use of 33 bits above.  That's meant to capture cases like this:

 > unsigned long foo_0xfffd77ff(void) { return
0xfffd77ffUL; }

We can use lui+addi+bclri+bclri to synthesize that in 4 instructions
instead of 5.

I'm including a handful of cases covering the two basic ideas above that
were found by the testing code.

And, no, we're not done yet.  I see at least one more notable idiom
missing before exploring zbkb's potential to improve things.

Tested in my tester and waiting on Rivos CI system before moving forward.
gcc/

* config/riscv/predicates.md (arith_operand_or_mode_mask): Renamed 
to..
(arith_or_mode_mask_or_zbs_operand): New predicate.
* config/riscv/riscv.md (and3): Update predicate for operand 
2.
* config/riscv/riscv.cc (riscv_build_integer_1): Use bclri to clear
bits, particularly bits 31..63 when profitable to do so.

gcc/testsuite/

* gcc.target/riscv/synthesis-6.c: New test.

(cherry picked from commit 401994d60ab38ffa9e63f368f0456eb7b08599be)

Diff:
---
 gcc/config/riscv/predicates.md   | 14 ++--
 gcc/config/riscv/riscv.cc| 34 ++
 gcc/config/riscv/riscv.md|  2 +-
 gcc/testsuite/gcc.target/riscv/synthesis-6.c | 95 
 4 files changed, 138 insertions(+), 7 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 8948fbfc363..0fb5729fdcf 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -27,12 +27,6 @@
   (ior (match_operand 0 "const_arith_operand")
(match_operand 0 "register_operand")))
 
-(define_predicate "arith_operand_or_mode_mask"
-  (ior (match_operand 0 "arith_operand")
-   (and (match_code "const_int")
-(match_test "UINTVAL (op) == GET_MODE_MASK (HImode)
-|| UINTVAL (op) == GET_MODE_MASK (SImode)"
-
 (define_predicate "lui_operand"
   (and (match_code "const_int")
(match_test "LUI_OPERAND (INTVAL (op))")))
@@ -398,6 +392,14 @@
   (and (match_code "const_int")
(match_test "SINGLE_BIT_MASK_OPERAND (~UINTVAL (op))")))
 
+(define_predicate "arith_or_mode_mask_or_zbs_operand"
+  (ior (match_operand 0 "arith_operand")
+   (and (match_test "TARGET_ZBS")
+   (match_operand 0 "not_single_bit_mask_operand"))
+   (and (match_code "const_int")
+   (match_test "UINTVAL (op) == GET_MODE_MASK (HImode)
+|| UINTVAL (op) == GET_MODE_MASK (SImode)"
+
 (define_predicate "const_si_mask_operand"
   (and (match_code "const_int")
(match_test "(INTVAL (op) & (GET_MODE_BITSIZE (SImode) - 1))
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 85df5b7ab49..92935275aaa 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -893,6 +893,40 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
  codes[1].use_uw = false;
  cost = 2;
}
+
+  /* If LUI/ADDI are going to set bits 32..63 and we need a small
+number of them cleared, we might be able to use bclri profitably.
+
+Note we may allow clearing of bit 31 using bclri.  There's a class
+of constants with that bit clear where this helps.  */
+  else if (TARGET_64BIT
+  && TARGET_ZBS
+  && (32 - popcount_hwi (value & HOST_WIDE_INT_C 
(0x8000

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [committed] [v2] More logical op simplifications in simplify-rtx.cc

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:91f41ea3f28d8ba5d5d93dcfcd82c7d94e4e668d

commit 91f41ea3f28d8ba5d5d93dcfcd82c7d94e4e668d
Author: Jeff Law 
Date:   Sat May 25 12:39:05 2024 -0600

[committed] [v2] More logical op simplifications in simplify-rtx.cc

This is a revamp of what started as a target specific patch.

Basically xalan (corrected, I originally thought it was perlbench) has a 
bitset
implementation with a bit of an oddity.  Specifically setBit will clear the 
bit
before it is set:

> if (bitToSet < 32)
> {
> fBits1 &= ~mask;
> fBits1 |= mask;
> }
>  else
> {
> fBits2 &= ~mask;
> fBits2 |= mask;
> }
We can clean this up pretty easily in RTL with a small bit of code in
simplify-rtx.  While xalan doesn't have other cases, we can synthesize tests
pretty easily and handle them as well.

It turns out we don't actually have to recognize this stuff at the bit 
level,
just standard logical identities are sufficient.  For example

(X | Y) & ~Y -> X & ~Y

Andrew P. might poke at this at the gimple level.  The type changes kindof 
get
in the way in gimple but he's much better at match.pd than I am, so if he 
wants
to chase it from the gimple side, I'll fully support that.

Bootstrapped and regression tested on x86.  Also run through my tester on 
its
embedded targets.

Pushing to the trunk.

gcc/

* simplify-rtx.cc (simplify_context::simplify_binary_operation_1): 
Handle
more logical simplifications.

gcc/testsuite/

* g++.target/riscv/redundant-bitmap-1.C: New test.
* g++.target/riscv/redundant-bitmap-2.C: New test.
* g++.target/riscv/redundant-bitmap-3.C: New test.
* g++.target/riscv/redundant-bitmap-4.C: New test.

(cherry picked from commit 05daf617ea22e1d818295ed2d037456937e23530)

Diff:
---
 gcc/simplify-rtx.cc| 29 ++
 .../g++.target/riscv/redundant-bitmap-1.C  | 14 +++
 .../g++.target/riscv/redundant-bitmap-2.C  | 14 +++
 .../g++.target/riscv/redundant-bitmap-3.C  | 14 +++
 .../g++.target/riscv/redundant-bitmap-4.C  | 14 +++
 5 files changed, 85 insertions(+)

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index dceaa1c..bb562c3af2c 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -3549,6 +3549,12 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
return tem;
}
 
+  /* Convert (ior (and (not A) B) A) into A | B.  */
+  if (GET_CODE (op0) == AND
+ && GET_CODE (XEXP (op0, 0)) == NOT
+ && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
+   return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
+
   tem = simplify_byte_swapping_operation (code, mode, op0, op1);
   if (tem)
return tem;
@@ -3801,6 +3807,12 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
return tem;
}
 
+  /* Convert (xor (and (not A) B) A) into A | B.  */
+  if (GET_CODE (op0) == AND
+ && GET_CODE (XEXP (op0, 0)) == NOT
+ && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
+   return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
+
   tem = simplify_byte_swapping_operation (code, mode, op0, op1);
   if (tem)
return tem;
@@ -4006,6 +4018,23 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
  && rtx_equal_p (op1, XEXP (XEXP (op0, 1), 0)))
return simplify_gen_binary (AND, mode, op1, XEXP (op0, 0));
 
+  /* (and (ior/xor (X Y) (not Y)) -> X & ~Y */
+  if ((GET_CODE (op0) == IOR || GET_CODE (op0) == XOR)
+ && GET_CODE (op1) == NOT
+ && rtx_equal_p (XEXP (op1, 0), XEXP (op0, 1)))
+   return simplify_gen_binary (AND, mode, XEXP (op0, 0),
+   simplify_gen_unary (NOT, mode,
+   XEXP (op1, 0),
+   mode));
+  /* (and (ior/xor (Y X) (not Y)) -> X & ~Y */
+  if ((GET_CODE (op0) == IOR || GET_CODE (op0) == XOR)
+ && GET_CODE (op1) == NOT
+ && rtx_equal_p (XEXP (op1, 0), XEXP (op0, 0)))
+   return simplify_gen_binary (AND, mode, XEXP (op0, 1),
+   simplify_gen_unary (NOT, mode,
+   XEXP (op1, 0),
+   mode));
+
   /* Convert (and (ior A C) (ior B C)) into (ior (and A B) C).  */
   if (GET_CODE (op0) == GET_CODE (op1)
  && (GET_CODE (op0) == AND
diff --git a/gcc/testsuite/g++.target/riscv/redundant-bitmap-1.C 
b/gcc/testsuite/g++.ta

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed][RISC-V] Generate nearby constant, then adjust to our final desired constant

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:dbd78bcf532086b55ef133074259ad29d33a400a

commit dbd78bcf532086b55ef133074259ad29d33a400a
Author: Jeff Law 
Date:   Sun May 26 10:54:18 2024 -0600

[to-be-committed][RISC-V] Generate nearby constant, then adjust to our 
final desired constant

Next step in constant synthesis work.

For some cases it can be advantageous to generate a constant near our 
target,
then do a final addi to fully synthesize C.

The idea is that while our target C may require N instructions to 
synthesize,
C' may only require N-2 (or fewer) instructions.  Thus there's budget to 
adjust
C' into C ending up with a better sequence than if we tried to generate C
directly.

So as an example:

> unsigned long foo_0xf7fe77ff(void) { return 0xf7fe77ffUL; 
}

This is currently 5 instructions on the trunk:

> li  a0,-4096
> addia0,a0,2047
> bclri   a0,a0,31
> bclri   a0,a0,32
> bclri   a0,a0,43

But we can do better by first synthesizing 0xf7fe7800 which is just 
3
instructions.  Then we can subtract 1 from the result.  That gives us this
sequence:

> li  a0,-16789504
> sllia0,a0,19
> addia0,a0,-2048
> addia0,a0,-1

These cases are relatively easy to find once you know what you're looking 
for.
I kept the full set found by the testing code yesterday, mostly because 
some of
them show different patterns for generating C', thus showing generality in 
the
overall synthesis implementation.  While all these tests have 0x7ff in their
low bits.  That's just an artifact to the test script.  The methodology will
work for a variety of other cases.

gcc/
* config/riscv/riscv.cc (riscv_build_integer_1): Try generating
a nearby simpler constant, then using a final addi to set low
bits properly.

gcc/testsuite

* gcc.target/riscv/synthesis-7.c: New test.

(cherry picked from commit 95660223c434000a42957cf6cabed0236bb4bae8)

Diff:
---
 gcc/config/riscv/riscv.cc|   20 +
 gcc/testsuite/gcc.target/riscv/synthesis-7.c | 1531 ++
 2 files changed, 1551 insertions(+)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 92935275aaa..0ebdd696a95 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -963,6 +963,26 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
}
 }
 
+  /* We might be able to generate a constant close to our target
+ then a final ADDI to get the desired constant.  */
+  if (cost > 2
+  && (value & 0xfff) != 0
+  && (value & 0x1800) == 0x1000)
+{
+  HOST_WIDE_INT adjustment = -(0x800 - (value & 0xfff));
+  alt_cost = 1 + riscv_build_integer_1 (alt_codes,
+   value - adjustment, mode);
+
+  if (alt_cost < cost)
+   {
+ alt_codes[alt_cost - 1].code = PLUS;
+ alt_codes[alt_cost - 1].value = adjustment;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+   }
+}
+
   /* Final cases, particularly focused on bseti.  */
   if (cost > 2 && TARGET_ZBS)
 {
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-7.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-7.c
new file mode 100644
index 000..1611c964758
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-7.c
@@ -0,0 +1,1531 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 7571 } } */
+
+unsigned long foo_0xf7fe77ff(void) { return 0xf7fe77ffUL; }
+unsigned long foo_0xeffe77ff(void) { return 0xeffe77ffUL; }
+unsigned long foo_0xdffe77ff(void) { return 0xdffe77ffUL; }
+unsigned long foo_0xbffe77ff(void) { return 0xbffe77ffUL; }
+unsigned long foo_0x7ffe77ff(void) { return 0x7ffe77ffUL; }
+unsigned long foo_0xfffefffe77ff(void) { return 0xfffefffe77ffUL; }
+unsigned long foo_0xfffdff

[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [to-be-committed] [RISC-V] Try inverting for constant synthesis

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:7f716ba0f6c0360db3643c815d390aee04d2794f

commit 7f716ba0f6c0360db3643c815d390aee04d2794f
Author: Jeff Law 
Date:   Sun May 26 17:54:51 2024 -0600

[to-be-committed] [RISC-V] Try inverting for constant synthesis

So there's another class of constants we're failing to synthesize well.
Specifically those where we can invert our original constant C into C' and 
C'
takes at least 2 fewer instructions to synthesize than C.  In that case we 
can
initially generate C', then use xori with the constant -1 to flip all the 
bits
resulting in our target constant.

I've only seen this trigger when the final synthesis is li+srli+xori. The
original synthesis took on various 4 or 5 instruction forms.

Most of the methods we use to improve constant synthesis are in
riscv_build_integer_1.  I originally tried to put this code in there. But
that'll end up with infinite recursion due to some other ADDI related code
which wants to flip bits and try synthesis.

So this was put into riscv_build_integer and recurses into 
riscv_build_integer.
This isn't unprecedented, just a bit different than most of the other 
synthesis
implementation bits.

This doesn't depend on any extensions.  So it should help any rv64 system.

gcc/

* config/riscv/riscv.cc (riscv_build_integer_one): Verify there
are no bits left to set in the constant when generating bseti.
(riscv_built_integer): Synthesize ~value and if it's cheap use it
with a trailing xori with -1.

gcc/testsuite

* gcc.target/riscv/synthesis-8.c: New test.

(cherry picked from commit 3a915d6ad5fc3e0fadd14e54515b48b1d655c5a4)

Diff:
---
 gcc/config/riscv/riscv.cc| 27 +-
 gcc/testsuite/gcc.target/riscv/synthesis-8.c | 34 
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 0ebdd696a95..401ddc9eeb7 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1022,7 +1022,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
 
   /* If LUI+ADDI+BSETI resulted in a more efficient
 sequence, then use it.  */
-  if (i < cost)
+  if (value == 0 && i < cost)
{
  memcpy (codes, alt_codes, sizeof (alt_codes));
  cost = i;
@@ -1074,6 +1074,31 @@ riscv_build_integer (struct riscv_integer_op *codes, 
HOST_WIDE_INT value,
}
 }
 
+  /* See if we can generate the inverted constant, then use
+ not to get the desired constant.
+
+ This can't be in riscv_build_integer_1 as it'll mutually
+ recurse with another case in there.  And it has to recurse
+ into riscv_build_integer so we get the trailing 0s case
+ above.  */
+  if (cost > 2 && value < 0)
+{
+  struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
+  int alt_cost;
+
+  HOST_WIDE_INT nval = ~value;
+  alt_cost = 1 + riscv_build_integer (alt_codes, nval, mode);
+  if (alt_cost < cost)
+   {
+ alt_codes[alt_cost - 1].code = XOR;
+ alt_codes[alt_cost - 1].value = -1;
+ alt_codes[alt_cost - 1].use_uw = false;
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+   }
+}
+
+
   if (!TARGET_64BIT
   && (value > INT32_MAX || value < INT32_MIN))
 {
diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-8.c 
b/gcc/testsuite/gcc.target/riscv/synthesis-8.c
new file mode 100644
index 000..2bcdb4e774d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/synthesis-8.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+   which shouldn't vary based on the optimization level.  -O1 seems to work
+   and eliminates the usual sources of extraneous dead code that would throw
+   off the counts.  */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } 
*/
+/* { dg-options "-march=rv64gc_zba_zbb_zbs" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+   having thousands of tests each testing one variant, we just test the
+   total number of instructions.
+
+   This isn't expected to change much and any change is worthy of a look.  */
+/* { dg-final { scan-assembler-times 
"\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli|srli|xori)" 72 } } */
+
+unsigned long foo_0xc00077ff(void) { return 0xc00077ffUL; }
+unsigned long foo_0xc000b7ff(void) { return 0xc000b7ffUL; }
+unsigned long foo_0xc00137ff(void) { return 0xc00137ffUL; }
+unsigned long foo_0xc00237ff(void) { return 0xc00237ffUL; }
+unsigned long foo_0xc00437ff(void) { return 0xc00437ffUL; }
+unsigned long foo_0xc

[gcc r15-840] Gen-Match: Fix gen_kids_1 right hand braces mis-alignment

2024-05-26 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:5d99cf74c9f748c93ea218eca9bd2f08edfb2a88

commit r15-840-g5d99cf74c9f748c93ea218eca9bd2f08edfb2a88
Author: Pan Li 
Date:   Sat May 25 23:16:50 2024 +0800

Gen-Match: Fix gen_kids_1 right hand braces mis-alignment

Notice some mis-alignment for gen_kids_1 right hand braces as below:

  if ((_q50 == _q20 && ! TREE_SIDE_EFFECTS (...
{
  if ((_q51 == _q21 && ! TREE_SIDE_EFFECTS (...
{
  {
tree captures[2] ATTRIBUTE_UNUSED = {...
{
  res_ops[0] = captures[0];
  res_ops[1] = captures[1];
  if (UNLIKELY (debug_dump)) ...
  return true;
}
  }
}
}
}  // mis-aligned here.
 }

The below test are passed for this patch:
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* genmatch.cc (dt_node::gen_kids_1): Fix indenet mis-aligned.

Signed-off-by: Pan Li 

Diff:
---
 gcc/genmatch.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index c982c95b70f..f1e0e7abe0c 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -3428,7 +3428,7 @@ dt_node::gen_kids_1 (FILE *f, int indent, bool gimple, 
int depth,
  child_opname, kid_opname, j);
}
   preds[i]->gen_kids (f, indent + 4, gimple, depth);
-  fprintf (f, "}\n");
+  fprintf_indent (f, indent, "  }\n");
   indent -= 2;
   fprintf_indent (f, indent, "}\n");
 }


[gcc r15-841] x86: Fix Logical Shift Issue in expand_vec_perm_psrlw_psllw_por [PR115146]

2024-05-26 Thread Levy Hsu via Gcc-cvs
https://gcc.gnu.org/g:0022064649d0ec40e97df24279c48842e278fedc

commit r15-841-g0022064649d0ec40e97df24279c48842e278fedc
Author: Levy Hsu 
Date:   Tue May 21 12:47:21 2024 +0930

x86: Fix Logical Shift Issue in expand_vec_perm_psrlw_psllw_por [PR115146]

Replaced arithmetic shifts with logical shifts in 
expand_vec_perm_psrlw_psllw_por to avoid sign bit extension issues. Also 
corrected gen_vlshrv8hi3 to gen_lshrv8hi3 and gen_vashlv8hi3 to gen_ashlv8hi3.

Co-authored-by: H.J. Lu 

gcc/ChangeLog:

PR target/115146
* config/i386/i386-expand.cc (expand_vec_perm_psrlw_psllw_por): 
Replace arithmatic shift
gen_ashrv4hi3 with logic shift gen_lshrv4hi3.
Replace gen_vlshrv8hi3 with gen_lshrv8hi3 and gen_vashlv8hi3 with 
gen_ashlv8hi3.

gcc/testsuite/ChangeLog:

PR target/115146
* g++.target/i386/pr107563-a.C: Append '-mno-sse3' to compile option
to avoid test failure on hosts with SSE3 support.
* g++.target/i386/pr107563-b.C: Append '-mno-sse3' to compile option
to avoid test failure on hosts with SSE3 support.
* gcc.target/i386/pr115146.c: New test.

Diff:
---
 gcc/config/i386/i386-expand.cc |  6 ++---
 gcc/testsuite/g++.target/i386/pr107563-a.C |  4 ++--
 gcc/testsuite/g++.target/i386/pr107563-b.C |  2 +-
 gcc/testsuite/gcc.target/i386/pr115146.c   | 38 ++
 4 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index ec402a78a09..215a998fc26 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -22055,14 +22055,14 @@ expand_vec_perm_psrlw_psllw_por (struct 
expand_vec_perm_d *d)
   if (!TARGET_MMX_WITH_SSE)
return false;
   mode = V4HImode;
-  gen_shr = gen_ashrv4hi3;
+  gen_shr = gen_lshrv4hi3;
   gen_shl = gen_ashlv4hi3;
   gen_or = gen_iorv4hi3;
   break;
 case E_V16QImode:
   mode = V8HImode;
-  gen_shr = gen_vlshrv8hi3;
-  gen_shl = gen_vashlv8hi3;
+  gen_shr = gen_lshrv8hi3;
+  gen_shl = gen_ashlv8hi3;
   gen_or = gen_iorv8hi3;
   break;
 default: return false;
diff --git a/gcc/testsuite/g++.target/i386/pr107563-a.C 
b/gcc/testsuite/g++.target/i386/pr107563-a.C
index 605c1bdf814..c1c332bb948 100755
--- a/gcc/testsuite/g++.target/i386/pr107563-a.C
+++ b/gcc/testsuite/g++.target/i386/pr107563-a.C
@@ -1,8 +1,8 @@
 /* PR target/107563.C */
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-std=c++2b -O3 -msse2" } */
+/* { dg-options "-std=c++2b -O3 -msse2 -mno-sse3" } */
 /* { dg-final { scan-assembler-times "psllw" 1 } } */
-/* { dg-final { scan-assembler-times "psraw" 1 } } */
+/* { dg-final { scan-assembler-times "psrlw" 1 } } */
 /* { dg-final { scan-assembler-times "por" 1 } } */
 
 using temp_vec_type2 [[__gnu__::__vector_size__(8)]] = char;
diff --git a/gcc/testsuite/g++.target/i386/pr107563-b.C 
b/gcc/testsuite/g++.target/i386/pr107563-b.C
index 0ce3e8263bb..d5cc0300f46 100755
--- a/gcc/testsuite/g++.target/i386/pr107563-b.C
+++ b/gcc/testsuite/g++.target/i386/pr107563-b.C
@@ -1,5 +1,5 @@
 /* PR target/107563.C */
-/* { dg-options "-std=c++2b -O3 -msse2" } */
+/* { dg-options "-std=c++2b -O3 -msse2 -mno-sse3" } */
 /* { dg-final { scan-assembler-times "psllw" 1 } } */
 /* { dg-final { scan-assembler-times "psrlw" 1 } } */
 /* { dg-final { scan-assembler-times "por" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr115146.c 
b/gcc/testsuite/gcc.target/i386/pr115146.c
new file mode 100755
index 000..b7706bf391c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr115146.c
@@ -0,0 +1,38 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef unsigned char v8qi __attribute__((vector_size (8)));
+
+v8qi res, a;
+
+void __attribute__((noipa))
+foo (void)
+{
+  res = __builtin_shufflevector(a, a, 1, 0, 3, 2, 5, 4, 7, 6);
+}
+
+void
+comp (v8qi a, v8qi b, int n)
+{
+  for (unsigned i = 0; i < n; ++i)
+if ((a)[i] != (b)[i])
+  __builtin_abort ();
+}
+
+#define E0 140
+#define E1 141
+#define E2 142
+#define E3 143
+#define E4 144
+#define E5 145
+#define E6 146
+#define E7 147
+
+int main()
+{
+  a = (v8qi) { E0, E1, E2, E3, E4, E5, E6, E7 };
+  foo ();
+  comp (res, ((v8qi) { E1, E0, E3, E2, E5, E4, E7, E6 }), 8);
+  return 0;
+}
+


[gcc r15-842] [to-be-committed][RISC-V] Reassociate constants in logical ops

2024-05-26 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:160929406f0c44df5b0d377a014ebfe5027fe4e7

commit r15-842-g160929406f0c44df5b0d377a014ebfe5027fe4e7
Author: Lyut Nersisyan 
Date:   Sun May 26 21:24:40 2024 -0600

[to-be-committed][RISC-V] Reassociate constants in logical ops

This patch from Lyut will reassociate operands when we have shifted logical
operations.  This can simplify a constant that may not be fit in a simm12 
into
a form that does fit into a simm12.

The basic work was done by Lyut.  I generalized it to handle XOR/OR.

It stands on its own, but also helps the upcoming Zbkb work from Lyut.

This has survived Ventana's CI system as well as my tester.  Obviously I'll
wait for a verdict from the Rivos CI system before moving forward.

gcc/

* config/riscv/riscv.md (_shift_reverse): New 
pattern.

gcc/testsuite

* gcc.target/riscv/and-shift32.c: New test.
* gcc.target/riscv/and-shift64.c: New test.

Co-authored-by: Jeffrey A Law 

Diff:
---
 gcc/config/riscv/riscv.md| 28 
 gcc/testsuite/gcc.target/riscv/and-shift32.c | 13 +
 gcc/testsuite/gcc.target/riscv/and-shift64.c | 13 +
 3 files changed, 54 insertions(+)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index ab628c60840..fe74b8dcd3b 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2829,6 +2829,34 @@
   [(set_attr "type" "shift")
(set_attr "mode" "SI")])
 
+;; We can reassociate the shift and bitwise operator which may allow us to
+;; reduce the immediate operand of the bitwise operator into a range that
+;; fits in a simm12.
+;;
+;; We need to make sure that shifting does not lose any bits, particularly
+;; for IOR/XOR.  It probably doesn't matter for AND.
+;;
+;; We also don't want to do this if the immediate already fits in a simm12
+;; field.
+(define_insn_and_split "_shift_reverse"
+  [(set (match_operand:X 0 "register_operand" "=r")
+(any_bitwise:X (ashift:X (match_operand:X 1 "register_operand" "r")
+(match_operand 2 "immediate_operand" "n"))
+  (match_operand 3 "immediate_operand" "n")))]
+  "(!SMALL_OPERAND (INTVAL (operands[3]))
+   && SMALL_OPERAND (INTVAL (operands[3]) >> INTVAL (operands[2]))
+   && (popcount_hwi (INTVAL (operands[3]))
+   <= popcount_hwi (INTVAL (operands[3]) >> INTVAL (operands[2]"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (any_bitwise:X (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (ashift:X (match_dup 0) (match_dup 2)))]
+  {
+operands[3] = GEN_INT (INTVAL (operands[3]) >> INTVAL (operands[2]));
+  }
+  [(set_attr "type" "shift")
+   (set_attr "mode" "")])
+
 ;; Non-canonical, but can be formed by ree when combine is not successful at
 ;; producing one of the two canonical patterns below.
 (define_insn "*lshrsi3_zero_extend_1"
diff --git a/gcc/testsuite/gcc.target/riscv/and-shift32.c 
b/gcc/testsuite/gcc.target/riscv/and-shift32.c
new file mode 100644
index 000..38ee63e8d79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/and-shift32.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc -mabi=ilp32" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-g" } } */
+
+int foo(int a)
+{
+  return (a << 8) & 24320;
+}
+
+/* { dg-final { scan-assembler-times "\\sandi\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sslli\\s" 1 } } */
+/* { dg-final { scan-assembler-not "\\sli\\s" } } */
+/* { dg-final { scan-assembler-not "\\saddi\\s" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/riscv/and-shift64.c 
b/gcc/testsuite/gcc.target/riscv/and-shift64.c
new file mode 100644
index 000..ccfaedd508a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/and-shift64.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-g" } } */
+
+long long foo(long long a)
+{
+  return (a << 8) & 24320;
+}
+
+/* { dg-final { scan-assembler-times "\\sandi\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sslli\\s" 1 } } */
+/* { dg-final { scan-assembler-not "\\sli\\s" } } */
+/* { dg-final { scan-assembler-not "\\saddi\\s" } } */
\ No newline at end of file


[gcc r15-843] vax: Fix descriptions of the FP format options [PR79646]

2024-05-26 Thread Maciej W. Rozycki via Gcc-cvs
https://gcc.gnu.org/g:a7f6543f21303583356fd2d2d1805bffbecc1bc5

commit r15-843-ga7f6543f21303583356fd2d2d1805bffbecc1bc5
Author: Abe Skolnik 
Date:   Mon May 27 05:07:32 2024 +0100

vax: Fix descriptions of the FP format options [PR79646]

Replace "Target" with "Generate" consistently and place a hyphen in
"double-precision" as this is used as an adjective here.

gcc/ChangeLog:

PR target/79646
* config/vax/vax.opt (md, md-float, mg, mg-float): Correct
descriptions.

Diff:
---
 gcc/config/vax/vax.opt | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/vax/vax.opt b/gcc/config/vax/vax.opt
index 2cc66e543fe..fa2be78e9fa 100644
--- a/gcc/config/vax/vax.opt
+++ b/gcc/config/vax/vax.opt
@@ -20,19 +20,19 @@
 
 md
 Target RejectNegative InverseMask(G_FLOAT)
-Target DFLOAT double precision code.
+Generate DFLOAT double-precision code.
 
 md-float
 Target RejectNegative InverseMask(G_FLOAT)
-Target DFLOAT double precision code.
+Generate DFLOAT double-precision code.
 
 mg
 Target RejectNegative Mask(G_FLOAT)
-Generate GFLOAT double precision code.
+Generate GFLOAT double-precision code.
 
 mg-float
 Target RejectNegative Mask(G_FLOAT)
-Generate GFLOAT double precision code.
+Generate GFLOAT double-precision code.
 
 mgnu
 Target RejectNegative InverseMask(UNIX_ASM)


[gcc r15-844] VAX/doc: Fix issues with FP format option documentation

2024-05-26 Thread Maciej W. Rozycki via Gcc-cvs
https://gcc.gnu.org/g:314448fc65f40c98ee8bc02dfb54ea49d2f2c60d

commit r15-844-g314448fc65f40c98ee8bc02dfb54ea49d2f2c60d
Author: Maciej W. Rozycki 
Date:   Mon May 27 05:07:32 2024 +0100

VAX/doc: Fix issues with FP format option documentation

Use the correct names of the D_floating and G_floating data formats as
per the VAX ISA nomenclature[1].  Document the `-md', `-md-float', and
`-mg-float' options.

References:

[1] DEC STD 032-0 "VAX Architecture Standard", Digital Equipment
Corporation, A-DS-EL-00032-00-0 Rev J, December 15, 1989, Section
1.2 "Data Types", pp. 1-7, 1-9

gcc/
* doc/invoke.texi (Option Summary): Add `-md', `-md-float', and
`-mg-float' options.  Reorder, matching VAX Options.
(VAX Options): Reword the description of `-mg' option.  Add
`-md', `-md-float', and `-mg-float' options.

Diff:
---
 gcc/doc/invoke.texi | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c9d8f6b37b6..2cba380718b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1429,7 +1429,7 @@ See RS/6000 and PowerPC Options.
 -mbig-switch}
 
 @emph{VAX Options}
-@gccoptlist{-mg  -mgnu  -munix  -mlra}
+@gccoptlist{-munix  -mgnu  -md  -md-float  -mg  -mg-float  -mlra}
 
 @emph{Visium Options}
 @gccoptlist{-mdebug  -msim  -mfpu  -mno-fpu  -mhard-float  -msoft-float
@@ -34129,9 +34129,19 @@ ranges.
 Do output those jump instructions, on the assumption that the
 GNU assembler is being used.
 
+@opindex md
+@opindex md-float
+@item -md
+@itemx -md-float
+Use the D_floating data format for double-precision floating-point numbers
+instead of G_floating.
+
 @opindex mg
+@opindex mg-float
 @item -mg
-Output code for G-format floating-point numbers instead of D-format.
+@itemx -mg-float
+Use the G_floating data format for double-precision floating-point numbers
+instead of D_floating.
 
 @opindex mlra
 @opindex mno-lra


[gcc r15-845] RISC-V: Fix missing boolean_expression in zmmul extension

2024-05-26 Thread Kito Cheng via Gcc-cvs
https://gcc.gnu.org/g:06bb125521dec5648b725ddee4345b00decfdc77

commit r15-845-g06bb125521dec5648b725ddee4345b00decfdc77
Author: Liao Shihua 
Date:   Fri May 24 13:03:57 2024 +0800

RISC-V: Fix missing boolean_expression in zmmul extension

Update v1->v2
Add testcase for this patch.

Missing boolean_expression TARGET_ZMMUL in riscv_rtx_costs() cause 
different instructions when
multiplying an integer with a constant. ( 
https://github.com/riscv-collab/riscv-gnu-toolchain/issues/1482 )

int foo(int *ib) {
*ib = *ib * 33938;
return 0;
}

rv64im:
lw  a4,0(a1)
li  a5,32768
addiw   a5,a5,1170
mulwa5,a5,a4
sw  a5,0(a1)
ret

rv64i_zmmul:
lw  a4,0(a1)
slliw   a5,a4,5
addwa5,a5,a4
slliw   a5,a5,3
addwa5,a5,a4
slliw   a5,a5,3
addwa5,a5,a4
slliw   a5,a5,3
addwa5,a5,a4
slliw   a5,a5,1
sw  a5,0(a1)
ret

Fixed.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_rtx_costs): Add TARGET_ZMMUL.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zmmul-3.c: New test.

Diff:
---
 gcc/config/riscv/riscv.cc| 2 +-
 gcc/testsuite/gcc.target/riscv/zmmul-3.c | 8 
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 401ddc9eeb7..a99211d56b1 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3832,7 +3832,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int 
outer_code, int opno ATTRIBUTE_UN
 case MULT:
   if (float_mode_p)
*total = tune_param->fp_mul[mode == DFmode];
-  else if (!TARGET_MUL)
+  else if (!(TARGET_MUL || TARGET_ZMMUL))
/* Estimate the cost of a library call.  */
*total = COSTS_N_INSNS (speed ? 32 : 6);
   else if (GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD)
diff --git a/gcc/testsuite/gcc.target/riscv/zmmul-3.c 
b/gcc/testsuite/gcc.target/riscv/zmmul-3.c
new file mode 100644
index 000..f8377f86180
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zmmul-3.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64iafdc_zmmul -mabi=lp64d" } */
+int foo1(int a)
+{
+return a * 99;
+}
+
+/* { dg-final { scan-assembler-times "mulw\t" 1 } } */