[gcc r15-4530] RISC-V: Add testcases for form 7 of vector signed SAT_TRUNC

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:f138806811968a99bd81d7a60746279877df7ee8

commit r15-4530-gf138806811968a99bd81d7a60746279877df7ee8
Author: Pan Li 
Date:   Mon Oct 14 15:10:46 2024 +0800

RISC-V: Add testcases for form 7 of vector signed SAT_TRUNC

Form 7:
  #define DEF_VEC_SAT_S_TRUNC_FMT_7(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_7 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN > x || x >= (WT)NT_MAX  \
  ? x < 0 ? NT_MIN : NT_MAX \
  : trunc;  \
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-7-i64-to-i8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-7-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-7-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-7-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-7-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-7-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-7-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-7-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-7-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-7-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c
new file mode 100644
index ..a6eb2d5b0b2f
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_7(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c
new file mode 100644
index ..fd01c74d2df9
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-7-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_7(int16_t, int32_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times 

[gcc r15-4531] RISC-V: Add testcases for form 8 of vector signed SAT_TRUNC

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:cb131a401b7489cc17e2d70420cf9a916515b3f6

commit r15-4531-gcb131a401b7489cc17e2d70420cf9a916515b3f6
Author: Pan Li 
Date:   Mon Oct 14 15:23:57 2024 +0800

RISC-V: Add testcases for form 8 of vector signed SAT_TRUNC

Form 8:
  #define DEF_VEC_SAT_S_TRUNC_FMT_8(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_8 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN >= x || x >= (WT)NT_MAX \
  ? x < 0 ? NT_MIN : NT_MAX \
  : trunc;  \
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-8-i64-to-i8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-8-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-8-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-8-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-8-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-8-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-8-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-8-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-8-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-8-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c
new file mode 100644
index ..64f140f764e6
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_8(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c
new file mode 100644
index ..9bd95a52a012
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-8-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_8(int16_t, int32_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times 

[gcc(refs/users/meissner/heads/work181-sha)] Move xxeval case to be first.

2024-10-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:cc231f8c1f7b0085c9c5b56435801ae6066ab815

commit cc231f8c1f7b0085c9c5b56435801ae6066ab815
Author: Michael Meissner 
Date:   Mon Oct 21 12:23:03 2024 -0400

Move xxeval case to be first.

2024-10-21  Michael Meissner  

gcc/

* config/rs6000/genfusion.pl (gen_logical_addsubf): Move xxeval case
to be first.
* config/rs6000/fusion.md: Regenerate.

Diff:
---
 gcc/config/rs6000/fusion.md| 352 -
 gcc/config/rs6000/genfusion.pl |   4 +-
 2 files changed, 178 insertions(+), 178 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 215a3aae074f..6f9081ab3372 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1872,16 +1872,16 @@
 ;; vector vand -> vand
 (define_insn "*fuse_vand_vand"
   [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
-  (match_operand:VM 1 "vector_fusion_operand" 
"%v,v,v,wa,v"))
- (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"wa,v,v,v,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"%wa,v,v,v,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v")))
(clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
+   xxeval %x3,%x2,%x1,%x0,1
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
-   xxeval %x3,%x2,%x1,%x0,1
vand %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
@@ -1893,16 +1893,16 @@
 ;; vector vandc -> vand
 (define_insn "*fuse_vandc_vand"
   [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
-  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
- (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"wa,v,v,v,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"wa,v,v,v,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v")))
(clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
+   xxeval %x3,%x2,%x1,%x0,2
vandc %3,%1,%0\;vand %3,%3,%2
vandc %3,%1,%0\;vand %3,%3,%2
vandc %3,%1,%0\;vand %3,%3,%2
-   xxeval %x3,%x2,%x1,%x0,2
vandc %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
@@ -1914,16 +1914,16 @@
 ;; vector veqv -> vand
 (define_insn "*fuse_veqv_vand"
   [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
-  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
- (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"wa,v,v,v,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"wa,v,v,v,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v")))
(clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
+   xxeval %x3,%x2,%x1,%x0,9
veqv %3,%1,%0\;vand %3,%3,%2
veqv %3,%1,%0\;vand %3,%3,%2
veqv %3,%1,%0\;vand %3,%3,%2
-   xxeval %x3,%x2,%x1,%x0,9
veqv %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
@@ -1935,16 +1935,16 @@
 ;; vector vnand -> vand
 (define_insn "*fuse_vnand_vand"
   [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
-  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
- (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"wa,v,v,v,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"wa,v,v,v,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v")))
(clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
+   xxeval %x3,%x2,%x1,%x0,14
vnand %3,%1,%0\;vand %3,%3,%2
vnand %3,%1,%0\;vand %3,%3,%2
vnand %3,%1,%0\;vand %3,%3,%2
-   xxeval %x3,%x2,%x1,%x0,14
vnand %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
@@ -1956,16 +1956,16 @@
 ;; vector vnor -> vand
 (define_insn "*fuse_vnor_vand"
   [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (and:VM (not:

[gcc(refs/users/meissner/heads/work181-sha)] Update ChangeLog.*

2024-10-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:b19edef8b8eb4dd28774a5b23feb7cd1703a49e0

commit b19edef8b8eb4dd28774a5b23feb7cd1703a49e0
Author: Michael Meissner 
Date:   Mon Oct 21 12:23:44 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index 2d7f998a8b52..403a46031b20 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -1,4 +1,16 @@
- Branch work181-sha, patch #403 
+ Branch work181-sha, patch #405 
+
+Move xxeval case to be first.
+
+2024-10-21  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/genfusion.pl (gen_logical_addsubf): Move xxeval case
+   to be first.
+   * config/rs6000/fusion.md: Regenerate.
+
+ Branch work181-sha, patch #404 
 
 Move xxeval case before alternative that needs a temporary register.


[gcc(refs/users/jmelcr/heads/omp-cp)] omp-cp: add callback flag to some checks

2024-10-21 Thread Josef Melcr via Gcc-cvs
https://gcc.gnu.org/g:cf15a12be9efe68841746d0ab189e3846499498f

commit cf15a12be9efe68841746d0ab189e3846499498f
Author: Josef Melcr 
Date:   Mon Oct 21 18:04:21 2024 +0200

omp-cp: add callback flag to some checks

gcc/ChangeLog:

* cgraph.cc (cgraph_edge::redirect_call_stmt_to_callee): return
  if callback flag is set
(cgraph_node::verify_node): allow some weirdness if callback is
set

Signed-off-by: Josef Melcr 

Diff:
---
 gcc/cgraph.cc | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index c62f5de807da..96d95a87c769 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -1514,9 +1514,7 @@ cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge *e,
}
 }
 
-
-  if (e->indirect_unknown_callee
-  || decl == e->callee->decl)
+  if (e->indirect_unknown_callee || decl == e->callee->decl || e->callback)
 return e->call_stmt;
 
   if (decl && ipa_saved_clone_sources)
@@ -3682,6 +3680,7 @@ cgraph_node::verify_node (void)
   if (gimple_has_body_p (e->caller->decl)
  && !e->caller->inlined_to
  && !e->speculative
+ && !e->callback
  /* Optimized out calls are redirected to __builtin_unreachable.  */
  && (e->count.nonzero_p ()
  || ! e->callee->decl
@@ -3929,7 +3928,7 @@ cgraph_node::verify_node (void)
 
   for (e = callees; e; e = e->next_callee)
{
- if (!e->aux && !e->speculative)
+ if (!e->aux && !e->speculative && !e->callback)
{
  error ("edge %s->%s has no corresponding call_stmt",
 identifier_to_locale (e->caller->name ()),


[gcc(refs/users/meissner/heads/work181-sha)] Update ChangeLog.*

2024-10-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:82ef5cf3c220dfcd7c3d76d40658a57eb8b94bcc

commit 82ef5cf3c220dfcd7c3d76d40658a57eb8b94bcc
Author: Michael Meissner 
Date:   Mon Oct 21 13:11:21 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.sha | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/gcc/ChangeLog.sha b/gcc/ChangeLog.sha
index 403a46031b20..515e0390089a 100644
--- a/gcc/ChangeLog.sha
+++ b/gcc/ChangeLog.sha
@@ -1,14 +1,4 @@
- Branch work181-sha, patch #405 
-
-Move xxeval case to be first.
-
-2024-10-21  Michael Meissner  
-
-gcc/
-
-   * config/rs6000/genfusion.pl (gen_logical_addsubf): Move xxeval case
-   to be first.
-   * config/rs6000/fusion.md: Regenerate.
+ Branch work181-sha, patch #405 was reverted 

 
  Branch work181-sha, patch #404 


[gcc(refs/users/jmelcr/heads/omp-cp)] omp-cp: resolve segfault through monkey stick debugging

2024-10-21 Thread Josef Melcr via Gcc-cvs
https://gcc.gnu.org/g:397b6d3ea75e4f16c7da848c55f7823480c662c8

commit 397b6d3ea75e4f16c7da848c55f7823480c662c8
Author: Josef Melcr 
Date:   Fri Oct 18 23:41:37 2024 +0200

omp-cp: resolve segfault through monkey stick debugging

gcc/ChangeLog:

* cgraph.h: modify functions regarding speculative call edges
* ipa-fnsummary.cc (analyze_function_body): integrate callback
  edges
(compute_fn_summary): integrate callback edges in similar
fashion to speculative edges

Signed-off-by: Josef Melcr 

Diff:
---
 gcc/cgraph.h | 24 +---
 gcc/ipa-fnsummary.cc |  9 -
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index bf0a22bcf365..5f1faa4c56ad 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1769,6 +1769,10 @@ public:
  target2.  */
   cgraph_edge *next_speculative_call_target ()
   {
+if (callback)
+  {
+   return NULL;
+  }
 cgraph_edge *e = this;
 gcc_checking_assert (speculative && callee);
 
@@ -1783,15 +1787,29 @@ public:
  indirect call edge in the speculative call sequence.  */
   cgraph_edge *speculative_call_indirect_edge ()
   {
-gcc_checking_assert (speculative);
+gcc_checking_assert (speculative || callback);
 if (!callee)
   return this;
-for (cgraph_edge *e2 = caller->indirect_calls;
-true; e2 = e2->next_callee)
+
+cgraph_edge * e2 = NULL;
+for (e2 = caller->indirect_calls;
+e2; e2 = e2->next_callee)
   if (e2->speculative
  && call_stmt == e2->call_stmt
  && lto_stmt_uid == e2->lto_stmt_uid)
return e2;
+
+if (!e2 && callback)
+  {
+   for (e2 = caller->callees; e2; e2 = e2->next_callee)
+ {
+   if (e2->has_callback && call_stmt == e2->call_stmt)
+ {
+   return e2;
+ }
+ }
+  }
+  gcc_unreachable();
   }
 
   /* When called on any edge in speculative call and when given any target
diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc
index b38247834065..7858684aaa25 100644
--- a/gcc/ipa-fnsummary.cc
+++ b/gcc/ipa-fnsummary.cc
@@ -2900,7 +2900,7 @@ analyze_function_body (struct cgraph_node *node, bool 
early)
  es->call_stmt_time = this_time;
  es->loop_depth = bb_loop_depth (bb);
  edge_set_predicate (edge, &bb_predicate);
- if (edge->speculative)
+ if (edge->speculative || edge->callback)
{
  cgraph_edge *indirect
= edge->speculative_call_indirect_edge ();
@@ -3309,6 +3309,13 @@ compute_fn_summary (struct cgraph_node *node, bool early)
   for (e = node->indirect_calls; e; e = e->next_callee)
if (e->speculative)
 break;
+
+  if (!e)
+   {
+ for (e = node->callees; e; e = e->next_callee)
+   if (e->callback)
+ break;
+   }
   gcc_assert (e || size_info->size == size_info->self_size);
 }
 }


[gcc r15-4532] [committed][PR rtl-optimization/116488] Fix SIGN_EXTEND source handling in ext-dce

2024-10-21 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:36e91df7716d34aa5694533837551593ec28f22b

commit r15-4532-g36e91df7716d34aa5694533837551593ec28f22b
Author: Jeff Law 
Date:   Mon Oct 21 13:37:21 2024 -0600

[committed][PR rtl-optimization/116488] Fix SIGN_EXTEND source handling in 
ext-dce

A while back I noticed that the code to call carry_backpropagate was being
called after the optimization step.  Which seemed wrong, but at the time I
didn't have a testcase showing it as a problem.  Now I have 4 :-)

The way things used to work, the extension would be stripped away before
calling carry_backpropagte, meaning carry_backpropagate would never see a
SIGN_EXTENSION.  Thus the code trying to account for the sign extended bit 
was
never reached.

Getting that bit marked live is what's needed to fix these testcases. 
Fallout
is minor with just an adjustment needed to sensibly deal with vector modes 
in a
place where we didn't have them before.

I'm still somewhat concerned about this code.  Specifically whether or not 
we
can get in here with arbitrarily complex RTL, and if so do we need to 
recurse
down and look at those sub-expressions.

So while this patch fixes the most pressing issue, I wouldn't be terribly
surprised if we're back inside this code at some point.

Bootstrapped and regression tested on x86_64, ppc64le, riscv64, s390x, 
mips64,
loongarch, aarch64, m68k, alpha, hppa, sh4, sh4eb, perhaps something else 
that
I've forgotten...  Also tested on all the crosses in my tester.

PR rtl-optimization/116488
PR rtl-optimization/116579
PR rtl-optimization/116915
PR rtl-optimization/117226
gcc/
* ext-dce.cc (carry_backpropagate): Properly handle SIGN_EXTEND, add
ZERO_EXTEND handling as well.
(ext_dce_process_uses): Call carry_backpropagate before the 
optimization
step.

gcc/testsuite/
* gcc.dg/torture/pr116488.c: New test.
* gcc.dg/torture/pr116579.c: New test.
* gcc.dg/torture/pr116915.c: New test.
* gcc.dg/torture/pr117226.c: New test.

Diff:
---
 gcc/ext-dce.cc  | 34 +++--
 gcc/testsuite/gcc.dg/torture/pr116488.c | 20 +++
 gcc/testsuite/gcc.dg/torture/pr116579.c | 18 +
 gcc/testsuite/gcc.dg/torture/pr116915.c | 15 +++
 gcc/testsuite/gcc.dg/torture/pr117226.c | 17 +
 5 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc
index 2f3514ae7976..a449b9f6b49c 100644
--- a/gcc/ext-dce.cc
+++ b/gcc/ext-dce.cc
@@ -478,7 +478,12 @@ binop_implies_op2_fully_live (rtx_code code)
holds true, and bits set in MASK are live in the result.  Compute a
mask of (potentially) live bits in the non-constant inputs.  In case of
binop_implies_op2_fully_live (e.g. shifts), the computed mask may
-   exclusively pertain to the first operand.  */
+   exclusively pertain to the first operand.
+
+   This looks wrong as we may have some important operations embedded as
+   operands of another operation.  For example, we might have an extension
+   wrapping a shift.  It really feels like this needs to be recursing down
+   into operands much more often.  */
 
 unsigned HOST_WIDE_INT
 carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x)
@@ -557,9 +562,26 @@ carry_backpropagate (unsigned HOST_WIDE_INT mask, enum 
rtx_code code, rtx x)
   return mmask;
 
 case SIGN_EXTEND:
-  if (mask & ~GET_MODE_MASK (GET_MODE_INNER (GET_MODE (XEXP (x, 0)
+  if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
+ || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
+   return -1;
+
+  /* We want the mode of the inner object.  We need to ensure its
+sign bit is on in MASK.  */
+  mode = GET_MODE (XEXP (x, 0));
+  if (mask & ~GET_MODE_MASK (GET_MODE_INNER (mode)))
mask |= 1ULL << (GET_MODE_BITSIZE (mode).to_constant () - 1);
-  return mask;
+
+  /* Recurse into the operand.  */
+  return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
+
+case ZERO_EXTEND:
+  if (!GET_MODE_BITSIZE (GET_MODE (x)).is_constant ()
+ || !GET_MODE_BITSIZE (GET_MODE (XEXP (x, 0))).is_constant ())
+   return -1;
+
+  /* Recurse into the operand.  */
+  return carry_backpropagate (mask, GET_CODE (XEXP (x, 0)), XEXP (x, 0));
 
 /* We propagate for the shifted operand, but not the shift
count.  The count is handled specially.  */
@@ -670,6 +692,8 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj,
  if (skipped_dest)
dst_mask = -1;
 
+ dst_mask = carry_backpropagate (dst_mask, code, src);
+
  /* ??? Could also handle ZERO_EXTRACT / SIGN_EXTRACT
 of the source specially to im

[gcc r15-4514] rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED

2024-10-21 Thread jeevitha via Gcc-cvs
https://gcc.gnu.org/g:1a4c5643a5911d130dfab9a064222baeeb7f9be7

commit r15-4514-g1a4c5643a5911d130dfab9a064222baeeb7f9be7
Author: Jeevitha 
Date:   Thu Oct 10 14:42:45 2024 -0500

rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED

Corrected the function code for the Atomic Memory Operation "Fetch and 
Decrement
Bounded", changing it from 0x1A to 0x1C.

2024-10-11 Jeevitha Palanisamy 

gcc/

* config/rs6000/amo.h (enum _AMO_LD): Correct the function code for
_AMO_LD_DEC_BOUNDED.

Diff:
---
 gcc/config/rs6000/amo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h
index 6b9e4e088b97..1303c9d9dab2 100644
--- a/gcc/config/rs6000/amo.h
+++ b/gcc/config/rs6000/amo.h
@@ -46,7 +46,7 @@ enum _AMO_LD {
   _AMO_LD_CS_NE= 0x10, /* Compare and Swap Not Equal.  
*/
   _AMO_LD_INC_BOUNDED  = 0x18, /* Fetch and Increment Bounded.  */
   _AMO_LD_INC_EQUAL= 0x19, /* Fetch and Increment Equal.  */
-  _AMO_LD_DEC_BOUNDED  = 0x1A  /* Fetch and Decrement Bounded.  */
+  _AMO_LD_DEC_BOUNDED  = 0x1C  /* Fetch and Decrement Bounded.  */
 };
 
 /* Implementation of the simple LWAT/LDAT operations that take one register and


[gcc r12-10779] rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED

2024-10-21 Thread jeevitha via Gcc-cvs
https://gcc.gnu.org/g:41377d0f4e791bcdd848e11eac172b8e81ecb6ec

commit r12-10779-g41377d0f4e791bcdd848e11eac172b8e81ecb6ec
Author: Jeevitha 
Date:   Mon Oct 21 04:01:46 2024 -0500

rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED

Corrected the function code for the Atomic Memory Operation "Fetch and 
Decrement
Bounded", changing it from 0x1A to 0x1C.

2024-10-11 Jeevitha Palanisamy 

gcc/

* config/rs6000/amo.h (enum _AMO_LD): Correct the function code for
_AMO_LD_DEC_BOUNDED.

(cherry picked from commit 1a4c5643a5911d130dfab9a064222baeeb7f9be7)

Diff:
---
 gcc/config/rs6000/amo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h
index ea4668e0547f..47d19ee181c2 100644
--- a/gcc/config/rs6000/amo.h
+++ b/gcc/config/rs6000/amo.h
@@ -46,7 +46,7 @@ enum _AMO_LD {
   _AMO_LD_CS_NE= 0x10, /* Compare and Swap Not Equal.  
*/
   _AMO_LD_INC_BOUNDED  = 0x18, /* Fetch and Increment Bounded.  */
   _AMO_LD_INC_EQUAL= 0x19, /* Fetch and Increment Equal.  */
-  _AMO_LD_DEC_BOUNDED  = 0x1A  /* Fetch and Decrement Bounded.  */
+  _AMO_LD_DEC_BOUNDED  = 0x1C  /* Fetch and Decrement Bounded.  */
 };
 
 /* Implementation of the simple LWAT/LDAT operations that take one register and


[gcc r13-9140] rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED

2024-10-21 Thread jeevitha via Gcc-cvs
https://gcc.gnu.org/g:5be7a44c7a7f86dc2fe82dafcb76603a718dedbc

commit r13-9140-g5be7a44c7a7f86dc2fe82dafcb76603a718dedbc
Author: Jeevitha 
Date:   Mon Oct 21 03:58:28 2024 -0500

rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED

Corrected the function code for the Atomic Memory Operation "Fetch and 
Decrement
Bounded", changing it from 0x1A to 0x1C.

2024-10-11 Jeevitha Palanisamy 

gcc/

* config/rs6000/amo.h (enum _AMO_LD): Correct the function code for
_AMO_LD_DEC_BOUNDED.

(cherry picked from commit 1a4c5643a5911d130dfab9a064222baeeb7f9be7)

Diff:
---
 gcc/config/rs6000/amo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h
index fa31bef9e935..e03fd7c71bb8 100644
--- a/gcc/config/rs6000/amo.h
+++ b/gcc/config/rs6000/amo.h
@@ -46,7 +46,7 @@ enum _AMO_LD {
   _AMO_LD_CS_NE= 0x10, /* Compare and Swap Not Equal.  
*/
   _AMO_LD_INC_BOUNDED  = 0x18, /* Fetch and Increment Bounded.  */
   _AMO_LD_INC_EQUAL= 0x19, /* Fetch and Increment Equal.  */
-  _AMO_LD_DEC_BOUNDED  = 0x1A  /* Fetch and Decrement Bounded.  */
+  _AMO_LD_DEC_BOUNDED  = 0x1C  /* Fetch and Decrement Bounded.  */
 };
 
 /* Implementation of the simple LWAT/LDAT operations that take one register and


[gcc r14-10808] rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED

2024-10-21 Thread jeevitha via Gcc-cvs
https://gcc.gnu.org/g:17f1277d78c51d64a222ade218796837f9153f42

commit r14-10808-g17f1277d78c51d64a222ade218796837f9153f42
Author: Jeevitha 
Date:   Mon Oct 21 03:54:03 2024 -0500

rs6000: Correct the function code for _AMO_LD_DEC_BOUNDED

Corrected the function code for the Atomic Memory Operation "Fetch and 
Decrement
Bounded", changing it from 0x1A to 0x1C.

2024-10-11 Jeevitha Palanisamy 

gcc/

* config/rs6000/amo.h (enum _AMO_LD): Correct the function code for
_AMO_LD_DEC_BOUNDED.

(cherry picked from commit 1a4c5643a5911d130dfab9a064222baeeb7f9be7)

Diff:
---
 gcc/config/rs6000/amo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h
index 6b9e4e088b97..1303c9d9dab2 100644
--- a/gcc/config/rs6000/amo.h
+++ b/gcc/config/rs6000/amo.h
@@ -46,7 +46,7 @@ enum _AMO_LD {
   _AMO_LD_CS_NE= 0x10, /* Compare and Swap Not Equal.  
*/
   _AMO_LD_INC_BOUNDED  = 0x18, /* Fetch and Increment Bounded.  */
   _AMO_LD_INC_EQUAL= 0x19, /* Fetch and Increment Equal.  */
-  _AMO_LD_DEC_BOUNDED  = 0x1A  /* Fetch and Decrement Bounded.  */
+  _AMO_LD_DEC_BOUNDED  = 0x1C  /* Fetch and Decrement Bounded.  */
 };
 
 /* Implementation of the simple LWAT/LDAT operations that take one register and


[gcc r14-10809] middle-end/115110 - Fix view_converted_memref_p

2024-10-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:2ac6159f8b5119e75a19f70f3c4578895f59cb53

commit r14-10809-g2ac6159f8b5119e75a19f70f3c4578895f59cb53
Author: Richard Biener 
Date:   Fri May 17 11:02:29 2024 +0200

middle-end/115110 - Fix view_converted_memref_p

view_converted_memref_p was checking the reference type against the
pointer type of the offset operand rather than its pointed-to type
which leads to all refs being subject to view-convert treatment
in get_alias_set causing numerous testsuite fails but with its
new uses from r15-512-g9b7cad5884f21c is also a wrong-code issue.

PR middle-end/115110
* tree-ssa-alias.cc (view_converted_memref_p): Fix.

(cherry picked from commit a5b3721c06646bf5b9b50a22964e8e2bd4d03f5f)

Diff:
---
 gcc/tree-ssa-alias.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
index e7c1c1aa6243..72af21c02131 100644
--- a/gcc/tree-ssa-alias.cc
+++ b/gcc/tree-ssa-alias.cc
@@ -2049,8 +2049,9 @@ view_converted_memref_p (tree base)
 {
   if (TREE_CODE (base) != MEM_REF && TREE_CODE (base) != TARGET_MEM_REF)
 return false;
-  return same_type_for_tbaa (TREE_TYPE (base),
-TREE_TYPE (TREE_OPERAND (base, 1))) != 1;
+  return (same_type_for_tbaa (TREE_TYPE (base),
+ TREE_TYPE (TREE_TYPE (TREE_OPERAND (base, 1
+ != 1);
 }
 
 /* Return true if an indirect reference based on *PTR1 constrained


[gcc r14-10812] tree-optimization/116907 - stale BLOCK reference from DECL_VALUE_EXPR

2024-10-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:a4744558b6a1d0a1c203acc827b6ad0cfe039212

commit r14-10812-ga4744558b6a1d0a1c203acc827b6ad0cfe039212
Author: Richard Biener 
Date:   Sun Oct 13 12:44:04 2024 +0200

tree-optimization/116907 - stale BLOCK reference from DECL_VALUE_EXPR

When we remove unused BLOCKs we fail to clean references to them
from DECL_VALUE_EXPRs of variables in other BLOCKs which in the
PR causes LTO streaming to walk into pointers to GGC freed blocks.

There's the question of whether such DECL_VALUE_EXPRs should keep
variables and blocks referenced live (it doesn't seem to do that)
and whether such DECL_VALUE_EXPRs should have survived in the
first place.

PR tree-optimization/116907
* tree-ssa-live.cc (clear_unused_block_pointer_in_block): New
helper.
(clear_unused_block_pointer): Call it.

(cherry picked from commit 7d15248d41dc45a4ba2d38ff532b672a5c0651d0)

Diff:
---
 gcc/tree-ssa-live.cc | 20 
 1 file changed, 20 insertions(+)

diff --git a/gcc/tree-ssa-live.cc b/gcc/tree-ssa-live.cc
index 122d8e245dd0..8b559f2dbd85 100644
--- a/gcc/tree-ssa-live.cc
+++ b/gcc/tree-ssa-live.cc
@@ -609,6 +609,22 @@ clear_unused_block_pointer_1 (tree *tp, int *, void *)
   return NULL_TREE;
 }
 
+/* Clear references to unused BLOCKs from DECL_VALUE_EXPRs of variables
+   in BLOCK.  */
+
+static void
+clear_unused_block_pointer_in_block (tree block)
+{
+  for (tree t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
+if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
+  {
+   tree val = DECL_VALUE_EXPR (t);
+   walk_tree (&val, clear_unused_block_pointer_1, NULL, NULL);
+  }
+  for (tree t = BLOCK_SUBBLOCKS (block); t; t = BLOCK_CHAIN (t))
+clear_unused_block_pointer_in_block (t);
+}
+
 /* Set all block pointer in debug or clobber stmt to NULL if the block
is unused, so that they will not be streamed out.  */
 
@@ -664,6 +680,10 @@ clear_unused_block_pointer (void)
  walk_tree (gimple_op_ptr (stmt, i), clear_unused_block_pointer_1,
 NULL, NULL);
   }
+
+  /* Walk all variables mentioned in the functions BLOCK tree and clear
+ DECL_VALUE_EXPR from unused blocks where present.  */
+  clear_unused_block_pointer_in_block (DECL_INITIAL (current_function_decl));
 }
 
 /* Dump scope blocks starting at SCOPE to FILE.  INDENT is the


[gcc r14-10811] tree-optimization/116481 - avoid building function_type[]

2024-10-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:8d8b8ed7835a1a03932a8c90c7c725f9903450d5

commit r14-10811-g8d8b8ed7835a1a03932a8c90c7c725f9903450d5
Author: Richard Biener 
Date:   Sun Oct 13 11:42:27 2024 +0200

tree-optimization/116481 - avoid building function_type[]

The following avoids building an array type with function or method
element type during diagnosing an array bound violation as this
will result in an error, rejecting a program with a not too useful
error message.  Instead build such array type manually.

PR tree-optimization/116481
* pointer-query.cc (build_printable_array_type):
Build an array types with function or method element type
manually to avoid bogus diagnostic.

* gcc.dg/pr116481.c: New testcase.

(cherry picked from commit 1506027347776a2f6ec5b92d56ef192e85944e2e)

Diff:
---
 gcc/pointer-query.cc| 11 +++
 gcc/testsuite/gcc.dg/pr116481.c | 13 +
 2 files changed, 24 insertions(+)

diff --git a/gcc/pointer-query.cc b/gcc/pointer-query.cc
index ccf9d823870a..002c8ed2162c 100644
--- a/gcc/pointer-query.cc
+++ b/gcc/pointer-query.cc
@@ -2587,6 +2587,17 @@ array_elt_at_offset (tree artype, HOST_WIDE_INT off,
 tree
 build_printable_array_type (tree eltype, unsigned HOST_WIDE_INT nelts)
 {
+  /* Cannot build an array type of functions or methods without
+ an error diagnostic.  */
+  if (FUNC_OR_METHOD_TYPE_P (eltype))
+{
+  tree arrtype = make_node (ARRAY_TYPE);
+  TREE_TYPE (arrtype) = eltype;
+  TYPE_SIZE (arrtype) = bitsize_zero_node;
+  TYPE_SIZE_UNIT (arrtype) = size_zero_node;
+  return arrtype;
+}
+
   if (TYPE_SIZE_UNIT (eltype)
   && TREE_CODE (TYPE_SIZE_UNIT (eltype)) == INTEGER_CST
   && !integer_zerop (TYPE_SIZE_UNIT (eltype))
diff --git a/gcc/testsuite/gcc.dg/pr116481.c b/gcc/testsuite/gcc.dg/pr116481.c
new file mode 100644
index ..3ee6d7470876
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116481.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Warray-bounds" } */
+
+extern void tramp ();
+
+int is_trampoline (void* function) /* { dg-bogus "arrays of functions are not 
meaningful" } */
+{
+  void* tramp_address = tramp;
+  if (!(((unsigned long)function & 3) == 2))
+return 0;
+  return (((long *) ((char*)function - 2))[0]
+ == ((long *) ((char*)tramp_address-2))[0]); /* { dg-warning "outside 
array bounds" } */
+}


[gcc r14-10810] tree-optimization/116290 - fix compare-debug issue in ldist

2024-10-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:69934cb171fdd9d58dd64bb1811afaf43f6f7e44

commit r14-10810-g69934cb171fdd9d58dd64bb1811afaf43f6f7e44
Author: Richard Biener 
Date:   Sun Oct 13 15:12:44 2024 +0200

tree-optimization/116290 - fix compare-debug issue in ldist

Loop distribution does different analysis with -g0/-g due to counting
a debug stmt starting a BB against a limit which will everntually
lead to different IVOPTs choices.  I've fixed a possible IVOPTs
issue on the way even though it doesn't make a difference here.

PR tree-optimization/116290
* tree-loop-distribution.cc (determine_reduction_stmt_1): PHIs
have no debug variants.  Start with first non-debug real stmt.
* tree-ssa-loop-ivopts.cc (find_givs_in_bb): Do not analyze
debug stmts.

* gcc.dg/pr116290.c: New testcase.

(cherry picked from commit 566740013b3445162b8c4bc2205e4e568d014968)

Diff:
---
 gcc/testsuite/gcc.dg/pr116290.c | 18 ++
 gcc/tree-loop-distribution.cc   |  6 +++---
 gcc/tree-ssa-loop-ivopts.cc |  3 ++-
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/pr116290.c b/gcc/testsuite/gcc.dg/pr116290.c
new file mode 100644
index ..97b946bda893
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116290.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-g -O2 -fcompare-debug" } */
+
+char *camel_message_info_class_intern_init_part;
+void g_once_init_enter();
+void camel_message_info_class_intern_init() {
+  int ii;
+  char *label;
+  for (; camel_message_info_class_intern_init_part[ii]; ii++)
+if (camel_message_info_class_intern_init_part) {
+  if (label && *label)
+g_once_init_enter();
+  label = &camel_message_info_class_intern_init_part[ii + 1];
+  camel_message_info_class_intern_init_part[ii] = ' ';
+}
+  if (label)
+g_once_init_enter();
+}
diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index cb804ba48ffe..2bbd8da33e86 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -3551,7 +3551,7 @@ determine_reduction_stmt_1 (const loop_p loop, const 
basic_block *bbs)
   basic_block bb = bbs[i];
 
   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
-  gsi_next_nondebug (&bsi))
+  gsi_next (&bsi))
{
  gphi *phi = bsi.phi ();
  if (virtual_operand_p (gimple_phi_result (phi)))
@@ -3564,8 +3564,8 @@ determine_reduction_stmt_1 (const loop_p loop, const 
basic_block *bbs)
}
}
 
-  for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
-  gsi_next_nondebug (&bsi), ++ninsns)
+  for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
+  !gsi_end_p (bsi); gsi_next_nondebug (&bsi), ++ninsns)
{
  /* Bail out early for loops which are unlikely to match.  */
  if (ninsns > 16)
diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
index 7cae5bdefea3..a904910999f5 100644
--- a/gcc/tree-ssa-loop-ivopts.cc
+++ b/gcc/tree-ssa-loop-ivopts.cc
@@ -1460,7 +1460,8 @@ find_givs_in_bb (struct ivopts_data *data, basic_block bb)
   gimple_stmt_iterator bsi;
 
   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
-find_givs_in_stmt (data, gsi_stmt (bsi));
+if (!is_gimple_debug (gsi_stmt (bsi)))
+  find_givs_in_stmt (data, gsi_stmt (bsi));
 }
 
 /* Finds general ivs.  */


[gcc r14-10813] tree-optimization/116982 - analyze scalar loop exit early

2024-10-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:1d11536881e60f36a2b8ad9919169ac7a8bc0e3e

commit r14-10813-g1d11536881e60f36a2b8ad9919169ac7a8bc0e3e
Author: Richard Biener 
Date:   Mon Oct 7 11:05:17 2024 +0200

tree-optimization/116982 - analyze scalar loop exit early

The following makes sure to discover the scalar loop IV exit during
analysis as failure to do so (if DCE and friends are disabled this
can happen due to if-conversion doing DCE and FRE on the if-converted
loop) would ICE later.

I refrained from larger refactoring to be able to eventually backport.

PR tree-optimization/116982
* tree-vectorizer.h (vect_analyze_loop): Pass in .LOOP_VECTORIZED
call.
(vect_analyze_loop_form): Likewise.
* tree-vect-loop.cc (vect_analyze_loop_form): Reject loops where we
cannot determine a IV exit for the scalar loop.
(vect_analyze_loop): Adjust.
* tree-vectorizer.cc (try_vectorize_loop_1): Likewise.
* tree-parloops.cc (gather_scalar_reductions): Likewise.

(cherry picked from commit 9b86efd5210101954bd187c3aa8bb909610a5746)

Diff:
---
 gcc/tree-parloops.cc   |  4 ++--
 gcc/tree-vect-loop.cc  | 23 +++
 gcc/tree-vectorizer.cc |  3 ++-
 gcc/tree-vectorizer.h  |  6 --
 4 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/gcc/tree-parloops.cc b/gcc/tree-parloops.cc
index 888a834faf91..4d7a4ec94378 100644
--- a/gcc/tree-parloops.cc
+++ b/gcc/tree-parloops.cc
@@ -3304,7 +3304,7 @@ gather_scalar_reductions (loop_p loop, 
reduction_info_table_type *reduction_list
 
   vec_info_shared shared;
   vect_loop_form_info info;
-  if (!vect_analyze_loop_form (loop, &info))
+  if (!vect_analyze_loop_form (loop, NULL, &info))
 goto gather_done;
 
   simple_loop_info = vect_create_loop_vinfo (loop, &shared, &info);
@@ -3346,7 +3346,7 @@ gather_scalar_reductions (loop_p loop, 
reduction_info_table_type *reduction_list
 {
   vec_info_shared shared;
   vect_loop_form_info info;
-  if (vect_analyze_loop_form (loop->inner, &info))
+  if (vect_analyze_loop_form (loop->inner, NULL, &info))
{
  simple_loop_info
= vect_create_loop_vinfo (loop->inner, &shared, &info);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 744044735d39..dfb9d1be6670 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1734,7 +1734,8 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info 
loop_vinfo)
  niter could be analyzed under some assumptions.  */
 
 opt_result
-vect_analyze_loop_form (class loop *loop, vect_loop_form_info *info)
+vect_analyze_loop_form (class loop *loop, gimple *loop_vectorized_call,
+   vect_loop_form_info *info)
 {
   DUMP_VECT_SCOPE ("vect_analyze_loop_form");
 
@@ -1744,6 +1745,18 @@ vect_analyze_loop_form (class loop *loop, 
vect_loop_form_info *info)
   "not vectorized:"
   " could not determine main exit from"
   " loop with multiple exits.\n");
+  if (loop_vectorized_call)
+{
+  tree arg = gimple_call_arg (loop_vectorized_call, 1);
+  class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
+  edge scalar_exit_e = vec_init_loop_exit_info (scalar_loop);
+  if (!scalar_exit_e)
+   return opt_result::failure_at (vect_location,
+  "not vectorized:"
+  " could not determine main exit from"
+  " loop with multiple exits.\n");
+}
+
   info->loop_exit = exit_e;
   if (dump_enabled_p ())
   dump_printf_loc (MSG_NOTE, vect_location,
@@ -1815,7 +1828,7 @@ vect_analyze_loop_form (class loop *loop, 
vect_loop_form_info *info)
 
   /* Analyze the inner-loop.  */
   vect_loop_form_info inner;
-  opt_result res = vect_analyze_loop_form (loop->inner, &inner);
+  opt_result res = vect_analyze_loop_form (loop->inner, NULL, &inner);
   if (!res)
{
  if (dump_enabled_p ())
@@ -3570,7 +3583,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
*shared,
for it.  The different analyses will record information in the
loop_vec_info struct.  */
 opt_loop_vec_info
-vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
+  vec_info_shared *shared)
 {
   DUMP_VECT_SCOPE ("analyze_loop_nest");
 
@@ -3588,7 +3602,8 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
*shared)
 
   /* Analyze the loop form.  */
   vect_loop_form_info loop_form_info;
-  opt_result res = vect_analyze_loop_form (loop, &loop_form_info);
+  opt_result res = vect_analyze_loop_form (loop, loop_vectorized_call,
+  &loop_form_info);
   if (!res)
 {
   if (dump_enabled_p ())
diff --git a/gcc/tree-v

[gcc r14-10814] tree-optimization/117104 - add missed guards to max(a, b) != a simplification

2024-10-21 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:44c3eba2dfa71cb7cd9f8c3e7f33ef2b08132a51

commit r14-10814-g44c3eba2dfa71cb7cd9f8c3e7f33ef2b08132a51
Author: Richard Biener 
Date:   Sat Oct 12 14:51:37 2024 +0200

tree-optimization/117104 - add missed guards to max(a,b) != a simplification

For vector types we have to make sure the comparison result is a vector
type and the resulting compare operation is supported.  As the resulting
compare is never an equality compare I didn't bother to check for the
cbranch case.

PR tree-optimization/117104
* match.pd ((cmp:c (minmax:c @0 @1) @0) -> (out @0 @1)): Properly
guard the vector case.

* gcc.dg/pr117104.c: New testcase.

(cherry picked from commit f54d42e7e7a558b273d87f95b3e5b1938f5a)

Diff:
---
 gcc/match.pd|  6 +-
 gcc/testsuite/gcc.dg/pr117104.c | 12 
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 41afdfbe59de..62edaf1267e3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4167,7 +4167,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  out(le  gt  gt  le  ge  lt  lt  ge )
  (simplify
   (cmp:c (minmax:c @0 @1) @0)
-  (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)))
+  (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+   && (!VECTOR_TYPE_P (TREE_TYPE (@0))
+  || (VECTOR_TYPE_P (type)
+  && (!expand_vec_cmp_expr_p (TREE_TYPE (@0), type, cmp)
+  || expand_vec_cmp_expr_p (TREE_TYPE (@0), type, out)
(out @0 @1
 /* MIN (X, 5) == 0 -> X == 0
MIN (X, 5) == 7 -> false  */
diff --git a/gcc/testsuite/gcc.dg/pr117104.c b/gcc/testsuite/gcc.dg/pr117104.c
new file mode 100644
index ..9aa5734f7927
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr117104.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-vect-cost-model" } */
+/* { dg-additional-options "-mavx" { target { x86_64-*-* i?86-*-* } } } */
+
+void g();
+void f(long *a)
+{
+  long b0 = a[0] > 0 ? a[0] : 0;
+  long b1 = a[1] > 0 ? a[1] : 0;
+  if ((b0|b1) == 0)
+g();
+}


[gcc r15-4515] libstdc++: Fix order of [[...]] and __attribute__((...)) attrs [PR117220]

2024-10-21 Thread Jonathan Wakely via Gcc-cvs
https://gcc.gnu.org/g:cba80691251efccf44ab9aecb26558319605c9ea

commit r15-4515-gcba80691251efccf44ab9aecb26558319605c9ea
Author: Jonathan Wakely 
Date:   Mon Oct 21 12:09:36 2024 +0100

libstdc++: Fix order of [[...]] and __attribute__((...)) attrs [PR117220]

GCC allows these in either order, but Clang doesn't like the C++11-style
[[__nodiscard__]] coming after __attribute__((__always_inline__)).

libstdc++-v3/ChangeLog:

PR libstdc++/117220
* include/bits/stl_iterator.h: Move _GLIBCXX_NODISCARD
annotations after __attribute__((__always_inline__)).

Diff:
---
 libstdc++-v3/include/bits/stl_iterator.h | 46 
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
b/libstdc++-v3/include/bits/stl_iterator.h
index 26c5eab4b4e8..1fbc115b1163 100644
--- a/libstdc++-v3/include/bits/stl_iterator.h
+++ b/libstdc++-v3/include/bits/stl_iterator.h
@@ -1077,13 +1077,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // Forward iterator requirements
 
-  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
+  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
   _GLIBCXX_CONSTEXPR
   reference
   operator*() const _GLIBCXX_NOEXCEPT
   { return *_M_current; }
 
-  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
+  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
   _GLIBCXX_CONSTEXPR
   pointer
   operator->() const _GLIBCXX_NOEXCEPT
@@ -1123,7 +1123,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // Random access iterator requirements
 
-  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
+  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
   _GLIBCXX_CONSTEXPR
   reference
   operator[](difference_type __n) const _GLIBCXX_NOEXCEPT
@@ -1135,7 +1135,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   operator+=(difference_type __n) _GLIBCXX_NOEXCEPT
   { _M_current += __n; return *this; }
 
-  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
+  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
   _GLIBCXX_CONSTEXPR
   __normal_iterator
   operator+(difference_type __n) const _GLIBCXX_NOEXCEPT
@@ -1147,13 +1147,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   operator-=(difference_type __n) _GLIBCXX_NOEXCEPT
   { _M_current -= __n; return *this; }
 
-  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
+  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
   _GLIBCXX_CONSTEXPR
   __normal_iterator
   operator-(difference_type __n) const _GLIBCXX_NOEXCEPT
   { return __normal_iterator(_M_current - __n); }
 
-  __attribute__((__always_inline__)) _GLIBCXX_NODISCARD
+  _GLIBCXX_NODISCARD __attribute__((__always_inline__))
   _GLIBCXX_CONSTEXPR
   const _Iterator&
   base() const _GLIBCXX_NOEXCEPT
@@ -1209,7 +1209,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #else
// Forward iterator requirements
   template
-__attribute__((__always_inline__)) _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR
+_GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR
 inline bool
 operator==(const __normal_iterator<_IteratorL, _Container>& __lhs,
   const __normal_iterator<_IteratorR, _Container>& __rhs)
@@ -1217,7 +1217,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { return __lhs.base() == __rhs.base(); }
 
   template
-__attribute__((__always_inline__)) _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR
+_GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR
 inline bool
 operator==(const __normal_iterator<_Iterator, _Container>& __lhs,
   const __normal_iterator<_Iterator, _Container>& __rhs)
@@ -1225,7 +1225,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { return __lhs.base() == __rhs.base(); }
 
   template
-__attribute__((__always_inline__)) _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR
+_GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR
 inline bool
 operator!=(const __normal_iterator<_IteratorL, _Container>& __lhs,
   const __normal_iterator<_IteratorR, _Container>& __rhs)
@@ -1233,7 +1233,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { return __lhs.base() != __rhs.base(); }
 
   template
-__attribute__((__always_inline__)) _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR
+_GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR
 inline bool
 operator!=(const __normal_iterator<_Iterator, _Container>& __lhs,
   const __normal_iterator<_Iterator, _Container>& __rhs)
@@ -1242,7 +1242,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // Random access iterator requirements
   template
-__attribute__((__always_inline__)) _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR
+_GLIBCXX_NODISCARD __attribute__((__always_inline__)) _GLIBCXX_CONSTEXPR
 inline bool
 operator<(const __normal_iterator<_IteratorL, _Container>& __

[gcc r15-4517] libstdc++: Improve 26_numerics/headers/cmath/types_std_c++0x_neg.cc

2024-10-21 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:d0d99fc6b6c4f1c3fa8a9427f461103c78ab457b

commit r15-4517-gd0d99fc6b6c4f1c3fa8a9427f461103c78ab457b
Author: Jonathan Wakely 
Date:   Fri Oct 18 12:02:45 2024 +0100

libstdc++: Improve 26_numerics/headers/cmath/types_std_c++0x_neg.cc

This test checks that the special functions in  are not declared
prior to C++17. But we can remove the target selector and allow it to be
tested for C++17 and later, and add target selectors to the individual
dg-error directives instead.

Also rename the test to match what it actually tests.

libstdc++-v3/ChangeLog:

* testsuite/26_numerics/headers/cmath/types_std_c++0x_neg.cc:
Move to ...
* testsuite/26_numerics/headers/cmath/specfun_c++17.cc: here and
adjust test to be valid for all -std dialects.

Diff:
---
 .../{types_std_c++0x_neg.cc => specfun_c++17.cc}   | 47 +++---
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git 
a/libstdc++-v3/testsuite/26_numerics/headers/cmath/types_std_c++0x_neg.cc 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/specfun_c++17.cc
similarity index 57%
rename from 
libstdc++-v3/testsuite/26_numerics/headers/cmath/types_std_c++0x_neg.cc
rename to libstdc++-v3/testsuite/26_numerics/headers/cmath/specfun_c++17.cc
index 977f800a4b07..efb60ea1fbbe 100644
--- a/libstdc++-v3/testsuite/26_numerics/headers/cmath/types_std_c++0x_neg.cc
+++ b/libstdc++-v3/testsuite/26_numerics/headers/cmath/specfun_c++17.cc
@@ -1,4 +1,4 @@
-// { dg-do compile { target { ! c++17 } } }
+// { dg-do compile }
 
 // Copyright (C) 2007-2024 Free Software Foundation, Inc.
 //
@@ -21,28 +21,29 @@
 
 namespace gnu
 {
-  // C++11 changes from TR1.
-  using std::assoc_laguerre;   // { dg-error "has not been declared" }
-  using std::assoc_legendre;   // { dg-error "has not been declared" }
-  using std::beta; // { dg-error "has not been declared" }
-  using std::comp_ellint_1;// { dg-error "has not been declared" }
-  using std::comp_ellint_2;// { dg-error "has not been declared" }
-  using std::comp_ellint_3;// { dg-error "has not been declared" }
+  // C++17 additions from TR1.
+  using std::assoc_laguerre;   // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::assoc_legendre;   // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::beta; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::comp_ellint_1;// { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::comp_ellint_2;// { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::comp_ellint_3;// { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::cyl_bessel_i; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::cyl_bessel_j; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::cyl_bessel_k; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::cyl_neumann;  // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::ellint_1; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::ellint_2; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::ellint_3; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::expint;   // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::hermite;  // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::laguerre; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::legendre; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::riemann_zeta; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::sph_bessel;   // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::sph_legendre; // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  using std::sph_neumann;  // { dg-error "has not been declared" "" { 
target { ! c++17 } } }
+  // These two were in TR1 but not added to C++17.
   using std::conf_hyperg;  // { dg-error "has not been declared" }
-  using std::cyl_bessel_i; // { dg-error "has not been declared" }
-  using std::cyl_bessel_j; // { dg-error "has not been declared" }
-  using std::cyl_bessel_k; // { dg-error "has not been declared" }
-  using std::cyl_neumann;  // { dg-error "has not been declared" }
-  using std::ellint_1; // { dg-error "has not been declared" }
-  using std::ellint_2; // { dg-error "has not been declared" }
-  using std::ellint_3; // { dg-error "has not been declared" }
-  using std::expint;   // { dg-error "has not been declared" }
-  using std::h

[gcc r15-4516] libstdc++: Simplify C++98 std::vector::_M_data_ptr overload set

2024-10-21 Thread Jonathan Wakely via Libstdc++-cvs
https://gcc.gnu.org/g:1003a428154cd2e556c1fba994d4f3ea2442fc95

commit r15-4516-g1003a428154cd2e556c1fba994d4f3ea2442fc95
Author: Jonathan Wakely 
Date:   Fri Oct 18 11:55:08 2024 +0100

libstdc++: Simplify C++98 std::vector::_M_data_ptr overload set

We don't need separate overloads for returning a const or non-const
pointer. We can make the member function const and return a non-const
pointer, and let vector::data() const convert it to const as needed.

libstdc++-v3/ChangeLog:

* include/bits/stl_vector.h (vector::_M_data_ptr): Remove
non-const overloads. Always return non-const pointer.

Diff:
---
 libstdc++-v3/include/bits/stl_vector.h | 12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_vector.h 
b/libstdc++-v3/include/bits/stl_vector.h
index e284536ad31e..8982ca2b9eee 100644
--- a/libstdc++-v3/include/bits/stl_vector.h
+++ b/libstdc++-v3/include/bits/stl_vector.h
@@ -2034,20 +2034,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
_M_data_ptr(_Ptr __ptr) const
{ return empty() ? nullptr : std::__to_address(__ptr); }
 #else
-  template
-   _Up*
-   _M_data_ptr(_Up* __ptr) _GLIBCXX_NOEXCEPT
-   { return __ptr; }
-
   template
value_type*
-   _M_data_ptr(_Ptr __ptr)
-   { return empty() ? (value_type*)0 : __ptr.operator->(); }
-
-  template
-   const value_type*
_M_data_ptr(_Ptr __ptr) const
-   { return empty() ? (const value_type*)0 : __ptr.operator->(); }
+   { return empty() ? (value_type*)0 : __ptr.operator->(); }
 #endif
 };


[gcc r15-4518] pair-fusion: Assume alias conflict if common address reg changes [PR116783]

2024-10-21 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:c0e54ce1999ccf2241f74c5188b11b92e5aedc1f

commit r15-4518-gc0e54ce1999ccf2241f74c5188b11b92e5aedc1f
Author: Alex Coplan 
Date:   Fri Sep 20 17:39:39 2024 +0100

pair-fusion: Assume alias conflict if common address reg changes [PR116783]

As the PR shows, pair-fusion was tricking memory_modified_in_insn_p into
returning false when a common base register (in this case, x1) was
modified between the mem and the store insn.  This lead to wrong code as
the accesses really did alias.

To avoid this sort of problem, this patch avoids invoking RTL alias
analysis altogether (and assume an alias conflict) if the two insns to
be compared share a common address register R, and the insns see different
definitions of R (i.e. it was modified in between).

gcc/ChangeLog:

PR rtl-optimization/116783
* pair-fusion.cc (def_walker::cand_addr_uses): New.
(def_walker::def_walker): Add parameter for candidate address
uses.
(def_walker::alias_conflict_p): Declare.
(def_walker::addr_reg_conflict_p): New.
(def_walker::conflict_p): New.
(store_walker::store_walker): Add parameter for candidate
address uses and pass to base ctor.
(store_walker::conflict_p): Rename to ...
(store_walker::alias_conflict_p): ... this.
(load_walker::load_walker): Add parameter for candidate
address uses and pass to base ctor.
(load_walker::conflict_p): Rename to ...
(load_walker::alias_conflict_p): ... this.
(pair_fusion_bb_info::try_fuse_pair): Collect address register
uses for candidate insns and pass down to alias walkers.

gcc/testsuite/ChangeLog:

PR rtl-optimization/116783
* g++.dg/torture/pr116783.C: New test.

Diff:
---
 gcc/pair-fusion.cc  | 127 +---
 gcc/testsuite/g++.dg/torture/pr116783.C |  98 
 2 files changed, 213 insertions(+), 12 deletions(-)

diff --git a/gcc/pair-fusion.cc b/gcc/pair-fusion.cc
index 653055fdcf67..ccbb5511e9d1 100644
--- a/gcc/pair-fusion.cc
+++ b/gcc/pair-fusion.cc
@@ -2089,11 +2089,80 @@ protected:
 
   def_iter_t def_iter;
   insn_info *limit;
-  def_walker (def_info *def, insn_info *limit) :
-def_iter (def), limit (limit) {}
+
+  // Array of register uses from the candidate insn which occur in MEMs.
+  use_array cand_addr_uses;
+
+  def_walker (def_info *def, insn_info *limit, use_array addr_uses) :
+def_iter (def), limit (limit), cand_addr_uses (addr_uses) {}
 
   virtual bool iter_valid () const { return *def_iter; }
 
+  // Implemented in {load,store}_walker.
+  virtual bool alias_conflict_p (int &budget) const = 0;
+
+  // Return true if the current (walking) INSN () uses a register R inside a
+  // MEM, where R is also used inside a MEM by the (static) candidate insn, and
+  // those uses see different definitions of that register.  In this case we
+  // can't rely on RTL alias analysis, and for now we conservatively assume 
that
+  // there is an alias conflict.  See PR116783.
+  bool addr_reg_conflict_p () const
+  {
+use_array curr_insn_uses = insn ()->uses ();
+auto cand_use_iter = cand_addr_uses.begin ();
+auto insn_use_iter = curr_insn_uses.begin ();
+while (cand_use_iter != cand_addr_uses.end ()
+  && insn_use_iter != curr_insn_uses.end ())
+  {
+   auto insn_use = *insn_use_iter;
+   auto cand_use = *cand_use_iter;
+   if (insn_use->regno () > cand_use->regno ())
+ cand_use_iter++;
+   else if (insn_use->regno () < cand_use->regno ())
+ insn_use_iter++;
+   else
+ {
+   // As it stands I believe the alias code (memory_modified_in_insn_p)
+   // doesn't look at insn notes such as REG_EQU{IV,AL}, so it should
+   // be safe to skip over uses that only occur in notes.
+   if (insn_use->includes_address_uses ()
+   && !insn_use->only_occurs_in_notes ()
+   && insn_use->def () != cand_use->def ())
+ {
+   if (dump_file)
+ {
+   fprintf (dump_file,
+"assuming aliasing of cand i%d and i%d:\n"
+"-> insns see different defs of common addr reg 
r%u\n"
+"-> ",
+cand_use->insn ()->uid (), insn_use->insn ()->uid 
(),
+insn_use->regno ());
+
+   // Note that while the following sequence could be made more
+   // concise by eliding pp_string calls into the pp_printf
+   // calls, doing so triggers -Wformat-diag.
+   pretty_printer pp;
+   pp_string (&pp, "[");
+   pp_access (&pp, cand_use, 0);
+   pp_st

[gcc r15-4519] amdgcn: silence warning

2024-10-21 Thread Andrew Stubbs via Gcc-cvs
https://gcc.gnu.org/g:0b6d94ce72b2f35dbee7c42774d6972671c86f97

commit r15-4519-g0b6d94ce72b2f35dbee7c42774d6972671c86f97
Author: Andrew Stubbs 
Date:   Mon Sep 16 12:31:59 2024 +

amdgcn: silence warning

FIRST_SGPR_REG is register zero so the compiler always claims this 
comparison
is redundant.  It's right, of course, but I'd have preferred to keep the
comparison for completeness.  Probably the "correct" solution is to use an 
enum
for these values.

gcc/ChangeLog:

* config/gcn/gcn.h (SGPR_REGNO_P): Silence warning.

Diff:
---
 gcc/config/gcn/gcn.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index 1a4631dd39f6..faefe68cdfa9 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -191,7 +191,7 @@ STATIC_ASSERT (LAST_AVGPR_REG + 1 - FIRST_AVGPR_REG == 256);
 #define HARD_FRAME_POINTER_IS_ARG_POINTER   0
 #define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
 
-#define SGPR_REGNO_P(N)((N) >= FIRST_SGPR_REG && (N) <= 
LAST_SGPR_REG)
+#define SGPR_REGNO_P(N)(/*(N) >= FIRST_SGPR_REG &&*/ (N) <= 
LAST_SGPR_REG)
 #define VGPR_REGNO_P(N)((N) >= FIRST_VGPR_REG && (N) <= 
LAST_VGPR_REG)
 #define AVGPR_REGNO_P(N)((N) >= FIRST_AVGPR_REG && (N) <= 
LAST_AVGPR_REG)
 #define SSRC_REGNO_P(N)((N) <= SCC_REG && (N) != VCCZ_REG)


[gcc(refs/users/meissner/heads/work181-sha)] Revert changes

2024-10-21 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:8cce3a176a4edbcc4e7dfb4f0459a217abbdcd31

commit 8cce3a176a4edbcc4e7dfb4f0459a217abbdcd31
Author: Michael Meissner 
Date:   Mon Oct 21 13:10:54 2024 -0400

Revert changes

Diff:
---
 gcc/config/rs6000/fusion.md| 352 -
 gcc/config/rs6000/genfusion.pl |   4 +-
 2 files changed, 178 insertions(+), 178 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 6f9081ab3372..215a3aae074f 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1872,16 +1872,16 @@
 ;; vector vand -> vand
 (define_insn "*fuse_vand_vand"
   [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"wa,v,v,v,v")
-  (match_operand:VM 1 "vector_fusion_operand" 
"%wa,v,v,v,v"))
- (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v")))
+(and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"%v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
(clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
-   xxeval %x3,%x2,%x1,%x0,1
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
vand %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,1
vand %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
@@ -1893,16 +1893,16 @@
 ;; vector vandc -> vand
 (define_insn "*fuse_vandc_vand"
   [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"wa,v,v,v,v"))
-  (match_operand:VM 1 "vector_fusion_operand" 
"wa,v,v,v,v"))
- (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v")))
+(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v"))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
(clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
-   xxeval %x3,%x2,%x1,%x0,2
vandc %3,%1,%0\;vand %3,%3,%2
vandc %3,%1,%0\;vand %3,%3,%2
vandc %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,2
vandc %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
@@ -1914,16 +1914,16 @@
 ;; vector veqv -> vand
 (define_insn "*fuse_veqv_vand"
   [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"wa,v,v,v,v")
-  (match_operand:VM 1 "vector_fusion_operand" 
"wa,v,v,v,v")))
- (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v")))
+(and:VM (not:VM (xor:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+  (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
(clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
-   xxeval %x3,%x2,%x1,%x0,9
veqv %3,%1,%0\;vand %3,%3,%2
veqv %3,%1,%0\;vand %3,%3,%2
veqv %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,9
veqv %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
@@ -1935,16 +1935,16 @@
 ;; vector vnand -> vand
 (define_insn "*fuse_vnand_vand"
   [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"wa,v,v,v,v"))
-  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"wa,v,v,v,v")))
- (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,v")))
+(and:VM (ior:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v"))
+  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"v,v,v,wa,v")))
+ (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
(clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
-   xxeval %x3,%x2,%x1,%x0,14
vnand %3,%1,%0\;vand %3,%3,%2
vnand %3,%1,%0\;vand %3,%3,%2
vnand %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,14
vnand %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
(set_attr "cost" "6")
@@ -1956,16 +1956,16 @@
 ;; vector vnor -> vand
 (define_insn "*fuse_vnor_vand"
   [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
-(and:VM (and:VM (not:VM (match_operand:VM 0 "vector_fusion_operand" 
"wa,v,v,v,v"))
-  (not:VM (match_operand:VM 1 "vector_fusion_operand" 
"wa,v,v,v,v")))
- (match_operand:VM 2 "vector_fusion_operand" "wa,v,v,v,

[gcc r15-4529] RISC-V: Add testcases for form 6 of vector signed SAT_TRUNC

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:f411abe7935e01b7e61f966d12a7a0850ca8f1c0

commit r15-4529-gf411abe7935e01b7e61f966d12a7a0850ca8f1c0
Author: Pan Li 
Date:   Mon Oct 14 14:55:56 2024 +0800

RISC-V: Add testcases for form 6 of vector signed SAT_TRUNC

Form 6:
  #define DEF_VEC_SAT_S_TRUNC_FMT_6(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_6 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN >= x || x > (WT)NT_MAX  \
  ? x < 0 ? NT_MIN : NT_MAX \
  j: trunc;  \
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-6-i64-to-i8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-6-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-6-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-6-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-6-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-6-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-6-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-6-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-6-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-6-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c
new file mode 100644
index ..c97057355c40
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_6(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c
new file mode 100644
index ..629c07347bb9
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-6-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_6(int16_t, int32_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times

[gcc r15-4527] RISC-V: Add testcases for form 4 of vector signed SAT_TRUNC

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:f30ca9867a77c78f3a48bc124ab3bc4ce32283fa

commit r15-4527-gf30ca9867a77c78f3a48bc124ab3bc4ce32283fa
Author: Pan Li 
Date:   Mon Oct 14 11:41:02 2024 +0800

RISC-V: Add testcases for form 4 of vector signed SAT_TRUNC

Form 4:
  #define DEF_VEC_SAT_S_TRUNC_FMT_4(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_4 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN <= x && x < (WT)NT_MAX  \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-4-i64-to-i8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-4-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-4-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-4-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-4-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-4-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-4-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-4-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-4-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-4-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c
new file mode 100644
index ..2ac96aa1a35b
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_4(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c
new file mode 100644
index ..7fe8f2774767
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-4-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_4(int16_t, int32_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times 

[gcc r15-4524] RISC-V: Add testcases for form 1 of vector signed SAT_TRUNC

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:1f3a9c08aff9aac53d6c12b658efc222cf91de9c

commit r15-4524-g1f3a9c08aff9aac53d6c12b658efc222cf91de9c
Author: Pan Li 
Date:   Mon Oct 14 10:21:39 2024 +0800

RISC-V: Add testcases for form 1 of vector signed SAT_TRUNC

Form 1:
  #define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h: Add test data 
for
signed SAT_TRUNC.
* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-1-i64-to-i8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/unop/vec_sat_data.h  | 291 +
 .../rvv/autovec/unop/vec_sat_s_trunc-1-i16-to-i8.c |   9 +
 .../autovec/unop/vec_sat_s_trunc-1-i32-to-i16.c|   9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-1-i32-to-i8.c |   9 +
 .../autovec/unop/vec_sat_s_trunc-1-i64-to-i16.c|   9 +
 .../autovec/unop/vec_sat_s_trunc-1-i64-to-i32.c|   9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-1-i64-to-i8.c |   9 +
 .../autovec/unop/vec_sat_s_trunc-run-1-i16-to-i8.c |  16 ++
 .../unop/vec_sat_s_trunc-run-1-i32-to-i16.c|  16 ++
 .../autovec/unop/vec_sat_s_trunc-run-1-i32-to-i8.c |  16 ++
 .../unop/vec_sat_s_trunc-run-1-i64-to-i16.c|  16 ++
 .../unop/vec_sat_s_trunc-run-1-i64-to-i32.c|  16 ++
 .../autovec/unop/vec_sat_s_trunc-run-1-i64-to-i8.c |  16 ++
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   |  22 ++
 14 files changed, 463 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h
index 6b23ec809f6c..a3643c5e1218 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_data.h
@@ -25,6 +25,15 @@ TEST_UNARY_STRUCT(uint16_t, uint64_t)
 
 TEST_UNARY_STRUCT(uint32_t, uint64_t)
 
+TEST_UNARY_STRUCT(int8_t, int16_t)
+TEST_UNARY_STRUCT(int8_t, int32_t)
+TEST_UNARY_STRUCT(int8_t, int64_t)
+
+TEST_UNARY_STRUCT(int16_t, int32_t)
+TEST_UNARY_STRUCT(int16_t, int64_t)
+
+TEST_UNARY_STRUCT(int32_t, int64_t)
+
 TEST_UNARY_STRUCT_DECL(uint8_t, uint16_t) \
   TEST_UNARY_DATA(uint8_t, uint16_t)[] =
 {
@@ -391,4 +400,286 @@ TEST_UNARY_STRUCT_DECL(uint32_t, uint64_t) \
   },
 };
 
+TEST_UNARY_STRUCT_DECL(int8_t, int16_t) \
+  TEST_UNARY_DATA(int8_t, int16_t)[] =
+{
+  {
+{
+  0,  0,  0,  0,
+ -1, -1, -1, -1,
+  1,  1,  1,  1,
+  2,  2,  2,  2,
+},
+{
+  0,  0,  0,  0,
+ -1, -1, -1, -1,
+  1,  1,  1,  1,
+  2,  2,  2,  2,
+},
+  },
+  {
+{
+   127,  127,  127,  127,
+   128,  128,  128,  128,
+  -128, -128, -128, -

[gcc r15-4526] RISC-V: Add testcases for form 3 of vector signed SAT_TRUNC

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:efa1617bfc095e0667df31a6f3a2c0319afbc8d0

commit r15-4526-gefa1617bfc095e0667df31a6f3a2c0319afbc8d0
Author: Pan Li 
Date:   Mon Oct 14 11:26:06 2024 +0800

RISC-V: Add testcases for form 3 of vector signed SAT_TRUNC

Form 3:
  #define DEF_VEC_SAT_S_TRUNC_FMT_3(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_3 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN < x && x < (WT)NT_MAX   \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-3-i64-to-i8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-3-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-3-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-3-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-3-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-3-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-3-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-3-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-3-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-3-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c
new file mode 100644
index ..392366def060
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_3(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c
new file mode 100644
index ..2b16049994a5
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-3-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_3(int16_t, int32_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times 

[gcc r15-4525] RISC-V: Add testcases for form 2 of vector signed SAT_TRUNC

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:033900fc175bbd67fd1a8c8f7410a21f8b04eda2

commit r15-4525-g033900fc175bbd67fd1a8c8f7410a21f8b04eda2
Author: Pan Li 
Date:   Mon Oct 14 11:09:55 2024 +0800

RISC-V: Add testcases for form 2 of vector signed SAT_TRUNC

Form 2:
  #define DEF_VEC_SAT_S_TRUNC_FMT_2(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_2 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN < x && x < (WT)NT_MAX   \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-2-i64-to-i8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-2-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-2-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-2-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-2-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-2-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-2-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-2-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-2-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-2-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c
new file mode 100644
index ..3e26e788c083
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_2(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c
new file mode 100644
index ..63797705a04a
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-2-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_2(int16_t, int32_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times 

[gcc r15-4528] RISC-V: Add testcases for form 5 of vector signed SAT_TRUNC

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:108c8ef03dd5dff96fd3a4aa31088e42d98a0624

commit r15-4528-g108c8ef03dd5dff96fd3a4aa31088e42d98a0624
Author: Pan Li 
Date:   Mon Oct 14 14:41:22 2024 +0800

RISC-V: Add testcases for form 5 of vector signed SAT_TRUNC

Form 5:
  #define DEF_VEC_SAT_S_TRUNC_FMT_5(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_5 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN > x || x > (WT)NT_MAX   \
  ? x < 0 ? NT_MIN : NT_MAX \
  : trunc;  \
  } \
  }

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/vec_sat_arith.h: Add test helper 
macros.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i8.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i16.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i32.c: 
New test.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i8.c: 
New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i16-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i32-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i32-to-i8.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i64-to-i16.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i64-to-i32.c: New test.
* 
gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-run-5-i64-to-i8.c: New test.

Signed-off-by: Pan Li 

Diff:
---
 .../rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-5-i64-to-i16.c|  9 +
 .../autovec/unop/vec_sat_s_trunc-5-i64-to-i32.c|  9 +
 .../rvv/autovec/unop/vec_sat_s_trunc-5-i64-to-i8.c |  9 +
 .../autovec/unop/vec_sat_s_trunc-run-5-i16-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-5-i32-to-i16.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-5-i32-to-i8.c | 16 
 .../unop/vec_sat_s_trunc-run-5-i64-to-i16.c| 16 
 .../unop/vec_sat_s_trunc-run-5-i64-to-i32.c| 16 
 .../autovec/unop/vec_sat_s_trunc-run-5-i64-to-i8.c | 16 
 .../gcc.target/riscv/rvv/autovec/vec_sat_arith.h   | 22 ++
 13 files changed, 172 insertions(+)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c
new file mode 100644
index ..49c076ad2779
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i16-to-i8.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_5(int8_t, int16_t, INT8_MIN, INT8_MAX)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_TRUNC " 2 "expand" } } */
+/* { dg-final { scan-assembler-times {vnclip\.wi} 1 } } */
diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c
 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c
new file mode 100644
index ..a2a1aa40e017
--- /dev/null
+++ 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_s_trunc-5-i32-to-i16.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fdump-rtl-expand-details" } */
+
+#include "../vec_sat_arith.h"
+
+DEF_VEC_SAT_S_TRUNC_FMT_5(int16_t, int32_t, INT16_MIN, INT16_MAX)
+
+/* { dg-final { scan-rtl-dump-times 

[gcc r15-4523] RISC-V: Implement vector SAT_TRUNC for signed integer

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:b5a058154179ab16fe5f9e6aa331624363410aad

commit r15-4523-gb5a058154179ab16fe5f9e6aa331624363410aad
Author: Pan Li 
Date:   Mon Oct 14 10:14:31 2024 +0800

RISC-V: Implement vector SAT_TRUNC for signed integer

This patch would like to implement the sstrunc for vector signed integer.

Form 1:
  #define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

DEF_VEC_SAT_S_TRUNC_FMT_1(int32_t, int64_t, INT32_MIN, INT32_MAX)

Before this patch:
  27   │ vsetvli a5,a2,e64,m1,ta,ma
  28   │ vle64.v v1,0(a1)
  29   │ sllia3,a5,3
  30   │ sllia4,a5,2
  31   │ sub a2,a2,a5
  32   │ add a1,a1,a3
  33   │ vadd.vv v0,v1,v5
  34   │ vsetvli zero,zero,e32,mf2,ta,ma
  35   │ vnsrl.wxv2,v1,a6
  36   │ vncvt.x.x.w v1,v1
  37   │ vsetvli zero,zero,e64,m1,ta,ma
  38   │ vmsgtu.vv   v0,v0,v4
  39   │ vsetvli zero,zero,e32,mf2,ta,mu
  40   │ vneg.v  v2,v2
  41   │ vxor.vv v1,v2,v3,v0.t
  42   │ vse32.v v1,0(a0)
  43   │ add a0,a0,a4
  44   │ bne a2,zero,.L3

After this patch:
  16   │ vsetvli a5,a2,e32,mf2,ta,ma
  17   │ vle64.v v1,0(a1)
  18   │ sllia3,a5,3
  19   │ sllia4,a5,2
  20   │ sub a2,a2,a5
  21   │ add a1,a1,a3
  22   │ vnclip.wi   v1,v1,0
  23   │ vse32.v v1,0(a0)
  24   │ add a0,a0,a4
  25   │ bne a2,zero,.L3

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

gcc/ChangeLog:

* config/riscv/autovec.md (sstrunc2): Add
new pattern sstrunc for double trunc.
(sstrunc2): Ditto but for quad trunc.
(sstrunc2): Ditto but for oct trunc.
* config/riscv/riscv-protos.h (expand_vec_double_sstrunc): Add
new func decl to expand double trunc.
(expand_vec_quad_sstrunc): Ditto but for quad trunc.
(expand_vec_oct_sstrunc): Ditto but for oct trunc.
* config/riscv/riscv-v.cc (expand_vec_double_sstrunc): Add new
func to expand double trunc.
(expand_vec_quad_sstrunc): Ditto but for quad trunc.
(expand_vec_oct_sstrunc): Ditto but for oct trunc.

Signed-off-by: Pan Li 

Diff:
---
 gcc/config/riscv/autovec.md | 34 ++
 gcc/config/riscv/riscv-protos.h |  4 
 gcc/config/riscv/riscv-v.cc | 46 +
 3 files changed, 84 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a34f63c96516..774a3d337231 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2779,6 +2779,40 @@
   }
 )
 
+(define_expand "sstrunc2"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VWEXTI   1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_vec_double_sstrunc (operands[0], operands[1],
+ mode);
+DONE;
+  }
+)
+
+(define_expand "sstrunc2"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VQEXTI 1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_vec_quad_sstrunc (operands[0], operands[1], 
mode,
+  mode);
+DONE;
+  }
+)
+
+(define_expand "sstrunc2"
+  [(match_operand: 0 "register_operand")
+   (match_operand:VOEXTI1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+riscv_vector::expand_vec_oct_sstrunc (operands[0], operands[1], mode,
+ mode,
+ mode);
+DONE;
+  }
+)
+
 ;; =
 ;; == Early break auto-vectorization patterns
 ;; =
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/risc

[gcc r15-4520] aarch64: Fix costing of move to/from MOVEABLE_SYSREGS

2024-10-21 Thread Andrew Carlotti via Gcc-cvs
https://gcc.gnu.org/g:8193e71a07de010c041175e7a8acf62eeae5b336

commit r15-4520-g8193e71a07de010c041175e7a8acf62eeae5b336
Author: Andrew Carlotti 
Date:   Thu Aug 22 11:59:33 2024 +0100

aarch64: Fix costing of move to/from MOVEABLE_SYSREGS

This is necessary to prevent reload assuming that a direct FP->FPMR move
is valid.

gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_register_move_cost):
Increase costs involving MOVEABLE_SYSREGS.

Diff:
---
 gcc/config/aarch64/aarch64.cc | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 0dbc2aaa99ff..21d9a6b5a20e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -15565,6 +15565,12 @@ aarch64_register_move_cost (machine_mode mode,
reg_class_contents[FFR_REGS]))
 return 80;
 
+  /* Moves to/from sysregs are expensive, and must go via GPR.  */
+  if (from == MOVEABLE_SYSREGS)
+return 80 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
+  if (to == MOVEABLE_SYSREGS)
+return 80 + aarch64_register_move_cost (mode, from, GENERAL_REGS);
+
   /* Moving between GPR and stack cost is the same as GP2GP.  */
   if ((from == GENERAL_REGS && to == STACK_REG)
   || (to == GENERAL_REGS && from == STACK_REG))


[gcc r15-4522] Vect: Try the pattern of vector signed integer SAT_TRUNC

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:2987ca61003ee7d55b8b005ab4c9c679efc9558b

commit r15-4522-g2987ca61003ee7d55b8b005ab4c9c679efc9558b
Author: Pan Li 
Date:   Mon Oct 14 10:09:31 2024 +0800

Vect: Try the pattern of vector signed integer SAT_TRUNC

Almost the same as vector unsigned integer SAT_TRUNC, try to match
the signed version during the vector pattern matching.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* tree-vect-patterns.cc (gimple_signed_integer_sat_trunc): Add
new func decl for signed SAT_TRUNC.
(vect_recog_sat_trunc_pattern): Try signed match pattern for
the SAT_TRUNC.

Signed-off-by: Pan Li 

Diff:
---
 gcc/tree-vect-patterns.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 746f100a0842..ce5a528141f7 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -4539,6 +4539,7 @@ extern bool gimple_unsigned_integer_sat_trunc (tree, 
tree*, tree (*)(tree));
 
 extern bool gimple_signed_integer_sat_add (tree, tree*, tree (*)(tree));
 extern bool gimple_signed_integer_sat_sub (tree, tree*, tree (*)(tree));
+extern bool gimple_signed_integer_sat_trunc (tree, tree*, tree (*)(tree));
 
 static gimple *
 vect_recog_build_binary_gimple_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
@@ -4770,7 +4771,8 @@ vect_recog_sat_trunc_pattern (vec_info *vinfo, 
stmt_vec_info stmt_vinfo,
   tree lhs = gimple_assign_lhs (last_stmt);
   tree otype = TREE_TYPE (lhs);
 
-  if (gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
+  if ((gimple_unsigned_integer_sat_trunc (lhs, ops, NULL)
+   || gimple_signed_integer_sat_trunc (lhs, ops, NULL))
   && type_has_mode_precision_p (otype))
 {
   tree itype = TREE_TYPE (ops[0]);


[gcc r15-4521] Match: Support form 1 for vector signed integer SAT_TRUNC

2024-10-21 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:bdbb74e38f30827568ba1224d52f5c86edb5d48c

commit r15-4521-gbdbb74e38f30827568ba1224d52f5c86edb5d48c
Author: Pan Li 
Date:   Mon Oct 14 10:03:25 2024 +0800

Match: Support form 1 for vector signed integer SAT_TRUNC

This patch would like to support the form 1 of the vector signed
integer SAT_TRUNC.  Aka below example:

Form 1:
  #define DEF_VEC_SAT_S_TRUNC_FMT_1(NT, WT, NT_MIN, NT_MAX) \
  void __attribute__((noinline))\
  vec_sat_s_trunc_##NT##_##WT##_fmt_1 (NT *out, WT *in, unsigned limit) \
  { \
unsigned i; \
for (i = 0; i < limit; i++) \
  { \
WT x = in[i];   \
NT trunc = (NT)x;   \
out[i] = (WT)NT_MIN <= x && x <= (WT)NT_MAX \
  ? trunc   \
  : x < 0 ? NT_MIN : NT_MAX;\
  } \
  }

DEF_VEC_SAT_S_TRUNC_FMT_1(int32_t, int64_t, INT32_MIN, INT32_MAX)

Before this patch:
  48   │   _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [2, 2]);
  49   │   ivtmp_64 = _87 * 8;
  50   │   vect_x_14.10_67 = .MASK_LEN_LOAD (vectp_in.8_65, 64B, { -1, ... 
}, _87, 0);
  51   │   vect_trunc_15.21_78 = (vector([2,2]) int) vect_x_14.10_67;
  52   │   _61 = VIEW_CONVERT_EXPR(vect_x_14.10_67);
  53   │   _32 = _61 >> 63;
  54   │   vect_patt_52.16_73 = (vector([2,2]) int) _32;
  55   │   vect__46.17_74 = VIEW_CONVERT_EXPR(vect_patt_52.16_73);
  56   │   vect__47.18_75 = -vect__46.17_74;
  57   │   vect__21.19_76 = VIEW_CONVERT_EXPR(vect__47.18_75);
  58   │   vect_x.11_68 = VIEW_CONVERT_EXPR(vect_x_14.10_67);
  59   │   vect__5.12_69 = vect_x.11_68 + { 2147483648, ... };
  60   │   mask__34.13_70 = vect__5.12_69 > { 4294967295, ... };
  61   │   _25 = .COND_XOR (mask__34.13_70, vect__21.19_76, { 2147483647, 
... }, vect_trunc_15.21_78);
  62   │   ivtmp_80 = _87 * 4;
  63   │   .MASK_LEN_STORE (vectp_out.23_81, 32B, { -1, ... }, _87, 0, _25);
  64   │   vectp_in.8_66 = vectp_in.8_65 + ivtmp_64;
  65   │   vectp_out.23_82 = vectp_out.23_81 + ivtmp_80;
  66   │   ivtmp_86 = ivtmp_85 - _87;

After this patch:
  38   │   _77 = .SELECT_VL (ivtmp_75, POLY_INT_CST [2, 2]);
  39   │   ivtmp_65 = _77 * 8;
  40   │   vect_x_14.10_68 = .MASK_LEN_LOAD (vectp_in.8_66, 64B, { -1, ... 
}, _77, 0);
  41   │   vect_patt_53.11_69 = .SAT_TRUNC (vect_x_14.10_68);
  42   │   ivtmp_70 = _77 * 4;
  43   │   .MASK_LEN_STORE (vectp_out.12_71, 32B, { -1, ... }, _77, 0, 
vect_patt_53.11_69);
  44   │   vectp_in.8_67 = vectp_in.8_66 + ivtmp_65;
  45   │   vectp_out.12_72 = vectp_out.12_71 + ivtmp_70;
  46   │   ivtmp_76 = ivtmp_75 - _77;

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

gcc/ChangeLog:

* match.pd: Refine matching for vector signed SAT_TRUNC form 1.

Signed-off-by: Pan Li 

Diff:
---
 gcc/match.pd | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 12d81fcac0de..ec2038d48dc4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3482,7 +3482,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
SAT_S_TRUNC(X) = (unsigned)X + NT_MAX + 1  > Unsigned_MAX ? (NT)X.  */
 (match (signed_integer_sat_trunc @0)
  (cond^ (gt (plus:c (convert@4 @0) INTEGER_CST@1) INTEGER_CST@2)
-   (bit_xor:c (negate (convert (lt @0 integer_zerop))) INTEGER_CST@3)
+   (bit_xor:c (nop_convert?
+   (negate (nop_convert? (convert (lt @0 integer_zerop)
+  INTEGER_CST@3)
(convert @0))
  (if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type)
   && !TYPE_UNSIGNED (TREE_TYPE (@0)) && TYPE_UNSIGNED (TREE_TYPE (@4)))


[gcc(refs/users/jmelcr/heads/omp-cp)] omp-cp: fix flags when cloning edges, add lto input and output

2024-10-21 Thread Josef Melcr via Gcc-cvs
https://gcc.gnu.org/g:f2c71e4d68dce5a51aedd0f71a18eec4ad76ff17

commit f2c71e4d68dce5a51aedd0f71a18eec4ad76ff17
Author: Josef Melcr 
Date:   Mon Oct 21 16:31:32 2024 +0200

omp-cp: fix flags when cloning edges, add lto input and output

gcc/ChangeLog:

* cgraph.cc (cgraph_edge::dump_edge_flags): add callback and
  has_callback printing
* cgraphclones.cc (cgraph_edge::clone): copy over callback and
  has_callback flags
* ipa-inline.cc (can_inline_edge_p): move callback condition to
  the beginning
* lto-cgraph.cc (lto_output_edge): add outputting for callback flags
(input_edge): add inputting of callback flags

Signed-off-by: Josef Melcr 

Diff:
---
 gcc/cgraph.cc   |  4 
 gcc/cgraphclones.cc |  2 ++
 gcc/ipa-inline.cc   | 10 +-
 gcc/lto-cgraph.cc   |  4 
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index 58813e8cc2d0..c62f5de807da 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -2113,6 +2113,10 @@ cgraph_edge::dump_edge_flags (FILE *f)
 {
   if (speculative)
 fprintf (f, "(speculative) ");
+  if (callback)
+fprintf (f, "(callback) ");
+  if (has_callback)
+fprintf (f, "(has_callback) ");
   if (!inline_failed)
 fprintf (f, "(inlined) ");
   if (call_stmt_cannot_inline_p)
diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc
index 4fff6873a369..d52b72364d4d 100644
--- a/gcc/cgraphclones.cc
+++ b/gcc/cgraphclones.cc
@@ -144,6 +144,8 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, 
unsigned stmt_uid,
   new_edge->can_throw_external = can_throw_external;
   new_edge->call_stmt_cannot_inline_p = call_stmt_cannot_inline_p;
   new_edge->speculative = speculative;
+  new_edge->callback = callback;
+  new_edge->has_callback = has_callback;
   new_edge->in_polymorphic_cdtor = in_polymorphic_cdtor;
 
   /* Update IPA profile.  Local profiles need no updating in original.  */
diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc
index 0d77b89fa301..dacf1fd2691b 100644
--- a/gcc/ipa-inline.cc
+++ b/gcc/ipa-inline.cc
@@ -371,6 +371,11 @@ can_inline_edge_p (struct cgraph_edge *e, bool report,
 {
   gcc_checking_assert (e->inline_failed);
 
+  if(e->callback) {
+printf("skipping inline - callback\n");
+return false;
+  }
+
   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 {
   if (report)
@@ -453,11 +458,6 @@ can_inline_edge_p (struct cgraph_edge *e, bool report,
   if (!inlinable && report)
 report_inline_failed_reason (e);
 
-  if(e->callback) {
-printf("skipping inline - tried to inline: %d\n", inlinable);
-inlinable = false;
-  }
-
   return inlinable;
 }
 
diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc
index 1d4311a8832b..b7a7def31b79 100644
--- a/gcc/lto-cgraph.cc
+++ b/gcc/lto-cgraph.cc
@@ -272,6 +272,8 @@ lto_output_edge (struct lto_simple_output_block *ob, struct 
cgraph_edge *edge,
   bp_pack_value (&bp, edge->speculative_id, 16);
   bp_pack_value (&bp, edge->indirect_inlining_edge, 1);
   bp_pack_value (&bp, edge->speculative, 1);
+  bp_pack_value (&bp, edge->callback, 1);
+  bp_pack_value (&bp, edge->has_callback, 1);
   bp_pack_value (&bp, edge->call_stmt_cannot_inline_p, 1);
   gcc_assert (!edge->call_stmt_cannot_inline_p
  || edge->inline_failed != CIF_BODY_NOT_AVAILABLE);
@@ -1524,6 +1526,8 @@ input_edge (class lto_input_block *ib, vec 
nodes,
 
   edge->indirect_inlining_edge = bp_unpack_value (&bp, 1);
   edge->speculative = bp_unpack_value (&bp, 1);
+  edge->callback = bp_unpack_value(&bp, 1);
+  edge->has_callback = bp_unpack_value(&bp, 1);
   edge->lto_stmt_uid = stmt_id;
   edge->speculative_id = speculative_id;
   edge->inline_failed = inline_failed;


[gcc(refs/users/omachota/heads/rtl-ssa-dce)] rtl-ssa: dce fix uid

2024-10-21 Thread Ondrej Machota via Gcc-cvs
https://gcc.gnu.org/g:cde5332b496943e584748870e65265549102077f

commit cde5332b496943e584748870e65265549102077f
Author: Ondřej Machota 
Date:   Mon Oct 21 16:54:52 2024 +0200

rtl-ssa: dce fix uid

Diff:
---
 gcc/dce.cc | 44 +++-
 1 file changed, 35 insertions(+), 9 deletions(-)

diff --git a/gcc/dce.cc b/gcc/dce.cc
index cde7d7f3c83d..716236d79c1b 100644
--- a/gcc/dce.cc
+++ b/gcc/dce.cc
@@ -1239,6 +1239,7 @@ namespace
 
 bool is_inherently_live(insn_info *insn)
 {
+  return insn->num_uses() > 0;
 }
 
 static void
@@ -1262,6 +1263,17 @@ rtl_ssa_dce_done()
 fprintf(dump_file, "\nFinished running rtl_ssa_dce\n\n");
 }
 
+static void
+rtl_ssa_dce_mark_live(insn_info *info, auto_vec worklist, sbitmap 
marked) {
+  int info_uid = info->uid();
+  bitmap_set_bit(marked, info_uid);
+  if (dump_file) {
+fprintf(dump_file, "  Adding insn %d to worklist\n", info_uid);
+  }
+
+  worklist.safe_push(info);
+}
+
 static void
 rtl_ssa_dce_mark(sbitmap marked)
 {
@@ -1279,12 +1291,19 @@ rtl_ssa_dce_mark(sbitmap marked)
 */
 // insn.defs() // UD chain - this is what I want - reach the ancestors\
  // insn.uses() // DU chain
+
+/*
+* For marking phi nodes, which don't have uid (insn->rtl() is null) by 
definition, use a dictionary and store their addresses
+* Is seems, that insn->uid() is uniq enough
+*/
+
 if (is_inherently_live(insn))
 {
   if (dump_file)
-fprintf(dump_file, "  Adding insn %d to worklist\n", 
INSN_UID(insn->rtl()));
+fprintf(dump_file, "  Adding insn %d to worklist\n", insn->uid());
+  rtl_ssa_dce_mark_live(insn, marked);
   worklist.safe_push(insn);
-  bitmap_set_bit(marked, INSN_UID(insn->rtl()));
+  bitmap_set_bit(marked, insn->uid());
 }
 
 // if (insn->can_be_optimized () || insn->is_debug_insn ())
@@ -1301,12 +1320,13 @@ rtl_ssa_dce_mark(sbitmap marked)
 
   insn_info *parent_insn = defs[i]->insn();
 
-  if (!bitmap_bit_p(marked, INSN_UID(parent_insn->rtl(
+  int parent_insn_uid = parent_insn->uid();
+  if (!bitmap_bit_p(marked, parent_insn_uid))
   {
 if (dump_file)
-  fprintf(dump_file, "  Adding insn %d to worklist\n", 
INSN_UID(parent_insn->rtl()));
+  fprintf(dump_file, "  Adding insn %d to worklist\n", 
parent_insn_uid);
 worklist.safe_push(parent_insn);
-bitmap_set_bit(marked, INSN_UID(parent_insn->rtl()));
+bitmap_set_bit(marked, parent_insn_uid);
   }
 }
   }
@@ -1318,10 +1338,16 @@ rtl_ssa_dce_sweep(sbitmap marked)
   insn_info *next;
   for (insn_info *insn = crtl->ssa->first_insn(); insn; insn = next)
   {
-  if (!bitmap_bit_p(marked, INSN_UID(insn->rtl( {
-insn->rtl()->set_deleted();
-// delete
-  }
+if (!bitmap_bit_p(marked, insn->uid())) {
+  // rtx_insn* rtl = insn->rtl();
+  // How to delete phis?
+  // if (rtl != nullptr) {
+  //   delete_insn(rtl);
+  // }
+  // insn_change::delete_insn(insn);
+  crtl->ssa->possibly_queue_changes(insn_change::delete_insn(insn))
+  // insn->rtl()->set_deleted();
+}
   }
 }


[gcc r15-4535] RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = max -1.

2024-10-21 Thread Li Xu via Gcc-cvs
https://gcc.gnu.org/g:93b6f287814bca3d10bcf53bb64db40d77eff5d7

commit r15-4535-g93b6f287814bca3d10bcf53bb64db40d77eff5d7
Author: xuli 
Date:   Mon Oct 21 04:01:01 2024 +

RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = max -1.

form 1:
T __attribute__((noinline)) \
sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
{   \
  return (T)IMM >= y ? (T)IMM - y : 0;  \
}

Passed the rv64gcv regression test.

Change-Id: Idaa1ab41f2a5785112279ea8ee2c93236457b740
Signed-off-by: Li Xu 
gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_sub_imm-1_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_3.c: New test.
* gcc.target/riscv/sat_u_sub_imm-4_1.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c | 21 ++
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c | 23 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c | 25 ++
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c | 20 +
 4 files changed, 89 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c
new file mode 100644
index ..6f2a493eebbe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_3.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm254_uint8_t_fmt_1:
+** li\s+[atx][0-9]+,\s*254
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint8_t, 254)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c
new file mode 100644
index ..ed03c186046a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_3.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm65534_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*65536
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-2
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint16_t, 65534)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c
new file mode 100644
index ..17d8e5f0b9fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_3.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm4294967294_uint32_t_fmt_1:
+** li\s+[atx][0-9]+,\s*1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*32
+** addi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*-2
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** sext\.w\s+a0,\s*a0
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint32_t, 4294967294)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c
new file mode 100644
index ..e6492190d171
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm18446744073709551614u_uint64_t_fmt_1:
+** li\s+[atx][0-9]+,\s*-2
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1

[gcc r15-4534] Match: Support IMM=max-1 for unsigned scalar .SAT_SUB IMM form 1

2024-10-21 Thread Li Xu via Gcc-cvs
https://gcc.gnu.org/g:1dccec47ab679926521fd4c9963b63b319b56eb9

commit r15-4534-g1dccec47ab679926521fd4c9963b63b319b56eb9
Author: xuli 
Date:   Tue Oct 22 01:08:56 2024 +

Match: Support IMM=max-1 for unsigned scalar .SAT_SUB IMM form 1

This patch would like to support .SAT_SUB when one of the op
is IMM = max - 1 of form1.

Form 1:
 #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \
 T __attribute__((noinline)) \
 sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
 {   \
   return IMM >= y ? IMM - y : 0;\
 }

Take below form 1 as example:
DEF_SAT_U_SUB_IMM_FMT_1(uint8_t, 254)

Before this patch:
__attribute__((noinline))
uint8_t sat_u_sub_imm254_uint8_t_fmt_1 (uint8_t y)
{
  uint8_t _1;
  uint8_t _3;

   [local count: 1073741824]:
  if (y_2(D) != 255)
goto ; [66.00%]
  else
goto ; [34.00%]

   [local count: 708669600]:
  _3 = 254 - y_2(D);

   [local count: 1073741824]:
  # _1 = PHI <0(2), _3(3)>
  return _1;

}

After this patch:
__attribute__((noinline))
uint8_t sat_u_sub_imm254_uint8_t_fmt_1 (uint8_t y)
{
  uint8_t _1;

   [local count: 1073741824]:
  _1 = .SAT_SUB (254, y_2(D)); [tail call]
  return _1;

}

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

Signed-off-by: Li Xu 

gcc/ChangeLog:

* match.pd: Support IMM=max-1.

Diff:
---
 gcc/match.pd | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index ec2038d48dc4..362bcac291fd 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3325,7 +3325,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
   && types_match (type, @0, @1
 
-/* Unsigned saturation sub with op_0 imm, case 9 (branch with gt):
+/* Unsigned saturation sub with op_0 imm, case 9 (branch with le):
SAT_U_SUB = IMM > Y  ? (IMM - Y) : 0.
  = IMM >= Y ? (IMM - Y) : 0.  */
 (match (unsigned_integer_sat_sub @0 @1)
@@ -3344,6 +3344,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (equal_p || less_than_1_p)
 
+/* The boundary condition for case 9: IMM = max -1  (branch with ne):
+   SAT_U_SUB = IMM >= Y ? (IMM - Y) : 0.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond^ (ne @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @1))
+(with
+  {
+   unsigned precision = TYPE_PRECISION (type);
+   wide_int max = wi::mask (precision, false, precision);
+   wide_int c0 = wi::to_wide (@0);
+   wide_int c2 = wi::to_wide (@2);
+   wide_int c0_add_1 = wi::add (c0, wi::uhwi (1, precision));
+  }
+  (if (wi::eq_p (c2, max) && wi::eq_p (c0_add_1, max))
+
 /* Unsigned saturation sub with op_1 imm, case 10:
SAT_U_SUB = X > IMM  ? (X - IMM) : 0.
  = X >= IMM ? (X - IMM) : 0.  */


[gcc r15-4536] Match: Support IMM=1 for unsigned scalar .SAT_SUB IMM form 1

2024-10-21 Thread Li Xu via Gcc-cvs
https://gcc.gnu.org/g:4e65e12a9a34d76f9a43fbc7ae32875a909ac708

commit r15-4536-g4e65e12a9a34d76f9a43fbc7ae32875a909ac708
Author: xuli 
Date:   Mon Oct 21 04:08:46 2024 +

Match: Support IMM=1 for unsigned scalar .SAT_SUB IMM form 1

This patch would like to support .SAT_SUB when one of the op
is IMM = 1 of form1.

Form 1:
 #define DEF_SAT_U_SUB_IMM_FMT_1(T, IMM) \
 T __attribute__((noinline)) \
 sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
 {   \
   return IMM >= y ? IMM - y : 0;\
 }

Take below form 1 as example:
DEF_SAT_U_SUB_IMM_FMT_1(uint8_t, 1)

Before this patch:
__attribute__((noinline))
uint8_t sat_u_sub_imm1_uint8_t_fmt_1 (uint8_t y)
{
  uint8_t _1;
  uint8_t _3;

   [local count: 1073741824]:
  if (y_2(D) <= 1)
goto ; [41.00%]
  else
goto ; [59.00%]

   [local count: 440234144]:
  _3 = y_2(D) ^ 1;

   [local count: 1073741824]:
  # _1 = PHI <0(2), _3(3)>
  return _1;

}

After this patch:
__attribute__((noinline))
uint8_t sat_u_sub_imm1_uint8_t_fmt_1 (uint8_t y)
{
  uint8_t _1;

;;   basic block 2, loop depth 0
;;pred:   ENTRY
  _1 = .SAT_SUB (1, y_2(D)); [tail call]
  return _1;
;;succ:   EXIT

}

The below test suites are passed for this patch:
1. The rv64gcv fully regression tests.
2. The x86 bootstrap tests.
3. The x86 fully regression tests.

Signed-off-by: Li Xu 
gcc/ChangeLog:

* match.pd: Support IMM=1.

Diff:
---
 gcc/match.pd | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index 362bcac291fd..0455dfa69937 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3360,6 +3360,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   }
   (if (wi::eq_p (c2, max) && wi::eq_p (c0_add_1, max))
 
+/* The boundary condition for case 9: IMM = 1  (branch with le):
+   SAT_U_SUB = IMM >= Y ? (IMM - Y) : 0.  */
+(match (unsigned_integer_sat_sub @0 @1)
+ (cond^ (le @1 integer_onep@0) (bit_xor @1 integer_onep@0) integer_zerop)
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type)
+ && types_match (type, @1
+
 /* Unsigned saturation sub with op_1 imm, case 10:
SAT_U_SUB = X > IMM  ? (X - IMM) : 0.
  = X >= IMM ? (X - IMM) : 0.  */


[gcc r15-4537] RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = 1.

2024-10-21 Thread Li Xu via Gcc-cvs
https://gcc.gnu.org/g:adf4ece4dc48deb1d1790efe104fa0cbcc22c0b6

commit r15-4537-gadf4ece4dc48deb1d1790efe104fa0cbcc22c0b6
Author: xuli 
Date:   Mon Oct 21 04:10:14 2024 +

RISC-V: Add testcases for unsigned .SAT_SUB form 1 with IMM = 1.

form 1:
T __attribute__((noinline)) \
sat_u_sub_imm##IMM##_##T##_fmt_1 (T y)  \
{   \
  return (T)IMM >= y ? (T)IMM - y : 0;  \
}

Passed the rv64gcv regression test.

Change-Id: I8805225b445cdbbc685f4f54a4d66c7ee8f748e1
Signed-off-by: Li Xu 
gcc/testsuite/ChangeLog:

* gcc.target/riscv/sat_u_sub_imm-1_4.c: New test.
* gcc.target/riscv/sat_u_sub_imm-2_4.c: New test.
* gcc.target/riscv/sat_u_sub_imm-3_4.c: New test.
* gcc.target/riscv/sat_u_sub_imm-4_2.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c | 21 
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c | 22 +
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c | 23 ++
 gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c | 20 +++
 4 files changed, 86 insertions(+)

diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c
new file mode 100644
index ..9229f3110848
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-1_4.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint8_t_fmt_1:
+** li\s+[atx][0-9]+,\s*1
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint8_t, 1)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c
new file mode 100644
index ..db3294838901
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-2_4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint16_t_fmt_1:
+** li\s+[atx][0-9]+,\s*1
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint16_t, 1)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c
new file mode 100644
index ..8073ee927fc4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-3_4.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint32_t_fmt_1:
+** li\s+[atx][0-9]+,\s*1
+** slli\s+a0,\s*a0,\s*32
+** srli\s+a0,\s*a0,\s*32
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** sext\.w\s+a0,\s*a0
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint32_t, 1)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c 
b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c
new file mode 100644
index ..9a1ec6edf657
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_sub_imm-4_2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details 
-fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_sub_imm1_uint64_t_fmt_1:
+** li\s+[atx][0-9]+,\s*1
+** sub\s+[atx][0-9]+,\s*[atx][0-9]+,\s*a0
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** addi\s+a0,\s*a0,\s*-1
+** and\s+a0,\s*a0,\s*[atx][0-9]+
+** ret
+*/
+
+DEF_SAT_U_SUB_IMM_FMT_1(uint64_t, 1)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_SUB " 2 "expand" } } */


[gcc r14-10819] libstdc++/ranges: Implement various small LWG issues

2024-10-21 Thread Patrick Palka via Libstdc++-cvs
https://gcc.gnu.org/g:07ee6874963d2f8a787ba48341a5392ee8b6ba56

commit r14-10819-g07ee6874963d2f8a787ba48341a5392ee8b6ba56
Author: Patrick Palka 
Date:   Fri Oct 4 10:01:39 2024 -0400

libstdc++/ranges: Implement various small LWG issues

This implements the following small LWG issues:

  3848. adjacent_view, adjacent_transform_view and slide_view missing base 
accessor
  3851. chunk_view::inner-iterator missing custom iter_move and iter_swap
  3947. Unexpected constraints on adjacent_transform_view::base()
  4001. iota_view should provide empty
  4012. common_view::begin/end are missing the simple-view check
  4013. lazy_split_view::outer-iterator::value_type should not provide 
default constructor
  4035. single_view should provide empty
  4053. Unary call to std::views::repeat does not decay the argument
  4054. Repeating a repeat_view should repeat the view

libstdc++-v3/ChangeLog:

* include/std/ranges (single_view::empty): Define as per LWG 4035.
(iota_view::empty): Define as per LWG 4001.
(lazy_split_view::_OuterIter::value_type): Remove default
constructor and make other constructor private as per LWG 4013.
(common_view::begin): Disable non-const overload for simple
views as per LWG 4012.
(common_view::end): Likewise.
(adjacent_view::base): Define as per LWG 3848.
(adjacent_transform_view::base): Likewise.
(chunk_view::_InnerIter::iter_move): Define as per LWG 3851.
(chunk_view::_InnerIter::itep_swap): Likewise.
(slide_view::base): Define as per LWG 3848.
(repeat_view): Adjust deduction guide as per LWG 4053.
(_Repeat::operator()): Adjust single-parameter overload as per
LWG 4054.
* testsuite/std/ranges/adaptors/adjacent/1.cc: Verify existence
of base member function.
* testsuite/std/ranges/adaptors/adjacent_transform/1.cc: Likewise.
* testsuite/std/ranges/adaptors/chunk/1.cc: Test LWG 3851 example.
* testsuite/std/ranges/adaptors/slide/1.cc: Verify existence of
base member function.
* testsuite/std/ranges/iota/iota_view.cc: Test LWG 4001 example.
* testsuite/std/ranges/repeat/1.cc: Test LWG 4053/4054 examples.

Reviewed-by: Jonathan Wakely 
(cherry picked from commit 20165d0107abd0f839f2519818b904f029f4ae55)

Diff:
---
 libstdc++-v3/include/std/ranges| 84 +++---
 .../testsuite/std/ranges/adaptors/adjacent/1.cc|  3 +
 .../std/ranges/adaptors/adjacent_transform/1.cc|  3 +
 .../testsuite/std/ranges/adaptors/chunk/1.cc   | 15 
 .../testsuite/std/ranges/adaptors/slide/1.cc   |  3 +
 .../testsuite/std/ranges/iota/iota_view.cc | 12 
 libstdc++-v3/testsuite/std/ranges/repeat/1.cc  | 23 ++
 7 files changed, 135 insertions(+), 8 deletions(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 2c8a8535d396..c94463c83e53 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -331,6 +331,12 @@ namespace ranges
   end() const noexcept
   { return data() + 1; }
 
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 4035. single_view should provide empty
+  static constexpr bool
+  empty() noexcept
+  { return false; }
+
   static constexpr size_t
   size() noexcept
   { return 1; }
@@ -691,6 +697,12 @@ namespace ranges
   end() const requires same_as<_Winc, _Bound>
   { return _Iterator{_M_bound}; }
 
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 4001. iota_view should provide empty
+  constexpr bool
+  empty() const
+  { return _M_value == _M_bound; }
+
   constexpr auto
   size() const
   requires (same_as<_Winc, _Bound> && __detail::__advanceable<_Winc>)
@@ -3350,14 +3362,17 @@ namespace views::__adaptor
  private:
_OuterIter _M_i = _OuterIter();
 
- public:
-   value_type() = default;
-
+   // _GLIBCXX_RESOLVE_LIB_DEFECTS
+   // 4013. lazy_split_view::outer-iterator::value_type should not
+   // provide default constructor
constexpr explicit
value_type(_OuterIter __i)
  : _M_i(std::move(__i))
{ }
 
+   friend _OuterIter;
+
+ public:
constexpr _InnerIter<_Const>
begin() const
{ return _InnerIter<_Const>{_M_i}; }
@@ -3949,8 +3964,10 @@ namespace views::__adaptor
   base() &&
   { return std::move(_M_base); }
 
+  // _GLIBCXX_RESOLVE_LIB_DEFECTS
+  // 4012. common_view::begin/end are missing the simple-view check
   constexpr auto
-  begin()
+  begin() requires (!__detail::__simple_view<_Vp>)
   {
if constexpr (random_access_range<_Vp> && sized_range<_Vp>)
  ret

[gcc r14-10816] libstdc++: Implement P2609R3 changes to the indirect invocability concepts

2024-10-21 Thread Patrick Palka via Libstdc++-cvs
https://gcc.gnu.org/g:3795ac860bc6f24d0ef222045dff7b2a6350a8c4

commit r14-10816-g3795ac860bc6f24d0ef222045dff7b2a6350a8c4
Author: Patrick Palka 
Date:   Thu Aug 22 09:24:11 2024 -0400

libstdc++: Implement P2609R3 changes to the indirect invocability concepts

This implements the changes of this C++23 paper as a DR against C++20.

Note that after the later P2538R1 "ADL-proof std::projected" (which we
already implement), we can't use a simple partial specialization to match
specializations of the 'projected' alias template.  So instead we identify
such specializations using a pair of distinguishing member aliases.

libstdc++-v3/ChangeLog:

* include/bits/iterator_concepts.h (__detail::__indirect_value):
Define.
(__indirect_value_t): Define as per P2609R3.
(iter_common_reference_t): Adjust as per P2609R3.
(indirectly_unary_invocable): Likewise.
(indirectly_regular_unary_invocable): Likewise.
(indirect_unary_predicate): Likewise.
(indirect_binary_predicate): Likewise.
(indirect_equivalence_relation): Likewise.
(indirect_strict_weak_order): Likewise.
(__detail::__projected::__type): Define member aliases
__projected_Iter and __projected_Proj providing the
template arguments of the current specialization.
* include/bits/version.def (ranges): Update value.
* include/bits/version.h: Regenerate.
* testsuite/24_iterators/indirect_callable/p2609r3.cc: New test.
* testsuite/std/ranges/version_c++23.cc: Update expected value
of __cpp_lib_ranges macro.

Reviewed-by: Jonathan Wakely 
(cherry picked from commit b552730faf36f1eae1dc6e73ccc93a016dec5401)

Diff:
---
 libstdc++-v3/include/bits/iterator_concepts.h  | 61 --
 libstdc++-v3/include/bits/version.def  |  2 +-
 libstdc++-v3/include/bits/version.h|  4 +-
 .../24_iterators/indirect_callable/p2609r3.cc  | 27 ++
 libstdc++-v3/testsuite/std/ranges/version_c++23.cc |  2 +-
 5 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/libstdc++-v3/include/bits/iterator_concepts.h 
b/libstdc++-v3/include/bits/iterator_concepts.h
index ce0b8a10f88f..9306b7bd194c 100644
--- a/libstdc++-v3/include/bits/iterator_concepts.h
+++ b/libstdc++-v3/include/bits/iterator_concepts.h
@@ -552,9 +552,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 concept indirectly_readable
   = __detail::__indirectly_readable_impl>;
 
+  namespace __detail
+  {
+template
+  struct __indirect_value
+  { using type = iter_value_t<_Tp>&; };
+
+// __indirect_value> is defined later.
+  } // namespace __detail
+
+  template
+using __indirect_value_t = typename __detail::__indirect_value<_Tp>::type;
+
   template
 using iter_common_reference_t
-  = common_reference_t, iter_value_t<_Tp>&>;
+  = common_reference_t, __indirect_value_t<_Tp>>;
 
   /// Requirements for writing a value into an iterator's referenced object.
   template
@@ -710,24 +722,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
 concept indirectly_unary_invocable = indirectly_readable<_Iter>
-  && copy_constructible<_Fn> && invocable<_Fn&, iter_value_t<_Iter>&>
+  && copy_constructible<_Fn> && invocable<_Fn&, __indirect_value_t<_Iter>>
   && invocable<_Fn&, iter_reference_t<_Iter>>
   && invocable<_Fn&, iter_common_reference_t<_Iter>>
-  && common_reference_with&>,
+  && common_reference_with>,
   invoke_result_t<_Fn&, iter_reference_t<_Iter>>>;
 
   template
 concept indirectly_regular_unary_invocable = indirectly_readable<_Iter>
   && copy_constructible<_Fn>
-  && regular_invocable<_Fn&, iter_value_t<_Iter>&>
+  && regular_invocable<_Fn&, __indirect_value_t<_Iter>>
   && regular_invocable<_Fn&, iter_reference_t<_Iter>>
   && regular_invocable<_Fn&, iter_common_reference_t<_Iter>>
-  && common_reference_with&>,
+  && common_reference_with>,
   invoke_result_t<_Fn&, iter_reference_t<_Iter>>>;
 
   template
 concept indirect_unary_predicate = indirectly_readable<_Iter>
-  && copy_constructible<_Fn> && predicate<_Fn&, iter_value_t<_Iter>&>
+  && copy_constructible<_Fn> && predicate<_Fn&, __indirect_value_t<_Iter>>
   && predicate<_Fn&, iter_reference_t<_Iter>>
   && predicate<_Fn&, iter_common_reference_t<_Iter>>;
 
@@ -735,9 +747,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 concept indirect_binary_predicate
   = indirectly_readable<_I1> && indirectly_readable<_I2>
   && copy_constructible<_Fn>
-  && predicate<_Fn&, iter_value_t<_I1>&, iter_value_t<_I2>&>
-  && predicate<_Fn&, iter_value_t<_I1>&, iter_reference_t<_I2>>
-  && predicate<_Fn&, iter_reference_t<_I1>, iter_value_t<_I2>&>
+  && predicate<_Fn&, __indirect_value_t

[gcc r14-10817] libstdc++: Implement P2997R1 changes to the indirect invocability concepts

2024-10-21 Thread Patrick Palka via Gcc-cvs
https://gcc.gnu.org/g:0b2f2a7e126cf8017626793446110aac892b00f6

commit r14-10817-g0b2f2a7e126cf8017626793446110aac892b00f6
Author: Patrick Palka 
Date:   Thu Aug 22 09:24:20 2024 -0400

libstdc++: Implement P2997R1 changes to the indirect invocability concepts

This implements the changes of this C++26 paper as a DR against C++20.

In passing this patch removes the std/ranges/version_c++23.cc test which
is now mostly obsolete after the version.def FTM refactoring, and instead
expands the __cpp_lib_ranges checks in another test so that it verifies
the exact value of the FTM on a per language version basis.

libstdc++-v3/ChangeLog:

* include/bits/iterator_concepts.h (indirectly_unary_invocable):
Relax as per P2997R1.
(indirectly_regular_unary_invocable): Likewise.
(indirect_unary_predicate): Likewise.
(indirect_binary_predicate): Likewise.
(indirect_equivalence_relation): Likewise.
(indirect_strict_weak_order): Likewise.
* include/bits/version.def (ranges): Update value for C++26.
* include/bits/version.h: Regenerate.
* testsuite/24_iterators/indirect_callable/p2997r1.cc: New test.
* testsuite/std/ranges/version_c++23.cc: Remove.
* testsuite/std/ranges/headers/ranges/synopsis.cc: Refine the
__cpp_lib_ranges checks.

Reviewed-by: Jonathan Wakely 
(cherry picked from commit 620232426bd83a79c81cd2be6f485834c618e920)

Diff:
---
 libstdc++-v3/include/bits/iterator_concepts.h  | 17 ++
 libstdc++-v3/include/bits/version.def  |  5 ++
 libstdc++-v3/include/bits/version.h|  7 ++-
 .../24_iterators/indirect_callable/p2997r1.cc  | 37 
 .../std/ranges/headers/ranges/synopsis.cc  |  6 +-
 libstdc++-v3/testsuite/std/ranges/version_c++23.cc | 70 --
 6 files changed, 57 insertions(+), 85 deletions(-)

diff --git a/libstdc++-v3/include/bits/iterator_concepts.h 
b/libstdc++-v3/include/bits/iterator_concepts.h
index 9306b7bd194c..d849ddc32fc2 100644
--- a/libstdc++-v3/include/bits/iterator_concepts.h
+++ b/libstdc++-v3/include/bits/iterator_concepts.h
@@ -724,7 +724,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 concept indirectly_unary_invocable = indirectly_readable<_Iter>
   && copy_constructible<_Fn> && invocable<_Fn&, __indirect_value_t<_Iter>>
   && invocable<_Fn&, iter_reference_t<_Iter>>
-  && invocable<_Fn&, iter_common_reference_t<_Iter>>
   && common_reference_with>,
   invoke_result_t<_Fn&, iter_reference_t<_Iter>>>;
 
@@ -733,15 +732,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   && copy_constructible<_Fn>
   && regular_invocable<_Fn&, __indirect_value_t<_Iter>>
   && regular_invocable<_Fn&, iter_reference_t<_Iter>>
-  && regular_invocable<_Fn&, iter_common_reference_t<_Iter>>
   && common_reference_with>,
   invoke_result_t<_Fn&, iter_reference_t<_Iter>>>;
 
   template
 concept indirect_unary_predicate = indirectly_readable<_Iter>
   && copy_constructible<_Fn> && predicate<_Fn&, __indirect_value_t<_Iter>>
-  && predicate<_Fn&, iter_reference_t<_Iter>>
-  && predicate<_Fn&, iter_common_reference_t<_Iter>>;
+  && predicate<_Fn&, iter_reference_t<_Iter>>;
 
   template
 concept indirect_binary_predicate
@@ -750,9 +747,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   && predicate<_Fn&, __indirect_value_t<_I1>, __indirect_value_t<_I2>>
   && predicate<_Fn&, __indirect_value_t<_I1>, iter_reference_t<_I2>>
   && predicate<_Fn&, iter_reference_t<_I1>, __indirect_value_t<_I2>>
-  && predicate<_Fn&, iter_reference_t<_I1>, iter_reference_t<_I2>>
-  && predicate<_Fn&, iter_common_reference_t<_I1>,
-  iter_common_reference_t<_I2>>;
+  && predicate<_Fn&, iter_reference_t<_I1>, iter_reference_t<_I2>>;
 
   template
 concept indirect_equivalence_relation
@@ -762,9 +757,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   && equivalence_relation<_Fn&, __indirect_value_t<_I1>, 
iter_reference_t<_I2>>
   && equivalence_relation<_Fn&, iter_reference_t<_I1>, 
__indirect_value_t<_I2>>
   && equivalence_relation<_Fn&, iter_reference_t<_I1>,
- iter_reference_t<_I2>>
-  && equivalence_relation<_Fn&, iter_common_reference_t<_I1>,
- iter_common_reference_t<_I2>>;
+ iter_reference_t<_I2>>;
 
   template
 concept indirect_strict_weak_order
@@ -773,9 +766,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   && strict_weak_order<_Fn&, __indirect_value_t<_I1>, 
__indirect_value_t<_I2>>
   && strict_weak_order<_Fn&, __indirect_value_t<_I1>, 
iter_reference_t<_I2>>
   && strict_weak_order<_Fn&, iter_reference_t<_I1>, 
__indirect_value_t<_I2>>
-  && strict_weak_order<_Fn&, iter_reference_t<_I1>, iter_reference_t<_I2>>
-  

[gcc r14-10818] libstdc++: Add some missing ranges feature-test macro tests

2024-10-21 Thread Patrick Palka via Libstdc++-cvs
https://gcc.gnu.org/g:be56fee60a62014709605af19a84a48b7aa0835a

commit r14-10818-gbe56fee60a62014709605af19a84a48b7aa0835a
Author: Patrick Palka 
Date:   Thu Aug 22 11:25:10 2024 -0400

libstdc++: Add some missing ranges feature-test macro tests

libstdc++-v3/ChangeLog:

* testsuite/25_algorithms/contains/1.cc: Verify value of
__cpp_lib_ranges_contains.
* testsuite/25_algorithms/find_last/1.cc: Verify value of
__cpp_lib_ranges_find_last.
* testsuite/25_algorithms/iota/1.cc: Verify value of
__cpp_lib_ranges_iota.

Reviewed-by: Jonathan Wakely 
(cherry picked from commit 8e0da56f18b3678beee9d2bae27e08a0e122573a)

Diff:
---
 libstdc++-v3/testsuite/25_algorithms/contains/1.cc  | 4 
 libstdc++-v3/testsuite/25_algorithms/find_last/1.cc | 4 
 libstdc++-v3/testsuite/25_algorithms/iota/1.cc  | 5 +
 3 files changed, 13 insertions(+)

diff --git a/libstdc++-v3/testsuite/25_algorithms/contains/1.cc 
b/libstdc++-v3/testsuite/25_algorithms/contains/1.cc
index 7d3fa048ef61..b44c06032e8a 100644
--- a/libstdc++-v3/testsuite/25_algorithms/contains/1.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/contains/1.cc
@@ -4,6 +4,10 @@
 #include 
 #include 
 
+#if __cpp_lib_ranges_contains != 202207L
+# error "Feature-test macro __cpp_lib_ranges_contains has wrong value in 
"
+#endif
+
 namespace ranges = std::ranges;
 
 void
diff --git a/libstdc++-v3/testsuite/25_algorithms/find_last/1.cc 
b/libstdc++-v3/testsuite/25_algorithms/find_last/1.cc
index 911e22887d1d..8a40bb1a6b36 100644
--- a/libstdc++-v3/testsuite/25_algorithms/find_last/1.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/find_last/1.cc
@@ -4,6 +4,10 @@
 #include 
 #include 
 
+#if __cpp_lib_ranges_find_last != 202207L
+# error "Feature-test macro __cpp_lib_ranges_find_last has wrong value in 
"
+#endif
+
 namespace ranges = std::ranges;
 
 constexpr bool
diff --git a/libstdc++-v3/testsuite/25_algorithms/iota/1.cc 
b/libstdc++-v3/testsuite/25_algorithms/iota/1.cc
index 61bf418b4dae..ebadeee79a13 100644
--- a/libstdc++-v3/testsuite/25_algorithms/iota/1.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/iota/1.cc
@@ -1,9 +1,14 @@
 // { dg-do run { target c++23 } }
 
 #include 
+#include 
 #include 
 #include 
 
+#if __cpp_lib_ranges_iota != 202202L
+# error "Feature-test macro __cpp_lib_ranges_iota has wrong value in "
+#endif
+
 namespace ranges = std::ranges;
 
 void


[gcc r14-10820] libstdc++: Implement LWG 3664 changes to ranges::distance

2024-10-21 Thread Patrick Palka via Gcc-cvs
https://gcc.gnu.org/g:f381a217e9b6c8276bb580a22d12445ed7a7dc8c

commit r14-10820-gf381a217e9b6c8276bb580a22d12445ed7a7dc8c
Author: Patrick Palka 
Date:   Sat Oct 5 13:48:06 2024 -0400

libstdc++: Implement LWG 3664 changes to ranges::distance

libstdc++-v3/ChangeLog:

* include/bits/ranges_base.h (__distance_fn::operator()):
Adjust iterator/sentinel overloads as per LWG 3664.
* testsuite/24_iterators/range_operations/distance.cc:
Test LWG 3664 example.

Reviewed-by: Jonathan Wakely 
(cherry picked from commit 7c0d1e9f2a2f1d41d9eb755c36c871d92638c4b7)

Diff:
---
 libstdc++-v3/include/bits/ranges_base.h| 14 +++---
 .../testsuite/24_iterators/range_operations/distance.cc| 11 +++
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/bits/ranges_base.h 
b/libstdc++-v3/include/bits/ranges_base.h
index 23c0b56ff225..67ac8db8b469 100644
--- a/libstdc++-v3/include/bits/ranges_base.h
+++ b/libstdc++-v3/include/bits/ranges_base.h
@@ -930,7 +930,9 @@ namespace ranges
 
   struct __distance_fn final
   {
-template _Sent>
+// _GLIBCXX_RESOLVE_LIB_DEFECTS
+// 3664. LWG 3392 broke std::ranges::distance(a, a+3)
+template _Sent>
   requires (!sized_sentinel_for<_Sent, _It>)
   constexpr iter_difference_t<_It>
   operator()[[nodiscard]](_It __first, _Sent __last) const
@@ -944,13 +946,11 @@ namespace ranges
return __n;
   }
 
-template _Sent>
+template> _Sent>
   [[nodiscard]]
-  constexpr iter_difference_t<_It>
-  operator()(const _It& __first, const _Sent& __last) const
-  {
-   return __last - __first;
-  }
+  constexpr iter_difference_t>
+  operator()(_It&& __first, _Sent __last) const
+  { return __last - static_cast&>(__first); }
 
 template
   [[nodiscard]]
diff --git a/libstdc++-v3/testsuite/24_iterators/range_operations/distance.cc 
b/libstdc++-v3/testsuite/24_iterators/range_operations/distance.cc
index 9a1d0c3efe83..336956936c22 100644
--- a/libstdc++-v3/testsuite/24_iterators/range_operations/distance.cc
+++ b/libstdc++-v3/testsuite/24_iterators/range_operations/distance.cc
@@ -144,6 +144,16 @@ test05()
   VERIFY( std::ranges::distance(c4) == 5 );
 }
 
+void
+test06()
+{
+  // LWG 3664 - LWG 3392 broke std::ranges::distance(a, a+3)
+  int a[] = {1, 2, 3};
+  VERIFY( std::ranges::distance(a, a+3) == 3 );
+  VERIFY( std::ranges::distance(a, a) == 0 );
+  VERIFY( std::ranges::distance(a+3, a) == -3 );
+}
+
 int
 main()
 {
@@ -152,4 +162,5 @@ main()
   test03();
   test04();
   test05();
+  test06();
 }


[gcc r13-9142] [GCC13/GCC12] Fix testcase.

2024-10-21 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:8b43518a01cbbbafe042b85a48fa09a32948380a

commit r13-9142-g8b43518a01cbbbafe042b85a48fa09a32948380a
Author: liuhongt 
Date:   Tue Oct 22 11:24:23 2024 +0800

[GCC13/GCC12] Fix testcase.

The optimization relies on other patterns which are only available at
GCC14 and obove, so restore the xfail for GCC13/12 branch.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512bw-pr103750-2.c: Add xfail for ia32.

Diff:
---
 gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c 
b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
index 3392e193222a..7303f5403ba8 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
@@ -1,7 +1,8 @@
 /* PR target/103750 */
 /* { dg-do compile }  */
 /* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */
-/* { dg-final { scan-assembler-not "kmov" } } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed.  */
 
 #include 
 extern __m128i* pi128;


[gcc r12-10781] [GCC13/GCC12] Fix testcase.

2024-10-21 Thread hongtao Liu via Gcc-cvs
https://gcc.gnu.org/g:45bde60836d04cce4637b74ecadbb0aff90b832f

commit r12-10781-g45bde60836d04cce4637b74ecadbb0aff90b832f
Author: liuhongt 
Date:   Tue Oct 22 11:24:23 2024 +0800

[GCC13/GCC12] Fix testcase.

The optimization relies on other patterns which are only available at
GCC14 and obove, so restore the xfail for GCC13/12 branch.

gcc/testsuite/ChangeLog:

* gcc.target/i386/avx512bw-pr103750-2.c: Add xfail for ia32.

(cherry picked from commit 8b43518a01cbbbafe042b85a48fa09a32948380a)

Diff:
---
 gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c 
b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
index 3392e193222a..7303f5403ba8 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
@@ -1,7 +1,8 @@
 /* PR target/103750 */
 /* { dg-do compile }  */
 /* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */
-/* { dg-final { scan-assembler-not "kmov" } } */
+/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
+/* xfail need to be fixed.  */
 
 #include 
 extern __m128i* pi128;


[gcc(refs/users/omachota/heads/rtl-ssa-dce)] rtl-ssa: dce fix working with sbitmap

2024-10-21 Thread Ondrej Machota via Gcc-cvs
https://gcc.gnu.org/g:d0095cfa468ae39a6b0c2e44951b2772f734a33a

commit d0095cfa468ae39a6b0c2e44951b2772f734a33a
Author: Ondřej Machota 
Date:   Tue Oct 22 08:40:34 2024 +0200

rtl-ssa: dce fix working with sbitmap

Diff:
---
 gcc/dce.cc | 107 -
 1 file changed, 77 insertions(+), 30 deletions(-)

diff --git a/gcc/dce.cc b/gcc/dce.cc
index 716236d79c1b..929cb259e6d6 100644
--- a/gcc/dce.cc
+++ b/gcc/dce.cc
@@ -1243,15 +1243,24 @@ bool is_inherently_live(insn_info *insn)
 }
 
 static void
-rtl_ssa_dce_init()
+rtl_ssa_dce_init(sbitmap &marked_rtx)
 {
   calculate_dominance_info(CDI_DOMINATORS);
   crtl->ssa = new rtl_ssa::function_info(cfun);
+
+  marked_rtx = sbitmap_alloc(get_max_uid() + 1);
+  bitmap_clear(marked_rtx);
+  if (dump_file)
+fprintf(dump_file, "Allocated `marked_rtx` with size: %d\n", get_max_uid() 
+ 1);
 }
 
 static void
-rtl_ssa_dce_done()
+rtl_ssa_dce_done(sbitmap marked_rtx)
 {
+  sbitmap_free(marked_rtx);
+  if (dump_file)
+fprintf(dump_file, "Freed `marked_rtx`\n");
+
   free_dominance_info(CDI_DOMINATORS);
   if (crtl->ssa->perform_pending_updates())
 cleanup_cfg(0);
@@ -1264,23 +1273,33 @@ rtl_ssa_dce_done()
 }
 
 static void
-rtl_ssa_dce_mark_live(insn_info *info, auto_vec worklist, sbitmap 
marked) {
+rtl_ssa_dce_mark_live(insn_info *info, vec &worklist, sbitmap 
marked_rtx)
+{
   int info_uid = info->uid();
-  bitmap_set_bit(marked, info_uid);
-  if (dump_file) {
+  if (dump_file)
+  {
 fprintf(dump_file, "  Adding insn %d to worklist\n", info_uid);
   }
+  if (info_uid < 0)
+  {
+  return;
+  }
+  bitmap_set_bit(marked_rtx, info_uid);
 
   worklist.safe_push(info);
 }
 
 static void
-rtl_ssa_dce_mark(sbitmap marked)
+rtl_ssa_dce_mark(sbitmap marked_rtx)
 {
   insn_info *next;
   auto_vec worklist;
   for (insn_info *insn = crtl->ssa->first_insn(); insn; insn = next)
   {
+if (dump_file)
+{
+  fprintf(dump_file, "Insn: %d\n", insn->uid());
+}
 next = insn->next_any_insn();
 /*
 I would like to mark visited instruction with something like plf (Pass 
local flags) as in gimple
@@ -1288,22 +1307,18 @@ rtl_ssa_dce_mark(sbitmap marked)
 This file contains some useful functions: e.g. marked_insn_p, mark_insn
 mark_insn does much more than I want now...
 It does quite a useful job. If rtl_insn is a call and it is obsolete, it 
will find call arguments.
-*/
-// insn.defs() // UD chain - this is what I want - reach the ancestors\
- // insn.uses() // DU chain
 
-/*
+insn.defs() // UD chain - this is what I want - reach the ancestors\
+insn.uses() // DU chain
+
+
 * For marking phi nodes, which don't have uid (insn->rtl() is null) by 
definition, use a dictionary and store their addresses
 * Is seems, that insn->uid() is uniq enough
 */
 
 if (is_inherently_live(insn))
 {
-  if (dump_file)
-fprintf(dump_file, "  Adding insn %d to worklist\n", insn->uid());
-  rtl_ssa_dce_mark_live(insn, marked);
-  worklist.safe_push(insn);
-  bitmap_set_bit(marked, insn->uid());
+  rtl_ssa_dce_mark_live(insn, worklist, marked_rtx);
 }
 
 // if (insn->can_be_optimized () || insn->is_debug_insn ())
@@ -1311,56 +1326,88 @@ rtl_ssa_dce_mark(sbitmap marked)
 //  worklist.safe_push (insn);
   }
 
+  if (dump_file)
+fprintf(dump_file, "Finished inherently live, marking parents\n");
   while (!worklist.is_empty())
   {
+if (dump_file)
+  fprintf(dump_file, "Brruuh; ");
 insn_info *insn = worklist.pop();
 def_array defs = insn->defs(); // array - because of phi?
+if (dump_file)
+  fprintf(dump_file, "Looking at: %d, defs: %d\n", insn->uid(), 
defs.size());
 for (size_t i = 0; i < defs.size(); i++)
 {
-
   insn_info *parent_insn = defs[i]->insn();
-
   int parent_insn_uid = parent_insn->uid();
-  if (!bitmap_bit_p(marked, parent_insn_uid))
+  if (parent_insn_uid < 0)
+  {
+continue;
+  }
+  if (dump_file)
+fprintf(dump_file, "Trying to add: %d\n", parent_insn_uid);
+  if (!bitmap_bit_p(marked_rtx, parent_insn_uid))
   {
 if (dump_file)
-  fprintf(dump_file, "  Adding insn %d to worklist\n", 
parent_insn_uid);
+  fprintf(dump_file, "  Adding insn %d to worklist - mark\n", 
parent_insn_uid);
 worklist.safe_push(parent_insn);
-bitmap_set_bit(marked, parent_insn_uid);
+if (parent_insn_uid >= 0)
+  bitmap_set_bit(marked_rtx, parent_insn_uid);
   }
 }
   }
 }
 
 static void
-rtl_ssa_dce_sweep(sbitmap marked)
+rtl_ssa_dce_sweep(sbitmap marked_rtx)
 {
   insn_info *next;
+  auto_vec to_delete;
   for (insn_info *insn = crtl->ssa->first_insn(); insn; insn = next)
   {
-if (!bitmap_bit_p(marked, insn->uid())) {
+if (dump_file)
+{
+  fprintf(dump_file, "Insn: %d\n", insn->uid());
+}
+next = insn->next_any_insn();
+if (dump_file)
+{
+  fprintf

[gcc r15-4538] testsuite: Fix typo in ext-floating19.C

2024-10-21 Thread Stefan Schulze Frielinghaus via Gcc-cvs
https://gcc.gnu.org/g:9263523b7e522e5b8c9ac70df5efc73632c19380

commit r15-4538-g9263523b7e522e5b8c9ac70df5efc73632c19380
Author: Stefan Schulze Frielinghaus 
Date:   Tue Oct 22 08:58:14 2024 +0200

testsuite: Fix typo in ext-floating19.C

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/ext-floating19.C: Fix typo for bfloat16 guard.

Diff:
---
 gcc/testsuite/g++.dg/cpp23/ext-floating19.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/cpp23/ext-floating19.C 
b/gcc/testsuite/g++.dg/cpp23/ext-floating19.C
index dfbedb986990..a79f7d6e202f 100644
--- a/gcc/testsuite/g++.dg/cpp23/ext-floating19.C
+++ b/gcc/testsuite/g++.dg/cpp23/ext-floating19.C
@@ -15,6 +15,6 @@ auto x64 = 3.14f64;
 #ifdef __STDCPP_FLOAT128_T__
 auto x128 = 3.14f128;
 #endif
-#ifdef __STDCPP_FLOAT16_T__
+#ifdef __STDCPP_BFLOAT16_T__
 auto xbf = 1.2bf16;
 #endif