[gcc(refs/users/meissner/heads/work178-tar)] PR 89213: Address review comments.

2024-09-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:37ebd552056613aa6dac190fcc6e6d2b6963b3b6

commit 37ebd552056613aa6dac190fcc6e6d2b6963b3b6
Author: Michael Meissner 
Date:   Tue Sep 24 22:16:22 2024 -0400

PR 89213: Address review comments.

PR 99293: Optimize splat of a V2DF/V2DI extract with constant element

We had optimizations for splat of a vector extract for the other vector
types, but we missed having one for V2DI and V2DF.  This patch adds a
combiner insn to do this optimization.

In looking at the source, we had similar optimizations for V4SI and V4SF
extract and splats, but we missed doing V2DI/V2DF.

Without the patch for the code:

vector long long splat_dup_l_0 (vector long long v)
{
  return __builtin_vec_splats (__builtin_vec_extract (v, 0));
}

the compiler generates (on a little endian power9):

splat_dup_l_0:
mfvsrld 9,34
mtvsrdd 34,9,9
blr

Now it generates:

splat_dup_l_0:
xxpermdi 34,34,34,3
blr

PR 89213: Add better support for shifting vectors with 64-bit elements

This patch fixes PR target/89213 to allow better code to be generated to do
constant shifts of V2DI/V2DF vectors.  Previously GCC would do constant 
shifts
of vectors with 64-bit elements by using:

XXSPLTIB 32,4
VEXTSB2D 0,0
VSRAD 2,2,0

I.e., the PowerPC does not have a VSPLTISD instruction to load -15..14 for 
the
64-bit shift count in one instruction.  Instead, it would need to load a 
byte
and then convert it to 64-bit.

With this patch, GCC now realizes that the vector shift instructions will 
look
at the bottom 6 bits for the shift count, and it can use either a VSPLTISW 
or
XXSPLTIB instruction to load the shift count.

2024-09-17  Michael Meissner  

gcc/

PR target/89213
* config/rs6000/altivec.md (altivec__shift_const): Remove 
extra
()'s.

gcc/testsuite/

PR target/89213
* gcc.target/powerpc/pr89213.c: Allow running test on 32-bit.

2024-09-12  Michael Meissner  

gcc/

* config/rs6000/vsx.md (vsx_splat_extract_): New insn.

gcc/testsuite/

* gcc.target/powerpc/builtins-1.c: Adjust insn count.
* gcc.target/powerpc/pr99293.c: New test.

2024-09-12  Michael Meissner  

gcc/

PR target/89213
* config/rs6000/altivec.md (UNSPEC_VECTOR_SHIFT): New unspec.
(VSHIFT_MODE): New mode iterator.
(vshift_code): New code iterator.
(vshift_attr): New code attribute.
(altivec___const): New pattern to optimize
vector long long/int shifts by a constant.
(altivec__shift_const): New helper insn to load up a
constant used by the shift operation.
* config/rs6000/predicates.md (vector_shift_constant): New
predicate.

gcc/testsuite/

PR target/89213
* gcc.target/powerpc/pr89213.c: New test.
* gcc.target/powerpc/vec-rlmi-rlnm.c: Update instruction count.

Diff:
---
 gcc/config/rs6000/altivec.md |  51 +++
 gcc/config/rs6000/predicates.md  |  63 ++
 gcc/config/rs6000/vsx.md |  18 
 gcc/testsuite/gcc.target/powerpc/builtins-1.c|   2 +-
 gcc/testsuite/gcc.target/powerpc/pr89213.c   | 106 +++
 gcc/testsuite/gcc.target/powerpc/pr99293.c   |  22 +
 gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c |   4 +-
 7 files changed, 263 insertions(+), 3 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1f5489b974f6..e4576c6d0967 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -170,6 +170,7 @@
UNSPEC_VSTRIL
UNSPEC_SLDB
UNSPEC_SRDB
+   UNSPEC_VECTOR_SHIFT
 ])
 
 (define_c_enum "unspecv"
@@ -2176,6 +2177,56 @@
   "vsro %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
+;; Optimize V2DI shifts by constants.  This relies on the shift instructions
+;; only looking at the bits needed to do the shift.  This means we can use
+;; VSPLTISW or XXSPLTIB to load up the constant, and not worry about the bits
+;; that the vector shift instructions will not use.
+(define_mode_iterator VSHIFT_MODE  [(V4SI "TARGET_P9_VECTOR")
+(V2DI "TARGET_P8_VECTOR")])
+
+(define_code_iterator vshift_code  [ashift ashiftrt lshiftrt])
+(define_code_attr vshift_attr  [(ashift   "ashift")
+(ashiftrt "ashiftrt")
+(lshiftrt "lshiftrt")])
+
+(define_insn_and_split "*altivec___const"
+  [(set (mat

[gcc(refs/users/meissner/heads/work178-tar)] Update ChangeLog.*

2024-09-24 Thread Michael Meissner via Gcc-cvs
https://gcc.gnu.org/g:5655ef3b1ce3a86a3b3e6670301c4e542bbcdaa5

commit 5655ef3b1ce3a86a3b3e6670301c4e542bbcdaa5
Author: Michael Meissner 
Date:   Tue Sep 24 22:20:10 2024 -0400

Update ChangeLog.*

Diff:
---
 gcc/ChangeLog.tar | 101 ++
 1 file changed, 101 insertions(+)

diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar
index b091c795c423..dbfd18351841 100644
--- a/gcc/ChangeLog.tar
+++ b/gcc/ChangeLog.tar
@@ -1,3 +1,104 @@
+ Branch work178-tar, patch #202 from work178-bugs 

+
+PR 89213: Address review comments.
+
+2024-09-17  Michael Meissner  
+
+gcc/
+
+   PR target/89213
+   * config/rs6000/altivec.md (altivec__shift_const): Remove extra
+   ()'s.
+
+gcc/testsuite/
+
+   PR target/89213
+   * gcc.target/powerpc/pr89213.c: Allow running test on 32-bit.
+
+ Branch work178-tar, patch #201 from work178-bugs 

+
+PR 99293: Optimize splat of a V2DF/V2DI extract with constant element
+
+We had optimizations for splat of a vector extract for the other vector
+types, but we missed having one for V2DI and V2DF.  This patch adds a
+combiner insn to do this optimization.
+
+In looking at the source, we had similar optimizations for V4SI and V4SF
+extract and splats, but we missed doing V2DI/V2DF.
+
+Without the patch for the code:
+
+   vector long long splat_dup_l_0 (vector long long v)
+   {
+ return __builtin_vec_splats (__builtin_vec_extract (v, 0));
+   }
+
+the compiler generates (on a little endian power9):
+
+   splat_dup_l_0:
+   mfvsrld 9,34
+   mtvsrdd 34,9,9
+   blr
+
+Now it generates:
+
+   splat_dup_l_0:
+   xxpermdi 34,34,34,3
+   blr
+
+2024-09-12  Michael Meissner  
+
+gcc/
+
+   * config/rs6000/vsx.md (vsx_splat_extract_): New insn.
+
+gcc/testsuite/
+
+   * gcc.target/powerpc/builtins-1.c: Adjust insn count.
+   * gcc.target/powerpc/pr99293.c: New test.
+
+ Branch work178-tar, patch #200 from work178-bugs 

+
+PR 89213: Add better support for shifting vectors with 64-bit elements
+
+This patch fixes PR target/89213 to allow better code to be generated to do
+constant shifts of V2DI/V2DF vectors.  Previously GCC would do constant shifts
+of vectors with 64-bit elements by using:
+
+   XXSPLTIB 32,4
+   VEXTSB2D 0,0
+   VSRAD 2,2,0
+
+I.e., the PowerPC does not have a VSPLTISD instruction to load -15..14 for the
+64-bit shift count in one instruction.  Instead, it would need to load a byte
+and then convert it to 64-bit.
+
+With this patch, GCC now realizes that the vector shift instructions will look
+at the bottom 6 bits for the shift count, and it can use either a VSPLTISW or
+XXSPLTIB instruction to load the shift count.
+
+2024-09-12  Michael Meissner  
+
+gcc/
+
+   PR target/89213
+   * config/rs6000/altivec.md (UNSPEC_VECTOR_SHIFT): New unspec.
+   (VSHIFT_MODE): New mode iterator.
+   (vshift_code): New code iterator.
+   (vshift_attr): New code attribute.
+   (altivec___const): New pattern to optimize
+   vector long long/int shifts by a constant.
+   (altivec__shift_const): New helper insn to load up a
+   constant used by the shift operation.
+   * config/rs6000/predicates.md (vector_shift_constant): New
+   predicate.
+
+gcc/testsuite/
+
+   PR target/89213
+   * gcc.target/powerpc/pr89213.c: New test.
+   * gcc.target/powerpc/vec-rlmi-rlnm.c: Update instruction count.
+
  Branch work178-tar, patch #301 
 
 Remove SPR alternatives for move insns.


[gcc r15-3840] c++: Implement C++23 P2718R0 - Wording for P2644R1 Fix for Range-based for Loop [PR107637]

2024-09-24 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:650e91566561870f3d1c8d5b92e6613296ee1a8d

commit r15-3840-g650e91566561870f3d1c8d5b92e6613296ee1a8d
Author: Jakub Jelinek 
Date:   Tue Sep 24 20:19:50 2024 +0200

c++: Implement C++23 P2718R0 - Wording for P2644R1 Fix for Range-based for 
Loop [PR107637]

The following patch implements the C++23 P2718R0 paper
- Wording for P2644R1 Fix for Range-based for Loop.
The patch introduces a new option, -f{,no-}range-for-ext-temps so that
user can control the behavior even in older C++ versions.
The option is on by default in C++23 and later (-fno-range-for-ext-temps
is an error in that case) and in the -std=gnu++11 ... -std=gnu++20 modes
(one can use -fno-range-for-ext-temps to request previous behavior in that
case), and is not enabled by default in -std=c++11 ... -std=c++20 modes
but one can explicitly enable it with -frange-for-ext-temps.
As all the temporaries from __for_range initialization should have life
extended until the end of __for_range scope, this patch disables (for
-frange-for-ext-temps and if !processing_template_decl) CLEANUP_POINT_EXPR 
wrapping
of the __for_range declaration, also disables -Wdangling-reference warning
as well as the rest of extend_ref_init_temps (we know the __for_range 
temporary
is not TREE_STATIC and as all the temporaries from the initializer will be 
life
extended, we shouldn't try to handle temporaries referenced by references 
any
differently) and adds an extra push_stmt_list/pop_stmt_list before
cp_finish_decl of __for_range and after end of the for body and wraps all
that into CLEANUP_POINT_EXPR.
I had to repeat that also for OpenMP range loops because those are handled
differently.

2024-09-24  Jakub Jelinek  

PR c++/107637
gcc/
* omp-general.cc (find_combined_omp_for, find_nested_loop_xform):
Handle CLEANUP_POINT_EXPR like TRY_FINALLY_EXPR.
* doc/invoke.texi (frange-for-ext-temps): Document.  Add
-fconcepts to the C++ option list.
gcc/c-family/
* c.opt (frange-for-ext-temps): New option.
* c-opts.cc (c_common_post_options): Set flag_range_for_ext_temps
for C++23 or later or for C++11 or later in !flag_iso mode if
the option wasn't set by user.
* c-cppbuiltin.cc (c_cpp_builtins): Change __cpp_range_based_for
value for flag_range_for_ext_temps from 201603L to 202212L in C++17
or later.
* c-omp.cc (c_find_nested_loop_xform_r): Handle CLEANUP_POINT_EXPR
like TRY_FINALLY_EXPR.
gcc/cp/
* cp-tree.h: Implement C++23 P2718R0 - Wording for P2644R1 Fix for
Range-based for Loop.
(cp_convert_omp_range_for): Add bool tmpl_p argument.
(find_range_for_decls): Declare.
* parser.cc (cp_convert_range_for): For flag_range_for_ext_temps 
call
push_stmt_list () before cp_finish_decl for range_temp and save it
temporarily to FOR_INIT_STMT.
(cp_convert_omp_range_for): Add tmpl_p argument.  If set, remember
DECL_NAME of range_temp and for cp_finish_decl call restore it 
before
clearing it again, if unset, don't adjust DECL_NAME of range_temp at
all.
(cp_parser_omp_loop_nest): For flag_range_for_ext_temps range for 
add
CLEANUP_POINT_EXPR around sl.  Call find_range_for_decls and adjust
DECL_NAMEs for range fors if not processing_template_decl.  Adjust
cp_convert_omp_range_for caller.  Remove superfluous backslash at 
the
end of line.
* decl.cc (initialize_local_var): For flag_range_for_ext_temps
temporarily clear stmts_are_full_exprs_p rather than set for
for_range__identifier decls.
* call.cc (extend_ref_init_temps): For flag_range_for_ext_temps 
return
init early for for_range__identifier decls.
* semantics.cc (find_range_for_decls): New function.
(finish_for_stmt): Use it.  For flag_range_for_ext_temps if
cp_convert_range_for set FOR_INIT_STMT, pop_stmt_list it and wrap
into CLEANUP_POINT_EXPR.
* pt.cc (tsubst_omp_for_iterator): Adjust tsubst_omp_for_iterator
caller.
(tsubst_stmt) : For flag_range_for_ext_temps if there
are any range fors in the loop nest, add push_stmt_list starting
before the initializations, pop_stmt_list it after the body and wrap
into CLEANUP_POINT_EXPR.  Change DECL_NAME of range for temps from
NULL to for_range_identifier.
gcc/testsuite/
* g++.dg/cpp23/range-for1.C: New test.
* g++.dg/cpp23/range-for2.C: New test.
* g++.dg/cpp23/range-for3.C: New test.
* g++.dg/cpp23/range-for4.C: New test.
* g++.dg/cpp23/range-for5.C: New t

[gcc r15-3839] libgcc, Darwin: Drop the legacy library build for macOS >= 15 [PR116809].

2024-09-24 Thread Iain D Sandoe via Gcc-cvs
https://gcc.gnu.org/g:d9cafa0c4f0a81304d9b95a78ccc8e9003c6d7a3

commit r15-3839-gd9cafa0c4f0a81304d9b95a78ccc8e9003c6d7a3
Author: Iain Sandoe 
Date:   Sun Sep 22 11:43:32 2024 +0100

libgcc, Darwin: Drop the legacy library build for macOS >= 15 [PR116809].

We have been building a legacy libgcc_s.1 DSO to support code that
was built with older compilers.

From macOS 15,  the unwinder no longer exports some of the symbols used
in that library which (a) cuases bootstrap fail and (b) means that the
legacy library is no longer useful.

No open branch of GCC emits references to this library - and any already
-built code that depends on the symbols would need rework anyway.

PR target/116809

libgcc/ChangeLog:

* config.host: Build legacy libgcc_s.1 on hosts before macOS 15.
* config/i386/t-darwin: Remove reference to legacy libgcc_s.1
* config/rs6000/t-darwin: Likewise.
* config/t-darwin-libgccs1: New file.

Signed-off-by: Iain Sandoe 

Diff:
---
 libgcc/config.host  | 11 +++
 libgcc/config/i386/t-darwin |  3 ---
 libgcc/config/rs6000/t-darwin   |  3 ---
 libgcc/config/t-darwin-libgccs1 |  3 +++
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/libgcc/config.host b/libgcc/config.host
index 5c6b656531ff..00bd6384c0f9 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -239,22 +239,25 @@ case ${host} in
   esac
   tmake_file="$tmake_file t-slibgcc-darwin"
   case ${host} in
+x86_64-*-darwin2[0-3]*)
+  tmake_file="t-darwin-min-11 t-darwin-libgccs1 $tmake_file"
+  ;;
 *-*-darwin2*)
   tmake_file="t-darwin-min-11 $tmake_file"
   ;;
 *-*-darwin1[89]*)
-  tmake_file="t-darwin-min-8 $tmake_file"
+  tmake_file="t-darwin-min-8 t-darwin-libgccs1 $tmake_file"
   ;;
 *-*-darwin9* | *-*-darwin1[0-7]*)
-  tmake_file="t-darwin-min-5 $tmake_file"
+  tmake_file="t-darwin-min-5 t-darwin-libgccs1 $tmake_file"
   ;;
 *-*-darwin[4-8]*)
-  tmake_file="t-darwin-min-1 $tmake_file"
+  tmake_file="t-darwin-min-1 t-darwin-libgccs1 $tmake_file"
   ;;
 *)
   # Fall back to configuring for the oldest system known to work with
   # all archs and the current sources.
-  tmake_file="t-darwin-min-5 $tmake_file"
+  tmake_file="t-darwin-min-5 t-darwin-libgccs1 $tmake_file"
   echo "Warning: libgcc configured to support macOS 10.5" 1>&2
   ;;
   esac
diff --git a/libgcc/config/i386/t-darwin b/libgcc/config/i386/t-darwin
index 4c18da1efbfd..c6b3acaaca28 100644
--- a/libgcc/config/i386/t-darwin
+++ b/libgcc/config/i386/t-darwin
@@ -4,6 +4,3 @@ LIB2FUNCS_EXCLUDE = _fixtfdi _fixunstfdi _floatditf _floatunditf
 
 # Extra symbols for this port.
 SHLIB_MAPFILES += $(srcdir)/config/i386/libgcc-darwin.ver
-
-# Build a legacy libgcc_s.1
-BUILD_LIBGCCS1 = YES
diff --git a/libgcc/config/rs6000/t-darwin b/libgcc/config/rs6000/t-darwin
index 183d0df92ce9..8b513bdb1d78 100644
--- a/libgcc/config/rs6000/t-darwin
+++ b/libgcc/config/rs6000/t-darwin
@@ -56,6 +56,3 @@ unwind-dw2_s.o: HOST_LIBGCC2_CFLAGS += -maltivec
 unwind-dw2.o: HOST_LIBGCC2_CFLAGS += -maltivec
 
 LIB2ADDEH += $(srcdir)/config/rs6000/darwin-fallback.c
-
-# Build a legacy libgcc_s.1
-BUILD_LIBGCCS1 = YES
diff --git a/libgcc/config/t-darwin-libgccs1 b/libgcc/config/t-darwin-libgccs1
new file mode 100644
index ..b88b1a5bba8a
--- /dev/null
+++ b/libgcc/config/t-darwin-libgccs1
@@ -0,0 +1,3 @@
+
+# Build a legacy libgcc_s.1
+BUILD_LIBGCCS1 = YES


[gcc r15-3838] i386: Fix comment typo

2024-09-24 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:dab450021569811402e02917d7dc1f05fb4626c7

commit r15-3838-gdab450021569811402e02917d7dc1f05fb4626c7
Author: Jakub Jelinek 
Date:   Tue Sep 24 19:00:38 2024 +0200

i386: Fix comment typo

Found a comment typo, fixed as obvious.

2024-09-24  Jakub Jelinek  

* config/i386/i386-expand.cc (ix86_expand_round_builtin): Fix 
comment
typo, insead -> instead.

Diff:
---
 gcc/config/i386/i386-expand.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 124cb976ec87..39ee9b8662ad 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -12748,7 +12748,7 @@ ix86_expand_round_builtin (const struct 
builtin_description *d,
  /* Skip erasing embedded rounding for below expanders who
 generates multiple insns.  In ix86_erase_embedded_rounding
 the pattern will be transformed to a single set, and emit_insn
-appends the set insead of insert it to chain.  So the insns
+appends the set instead of insert it to chain.  So the insns
 emitted inside define_expander would be ignored.  */
  switch (icode)
{


[gcc r15-3843] options: Regenerate c.opt.urls

2024-09-24 Thread Jakub Jelinek via Gcc-cvs
https://gcc.gnu.org/g:1762b7f89eb9d8a1f150ab294344e945c0870399

commit r15-3843-g1762b7f89eb9d8a1f150ab294344e945c0870399
Author: Jakub Jelinek 
Date:   Tue Sep 24 22:21:26 2024 +0200

options: Regenerate c.opt.urls

Forgot to regenerate URLs for the C++23 P2718R0 patch.

2024-09-24  Jakub Jelinek  

* c.opt.urls: Regenerate.

Diff:
---
 gcc/c-family/c.opt.urls | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/c-family/c.opt.urls b/gcc/c-family/c.opt.urls
index 2f1e9f95271b..084dfd297c5a 100644
--- a/gcc/c-family/c.opt.urls
+++ b/gcc/c-family/c.opt.urls
@@ -1268,6 +1268,9 @@ 
UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-fno-pretty-templates)
 fprintf-return-value
 UrlSuffix(gcc/Optimize-Options.html#index-fno-printf-return-value)
 
+frange-for-ext-temps
+UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-frange-for-ext-temps)
+
 freplace-objc-classes
 
UrlSuffix(gcc/Objective-C-and-Objective-C_002b_002b-Dialect-Options.html#index-freplace-objc-classes)


[gcc r15-3847] OpenMP: Fix testsuite failure on x86 with -m32

2024-09-24 Thread Sandra Loosemore via Gcc-cvs
https://gcc.gnu.org/g:6935bddd8f90dde6009a1b8dea9745788ceeefb1

commit r15-3847-g6935bddd8f90dde6009a1b8dea9745788ceeefb1
Author: Sandra Loosemore 
Date:   Wed Sep 25 02:59:53 2024 +

OpenMP: Fix testsuite failure on x86 with -m32

The testcase decare-variant-duplicates.c added in commit
96246bff0bcd9e5cdec9e6cf811ee3db4997f6d4 failed on 32-bit x86
because on that target "i386" is defined as a preprocessor macro
and cannot be used as an identifier.  Fixed by rewriting that test
not to do that.

gcc/testsuite/ChangeLog
* c-c++-common/gomp/declare-variant-duplicates.c: Avoid using
"i386" as an identifier.

Diff:
---
 gcc/testsuite/c-c++-common/gomp/declare-variant-duplicates.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/gomp/declare-variant-duplicates.c 
b/gcc/testsuite/c-c++-common/gomp/declare-variant-duplicates.c
index 47d34fc52e2f..9f319c724492 100644
--- a/gcc/testsuite/c-c++-common/gomp/declare-variant-duplicates.c
+++ b/gcc/testsuite/c-c++-common/gomp/declare-variant-duplicates.c
@@ -8,6 +8,6 @@ extern int f4 (int);
 
 #pragma omp declare variant (f1) match (device={kind(cpu,gpu,"cpu")})  /* { 
dg-error "trait-property .cpu. specified more than once" } */
 #pragma omp declare variant (f2) match (device={isa(sse4,"avx",avx)})  /* { 
dg-error "trait-property .avx. specified more than once" } */
-#pragma omp declare variant (f3) match 
(device={arch(x86_64,i386,aarch64,"i386")})  /* { dg-error "trait-property 
.i386. specified more than once" } */
+#pragma omp declare variant (f3) match 
(device={arch(x86_64,"i386",aarch64,"x86_64")})  /* { dg-error "trait-property 
.x86_64. specified more than once" } */
 #pragma omp declare variant (f4) match 
(implementation={vendor(llvm,gnu,"arm",gnu)})  /* { dg-error "trait-property 
.gnu. specified more than once" } */
 int f (int);


[gcc r15-3848] i386: Update the comment for mapxf option

2024-09-24 Thread Kong Lingling via Gcc-cvs
https://gcc.gnu.org/g:742d242fad997142f32a8ec5a40d78d8af4871ca

commit r15-3848-g742d242fad997142f32a8ec5a40d78d8af4871ca
Author: Lingling Kong 
Date:   Wed Sep 25 11:18:44 2024 +0800

i386: Update the comment for mapxf option

gcc/ChangeLog:

* config/i386/i386.opt: Update the features included in apxf.

Diff:
---
 gcc/config/i386/i386.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index fe16e44a4eab..64c295d344cf 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1313,7 +1313,7 @@ Enable vectorization for scatter instruction.
 mapxf
 Target Mask(ISA2_APX_F) Var(ix86_isa_flags2) Save
 Support code generation for APX features, including EGPR, PUSH2POP2,
-NDD and PPX.
+NDD, PPX, NF, CCMP and ZU.
 
 mapx-features=
 Target Undocumented Joined Enum(apx_features) EnumSet Var(ix86_apx_features) 
Init(apx_none) Save


[gcc/devel/sh-lra] SH: Try to reduce R0 live ranges

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:8ee643e50957904d75affece056a6dd84de343d6

commit 8ee643e50957904d75affece056a6dd84de343d6
Author: Kaz Kojima 
Date:   Fri Sep 20 18:15:30 2024 +0900

SH: Try to reduce R0 live ranges

Some move or extend patterns will make long R0 live ranges and could
confuse LRA.

gcc/ChangeLog:
* config/sh/sh-protos.h
(sh_satisfies_constraint_Sid_subreg_index): Declare.
* config/sh/sh.cc (sh_satisfies_constraint_Sid_subreg_index):
New function.
* config/sh/sh.md (extendsi2_short_mem_disp_z,
*mov_store_mem_index, mov_store_mem_index):
New insn and insn_and_split patterns.
(extendsi2, mov): Use them for LRA.

Diff:
---
 gcc/config/sh/sh-protos.h |  1 +
 gcc/config/sh/sh.cc   | 12 +++
 gcc/config/sh/sh.md   | 90 ++-
 3 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index b151a7c8fccc..5e5bd0aff7e7 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -61,6 +61,7 @@ extern rtx legitimize_pic_address (rtx, machine_mode, rtx);
 extern bool nonpic_symbol_mentioned_p (rtx);
 extern void output_pic_addr_const (FILE *, rtx);
 extern bool expand_block_move (rtx *);
+extern bool sh_satisfies_constraint_Sid_subreg_index (rtx);
 extern void prepare_move_operands (rtx[], machine_mode mode);
 extern bool sh_expand_cmpstr (rtx *);
 extern bool sh_expand_cmpnstr (rtx *);
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 7391b8df5830..c9222c3e6ac0 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -1577,6 +1577,18 @@ sh_encode_section_info (tree decl, rtx rtl, int first)
 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
 }
 
+/* Test Sid constraint with subreg index.  See also the comment in
+   prepare_move_operands.  */
+bool
+sh_satisfies_constraint_Sid_subreg_index (rtx op)
+{
+  return ((GET_CODE (op) == MEM)
+ && ((GET_CODE (XEXP (op, 0)) == PLUS)
+ && ((GET_CODE (XEXP (XEXP (op, 0), 0)) == REG)
+ && ((GET_CODE (XEXP (XEXP (op, 0), 1)) == SUBREG)
+ && (GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == 
REG);
+}
+
 /* Prepare operands for a move define_expand; specifically, one of the
operands must be in a register.  */
 void
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 7eee12ca6b8a..6d93f5cb816b 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -4801,7 +4801,38 @@
 
 (define_expand "extendsi2"
   [(set (match_operand:SI 0 "arith_reg_dest")
-   (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))])
+   (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))]
+  ""
+{
+  /* When the displacement addressing is used, RA will assign r0 to
+   the pseudo register operand for the QI/HImode load.  See
+   the comment in sh.cc:prepare_move_operand and PR target/55212.  */
+  if (! lra_in_progress && ! reload_completed
+  && sh_lra_p ()
+  && ! TARGET_SH2A
+  && arith_reg_dest (operands[0], mode)
+  && short_displacement_mem_operand (operands[1], mode))
+{
+  emit_insn (gen_extendsi2_short_mem_disp_z (operands[0],
+   
 operands[1]));
+  DONE;
+}
+})
+
+(define_insn_and_split "extendsi2_short_mem_disp_z"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+   (sign_extend:SI
+   (match_operand:QIHI 1 "short_displacement_mem_operand" "m")))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 2) (sign_extend:SI (match_dup  1)))
+   (set (match_dup 0) (match_dup 2))]
+{
+  operands[2] = gen_rtx_REG (SImode, R0_REG);
+}
+  [(set_attr "type" "load")])
 
 (define_insn_and_split "*extendsi2_compact_reg"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
@@ -5343,9 +5374,50 @@
 operands[1] = gen_lowpart (mode, reg);
 }
 
+  if (! lra_in_progress && ! reload_completed
+  && sh_lra_p ()
+  && ! TARGET_SH2A
+  && arith_reg_operand (operands[1], mode)
+  && (satisfies_constraint_Sid (operands[0])
+  || sh_satisfies_constraint_Sid_subreg_index (operands[0])))
+{
+  rtx adr = XEXP (operands[0], 0);
+  rtx base = XEXP (adr, 0);
+  rtx idx = XEXP (adr, 1);
+  emit_insn (gen_mov_store_mem_index (base, idx,
+   
  operands[1]));
+  DONE;
+}
+
   prepare_move_operands (operands, mode);
 })
 
+(define_insn "*mov_store_mem_index"
+  [(set (mem:QIHI
+   (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+  (match_operand:SI 1 "arith_reg_operand" "z")))
+  (match_operand:QIHI 2 "arith_reg_operan

[gcc] Created branch 'devel/sh-lra'

2024-09-24 Thread Oleg Endo via Gcc-cvs
The branch 'devel/sh-lra' was created pointing to:

 09210f927265... SH: enable LRA by default


[gcc/devel/sh-lra] SH: pin input args to hard-regs via predicates for sfuncs

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:4d28c5b1eb15cff97f07982c73770485e6eaa986

commit 4d28c5b1eb15cff97f07982c73770485e6eaa986
Author: Kaz Kojima 
Date:   Tue Sep 24 18:26:42 2024 +0900

SH: pin input args to hard-regs via predicates for sfuncs

Some sfuncs uses hard reg as input and clobber its raw reg pattern. It
seems that LRA doesn't process this clobber pattern.  Rewrite these
patterns so as to work with LRA.

gcc/ChangeLog:
* config/sh/predicates.md (hard_reg_r4, hard_reg_r5,
hard_reg_r6): New predicates.
* config/sh/sh.md (udivsi3_i4, udivsi3_i4_single,
udivsi3_i1): Rewrite with match_operand and match_dup.
(block_lump_real, block_lump_real_i4): Ditto.
(udivsi3): Adjust for it.
* config/sh/sh-mem.cc (expand_block_move): Ditto.

Diff:
---
 gcc/config/sh/predicates.md |  19 +
 gcc/config/sh/sh-mem.cc |   4 +-
 gcc/config/sh/sh.md | 101 +++-
 3 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
index 3732cec9608b..b10af71c280e 100644
--- a/gcc/config/sh/predicates.md
+++ b/gcc/config/sh/predicates.md
@@ -818,3 +818,22 @@
 
   return false;
 })
+
+;; Predicats for the arguments of sfunc R4, R5 and R6.
+(define_predicate "hard_reg_r4"
+  (match_code "reg")
+{
+  return REGNO (op) == R4_REG;
+})
+
+(define_predicate "hard_reg_r5"
+  (match_code "reg")
+{
+  return REGNO (op) == R5_REG;
+})
+
+(define_predicate "hard_reg_r6"
+  (match_code "reg")
+{
+  return REGNO (op) == R6_REG;
+})
diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc
index e22419912d6f..751c826e84fb 100644
--- a/gcc/config/sh/sh-mem.cc
+++ b/gcc/config/sh/sh-mem.cc
@@ -134,7 +134,7 @@ expand_block_move (rtx *operands)
 
  int dwords = bytes >> 3;
  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
- emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
+ emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab, r4, r5, r6));
  return true;
}
   else
@@ -178,7 +178,7 @@ expand_block_move (rtx *operands)
   final_switch = 16 - ((bytes / 4) % 16);
   while_loop = ((bytes / 4) / 16 - 1) * 16;
   emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
-  emit_insn (gen_block_lump_real (func_addr_rtx, lab));
+  emit_insn (gen_block_lump_real (func_addr_rtx, lab, r4, r5, r6));
   return true;
 }
 
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index c69eda36885f..451ae0b76891 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -2194,13 +2194,24 @@
 ;; there is nothing to prevent reload from using r0 to reload the address.
 ;; This reload would clobber the value in r0 we are trying to store.
 ;; If we let reload allocate r0, then this problem can never happen.
+;;
+;; In addition to that, we also must pin the input regs to hard-regs via the
+;; predicates.  When these insns are instantiated it also emits the
+;; accompanying mov insns to load the hard-regs.  However, subsequent RTL
+;; passes might move things around and reassign the operands to pseudo regs
+;; which might get allocated to different (wrong) hard-regs eventually.  To
+;; avoid that, only allow matching these insns if the operands are the
+;; expected hard-regs.
 (define_insn "udivsi3_i1"
   [(set (match_operand:SI 0 "register_operand" "=z,z")
-   (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (udiv:SI (match_operand:SI 3 "hard_reg_r4" "=r,r")
+(match_operand:SI 4 "hard_reg_r5" "=r,r")))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R1_REG))
-   (clobber (reg:SI R4_REG))
+   (clobber (match_dup 3))
+   (use (reg:SI R4_REG))
+   (use (reg:SI R5_REG))
(use (match_operand:SI 1 "arith_reg_operand" "r,r"))
(use (match_operand 2 "" "Z,Ccl"))]
   "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
@@ -2212,7 +2223,8 @@
 
 (define_insn "udivsi3_i4"
   [(set (match_operand:SI 0 "register_operand" "=y,y")
-   (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (udiv:SI (match_operand:SI 3 "hard_reg_r4" "=r,r")
+(match_operand:SI 4 "hard_reg_r5" "=r,r")))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:DF DR0_REG))
@@ -2220,9 +2232,11 @@
(clobber (reg:DF DR4_REG))
(clobber (reg:SI R0_REG))
(clobber (reg:SI R1_REG))
-   (clobber (reg:SI R4_REG))
-   (clobber (reg:SI R5_REG))
+   (clobber (match_dup 3))
+   (clobber (match_dup 4))
(clobber (reg:SI FPSCR_STAT_REG))
+   (use (reg:SI R4_REG))
+   (use (reg:SI R5_REG))
(use (match_operand:SI 1 "arith_reg_operand" "r,r"))
(use (match_operand 2 "" "Z,Ccl"))
(use (reg:SI FPSCR_MODES_REG))]
@@ -2236,7 +2250,8 @@
 
 (define_insn "udivsi3_i4_single"
   [(set (match_operand:SI 0 "register_operand" "=y,y")
-   (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (udiv:SI (match_o

[gcc/devel/sh-lra] SH: Fix the condition to use movsh_ie_y pattern.

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:eee3b80ab6217ab792f01280bc6240c55468600a

commit eee3b80ab6217ab792f01280bc6240c55468600a
Author: Kaz Kojima 
Date:   Tue Sep 24 18:35:46 2024 +0900

SH: Fix the condition to use movsh_ie_y pattern.

gcc/ChangeLog:
* config/sh/sh.cc (sh_movsf_ie_y_split_p): Take the subreg
of DImode into account.

Diff:
---
 gcc/config/sh/sh.cc | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index b2ba7488c5de..d9e7e67cba65 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -11462,11 +11462,15 @@ sh_movsf_ie_y_split_p (rtx op0, rtx op1)
 {
   /* f, r */
   if (REG_P (op0)
-  && (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == SImode))
+  && (SUBREG_P (op1)
+ && (GET_MODE (SUBREG_REG (op1)) == SImode
+ || GET_MODE (SUBREG_REG (op1)) == DImode)))
 return true;
   /* r, f */
   if (REG_P (op1)
-  && (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == SImode))
+  && (SUBREG_P (op0)
+ && (GET_MODE (SUBREG_REG (op0)) == SImode
+ || GET_MODE (SUBREG_REG (op0)) == DImode)))
 return true;
 
   return false;


[gcc/devel/sh-lra] SH: A test case for the wrong-code with -mlra PR55212 c#248.

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:c5a639c84c15adf402d5b664184628ff809a5bb0

commit c5a639c84c15adf402d5b664184628ff809a5bb0
Author: Kaz Kojima 
Date:   Fri Sep 20 18:36:22 2024 +0900

SH: A test case for the wrong-code with -mlra PR55212 c#248.

gcc/testsuite/ChangeLog:
* gcc.target/sh/pr55212-c248.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/sh/pr55212-c248.c | 31 ++
 1 file changed, 31 insertions(+)

diff --git a/gcc/testsuite/gcc.target/sh/pr55212-c248.c 
b/gcc/testsuite/gcc.target/sh/pr55212-c248.c
new file mode 100644
index ..94fd6afaab3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sh/pr55212-c248.c
@@ -0,0 +1,31 @@
+/* { dg-do run }  */
+/* { dg-options "-O2 -m4 -mlra -ffixed-r7 -ffixed-r8 -ffixed-r9 -ffixed-r10 
-ffixed-r11 -ffixed-r12 -ffixed-r13" } */
+#include 
+#include 
+
+typedef struct { int c[64]; } obj;
+obj obj0;
+obj obj1;
+
+void __attribute__ ((noinline))
+bar (int a, int b, int c, int d, obj *q)
+{
+  if (q->c[0] != 0x12345678 || q->c[1] != 0xdeadbeef) 
+abort ();
+}
+
+void foo (obj *p)
+{
+  obj bobj;
+  bobj = *p;
+  bar (0, 0, 0, 0, &bobj);
+}
+
+int
+main ()
+{
+  obj0.c[0] = 0x12345678;
+  obj0.c[1] = 0xdeadbeef;
+  foo (&obj0);
+  exit (0);
+}


[gcc/devel/sh-lra] LRA: Take scratch as implicit unused output reloads

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:dbd192caf14916d0869c7123e03ad21ef7bfd65f

commit dbd192caf14916d0869c7123e03ad21ef7bfd65f
Author: Alexandre Oliva 
Date:   Fri Sep 20 18:22:12 2024 +0900

LRA: Take scratch as implicit unused output reloads

gcc/ChangeLog:
* lra-constraints.cc (match_reload, process_alt_operands,
curr_insn_transform): Take scratch as implicit unused
output reloads.

Diff:
---
 gcc/lra-constraints.cc | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index fdcc07764a2e..4bc40ef532ba 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -1213,7 +1213,9 @@ match_reload (signed char out, signed char *ins, signed 
char *outs,
 return;
   /* See a comment for the input operand above.  */
   narrow_reload_pseudo_class (out_rtx, goal_class);
-  if (find_reg_note (curr_insn, REG_UNUSED, out_rtx) == NULL_RTX)
+  if (find_reg_note (curr_insn, REG_UNUSED, out_rtx) == NULL_RTX
+  && !ira_former_scratch_p (REGNO (SUBREG_P (out_rtx)
+  ? SUBREG_REG (out_rtx) : out_rtx)))
 {
   reg = SUBREG_P (out_rtx) ? SUBREG_REG (out_rtx) : out_rtx;
   start_sequence ();
@@ -2946,7 +2948,8 @@ process_alt_operands (int only_alternative)
 objects with a REG_UNUSED note.  */
  if ((curr_static_id->operand[nop].type != OP_IN
   && no_output_reloads_p
-  && ! find_reg_note (curr_insn, REG_UNUSED, op))
+  && ! find_reg_note (curr_insn, REG_UNUSED, op)
+  && ! ira_former_scratch_p (REGNO (operand_reg[nop])))
  || (curr_static_id->operand[nop].type != OP_OUT
  && no_input_reloads_p && ! const_to_mem)
  || (this_alternative_matches >= 0
@@ -2956,7 +2959,9 @@ process_alt_operands (int only_alternative)
  [this_alternative_matches].type != OP_IN)
  && ! find_reg_note (curr_insn, REG_UNUSED,
  no_subreg_reg_operand
- 
[this_alternative_matches])
+ [this_alternative_matches])
+ && ! (ira_former_scratch_p
+   (REGNO (operand_reg[nop])))
{
  if (lra_dump_file != NULL)
fprintf
@@ -4744,7 +4749,8 @@ curr_insn_transform (bool check_only_p)
  if (type != OP_IN
  && find_reg_note (curr_insn, REG_UNUSED, old) == NULL_RTX
  /* OLD can be an equivalent constant here.  */
- && !CONSTANT_P (old))
+ && !CONSTANT_P (old)
+ && !ira_former_scratch_p (REGNO (old)))
{
  start_sequence ();
  lra_emit_move (type == OP_INOUT ? copy_rtx (old) : old, new_reg);


[gcc/devel/sh-lra] SH: try to workaround fp-reg related move insns

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:b924a79f5d3a8d32e4e65e62b63dc4432076af1d

commit b924a79f5d3a8d32e4e65e62b63dc4432076af1d
Author: Kaz Kojima 
Date:   Fri Sep 20 18:17:31 2024 +0900

SH: try to workaround fp-reg related move insns

LRA will try to satisfy the constraints in match_scratch for the memory
displacements and it will make issues on this target. To mitigate the
issue, split movsf_ie_ra into several new patterns to remove
match_scratch.  Also define a new sub-pattern of movdf for constant
loads.

gcc/ChangeLog:
* gcc/config/sh/predicates.md (pc_relative_load_operand):
New predicate.
* gcc/config/sh/sh-protos.h (sh_movsf_ie_ra_split_p): Remove.
(sh_movsf_ie_y_split_p): New proto.
* gcc/config/sh/sh.cc: (sh_movsf_ie_ra_split_p): Remove.
(sh_movsf_ie_y_split_p): New function.
(broken_move): Take movsf_ie_ra into account for fldi cases.
* gcc/config/sh/sh.md (movdf_i4_F_z): New insn pattern.
(movdf): Use it.
(movsf_ie_ra): Use define_insn instead of define_insn_and_split.
(movsf_ie_F_z, movsf_ie_Q_z, movsf_ie_y): New insn pattern.
(movsf): Use new patterns.
(movsf-1):  Don't split when operands[0] or operands[1]
is fpul.
(movdf_i4_F_z+7): New splitter.

Diff:
---
 gcc/config/sh/predicates.md |  11 +
 gcc/config/sh/sh-protos.h   |   2 +-
 gcc/config/sh/sh.cc |  30 -
 gcc/config/sh/sh.md | 103 +++-
 4 files changed, 95 insertions(+), 51 deletions(-)

diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
index da32329b4b54..3732cec9608b 100644
--- a/gcc/config/sh/predicates.md
+++ b/gcc/config/sh/predicates.md
@@ -485,6 +485,17 @@
 && sh_legitimate_index_p (mode, XEXP (plus0_rtx, 1), TARGET_SH2A, 
true);
 })
 
+;; Returns true if OP is a pc relative load operand.
+(define_predicate "pc_relative_load_operand"
+  (match_code "mem")
+{
+  if (GET_MODE (op) != QImode
+  && IS_PC_RELATIVE_LOAD_ADDR_P (XEXP (op, 0)))
+return true;
+
+  return false;
+})
+
 ;; Returns true if OP is a valid source operand for a logical operation.
 (define_predicate "logical_operand"
   (and (match_code "subreg,reg,const_int")
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index 5e5bd0aff7e7..ffbe5164f08c 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -103,7 +103,7 @@ extern rtx sh_find_equiv_gbr_addr (rtx_insn* cur_insn, rtx 
mem);
 extern int sh_eval_treg_value (rtx op);
 extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op);
 extern int sh_max_mov_insn_displacement (machine_mode mode, bool 
consider_sh2a);
-extern bool sh_movsf_ie_ra_split_p (rtx, rtx, rtx);
+extern bool sh_movsf_ie_y_split_p (rtx, rtx);
 extern void sh_expand_sym_label2reg (rtx, rtx, rtx, bool);
 
 /* Result value of sh_find_set_of_reg.  */
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index c9222c3e6ac0..b2ba7488c5de 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -4832,6 +4832,7 @@ broken_move (rtx_insn *insn)
   we changed this to do a constant load.  In that case
   we don't have an r0 clobber, hence we must use fldi.  */
&& (TARGET_FMOVD
+   || sh_lra_p ()
|| (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
== SCRATCH))
&& REG_P (SET_DEST (pat))
@@ -11455,30 +11456,17 @@ sh_legitimize_address_displacement (rtx *offset1, rtx 
*offset2,
   return false;
 }
 
-/* Return true if movsf insn should be splited with an additional
-   register.  */
+/* Return true if movsf insn should be splited with fpul register.  */
 bool
-sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
+sh_movsf_ie_y_split_p (rtx op0, rtx op1)
 {
-  /* op0 == op1 */
-  if (rtx_equal_p (op0, op1))
+  /* f, r */
+  if (REG_P (op0)
+  && (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == SImode))
 return true;
-  /* fy, FQ, reg */
-  if (GET_CODE (op1) == CONST_DOUBLE
-  && ! satisfies_constraint_G (op1)
-  && ! satisfies_constraint_H (op1)
-  && REG_P (op0)
-  && REG_P (op2))
-return true;
-  /* f, r, y */
-  if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
-  && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
-  && REG_P (op2) && (REGNO (op2) == FPUL_REG))
-return true;
-  /* r, f, y */
-  if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
-  && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
-  && REG_P (op2) && (REGNO (op2) == FPUL_REG))
+  /* r, f */
+  if (REG_P (op1)
+  && (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == SImode))
 return true;
 
   return false;
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 6d93f5cb816b..c69eda36885f 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -5858,6 +5858,15 @@
   

[gcc/devel/sh-lra] SH: enable LRA by default

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:09210f927265fb4f198e904ba11297ac1e370554

commit 09210f927265fb4f198e904ba11297ac1e370554
Author: Oleg Endo 
Date:   Wed Sep 25 09:47:25 2024 +0900

SH: enable LRA by default

gcc/ChangeLog:

PR target/55212
* conifg/sh/sh.opt (sh_lra_flag): Init to 1.

Diff:
---
 gcc/config/sh/sh.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt
index c44cfe70cb11..718dfb744ff5 100644
--- a/gcc/config/sh/sh.opt
+++ b/gcc/config/sh/sh.opt
@@ -299,5 +299,5 @@ Target Var(TARGET_FSRRA)
 Enable the use of the fsrra instruction.
 
 mlra
-Target Var(sh_lra_flag) Init(0) Save
+Target Var(sh_lra_flag) Init(1) Save
 Use LRA instead of reload (transitional).


[gcc r15-3831] tree-optimization/116819 - SLP with !STMT_VINFO_RELEVANT representative

2024-09-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:cef29936c6b6773bff1939f94fb629760725bd82

commit r15-3831-gcef29936c6b6773bff1939f94fb629760725bd82
Author: Richard Biener 
Date:   Tue Sep 24 13:47:04 2024 +0200

tree-optimization/116819 - SLP with !STMT_VINFO_RELEVANT representative

Under some circumstances we can end up picking a not relevant stmt
as representative of a SLP node.  Instead of skipping stmt analysis
and declaring success we have to either ignore relevancy throughout
the code base or fail SLP operation verification.  The following
does the latter.

PR tree-optimization/116819
* tree-vect-stmts.cc (vect_analyze_stmt): When the SLP
representative isn't relevant signal failure instead of
success.

Diff:
---
 gcc/tree-vect-stmts.cc | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index c654e01a540f..1b351c5c66ec 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -13295,6 +13295,12 @@ vect_analyze_stmt (vec_info *vinfo,
   if (dump_enabled_p ())
 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
 
+ if (node)
+   return opt_result::failure_at (stmt_info->stmt,
+  "not vectorized:"
+  " irrelevant stmt as SLP node %p "
+  "representative.\n",
+  (void *)node);
   return opt_result::success ();
 }
 }


[gcc r15-3832] Widening-Mul: Fix one ICE for SAT_SUB matching operand checking

2024-09-24 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:de6fe690db32689ba5e5c6f551672a19e6cae5d4

commit r15-3832-gde6fe690db32689ba5e5c6f551672a19e6cae5d4
Author: Pan Li 
Date:   Mon Sep 23 22:37:58 2024 +0800

Widening-Mul: Fix one ICE for SAT_SUB matching operand checking

This patch would like to fix the following ICE for -O2 -m32 of x86_64.

during RTL pass: expand
JackMidiAsyncWaitQueue.cpp.cpp: In function 'void DequeueEvent(unsigned
int)':
JackMidiAsyncWaitQueue.cpp.cpp:3:6: internal compiler error: in
expand_fn_using_insn, at internal-fn.cc:263
3 | void DequeueEvent(unsigned frame) {
  |  ^~~~
0x27b580d diagnostic_context::diagnostic_impl(rich_location*,
diagnostic_metadata const*, diagnostic_option_id, char const*,
__va_list_tag (*) [1], diagnostic_t)
???:0
0x27c4a3f internal_error(char const*, ...)
???:0
0x27b3994 fancy_abort(char const*, int, char const*)
???:0
0xf25ae5 expand_fn_using_insn(gcall*, insn_code, unsigned int, unsigned int)
???:0
0xf2a124 expand_direct_optab_fn(internal_fn, gcall*, optab_tag, unsigned 
int)
???:0
0xf2c87c expand_SAT_SUB(internal_fn, gcall*)
???:0

We allowed the operand convert when matching SAT_SUB in match.pd, to support
the zip benchmark SAT_SUB pattern.  Aka,

(convert? (minus (convert1? @0) (convert1? @1))) for below sample code.

void test (uint16_t *x, unsigned b, unsigned n)
{
  unsigned a = 0;
  register uint16_t *p = x;

  do {
a = *--p;
*p = (uint16_t)(a >= b ? a - b : 0); // Truncate after .SAT_SUB
  } while (--n);
}

The pattern match for SAT_SUB itself may also act on below scalar sample
code too.

unsigned long long GetTimeFromFrames(int);
unsigned long long GetMicroSeconds();

void DequeueEvent(unsigned frame) {
  long long frame_time = GetTimeFromFrames(frame);
  unsigned long long current_time = GetMicroSeconds();
  DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time);
}

Aka:

uint32_t a = (uint32_t)SAT_SUB(uint64_t, uint64_t);

Then there will be a problem when ia32 or -m32 is given when compiling.
Because we only check the lhs (aka uint32_t) type is supported by ifn
instead of the operand (aka uint64_t).  Mostly DImode is disabled for
32 bits target like ia32 or rv32gcv, and then trigger ICE when expanding.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.
* The x86 bootstrap test.
* The x86 fully regression test.

PR middle-end/116814

gcc/ChangeLog:

* tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Make
ifn is_supported type check based on operand instead of lhs.

gcc/testsuite/ChangeLog:

* g++.dg/torture/pr116814-1.C: New test.

Signed-off-by: Pan Li 

Diff:
---
 gcc/testsuite/g++.dg/torture/pr116814-1.C | 12 
 gcc/tree-ssa-math-opts.cc |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/torture/pr116814-1.C 
b/gcc/testsuite/g++.dg/torture/pr116814-1.C
new file mode 100644
index ..dd6f29daa7c3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr116814-1.C
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2" } */
+
+unsigned long long GetTimeFromFrames(int);
+unsigned long long GetMicroSeconds();
+
+void DequeueEvent(unsigned frame) {
+  long long frame_time = GetTimeFromFrames(frame);
+  unsigned long long current_time = GetMicroSeconds();
+
+  DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time);
+}
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index d61668aacfc6..8c622514dbd9 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -4042,7 +4042,7 @@ build_saturation_binary_arith_call (gimple_stmt_iterator 
*gsi, gphi *phi,
internal_fn fn, tree lhs, tree op_0,
tree op_1)
 {
-  if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH))
+  if (direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH))
 {
   gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1);
   gimple_call_set_lhs (call, lhs);


[gcc r15-3821] Testsuite, darwin: account for macOS 15

2024-09-24 Thread François-Xavier Coudert via Gcc-cvs
https://gcc.gnu.org/g:7e560ffd7562cbd1a51ae6298c515b89ebed1363

commit r15-3821-g7e560ffd7562cbd1a51ae6298c515b89ebed1363
Author: Francois-Xavier Coudert 
Date:   Tue Sep 24 09:59:56 2024 +0200

Testsuite, darwin: account for macOS 15

gcc/testsuite/ChangeLog:

* gcc.dg/darwin-minversion-link.c: Account for macOS 15.

Diff:
---
 gcc/testsuite/gcc.dg/darwin-minversion-link.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/gcc.dg/darwin-minversion-link.c 
b/gcc/testsuite/gcc.dg/darwin-minversion-link.c
index a835e9d4648a..af712a1b8963 100644
--- a/gcc/testsuite/gcc.dg/darwin-minversion-link.c
+++ b/gcc/testsuite/gcc.dg/darwin-minversion-link.c
@@ -19,6 +19,7 @@
 /* { dg-additional-options "-mmacosx-version-min=012.000.00 -DCHECK=12" { 
target *-*-darwin21* } } */
 /* { dg-additional-options "-mmacosx-version-min=013.000.00 -DCHECK=13" { 
target *-*-darwin22* } } */
 /* { dg-additional-options "-mmacosx-version-min=014.000.00 -DCHECK=14" { 
target *-*-darwin23* } } */
+/* { dg-additional-options "-mmacosx-version-min=015.000.00 -DCHECK=15" { 
target *-*-darwin24* } } */
 
 int
 main ()


[gcc r15-3824] tree-optimization/114855 - more update_ssa speedup

2024-09-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:9a795b3a5b6a0d8b4b4f38a66ab9782aabead92e

commit r15-3824-g9a795b3a5b6a0d8b4b4f38a66ab9782aabead92e
Author: Richard Biener 
Date:   Tue Sep 24 12:53:11 2024 +0200

tree-optimization/114855 - more update_ssa speedup

The following tackles another source of slow bitmap operations,
namely populating blocks_to_update.  We already have that in
tree view around PHI insertion but also the initial population is
slow.  There's unfortunately a conditional inbetween list view
requirement and the bitmap API doesn't allow opportunistic
switching but rejects tree -> tree or list -> list transitions.
So the following patch wraps the early population in a tree view
section with possibly one redundant tree -> list -> tree view
transition.

This cuts tree SSA incremental from 228.25s (21%) to 65.05s (7%).

PR tree-optimization/114855
* tree-into-ssa.cc (update_ssa): Use tree view for the
initial population of blocks_to_update.

Diff:
---
 gcc/tree-into-ssa.cc | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/tree-into-ssa.cc b/gcc/tree-into-ssa.cc
index 1cce9d628090..fc61d47ca777 100644
--- a/gcc/tree-into-ssa.cc
+++ b/gcc/tree-into-ssa.cc
@@ -3445,6 +3445,7 @@ update_ssa (unsigned update_flags)
   blocks_with_phis_to_rewrite = BITMAP_ALLOC (NULL);
   bitmap_tree_view (blocks_with_phis_to_rewrite);
   blocks_to_update = BITMAP_ALLOC (NULL);
+  bitmap_tree_view (blocks_to_update);
 
   insert_phi_p = (update_flags != TODO_update_ssa_no_phi);
 
@@ -3492,6 +3493,8 @@ update_ssa (unsigned update_flags)
 placement heuristics.  */
   prepare_block_for_update (start_bb, insert_phi_p);
 
+  bitmap_list_view (blocks_to_update);
+
   tree name;
 
   if (flag_checking)
@@ -3517,6 +3520,8 @@ update_ssa (unsigned update_flags)
 }
   else
 {
+  bitmap_list_view (blocks_to_update);
+
   /* Otherwise, the entry block to the region is the nearest
 common dominator for the blocks in BLOCKS.  */
   start_bb = nearest_common_dominator_for_set (CDI_DOMINATORS,


[gcc r15-3826] build: enable C++11 narrowing warnings

2024-09-24 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:2249c3b459510f307b4f241ea4b14f6557035152

commit r15-3826-g2249c3b459510f307b4f241ea4b14f6557035152
Author: Jason Merrill 
Date:   Thu Sep 19 15:50:19 2024 -0400

build: enable C++11 narrowing warnings

We've been using -Wno-narrowing since gcc 4.7, but at this point narrowing
diagnostics seem like a stable part of C++ and we should adjust.

This patch changes -Wno-narrowing to -Wno-error=narrowing so that narrowing
issues will still not break bootstrap, but we can see them.

The rest of the patch fixes the narrowing warnings I see in an
x86_64-pc-linux-gnu bootstrap.  In most of the cases, by adjusting the types
of various declarations so that we store the values in the same types we
compute them in, which seems worthwhile anyway.  This also allowed us to
remove a few -Wsign-compare casts.

gcc/ChangeLog:

* configure.ac (CXX_WARNING_OPTS): Change -Wno-narrowing
to -Wno-error=narrowing.
* configure: Regenerate.
* config/i386/i386.h (debugger_register_map)
(debugger64_register_map)
(svr4_debugger_register_map): Make unsigned.
* config/i386/i386.cc: Likewise.
* diagnostic-event-id.h (diagnostic_thread_id_t): Make int.
* vec.h (vec::size): Make unsigned int.
* ipa-modref.cc (escape_point::arg): Make unsigned.
(modref_lattice::add_escape_point): Use eaf_flags_t.
(update_escape_summary_1): Use eaf_flags_t, && for bool.
* pair-fusion.cc (pair_fusion_bb_info::track_access):
Make mem_size unsigned int.
* pretty-print.cc (format_phase_2): Cast va_arg to char.
* tree-ssa-loop-ch.cc (ch_base::copy_headers): Make nheaders
unsigned, remove cast.
* tree-ssa-structalias.cc (bitpos_of_field): Return unsigned.
(push_fields_onto_fieldstack):Make offset unsigned, remove cast.
* tree-vect-slp.cc (vect_prologue_cost_for_slp): Use nelt_limit.
* tree-vect-stmts.cc (vect_truncate_gather_scatter_offset):
Make scale unsigned.
(vectorizable_operation): Make ncopies unsigned.
* rtl-ssa/member-fns.inl: Make num_accesses unsigned int.

Diff:
---
 gcc/config/i386/i386.h  |  6 +++---
 gcc/diagnostic-event-id.h   |  2 +-
 gcc/vec.h   |  2 +-
 gcc/config/i386/i386.cc |  6 +++---
 gcc/ipa-modref.cc   | 13 +++--
 gcc/pair-fusion.cc  |  2 +-
 gcc/pretty-print.cc |  2 +-
 gcc/tree-ssa-loop-ch.cc |  6 +++---
 gcc/tree-ssa-structalias.cc | 16 
 gcc/tree-vect-slp.cc|  4 ++--
 gcc/tree-vect-stmts.cc  |  7 ---
 gcc/configure.ac|  3 +--
 gcc/rtl-ssa/member-fns.inl  |  3 ++-
 gcc/configure   |  7 +++
 14 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index c1ec92ffb150..751c250ddb31 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2091,9 +2091,9 @@ do {  
\
 #define DEBUGGER_REGNO(N) \
   (TARGET_64BIT ? debugger64_register_map[(N)] : debugger_register_map[(N)])
 
-extern int const debugger_register_map[FIRST_PSEUDO_REGISTER];
-extern int const debugger64_register_map[FIRST_PSEUDO_REGISTER];
-extern int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER];
+extern unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER];
+extern unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER];
+extern unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER];
 
 /* Before the prologue, RA is at 0(%esp).  */
 #define INCOMING_RETURN_ADDR_RTX \
diff --git a/gcc/diagnostic-event-id.h b/gcc/diagnostic-event-id.h
index 8237ba34df33..06985d23c124 100644
--- a/gcc/diagnostic-event-id.h
+++ b/gcc/diagnostic-event-id.h
@@ -67,6 +67,6 @@ typedef diagnostic_event_id_t *diagnostic_event_id_ptr;
 /* A type for compactly referring to a particular thread within a
diagnostic_path.  Typically there is just one thread per path,
with id 0.  */
-typedef unsigned diagnostic_thread_id_t;
+typedef int diagnostic_thread_id_t;
 
 #endif /* ! GCC_DIAGNOSTIC_EVENT_ID_H */
diff --git a/gcc/vec.h b/gcc/vec.h
index bc83827f644e..b13c4716428e 100644
--- a/gcc/vec.h
+++ b/gcc/vec.h
@@ -2409,7 +2409,7 @@ public:
   const value_type &back () const;
   const value_type &operator[] (unsigned int i) const;
 
-  size_t size () const { return m_size; }
+  unsigned size () const { return m_size; }
   size_t size_bytes () const { return m_size * sizeof (T); }
   bool empty () const { return m_size == 0; }
 
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 7dbae1d72e35..2f736a3b346e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -181,7 +181,7 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER]

[gcc r15-3827] Fortran: Allow to nullify caf token when not in ultimate component. [PR101100]

2024-09-24 Thread Andre Vehreschild via Gcc-cvs
https://gcc.gnu.org/g:0c0d79c783f5c289651d76aa697b48d4505e169d

commit r15-3827-g0c0d79c783f5c289651d76aa697b48d4505e169d
Author: Andre Vehreschild 
Date:   Wed Sep 18 15:55:28 2024 +0200

Fortran: Allow to nullify caf token when not in ultimate component. 
[PR101100]

gcc/fortran/ChangeLog:

PR fortran/101100

* trans-expr.cc (trans_caf_token_assign): Take caf-token from
decl for non ultimate coarray components.

gcc/testsuite/ChangeLog:

* gfortran.dg/coarray/proc_pointer_assign_1.f90: New test.

Diff:
---
 gcc/fortran/trans-expr.cc  |  8 +-
 .../gfortran.dg/coarray/proc_pointer_assign_1.f90  | 29 ++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index 01cf3f0ff148..d0c7dfea903d 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -10359,7 +10359,13 @@ trans_caf_token_assign (gfc_se *lse, gfc_se *rse, 
gfc_expr *expr1,
   else if (lhs_attr.codimension)
 {
   lhs_tok = gfc_get_ultimate_alloc_ptr_comps_caf_token (lse, expr1);
-  lhs_tok = build_fold_indirect_ref (lhs_tok);
+  if (!lhs_tok)
+   {
+ lhs_tok = gfc_get_tree_for_caf_expr (expr1);
+ lhs_tok = GFC_TYPE_ARRAY_CAF_TOKEN (TREE_TYPE (lhs_tok));
+   }
+  else
+   lhs_tok = build_fold_indirect_ref (lhs_tok);
   tmp = build2_loc (input_location, MODIFY_EXPR, void_type_node,
lhs_tok, null_pointer_node);
   gfc_prepend_expr_to_block (&lse->post, tmp);
diff --git a/gcc/testsuite/gfortran.dg/coarray/proc_pointer_assign_1.f90 
b/gcc/testsuite/gfortran.dg/coarray/proc_pointer_assign_1.f90
new file mode 100644
index ..81f0c3b19cf1
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray/proc_pointer_assign_1.f90
@@ -0,0 +1,29 @@
+!{ dg-do run }
+
+! Check that PR101100 is fixed.
+
+! Contributed by G. Steinmetz  
+
+program p
+  type t
+procedure(), pointer, nopass :: f
+  end type
+
+  integer :: i = 0
+  type(t) :: x[*]
+
+  x%f => null()
+  if ( associated(x%f) ) stop 1
+
+  x%f => g
+  if (.not. associated(x%f) ) stop 2
+
+  call x%f()
+  if ( i /= 1 ) stop 3
+
+contains
+  subroutine g()
+i = 1
+  end subroutine
+end
+


[gcc r15-3829] RISC-V: Add more vector-vector extract cases.

2024-09-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:be50c763a07893416419b82538f259f43e0773d4

commit r15-3829-gbe50c763a07893416419b82538f259f43e0773d4
Author: Robin Dapp 
Date:   Tue Sep 3 17:53:34 2024 +0200

RISC-V: Add more vector-vector extract cases.

This adds a V16SI -> V4SI and related i.e. "quartering" vector-vector
extract expander for VLS modes.  It helps with spills in x264 that may
cause a load-hit-store.

gcc/ChangeLog:

* config/riscv/autovec.md (vec_extract):
Add quarter vec-vec extract.
* config/riscv/vector-iterators.md: New iterators.

Diff:
---
 gcc/config/riscv/autovec.md  |  28 ++
 gcc/config/riscv/vector-iterators.md | 184 +++
 2 files changed, 212 insertions(+)

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index a53c44659f0f..836cdd4491f6 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1488,6 +1488,34 @@
   DONE;
 })
 
+(define_expand "vec_extract"
+  [(set (match_operand:0 "nonimmediate_operand")
+ (vec_select:
+   (match_operand:VLS_HAS_QUARTER   1 "register_operand")
+   (parallel
+[(match_operand 2 "immediate_operand")])))]
+  "TARGET_VECTOR"
+{
+  int sz = GET_MODE_NUNITS (mode).to_constant ();
+  int part = INTVAL (operands[2]);
+
+  rtx start = GEN_INT (part * sz);
+  rtx tmp = operands[1];
+
+  if (part != 0)
+{
+  tmp = gen_reg_rtx (mode);
+
+  rtx ops[] = {tmp, operands[1], start};
+  riscv_vector::emit_vlmax_insn
+   (code_for_pred_slide (UNSPEC_VSLIDEDOWN, mode),
+riscv_vector::BINARY_OP, ops);
+}
+
+  emit_move_insn (operands[0], gen_lowpart (mode, tmp));
+  DONE;
+})
+
 ;; -
 ;;  [FP] Binary operations
 ;; -
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index a00b5c3feddd..43325d1ba87a 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -4328,3 +4328,187 @@
   (V256DF "v128df")
   (V512DF "v256df")
 ])
+
+(define_mode_iterator VLS_HAS_QUARTER [
+  (V4QI "riscv_vector::vls_mode_valid_p (V4QImode)")
+  (V8QI "riscv_vector::vls_mode_valid_p (V8QImode)")
+  (V16QI "riscv_vector::vls_mode_valid_p (V16QImode)")
+  (V4HI "riscv_vector::vls_mode_valid_p (V4HImode)")
+  (V8HI "riscv_vector::vls_mode_valid_p (V8HImode)")
+  (V16HI "riscv_vector::vls_mode_valid_p (V16HImode)")
+  (V4SI "riscv_vector::vls_mode_valid_p (V4SImode)")
+  (V8SI "riscv_vector::vls_mode_valid_p (V8SImode)")
+  (V16SI "riscv_vector::vls_mode_valid_p (V16SImode) && TARGET_MIN_VLEN >= 64")
+  (V4DI "riscv_vector::vls_mode_valid_p (V4DImode) && TARGET_VECTOR_ELEN_64")
+  (V8DI "riscv_vector::vls_mode_valid_p (V8DImode) && TARGET_VECTOR_ELEN_64 && 
TARGET_MIN_VLEN >= 64")
+  (V16DI "riscv_vector::vls_mode_valid_p (V16DImode) && TARGET_VECTOR_ELEN_64 
&& TARGET_MIN_VLEN >= 128")
+  (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && 
TARGET_VECTOR_ELEN_FP_32")
+  (V8SF "riscv_vector::vls_mode_valid_p (V8SFmode) && 
TARGET_VECTOR_ELEN_FP_32")
+  (V16SF "riscv_vector::vls_mode_valid_p (V16SFmode) && 
TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64")
+  (V4DF "riscv_vector::vls_mode_valid_p (V4DFmode) && 
TARGET_VECTOR_ELEN_FP_64")
+  (V8DF "riscv_vector::vls_mode_valid_p (V8DFmode) && TARGET_VECTOR_ELEN_FP_64 
&& TARGET_MIN_VLEN >= 64")
+  (V16DF "riscv_vector::vls_mode_valid_p (V16DFmode) && 
TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+  (V32QI "riscv_vector::vls_mode_valid_p (V32QImode)")
+  (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 64")
+  (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 
128")
+  (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 
256")
+  (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 
512")
+  (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 
1024")
+  (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 
2048")
+  (V4096QI "riscv_vector::vls_mode_valid_p (V4096QImode) && TARGET_MIN_VLEN >= 
4096")
+  (V32HI "riscv_vector::vls_mode_valid_p (V32HImode) && TARGET_MIN_VLEN >= 64")
+  (V64HI "riscv_vector::vls_mode_valid_p (V64HImode) && TARGET_MIN_VLEN >= 
128")
+  (V128HI "riscv_vector::vls_mode_valid_p (V128HImode) && TARGET_MIN_VLEN >= 
256")
+  (V256HI "riscv_vector::vls_mode_valid_p (V256HImode) && TARGET_MIN_VLEN >= 
512")
+  (V512HI "riscv_vector::vls_mode_valid_p (V512HImode) && TARGET_MIN_VLEN >= 
1024")
+  (V1024HI "riscv_vector::vls_mode_valid_p (V1024HImode) && TARGET_MIN_VLEN >= 
2048")
+  (V2048HI "riscv_vector::vls_mode_valid_p (V2048HImode) && TARGET_MIN_VLEN >= 
4096")
+  (V32SI "riscv_vector::vls_mode_valid_p (V32SImode) && TARGET_MIN_VLEN >= 
128")
+  (V64SI "riscv

[gcc r15-3828] RISC-V: Fix effective target check.

2024-09-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:e45537f56250f19cdf2ec09a744c6b11170c1001

commit r15-3828-ge45537f56250f19cdf2ec09a744c6b11170c1001
Author: Robin Dapp 
Date:   Fri Aug 30 14:35:08 2024 +0200

RISC-V: Fix effective target check.

The return value is inverted in check_effective_target_rvv_zvl256b_ok
and check_effective_target_rvv_zvl512b_ok.  Fix this and also just use
the current march.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp: Fix effective target check.

Diff:
---
 gcc/testsuite/lib/target-supports.exp | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 8f2afe866c7c..05a63c4e9a55 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -1978,15 +1978,15 @@ proc check_effective_target_riscv_v { } {
 
 proc check_effective_target_rvv_zvl256b_ok { } {
 # Check if the target has a VLENB of 32.
-set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v]
+set gcc_march [riscv_get_arch]
 return [check_runtime ${gcc_march}_exec {
int main()
{
  int vlenb = 0;
  asm ("csrr %0,vlenb" : "=r" (vlenb) : : );
  if (vlenb == 32)
-   return 1;
- return 0;
+   return 0;
+ return 1;
}
   } "-march=${gcc_march}"]
 }
@@ -1996,15 +1996,15 @@ proc check_effective_target_rvv_zvl256b_ok { } {
 
 proc check_effective_target_rvv_zvl512b_ok { } {
 # Check if the target has a VLENB of 64.
-set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v]
+set gcc_march [riscv_get_arch]
 return [check_runtime ${gcc_march}_exec {
int main()
{
  int vlenb = 0;
  asm ("csrr %0,vlenb" : "=r" (vlenb) : : );
  if (vlenb == 64)
-   return 1;
- return 0;
+   return 0;
+ return 1;
}
   } "-march=${gcc_march}"]
 }


[gcc r15-3830] RISC-V: testsuite: Fix SELECT_VL SLP fallout.

2024-09-24 Thread Robin Dapp via Gcc-cvs
https://gcc.gnu.org/g:4bd3ccae58d40fad6bd99ed08ef4e1e4d70fefd0

commit r15-3830-g4bd3ccae58d40fad6bd99ed08ef4e1e4d70fefd0
Author: Robin Dapp 
Date:   Thu Sep 19 05:08:47 2024 -0700

RISC-V: testsuite: Fix SELECT_VL SLP fallout.

This fixes asm-scan fallout from r15-3712-g5e3a4a01785e2d where we allow
SLP with SELECT_VL.

Assisted by sed and regtested on rv64gcv_zvfh_zvbb.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c: Expect
length-controlled loop.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_

[gcc r15-3823] Alphabetize my entry in MAINTAINER's DCO list.

2024-09-24 Thread Aldy Hernandez via Gcc-cvs
https://gcc.gnu.org/g:34366176046351250e1beb578664d926fbdd50c9

commit r15-3823-g34366176046351250e1beb578664d926fbdd50c9
Author: Aldy Hernandez 
Date:   Tue Sep 24 11:40:52 2024 +0200

Alphabetize my entry in MAINTAINER's DCO list.

ChangeLog:

* MAINTAINERS: Move my entry in DCO list into alphabetical order.

Diff:
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 3b4cf9d20d80..47b5915e9f8f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -917,6 +917,7 @@ information.
 Juergen Christ  
 Robin Dapp  
 Robin Dapp  
+Aldy Hernandez  
 Michal Jires
 Matthias Kretz  
 Prathamesh Kulkarni 
@@ -949,4 +950,3 @@ Jonathan Wakely 

 Alexander Westbrooks
 Chung-Ju Wu 
 Pengxuan Zheng  
-Aldy Hernandez  


[gcc r15-3825] Fortran: Assign allocated caf-memory to scalar members [PR84870]

2024-09-24 Thread Andre Vehreschild via Gcc-cvs
https://gcc.gnu.org/g:f5035d7d015ebd4a7f5df5831cfc1269f9567e06

commit r15-3825-gf5035d7d015ebd4a7f5df5831cfc1269f9567e06
Author: Andre Vehreschild 
Date:   Thu Sep 19 15:09:52 2024 +0200

Fortran: Assign allocated caf-memory to scalar members [PR84870]

Allocating a coarray required an array-descriptor.  For scalars a
temporary descriptor was created.  Assigning the allocated memory from
the temporary descriptor back to the scalar is now added.

gcc/fortran/ChangeLog:

PR fortran/84870

* trans-array.cc (duplicate_allocatable_coarray): For scalar
allocatable components the memory allocated is now assigned to
the component's pointer.

gcc/testsuite/ChangeLog:

* gfortran.dg/coarray/alloc_comp_10.f90: New test.

Diff:
---
 gcc/fortran/trans-array.cc |  2 ++
 .../gfortran.dg/coarray/alloc_comp_10.f90  | 24 ++
 2 files changed, 26 insertions(+)

diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc
index 7d8274ab5718..0b8ef0b5e018 100644
--- a/gcc/fortran/trans-array.cc
+++ b/gcc/fortran/trans-array.cc
@@ -9505,6 +9505,7 @@ duplicate_allocatable_coarray (tree dest, tree dest_tok, 
tree src, tree type,
  gfc_build_addr_expr (NULL_TREE, dest_tok),
  NULL_TREE, NULL_TREE, NULL_TREE,
  GFC_CAF_COARRAY_ALLOC_REGISTER_ONLY);
+  gfc_add_modify (&block, dest, gfc_conv_descriptor_data_get (dummy_desc));
   null_data = gfc_finish_block (&block);
 
   gfc_init_block (&block);
@@ -9514,6 +9515,7 @@ duplicate_allocatable_coarray (tree dest, tree dest_tok, 
tree src, tree type,
  gfc_build_addr_expr (NULL_TREE, dest_tok),
  NULL_TREE, NULL_TREE, NULL_TREE,
  GFC_CAF_COARRAY_ALLOC);
+  gfc_add_modify (&block, dest, gfc_conv_descriptor_data_get (dummy_desc));
 
   tmp = builtin_decl_explicit (BUILT_IN_MEMCPY);
   tmp = build_call_expr_loc (input_location, tmp, 3, dest, src,
diff --git a/gcc/testsuite/gfortran.dg/coarray/alloc_comp_10.f90 
b/gcc/testsuite/gfortran.dg/coarray/alloc_comp_10.f90
new file mode 100644
index ..a31d005498c1
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/coarray/alloc_comp_10.f90
@@ -0,0 +1,24 @@
+!{ dg-do run }
+
+! Check that copying of memory for allocated scalar is assigned
+! to coarray object.
+
+! Contributed by G. Steinmetz  
+
+program p
+  type t
+integer, allocatable :: a
+  end type
+  type t2
+type(t), allocatable :: b
+  end type
+  type(t2) :: x, y[*]
+
+  x%b = t(1)
+  y = x
+  y%b%a = 2
+
+  if (x%b%a /= 1) stop 1
+  if (y%b%a /= 2) stop 2
+end
+


[gcc r15-3822] OpenMP: Add support for 'self_maps' to the 'require' directive

2024-09-24 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:b752eed3e3f2f27570ea89b7c2339468698472a8

commit r15-3822-gb752eed3e3f2f27570ea89b7c2339468698472a8
Author: Tobias Burnus 
Date:   Tue Sep 24 10:53:59 2024 +0200

OpenMP: Add support for 'self_maps' to the 'require' directive

'self_maps' implies 'unified_shared_memory', except that the latter
also permits that explicit maps copy data to device memory while
self_maps does not. In GCC, currently, both are handled identical.

gcc/c/ChangeLog:

* c-parser.cc (c_parser_omp_requires): Handle self_maps clause.

gcc/cp/ChangeLog:

* parser.cc (cp_parser_omp_requires): Handle self_maps clause.

gcc/fortran/ChangeLog:

* gfortran.h (enum gfc_omp_requires_kind): Add OMP_REQ_SELF_MAPS.
(gfc_namespace): Enlarge omp_requires bitfield.
* module.cc (enum ab_attribute, attr_bits): Add 
AB_OMP_REQ_SELF_MAPS.
(mio_symbol_attribute): Handle it.
* openmp.cc (gfc_check_omp_requires, gfc_match_omp_requires): Handle
self_maps clause.
* parse.cc (gfc_parse_file): Handle self_maps clause.

gcc/ChangeLog:

* lto-cgraph.cc (output_offload_tables, omp_requires_to_name): 
Handle
self_maps clause.
* omp-general.cc (struct omp_ts_info, omp_context_selector_matches):
Likewise for the associated trait.
* omp-general.h (enum omp_requires): Add OMP_REQUIRES_SELF_MAPS.
* omp-selectors.h (enum omp_ts_code): Add
OMP_TRAIT_IMPLEMENTATION_SELF_MAPS.

include/ChangeLog:

* gomp-constants.h (GOMP_REQUIRES_SELF_MAPS): #define.

libgomp/ChangeLog:

* plugin/plugin-gcn.c (GOMP_OFFLOAD_get_num_devices):
Accept self_maps clause.
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_num_devices):
Likewise.
* libgomp.texi (TR13 Impl. Status): Set to 'Y'.
* target.c (gomp_requires_to_name, GOMP_offload_register_ver,
gomp_target_init): Handle self_maps clause.
* testsuite/libgomp.fortran/self_maps.f90: New test.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/declare-variant-1.c: Add self_maps test.
* c-c++-common/gomp/requires-4.c: Likewise.
* gfortran.dg/gomp/declare-variant-3.f90:  Likewise.
* c-c++-common/gomp/requires-2.c: Update dg-error msg.
* gfortran.dg/gomp/requires-2.f90: Likewise.
* gfortran.dg/gomp/requires-self-maps-aux.f90: New.
* gfortran.dg/gomp/requires-self-maps.f90: New.

Diff:
---
 gcc/c/c-parser.cc  |  3 ++
 gcc/cp/parser.cc   |  3 ++
 gcc/fortran/gfortran.h | 10 +++--
 gcc/fortran/module.cc  | 11 -
 gcc/fortran/openmp.cc  | 30 -
 gcc/fortran/parse.cc   |  3 ++
 gcc/lto-cgraph.cc  |  4 ++
 gcc/omp-general.cc | 21 ++
 gcc/omp-general.h  |  1 +
 gcc/omp-selectors.h|  1 +
 .../c-c++-common/gomp/declare-variant-1.c  |  6 +++
 gcc/testsuite/c-c++-common/gomp/requires-2.c   |  2 +-
 gcc/testsuite/c-c++-common/gomp/requires-4.c   |  1 +
 .../gfortran.dg/gomp/declare-variant-3.f90 |  3 ++
 gcc/testsuite/gfortran.dg/gomp/requires-2.f90  |  2 +-
 .../gfortran.dg/gomp/requires-self-maps-aux.f90| 18 
 .../gfortran.dg/gomp/requires-self-maps.f90| 17 
 include/gomp-constants.h   |  4 ++
 libgomp/libgomp.texi   |  2 +-
 libgomp/plugin/plugin-gcn.c|  4 +-
 libgomp/plugin/plugin-nvptx.c  |  4 +-
 libgomp/target.c   | 10 +++--
 libgomp/testsuite/libgomp.fortran/self_maps.f90| 49 ++
 23 files changed, 185 insertions(+), 24 deletions(-)

diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index aff5af174303..6a46577f5119 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -26208,6 +26208,8 @@ c_parser_omp_requires (c_parser *parser)
this_req = OMP_REQUIRES_UNIFIED_ADDRESS;
  else if (!strcmp (p, "unified_shared_memory"))
this_req = OMP_REQUIRES_UNIFIED_SHARED_MEMORY;
+ else if (!strcmp (p, "self_maps"))
+   this_req = OMP_REQUIRES_SELF_MAPS;
  else if (!strcmp (p, "dynamic_allocators"))
this_req = OMP_REQUIRES_DYNAMIC_ALLOCATORS;
  else if (!strcmp (p, "reverse_offload"))
@@ -26274,6 +26276,7 @@ c_parser_omp_requires (c_parser *parser)
{
  error_at (cloc, "expected %, "
  "%, "
+ "%,

[gcc r15-3833] Simplify range-op shift mask generation

2024-09-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:2114243c23ec5168cd54633cd4995ab123d92573

commit r15-3833-g2114243c23ec5168cd54633cd4995ab123d92573
Author: Richard Biener 
Date:   Tue Sep 24 14:23:40 2024 +0200

Simplify range-op shift mask generation

The following reduces the number of wide_ints built which show up
in the profile for PR114855 as the largest remaining bit at -O1.

* range-op.cc (operator_rshift::op1_range): Use wi::mask instead
of shift and not.

Diff:
---
 gcc/range-op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index c576f6882219..3f5cf0834402 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -2863,7 +2863,7 @@ operator_rshift::op1_range (irange &r,
   // OP1 is anything from 0011 1000 to 0011 .  That is, a
   // range from LHS<<3 plus a mask of the 3 bits we shifted on the
   // right hand side (0x07).
-  wide_int mask = wi::bit_not (wi::lshift (wi::minus_one (prec), shift));
+  wide_int mask = wi::mask (shift.to_uhwi (), false, prec);
   int_range_max mask_range (type,
wi::zero (TYPE_PRECISION (type)),
mask);


[gcc r15-3834] OpenMP: Check additional restrictions on context selector properties

2024-09-24 Thread Sandra Loosemore via Gcc-cvs
https://gcc.gnu.org/g:96246bff0bcd9e5cdec9e6cf811ee3db4997f6d4

commit r15-3834-g96246bff0bcd9e5cdec9e6cf811ee3db4997f6d4
Author: Sandra Loosemore 
Date:   Fri Sep 6 20:58:13 2024 +

OpenMP: Check additional restrictions on context selector properties

TR13 (pre-6.0) of the OpenMP spec says:

"Each trait-property may only be specified once in a trait selector
other than those in the construct selector set."

and

"If trait-property any is specified in the kind trait-selector of the
device selector set or the target_device selector sets, no other
trait-property may be specified in the same selector set."

These restrictions (with slightly different wording) date back to
OpenMP 5.1, but were not in 5.0 which was the basis for GCC's
implementation.

This patch adds a diagnostic, adds new testcases, and fixes some older
testcases that include now-invalid selectors.

gcc/ChangeLog
* omp-general.cc (omp_check_context_selector): Reject other
properties in the same selector set with kind(any).  Also reject
duplicate name-list properties.

gcc/testsuite/ChangeLog
* c-c++-common/gomp/declare-variant-10.c: Fix broken tests.
* c-c++-common/gomp/declare-variant-3.c: Likewise.
* c-c++-common/gomp/declare-variant-9.c: Likewise.
* c-c++-common/gomp/declare-variant-any.c: New.
* c-c++-common/gomp/declare-variant-duplicates.c: New.
* gfortran.dg/gomp/declare-variant-10.f90: Fix broken tests.
* gfortran.dg/gomp/declare-variant-3.f90: Likewise.
* gfortran.dg/gomp/declare-variant-9.f90: Likewise.
* gfortran.dg/gomp/declare-variant-any.f90: New.
* gfortran.dg/gomp/declare-variant-duplicates.f90: New.

Diff:
---
 gcc/omp-general.cc | 64 +-
 .../c-c++-common/gomp/declare-variant-10.c |  4 +-
 .../c-c++-common/gomp/declare-variant-3.c  | 10 +---
 .../c-c++-common/gomp/declare-variant-9.c  |  4 +-
 .../c-c++-common/gomp/declare-variant-any.c| 17 ++
 .../c-c++-common/gomp/declare-variant-duplicates.c | 13 +
 .../gfortran.dg/gomp/declare-variant-10.f90|  4 +-
 .../gfortran.dg/gomp/declare-variant-3.f90 | 12 +---
 .../gfortran.dg/gomp/declare-variant-9.f90 |  2 +-
 .../gfortran.dg/gomp/declare-variant-any.f90   | 40 ++
 .../gomp/declare-variant-duplicates.f90| 30 ++
 11 files changed, 176 insertions(+), 24 deletions(-)

diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc
index c93bf129e4d7..9713e684e830 100644
--- a/gcc/omp-general.cc
+++ b/gcc/omp-general.cc
@@ -1293,6 +1293,8 @@ omp_check_context_selector (location_t loc, tree ctx)
   for (tree tss = ctx; tss; tss = TREE_CHAIN (tss))
 {
   enum omp_tss_code tss_code = OMP_TSS_CODE (tss);
+  bool saw_any_prop = false;
+  bool saw_other_prop = false;
 
   /* We can parse this, but not handle it yet.  */
   if (tss_code == OMP_TRAIT_SET_TARGET_DEVICE)
@@ -1329,9 +1331,61 @@ omp_check_context_selector (location_t loc, tree ctx)
  else
ts_seen[ts_code] = true;
 
+ /* If trait-property "any" is specified in the "kind"
+trait-selector of the "device" selector set or the
+"target_device" selector sets, no other trait-property
+may be specified in the same selector set.  */
+ if (ts_code == OMP_TRAIT_DEVICE_KIND)
+   for (tree p = OMP_TS_PROPERTIES (ts); p; p = TREE_CHAIN (p))
+ {
+   const char *prop = omp_context_name_list_prop (p);
+   if (!prop)
+ continue;
+   else if (strcmp (prop, "any") == 0)
+ saw_any_prop = true;
+   else
+ saw_other_prop = true;
+ }
+ /* It seems slightly suspicious that the spec's language covers
+the device_num selector too, but
+  target_device={device_num(whatever),kind(any)}
+is probably not terribly useful anyway.  */
+ else if (ts_code == OMP_TRAIT_DEVICE_ARCH
+  || ts_code == OMP_TRAIT_DEVICE_ISA
+  || ts_code == OMP_TRAIT_DEVICE_NUM)
+   saw_other_prop = true;
+
+ /* Each trait-property can only be specified once in a trait-selector
+other than the construct selector set.  FIXME: only handles
+name-list properties, not clause-list properties, since the
+"requires" selector is not implemented yet (PR 113067).  */
+ if (tss_code != OMP_TRAIT_SET_CONSTRUCT)
+   for (tree p1 = OMP_TS_PROPERTIES (ts); p1; p1 = TREE_CHAIN (p1))
+ {
+   if (OMP_TP_NAME (p1) != OMP_TP_NAMELIST_NODE)
+ break;
+   const char *n1 = omp_context_name_list_prop (p1);
+

[gcc r15-3842] Implement SUM and PRODUCT for unsigned.

2024-09-24 Thread Thomas Kテカnig via Gcc-cvs
https://gcc.gnu.org/g:5e918a4db9e4a5bdbeafec6881fa8b22a55d3789

commit r15-3842-g5e918a4db9e4a5bdbeafec6881fa8b22a55d3789
Author: Thomas Koenig 
Date:   Tue Sep 24 21:59:10 2024 +0200

Implement SUM and PRODUCT for unsigned.

gcc/fortran/ChangeLog:

* gfortran.texi: Document SUM and PRODUCT.
* iresolve.cc (resolve_transformational): New argument,
use_integer, to translate calls to unsigned to calls to
integer.
(gfc_resolve_product): Use it
(gfc_resolve_sum): Use it.
* simplify.cc (init_result_expr): Handle BT_UNSIGNED.

libgfortran/ChangeLog:

* generated/product_c10.c: Regenerated.
* generated/product_c16.c: Regenerated.
* generated/product_c17.c: Regenerated.
* generated/product_c4.c: Regenerated.
* generated/product_c8.c: Regenerated.
* generated/product_i1.c: Regenerated.
* generated/product_i16.c: Regenerated.
* generated/product_i2.c: Regenerated.
* generated/product_i4.c: Regenerated.
* generated/product_i8.c: Regenarated.
* generated/product_r10.c: Regenerated.
* generated/product_r16.c: Regenerated.
* generated/product_r17.c: Regenerated.
* generated/product_r4.c: Regenerated.
* generated/product_r8.c: Regenarated.
* generated/sum_c10.c: Regenerated.
* generated/sum_c16.c: Regenerated.
* generated/sum_c17.c: Regenerated.
* generated/sum_c4.c: Regenerated.
* generated/sum_c8.c: Regenerated.
* generated/sum_i1.c: Regenerated.
* generated/sum_i16.c: Regenerated.
* generated/sum_i2.c: Regenerated.
* generated/sum_i4.c: Regenerated.
* generated/sum_i8.c: Regenerated.
* generated/sum_r10.c: Regenerated.
* generated/sum_r16.c: Regenerated.
* generated/sum_r17.c: Regenerated.
* generated/sum_r4.c: Regenerated.
* generated/sum_r8.c: Regenerated.
* m4/ifunction.m4: Whitespace fix.
* m4/product.m4: If type is integer, change to unsigned.
* m4/sum.m4: Likewise.

Diff:
---
 gcc/fortran/gfortran.texi   |  2 +-
 gcc/fortran/iresolve.cc | 19 ++---
 gcc/fortran/simplify.cc | 11 +++-
 libgfortran/generated/product_c10.c | 22 +++
 libgfortran/generated/product_c16.c | 22 +++
 libgfortran/generated/product_c17.c | 22 +++
 libgfortran/generated/product_c4.c  | 22 +++
 libgfortran/generated/product_c8.c  | 22 +++
 libgfortran/generated/product_i1.c  | 54 ++---
 libgfortran/generated/product_i16.c | 54 ++---
 libgfortran/generated/product_i2.c  | 54 ++---
 libgfortran/generated/product_i4.c  | 54 ++---
 libgfortran/generated/product_i8.c  | 54 ++---
 libgfortran/generated/product_r10.c | 22 +++
 libgfortran/generated/product_r16.c | 22 +++
 libgfortran/generated/product_r17.c | 22 +++
 libgfortran/generated/product_r4.c  | 22 +++
 libgfortran/generated/product_r8.c  | 22 +++
 libgfortran/generated/sum_c10.c | 22 +++
 libgfortran/generated/sum_c16.c | 22 +++
 libgfortran/generated/sum_c17.c | 22 +++
 libgfortran/generated/sum_c4.c  | 22 +++
 libgfortran/generated/sum_c8.c  | 22 +++
 libgfortran/generated/sum_i1.c  | 54 ++---
 libgfortran/generated/sum_i16.c | 54 ++---
 libgfortran/generated/sum_i2.c  | 54 ++---
 libgfortran/generated/sum_i4.c  | 54 ++---
 libgfortran/generated/sum_i8.c  | 54 ++---
 libgfortran/generated/sum_r10.c | 22 +++
 libgfortran/generated/sum_r16.c | 22 +++
 libgfortran/generated/sum_r17.c | 22 +++
 libgfortran/generated/sum_r4.c  | 22 +++
 libgfortran/generated/sum_r8.c  | 22 +++
 libgfortran/m4/ifunction.m4 | 22 +++
 libgfortran/m4/product.m4   |  5 
 libgfortran/m4/sum.m4   |  5 
 36 files changed, 537 insertions(+), 507 deletions(-)

diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi
index 829ab00c6653..e5ffe678 100644
--- a/gcc/fortran/gfortran.texi
+++ b/gcc/fortran/gfortran.texi
@@ -2788,7 +2788,7 @@ As of now, the following intrinsics take unsigned 
arguments:
 @item @code{MVBITS}
 @item @code{RANGE}
 @item @code{TRANSFER}
-@item @code{MATMUL} and @code{DOT_PRODUCT}
+@item @

[gcc r15-3835] [PATCH] RISC-V: Fix FIXED_REGISTERS comment missing return address register

2024-09-24 Thread Jeff Law via Gcc-cvs
https://gcc.gnu.org/g:79a3d3da8c8a5ad56547b7f7991577271ee5d1b2

commit r15-3835-g79a3d3da8c8a5ad56547b7f7991577271ee5d1b2
Author: Yixuan Chen 
Date:   Tue Sep 24 09:15:00 2024 -0600

[PATCH] RISC-V: Fix FIXED_REGISTERS comment missing return address register

gcc/ChangeLog:

* config/riscv/riscv.h: Fix FIXED_REGISTERS comment missing return
address register.

Diff:
---
 gcc/config/riscv/riscv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index ead97867eb8e..3aecb43f8312 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -316,7 +316,7 @@ ASM_MISA_SPEC
 
 #define FIRST_PSEUDO_REGISTER 128
 
-/* x0, sp, gp, and tp are fixed.  */
+/* x0, ra, sp, gp, and tp are fixed.  */
 
 #define FIXED_REGISTERS
\
 { /* General registers.  */\


[gcc r15-3836] libgomp: with USM, init 'link' variables with host address

2024-09-24 Thread Tobias Burnus via Gcc-cvs
https://gcc.gnu.org/g:4cb20dc043cf70b8a1b4846c86599cc1ff9680d9

commit r15-3836-g4cb20dc043cf70b8a1b4846c86599cc1ff9680d9
Author: Tobias Burnus 
Date:   Tue Sep 24 17:41:39 2024 +0200

libgomp: with USM, init 'link' variables with host address

If requires unified_shared_memory or self_maps is set, make
'declare target link' variables to point initially to the host pointer.

libgomp/ChangeLog:

* target.c (gomp_load_image_to_device): For requires
unified_shared_memory, update 'link' vars to point to the host var.
* testsuite/libgomp.c-c++-common/target-link-3.c: New test.
* testsuite/libgomp.c-c++-common/target-link-4.c: New test.

Diff:
---
 libgomp/target.c   |  6 +++
 .../testsuite/libgomp.c-c++-common/target-link-3.c | 52 ++
 .../testsuite/libgomp.c-c++-common/target-link-4.c | 52 ++
 3 files changed, 110 insertions(+)

diff --git a/libgomp/target.c b/libgomp/target.c
index 6918694a843b..cf62af61f3b6 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -2454,6 +2454,12 @@ gomp_load_image_to_device (struct gomp_device_descr 
*devicep, unsigned version,
   array->right = NULL;
   splay_tree_insert (&devicep->mem_map, array);
   array++;
+
+  if (is_link_var
+ && (omp_requires_mask
+ & (GOMP_REQUIRES_UNIFIED_SHARED_MEMORY | 
GOMP_REQUIRES_SELF_MAPS)))
+   gomp_copy_host2dev (devicep, NULL, (void *) target_var->start,
+   &k->host_start, sizeof (void *), false, NULL);
 }
 
   /* Last entry is for the ICV struct variable; if absent, start = end = 0.  */
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-link-3.c 
b/libgomp/testsuite/libgomp.c-c++-common/target-link-3.c
new file mode 100644
index ..c707b38b7d46
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-link-3.c
@@ -0,0 +1,52 @@
+/* { dg-do run }  */
+
+#include 
+#include 
+
+#pragma omp requires unified_shared_memory
+
+int A[3] = {-3,-4,-5};
+static int q = -401;
+#pragma omp declare target link(A, q)
+
+#pragma omp begin declare target
+void
+f (uintptr_t *pA, uintptr_t *pq)
+{
+  if (A[0] != 1 || A[1] != 2 || A[2] != 3 || q != 42)
+__builtin_abort ();
+  A[0] = 13;
+  A[1] = 14;
+  A[2] = 15;
+  q = 23;
+  *pA = (uintptr_t) &A[0];
+  *pq = (uintptr_t) &q;
+}
+#pragma omp end declare target
+
+int
+main ()
+{
+  uintptr_t hpA = (uintptr_t) &A[0];
+  uintptr_t hpq = (uintptr_t) &q;
+  uintptr_t dpA, dpq;
+
+  A[0] = 1;
+  A[1] = 2;
+  A[2] = 3;
+  q = 42;
+
+  for (int i = 0; i <= omp_get_num_devices (); ++i)
+{
+  #pragma omp target device(device_num: i) map(dpA, dpq)
+   f (&dpA, &dpq);
+  if (hpA != dpA || hpq != dpq)
+   __builtin_abort ();
+  if (A[0] != 13 || A[1] != 14 || A[2] != 15 || q != 23)
+   __builtin_abort ();
+  A[0] = 1;
+  A[1] = 2;
+  A[2] = 3;
+  q = 42;
+}
+}
diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-link-4.c 
b/libgomp/testsuite/libgomp.c-c++-common/target-link-4.c
new file mode 100644
index ..785055e216d7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/target-link-4.c
@@ -0,0 +1,52 @@
+/* { dg-do run }  */
+
+#include 
+#include 
+
+#pragma omp requires self_maps
+
+int A[3] = {-3,-4,-5};
+static int q = -401;
+#pragma omp declare target link(A, q)
+
+#pragma omp begin declare target
+void
+f (uintptr_t *pA, uintptr_t *pq)
+{
+  if (A[0] != 1 || A[1] != 2 || A[2] != 3 || q != 42)
+__builtin_abort ();
+  A[0] = 13;
+  A[1] = 14;
+  A[2] = 15;
+  q = 23;
+  *pA = (uintptr_t) &A[0];
+  *pq = (uintptr_t) &q;
+}
+#pragma omp end declare target
+
+int
+main ()
+{
+  uintptr_t hpA = (uintptr_t) &A[0];
+  uintptr_t hpq = (uintptr_t) &q;
+  uintptr_t dpA, dpq;
+
+  A[0] = 1;
+  A[1] = 2;
+  A[2] = 3;
+  q = 42;
+
+  for (int i = 0; i <= omp_get_num_devices (); ++i)
+{
+  #pragma omp target device(device_num: i) map(dpA, dpq)
+   f (&dpA, &dpq);
+  if (hpA != dpA || hpq != dpq)
+   __builtin_abort ();
+  if (A[0] != 13 || A[1] != 14 || A[2] != 15 || q != 23)
+   __builtin_abort ();
+  A[0] = 1;
+  A[1] = 2;
+  A[2] = 3;
+  q = 42;
+}
+}


[gcc r15-3837] c++/contracts: ICE in build_contract_condition_function [PR116490]

2024-09-24 Thread Jason Merrill via Gcc-cvs
https://gcc.gnu.org/g:ae57e52754ca6c96145a1b7504c2c7613a9e54d9

commit r15-3837-gae57e52754ca6c96145a1b7504c2c7613a9e54d9
Author: Nina Dinka Ranns 
Date:   Fri Aug 30 13:49:07 2024 +0100

c++/contracts: ICE in build_contract_condition_function [PR116490]

We currently do not expect comdat group of the guarded function to
be set at the time of generating pre and post check function.
However, in the case of an explicit instantiation, the guarded
function has been added to a comdat group before generating contract
check functions, which causes the observed ICE. Current assert
removed and an additional check for comdat group of the guarded
function added. With this change, the pre and post check functions
get added to the same comdat group of the guarded function if the
guarded function is already placed in a comdat group.

PR c++/116490

gcc/cp/ChangeLog:

* contracts.cc (build_contract_condition_function): added
a check for comdat group of the guarded function. If set,
the condition check function is added to the same comdat
group.

gcc/testsuite/ChangeLog:

* g++.dg/contracts/pr116490.C: New test.

Signed-off-by: Nina Ranns 

Diff:
---
 gcc/cp/contracts.cc   | 12 +--
 gcc/testsuite/g++.dg/contracts/pr116490.C | 56 +++
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/gcc/cp/contracts.cc b/gcc/cp/contracts.cc
index 39f0487ea367..4d2849a289a1 100644
--- a/gcc/cp/contracts.cc
+++ b/gcc/cp/contracts.cc
@@ -145,6 +145,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "print-tree.h"
 #include "stor-layout.h"
 #include "intl.h"
+#include "cgraph.h"
 
 const int max_custom_roles = 32;
 static contract_role contract_build_roles[max_custom_roles] = {
@@ -1458,9 +1459,14 @@ build_contract_condition_function (tree fndecl, bool pre)
   DECL_WEAK (fn) = false;
   DECL_COMDAT (fn) = false;
 
-  /* We haven't set the comdat group on the guarded function yet, we'll add
-this to the same group in comdat_linkage later.  */
-  gcc_assert (!DECL_ONE_ONLY (fndecl));
+  /* We may not have set the comdat group on the guarded function yet.
+If we haven't, we'll add this to the same group in comdat_linkage
+later.  Otherwise, add it to the same comdat group now.  */
+  if (DECL_ONE_ONLY (fndecl))
+   {
+ symtab_node *n = symtab_node::get (fndecl);
+ cgraph_node::get_create (fn)->add_to_same_comdat_group (n);
+   }
 
   DECL_INTERFACE_KNOWN (fn) = true;
 }
diff --git a/gcc/testsuite/g++.dg/contracts/pr116490.C 
b/gcc/testsuite/g++.dg/contracts/pr116490.C
new file mode 100644
index ..e3a5d77bafd3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/contracts/pr116490.C
@@ -0,0 +1,56 @@
+// ICE in explicit instantiation of a function with contracts
+// { dg-do run }
+// { dg-options "-std=c++20 -fcontracts -fcontract-continuation-mode=on" }
+
+template
+void foo(T t)
+[[pre : t == 9 ]] {
+}
+
+template void foo(int i);
+
+
+template
+struct templateS
+{
+  void fooS(T t)
+  [[pre : t == 9 ]] {
+  }
+};
+
+template struct templateS;
+
+
+struct S {
+
+  template
+  void fooS(T t)
+  [[pre : t == 9 ]] {
+  }
+
+  template
+  static void fooStatic(T t)
+  [[pre : t == 9 ]] {
+  }
+};
+
+template void S::fooS(int i);
+
+template void S::fooStatic(int i);
+
+int main()
+{
+   foo(3);
+
+   templateS ts;
+   ts.fooS(3);
+
+   S s;
+   s.fooS(3);
+   S::fooStatic(3);
+}
+
+// { dg-output "contract violation in function foo at .* t == 
9.*(\n|\r\n|\r)" }
+// { dg-output "contract violation in function templateS::fooS at .* t == 
9.*(\n|\r\n|\r)" }
+// { dg-output "contract violation in function S::fooS at .* t == 
9.*(\n|\r\n|\r)" }
+// { dg-output "contract violation in function S::fooStatic at .* t == 
9.*(\n|\r\n|\r)" }


[gcc r15-3844] Implement IANY, IALL and IPARITY for unsigned.

2024-09-24 Thread Thomas Kテカnig via Gcc-cvs
https://gcc.gnu.org/g:fbeb1a965d85492e2f6f3adf913b90d005151b00

commit r15-3844-gfbeb1a965d85492e2f6f3adf913b90d005151b00
Author: Thomas Koenig 
Date:   Tue Sep 24 22:53:59 2024 +0200

Implement IANY, IALL and IPARITY for unsigned.

gcc/fortran/ChangeLog:

* check.cc (gfc_check_transf_bit_intrins): Handle unsigned.
* gfortran.texi: Docment IANY, IALL and IPARITY for unsigned.
* iresolve.cc (gfc_resolve_iall): Set flag to use integer
if type is BT_UNSIGNED.
(gfc_resolve_iany): Likewise.
(gfc_resolve_iparity): Likewise.
* simplify.cc (do_bit_and): Adjust asserts for BT_UNSIGNED.
(do_bit_ior): Likewise.
(do_bit_xor): Likewise

gcc/testsuite/ChangeLog:

* gfortran.dg/unsigned_29.f90: New test.

Diff:
---
 gcc/fortran/check.cc  | 14 -
 gcc/fortran/gfortran.texi |  1 +
 gcc/fortran/iresolve.cc   |  6 ++--
 gcc/fortran/simplify.cc   | 51 +--
 gcc/testsuite/gfortran.dg/unsigned_29.f90 | 40 
 5 files changed, 99 insertions(+), 13 deletions(-)

diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc
index 7c630dd73f43..533c9d7d3438 100644
--- a/gcc/fortran/check.cc
+++ b/gcc/fortran/check.cc
@@ -4430,7 +4430,19 @@ gfc_check_mask (gfc_expr *i, gfc_expr *kind)
 bool
 gfc_check_transf_bit_intrins (gfc_actual_arglist *ap)
 {
-  if (ap->expr->ts.type != BT_INTEGER)
+  bt type = ap->expr->ts.type;
+
+  if (flag_unsigned)
+{
+  if (type != BT_INTEGER && type != BT_UNSIGNED)
+   {
+ gfc_error ("%qs argument of %qs intrinsic at %L must be INTEGER "
+"or UNSIGNED", gfc_current_intrinsic_arg[0]->name,
+gfc_current_intrinsic, &ap->expr->where);
+ return false;
+   }
+}
+  else if (ap->expr->ts.type != BT_INTEGER)
 {
   gfc_error ("%qs argument of %qs intrinsic at %L must be INTEGER",
  gfc_current_intrinsic_arg[0]->name,
diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi
index e5ffe678..3eb8039c09fd 100644
--- a/gcc/fortran/gfortran.texi
+++ b/gcc/fortran/gfortran.texi
@@ -2789,6 +2789,7 @@ As of now, the following intrinsics take unsigned 
arguments:
 @item @code{RANGE}
 @item @code{TRANSFER}
 @item @code{SUM}, @code{PRODUCT}, @code{MATMUL} and @code{DOT_PRODUCT}
+@item @code{IANY}, @code{IALL} and @code{IPARITY}
 @end itemize
 This list will grow in the near future.
 @c -
diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc
index b4c9a636260e..b281ab740b1d 100644
--- a/gcc/fortran/iresolve.cc
+++ b/gcc/fortran/iresolve.cc
@@ -1195,7 +1195,7 @@ gfc_resolve_hypot (gfc_expr *f, gfc_expr *x, gfc_expr *y 
ATTRIBUTE_UNUSED)
 void
 gfc_resolve_iall (gfc_expr *f, gfc_expr *array, gfc_expr *dim, gfc_expr *mask)
 {
-  resolve_transformational ("iall", f, array, dim, mask);
+  resolve_transformational ("iall", f, array, dim, mask, true);
 }
 
 
@@ -1223,7 +1223,7 @@ gfc_resolve_iand (gfc_expr *f, gfc_expr *i, gfc_expr *j)
 void
 gfc_resolve_iany (gfc_expr *f, gfc_expr *array, gfc_expr *dim, gfc_expr *mask)
 {
-  resolve_transformational ("iany", f, array, dim, mask);
+  resolve_transformational ("iany", f, array, dim, mask, true);
 }
 
 
@@ -1429,7 +1429,7 @@ gfc_resolve_long (gfc_expr *f, gfc_expr *a)
 void
 gfc_resolve_iparity (gfc_expr *f, gfc_expr *array, gfc_expr *dim, gfc_expr 
*mask)
 {
-  resolve_transformational ("iparity", f, array, dim, mask);
+  resolve_transformational ("iparity", f, array, dim, mask, true);
 }
 
 
diff --git a/gcc/fortran/simplify.cc b/gcc/fortran/simplify.cc
index e5681c42a48c..bd2f6485c95e 100644
--- a/gcc/fortran/simplify.cc
+++ b/gcc/fortran/simplify.cc
@@ -3401,9 +3401,20 @@ gfc_simplify_iachar (gfc_expr *e, gfc_expr *kind)
 static gfc_expr *
 do_bit_and (gfc_expr *result, gfc_expr *e)
 {
-  gcc_assert (e->ts.type == BT_INTEGER && e->expr_type == EXPR_CONSTANT);
-  gcc_assert (result->ts.type == BT_INTEGER
- && result->expr_type == EXPR_CONSTANT);
+  if (flag_unsigned)
+{
+  gcc_assert ((e->ts.type == BT_INTEGER || e->ts.type == BT_UNSIGNED)
+ && e->expr_type == EXPR_CONSTANT);
+  gcc_assert ((result->ts.type == BT_INTEGER
+  || result->ts.type == BT_UNSIGNED)
+ && result->expr_type == EXPR_CONSTANT);
+}
+  else
+{
+  gcc_assert (e->ts.type == BT_INTEGER && e->expr_type == EXPR_CONSTANT);
+  gcc_assert (result->ts.type == BT_INTEGER
+ && result->expr_type == EXPR_CONSTANT);
+}
 
   mpz_and (result->value.integer, result->value.integer, e->value.integer);
   return result;
@@ -3420,9 +3431,20 @@ gfc_simplify_iall (gfc_expr *array, gfc_expr *dim, 
gfc_expr *mask)
 static gfc_expr *
 do_bit_ior (gfc_expr *result, gfc_expr *e)
 {
-  

[gcc r15-3845] Add random numbers and fix some bugs.

2024-09-24 Thread Thomas Kテカnig via Gcc-cvs
https://gcc.gnu.org/g:291e20e86090e5940e2bd862ec83c7d5e0715dd5

commit r15-3845-g291e20e86090e5940e2bd862ec83c7d5e0715dd5
Author: Thomas Koenig 
Date:   Tue Sep 24 22:57:42 2024 +0200

Add random numbers and fix some bugs.

This patch adds random number support for UNSIGNED, plus fixes
two bugs, with array I/O where the type used to be set to BT_INTEGER,
and for division with the divisor being a constant.

gcc/fortran/ChangeLog:

* check.cc (gfc_check_random_number): Adjust for unsigned.
* iresolve.cc (gfc_resolve_random_number): Handle unsigned.
* trans-expr.cc (gfc_conv_expr_op): Handle BT_UNSIGNED for divide.
* trans-types.cc (gfc_get_dtype_rank_type): Handle BT_UNSIGNED.
* gfortran.texi: Add RANDOM_NUMBER for UNSIGNED.

libgfortran/ChangeLog:

* gfortran.map: Add _gfortran_random_m1, _gfortran_random_m2,
_gfortran_random_m4, _gfortran_random_m8 and _gfortran_random_m16.
* intrinsics/random.c (random_m1): New function.
(random_m2): New function.
(random_m4): New function.
(random_m8): New function.
(random_m16): New function.
(arandom_m1): New function.
(arandom_m2): New function.
(arandom_m4): New function.
(arandom_m8): New funciton.
(arandom_m16): New function.

gcc/testsuite/ChangeLog:

* gfortran.dg/unsigned_30.f90: New test.

Diff:
---
 gcc/fortran/check.cc  |  10 +-
 gcc/fortran/gfortran.texi |   1 +
 gcc/fortran/iresolve.cc   |   6 +-
 gcc/fortran/trans-expr.cc |   4 +-
 gcc/fortran/trans-types.cc|   7 +-
 gcc/testsuite/gfortran.dg/unsigned_30.f90 |  63 +
 libgfortran/gfortran.map  |  10 +
 libgfortran/intrinsics/random.c   | 440 ++
 8 files changed, 534 insertions(+), 7 deletions(-)

diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc
index 533c9d7d3438..1851cfb8d4ad 100644
--- a/gcc/fortran/check.cc
+++ b/gcc/fortran/check.cc
@@ -7007,8 +7007,14 @@ gfc_check_random_init (gfc_expr *repeatable, gfc_expr 
*image_distinct)
 bool
 gfc_check_random_number (gfc_expr *harvest)
 {
-  if (!type_check (harvest, 0, BT_REAL))
-return false;
+  if (flag_unsigned)
+{
+  if (!type_check2 (harvest, 0, BT_REAL, BT_UNSIGNED))
+   return false;
+}
+  else
+if (!type_check (harvest, 0, BT_REAL))
+  return false;
 
   if (!variable_check (harvest, 0, false))
 return false;
diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi
index 3eb8039c09fd..a5ebadff3bb8 100644
--- a/gcc/fortran/gfortran.texi
+++ b/gcc/fortran/gfortran.texi
@@ -2790,6 +2790,7 @@ As of now, the following intrinsics take unsigned 
arguments:
 @item @code{TRANSFER}
 @item @code{SUM}, @code{PRODUCT}, @code{MATMUL} and @code{DOT_PRODUCT}
 @item @code{IANY}, @code{IALL} and @code{IPARITY}
+@item @code{RANDOM_NUMBER}.
 @end itemize
 This list will grow in the near future.
 @c -
diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc
index b281ab740b1d..5a1e0a6ed1d3 100644
--- a/gcc/fortran/iresolve.cc
+++ b/gcc/fortran/iresolve.cc
@@ -3452,12 +3452,14 @@ gfc_resolve_random_number (gfc_code *c)
 {
   const char *name;
   int kind;
+  char type;
 
   kind = gfc_type_abi_kind (&c->ext.actual->expr->ts);
+  type = gfc_type_letter (c->ext.actual->expr->ts.type);
   if (c->ext.actual->expr->rank == 0)
-name = gfc_get_string (PREFIX ("random_r%d"), kind);
+name = gfc_get_string (PREFIX ("random_%c%d"), type, kind);
   else
-name = gfc_get_string (PREFIX ("arandom_r%d"), kind);
+name = gfc_get_string (PREFIX ("arandom_%c%d"), type, kind);
 
   c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
 }
diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc
index d0c7dfea903d..e4c491a98486 100644
--- a/gcc/fortran/trans-expr.cc
+++ b/gcc/fortran/trans-expr.cc
@@ -3973,9 +3973,9 @@ gfc_conv_expr_op (gfc_se * se, gfc_expr * expr)
 
 case INTRINSIC_DIVIDE:
   /* If expr is a real or complex expr, use an RDIV_EXPR. If op1 is
- an integer, we must round towards zero, so we use a
+an integer or unsigned, we must round towards zero, so we use a
  TRUNC_DIV_EXPR.  */
-  if (expr->ts.type == BT_INTEGER)
+  if (expr->ts.type == BT_INTEGER || expr->ts.type == BT_UNSIGNED)
code = TRUNC_DIV_EXPR;
   else
code = RDIV_EXPR;
diff --git a/gcc/fortran/trans-types.cc b/gcc/fortran/trans-types.cc
index 96ef8b49fbef..05e64b3a8e1b 100644
--- a/gcc/fortran/trans-types.cc
+++ b/gcc/fortran/trans-types.cc
@@ -1651,7 +1651,12 @@ gfc_get_dtype_rank_type (int rank, tree etype)
  && TYPE_STRING_FLAG (ptype))
n = BT_CHARACTER;
   else
-   n = BT_INTEGER;

[gcc r15-3820] tree-optimization/115372 - failed store-lanes in some cases

2024-09-24 Thread Richard Biener via Gcc-cvs
https://gcc.gnu.org/g:f594008dcced0ebb86908f3d7602fcf943e05bc7

commit r15-3820-gf594008dcced0ebb86908f3d7602fcf943e05bc7
Author: Richard Biener 
Date:   Fri Sep 20 15:07:24 2024 +0200

tree-optimization/115372 - failed store-lanes in some cases

The gcc.target/riscv/rvv/autovec/struct/struct_vect-4.c testcase shows
that we sometimes fail to use store-lanes even though it should be
profitable.  We're currently relying on vect_slp_prefer_store_lanes_p
at the point we run into the first SLP discovery mismatch with obviously
limited information.  For the case at hand we have 3, 5 or 7 lanes
of VnDImode [2, 2] vectors with the first mismatch at lane 2 so the
new group size is 1.  The heuristic says that might be an OK split
given the rest is a multiple of the vector lanes.  Now we continue
discovery but in the end mismatches result in uniformly single-lane
SLP instances which we can handle via interleaving but of course are
prime candidates for store-lanes.  The following patch re-assesses
with the extra knowledge now just relying on the fact whether the
target supports store-lanes for the given group size.

PR tree-optimization/115372
* tree-vect-slp.cc (vect_build_slp_instance): Compute the
uniform, if, number of lanes of the RHS sub-graphs feeding
the store and if uniformly one, use store-lanes if the target
supports that.

Diff:
---
 gcc/tree-vect-slp.cc | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index ab49bb0e7ee1..f5b47e430e31 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3957,6 +3957,7 @@ vect_build_slp_instance (vec_info *vinfo,
  /* Calculate the unrolling factor based on the smallest type.  */
  poly_uint64 unrolling_factor = 1;
 
+ unsigned int rhs_common_nlanes = 0;
  unsigned int start = 0, end = i;
  while (start < group_size)
{
@@ -3978,6 +3979,10 @@ vect_build_slp_instance (vec_info *vinfo,
 calculate_unrolling_factor
   (max_nunits, end - start));
  rhs_nodes.safe_push (node);
+ if (start == 0)
+   rhs_common_nlanes = SLP_TREE_LANES (node);
+ else if (rhs_common_nlanes != SLP_TREE_LANES (node))
+   rhs_common_nlanes = 0;
  start = end;
  if (want_store_lanes || force_single_lane)
end = start + 1;
@@ -4015,6 +4020,19 @@ vect_build_slp_instance (vec_info *vinfo,
}
}
 
+ /* Now re-assess whether we want store lanes in case the
+discovery ended up producing all single-lane RHSs.  */
+ if (rhs_common_nlanes == 1
+ && ! STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+ && ! STMT_VINFO_STRIDED_P (stmt_info)
+ && compare_step_with_zero (vinfo, stmt_info) > 0
+ && (vect_store_lanes_supported (SLP_TREE_VECTYPE (rhs_nodes[0]),
+ group_size,
+ SLP_TREE_CHILDREN
+   (rhs_nodes[0]).length () != 1)
+ != IFN_LAST))
+   want_store_lanes = true;
+
  /* Now we assume we can build the root SLP node from all stores.  */
  if (want_store_lanes)
{


[gcc r15-3849] RISC-V: Refine the testcase of vector SAT_ADD

2024-09-24 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:043d607cc45a9f45016ab1bf9870429f6d9fbaf5

commit r15-3849-g043d607cc45a9f45016ab1bf9870429f6d9fbaf5
Author: Pan Li 
Date:   Wed Sep 25 11:41:22 2024 +0800

RISC-V: Refine the testcase of vector SAT_ADD

Take scan-assembler-times for vsadd insn check instead of function body,
as we only care about if we can generate the fixed point insn vsadd.

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c: Remove
func body check and take scan asm times instead.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c: Ditto.

Signed-off-by: Pan Li 

Diff:
---
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c| 13 ++---
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c| 13 ++---
 .../gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c| 13 ++---
 .../gcc.target/riscv/rvv/autovec

[gcc r15-3850] RISC-V: Refine the testcase of vector SAT_SUB

2024-09-24 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:32bcca3e58e67c5f49c5b429da85910e03d21bef

commit r15-3850-g32bcca3e58e67c5f49c5b429da85910e03d21bef
Author: Pan Li 
Date:   Wed Sep 25 13:55:22 2024 +0800

RISC-V: Refine the testcase of vector SAT_SUB

Take scan-assembler-times for vssub insn check instead of function body,
as we only care about if we can generate the fixed point insn vssub.

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Remove
func body check and take scan asm times instead.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-30.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-31.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-32.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-34.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-35.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-36.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-37.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-38.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-39.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-40.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-9.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_zip.c: Ditto.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-1.c  | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-10.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-11.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-12.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-13.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-14.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-15.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-16.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-17.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-18.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-19.c | 13 ++---
 .../riscv/rvv/autovec/binop/vec_sat_u_sub-2.c  | 13 ++---
 .../riscv/rvv/a

[gcc r15-3851] RISC-V: Refine the testcase of vector SAT_TRUNC

2024-09-24 Thread Pan Li via Gcc-cvs
https://gcc.gnu.org/g:5b652b0132334e509c730311ac625c1dbe287282

commit r15-3851-g5b652b0132334e509c730311ac625c1dbe287282
Author: Pan Li 
Date:   Wed Sep 25 14:37:46 2024 +0800

RISC-V: Refine the testcase of vector SAT_TRUNC

Take scan-assembler-times for vnclip insn check instead of function body,
as we only care about if we can generate the fixed point insn vnclip.

The below test are passed for this patch.
* The rv64gcv fully regression test.

It is test only patch and obvious up to a point, will commit it
directly if no comments in next 48H.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: Remove
func body check and take scan asm times instead.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c: Ditto.
* gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c: Ditto.

Signed-off-by: Pan Li 

Diff:
---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c  | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c | 16 +---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c | 12 +---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c | 17 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c | 21 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c | 17 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c  | 17 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c | 17 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c | 21 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c | 17 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c  | 21 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c  | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c  | 17 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c  | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c  | 13 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c  | 17 ++---
 .../riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c  | 21 ++---
 24 files changed, 46 insertions(+), 328 deletions(-)

diff --git 
a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
index 186005733ecd..3d29d26abff1 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c
@@ -1,18 +1,9 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize 
-fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
-/* { dg-skip-if "" { *-*-* } { "-flto" } } */
-/* { dg-