[gcc(refs/users/meissner/heads/work178-tar)] PR 89213: Address review comments.
https://gcc.gnu.org/g:37ebd552056613aa6dac190fcc6e6d2b6963b3b6 commit 37ebd552056613aa6dac190fcc6e6d2b6963b3b6 Author: Michael Meissner Date: Tue Sep 24 22:16:22 2024 -0400 PR 89213: Address review comments. PR 99293: Optimize splat of a V2DF/V2DI extract with constant element We had optimizations for splat of a vector extract for the other vector types, but we missed having one for V2DI and V2DF. This patch adds a combiner insn to do this optimization. In looking at the source, we had similar optimizations for V4SI and V4SF extract and splats, but we missed doing V2DI/V2DF. Without the patch for the code: vector long long splat_dup_l_0 (vector long long v) { return __builtin_vec_splats (__builtin_vec_extract (v, 0)); } the compiler generates (on a little endian power9): splat_dup_l_0: mfvsrld 9,34 mtvsrdd 34,9,9 blr Now it generates: splat_dup_l_0: xxpermdi 34,34,34,3 blr PR 89213: Add better support for shifting vectors with 64-bit elements This patch fixes PR target/89213 to allow better code to be generated to do constant shifts of V2DI/V2DF vectors. Previously GCC would do constant shifts of vectors with 64-bit elements by using: XXSPLTIB 32,4 VEXTSB2D 0,0 VSRAD 2,2,0 I.e., the PowerPC does not have a VSPLTISD instruction to load -15..14 for the 64-bit shift count in one instruction. Instead, it would need to load a byte and then convert it to 64-bit. With this patch, GCC now realizes that the vector shift instructions will look at the bottom 6 bits for the shift count, and it can use either a VSPLTISW or XXSPLTIB instruction to load the shift count. 2024-09-17 Michael Meissner gcc/ PR target/89213 * config/rs6000/altivec.md (altivec__shift_const): Remove extra ()'s. gcc/testsuite/ PR target/89213 * gcc.target/powerpc/pr89213.c: Allow running test on 32-bit. 2024-09-12 Michael Meissner gcc/ * config/rs6000/vsx.md (vsx_splat_extract_): New insn. gcc/testsuite/ * gcc.target/powerpc/builtins-1.c: Adjust insn count. * gcc.target/powerpc/pr99293.c: New test. 2024-09-12 Michael Meissner gcc/ PR target/89213 * config/rs6000/altivec.md (UNSPEC_VECTOR_SHIFT): New unspec. (VSHIFT_MODE): New mode iterator. (vshift_code): New code iterator. (vshift_attr): New code attribute. (altivec___const): New pattern to optimize vector long long/int shifts by a constant. (altivec__shift_const): New helper insn to load up a constant used by the shift operation. * config/rs6000/predicates.md (vector_shift_constant): New predicate. gcc/testsuite/ PR target/89213 * gcc.target/powerpc/pr89213.c: New test. * gcc.target/powerpc/vec-rlmi-rlnm.c: Update instruction count. Diff: --- gcc/config/rs6000/altivec.md | 51 +++ gcc/config/rs6000/predicates.md | 63 ++ gcc/config/rs6000/vsx.md | 18 gcc/testsuite/gcc.target/powerpc/builtins-1.c| 2 +- gcc/testsuite/gcc.target/powerpc/pr89213.c | 106 +++ gcc/testsuite/gcc.target/powerpc/pr99293.c | 22 + gcc/testsuite/gcc.target/powerpc/vec-rlmi-rlnm.c | 4 +- 7 files changed, 263 insertions(+), 3 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 1f5489b974f6..e4576c6d0967 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -170,6 +170,7 @@ UNSPEC_VSTRIL UNSPEC_SLDB UNSPEC_SRDB + UNSPEC_VECTOR_SHIFT ]) (define_c_enum "unspecv" @@ -2176,6 +2177,56 @@ "vsro %0,%1,%2" [(set_attr "type" "vecperm")]) +;; Optimize V2DI shifts by constants. This relies on the shift instructions +;; only looking at the bits needed to do the shift. This means we can use +;; VSPLTISW or XXSPLTIB to load up the constant, and not worry about the bits +;; that the vector shift instructions will not use. +(define_mode_iterator VSHIFT_MODE [(V4SI "TARGET_P9_VECTOR") +(V2DI "TARGET_P8_VECTOR")]) + +(define_code_iterator vshift_code [ashift ashiftrt lshiftrt]) +(define_code_attr vshift_attr [(ashift "ashift") +(ashiftrt "ashiftrt") +(lshiftrt "lshiftrt")]) + +(define_insn_and_split "*altivec___const" + [(set (mat
[gcc(refs/users/meissner/heads/work178-tar)] Update ChangeLog.*
https://gcc.gnu.org/g:5655ef3b1ce3a86a3b3e6670301c4e542bbcdaa5 commit 5655ef3b1ce3a86a3b3e6670301c4e542bbcdaa5 Author: Michael Meissner Date: Tue Sep 24 22:20:10 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.tar | 101 ++ 1 file changed, 101 insertions(+) diff --git a/gcc/ChangeLog.tar b/gcc/ChangeLog.tar index b091c795c423..dbfd18351841 100644 --- a/gcc/ChangeLog.tar +++ b/gcc/ChangeLog.tar @@ -1,3 +1,104 @@ + Branch work178-tar, patch #202 from work178-bugs + +PR 89213: Address review comments. + +2024-09-17 Michael Meissner + +gcc/ + + PR target/89213 + * config/rs6000/altivec.md (altivec__shift_const): Remove extra + ()'s. + +gcc/testsuite/ + + PR target/89213 + * gcc.target/powerpc/pr89213.c: Allow running test on 32-bit. + + Branch work178-tar, patch #201 from work178-bugs + +PR 99293: Optimize splat of a V2DF/V2DI extract with constant element + +We had optimizations for splat of a vector extract for the other vector +types, but we missed having one for V2DI and V2DF. This patch adds a +combiner insn to do this optimization. + +In looking at the source, we had similar optimizations for V4SI and V4SF +extract and splats, but we missed doing V2DI/V2DF. + +Without the patch for the code: + + vector long long splat_dup_l_0 (vector long long v) + { + return __builtin_vec_splats (__builtin_vec_extract (v, 0)); + } + +the compiler generates (on a little endian power9): + + splat_dup_l_0: + mfvsrld 9,34 + mtvsrdd 34,9,9 + blr + +Now it generates: + + splat_dup_l_0: + xxpermdi 34,34,34,3 + blr + +2024-09-12 Michael Meissner + +gcc/ + + * config/rs6000/vsx.md (vsx_splat_extract_): New insn. + +gcc/testsuite/ + + * gcc.target/powerpc/builtins-1.c: Adjust insn count. + * gcc.target/powerpc/pr99293.c: New test. + + Branch work178-tar, patch #200 from work178-bugs + +PR 89213: Add better support for shifting vectors with 64-bit elements + +This patch fixes PR target/89213 to allow better code to be generated to do +constant shifts of V2DI/V2DF vectors. Previously GCC would do constant shifts +of vectors with 64-bit elements by using: + + XXSPLTIB 32,4 + VEXTSB2D 0,0 + VSRAD 2,2,0 + +I.e., the PowerPC does not have a VSPLTISD instruction to load -15..14 for the +64-bit shift count in one instruction. Instead, it would need to load a byte +and then convert it to 64-bit. + +With this patch, GCC now realizes that the vector shift instructions will look +at the bottom 6 bits for the shift count, and it can use either a VSPLTISW or +XXSPLTIB instruction to load the shift count. + +2024-09-12 Michael Meissner + +gcc/ + + PR target/89213 + * config/rs6000/altivec.md (UNSPEC_VECTOR_SHIFT): New unspec. + (VSHIFT_MODE): New mode iterator. + (vshift_code): New code iterator. + (vshift_attr): New code attribute. + (altivec___const): New pattern to optimize + vector long long/int shifts by a constant. + (altivec__shift_const): New helper insn to load up a + constant used by the shift operation. + * config/rs6000/predicates.md (vector_shift_constant): New + predicate. + +gcc/testsuite/ + + PR target/89213 + * gcc.target/powerpc/pr89213.c: New test. + * gcc.target/powerpc/vec-rlmi-rlnm.c: Update instruction count. + Branch work178-tar, patch #301 Remove SPR alternatives for move insns.
[gcc r15-3840] c++: Implement C++23 P2718R0 - Wording for P2644R1 Fix for Range-based for Loop [PR107637]
https://gcc.gnu.org/g:650e91566561870f3d1c8d5b92e6613296ee1a8d commit r15-3840-g650e91566561870f3d1c8d5b92e6613296ee1a8d Author: Jakub Jelinek Date: Tue Sep 24 20:19:50 2024 +0200 c++: Implement C++23 P2718R0 - Wording for P2644R1 Fix for Range-based for Loop [PR107637] The following patch implements the C++23 P2718R0 paper - Wording for P2644R1 Fix for Range-based for Loop. The patch introduces a new option, -f{,no-}range-for-ext-temps so that user can control the behavior even in older C++ versions. The option is on by default in C++23 and later (-fno-range-for-ext-temps is an error in that case) and in the -std=gnu++11 ... -std=gnu++20 modes (one can use -fno-range-for-ext-temps to request previous behavior in that case), and is not enabled by default in -std=c++11 ... -std=c++20 modes but one can explicitly enable it with -frange-for-ext-temps. As all the temporaries from __for_range initialization should have life extended until the end of __for_range scope, this patch disables (for -frange-for-ext-temps and if !processing_template_decl) CLEANUP_POINT_EXPR wrapping of the __for_range declaration, also disables -Wdangling-reference warning as well as the rest of extend_ref_init_temps (we know the __for_range temporary is not TREE_STATIC and as all the temporaries from the initializer will be life extended, we shouldn't try to handle temporaries referenced by references any differently) and adds an extra push_stmt_list/pop_stmt_list before cp_finish_decl of __for_range and after end of the for body and wraps all that into CLEANUP_POINT_EXPR. I had to repeat that also for OpenMP range loops because those are handled differently. 2024-09-24 Jakub Jelinek PR c++/107637 gcc/ * omp-general.cc (find_combined_omp_for, find_nested_loop_xform): Handle CLEANUP_POINT_EXPR like TRY_FINALLY_EXPR. * doc/invoke.texi (frange-for-ext-temps): Document. Add -fconcepts to the C++ option list. gcc/c-family/ * c.opt (frange-for-ext-temps): New option. * c-opts.cc (c_common_post_options): Set flag_range_for_ext_temps for C++23 or later or for C++11 or later in !flag_iso mode if the option wasn't set by user. * c-cppbuiltin.cc (c_cpp_builtins): Change __cpp_range_based_for value for flag_range_for_ext_temps from 201603L to 202212L in C++17 or later. * c-omp.cc (c_find_nested_loop_xform_r): Handle CLEANUP_POINT_EXPR like TRY_FINALLY_EXPR. gcc/cp/ * cp-tree.h: Implement C++23 P2718R0 - Wording for P2644R1 Fix for Range-based for Loop. (cp_convert_omp_range_for): Add bool tmpl_p argument. (find_range_for_decls): Declare. * parser.cc (cp_convert_range_for): For flag_range_for_ext_temps call push_stmt_list () before cp_finish_decl for range_temp and save it temporarily to FOR_INIT_STMT. (cp_convert_omp_range_for): Add tmpl_p argument. If set, remember DECL_NAME of range_temp and for cp_finish_decl call restore it before clearing it again, if unset, don't adjust DECL_NAME of range_temp at all. (cp_parser_omp_loop_nest): For flag_range_for_ext_temps range for add CLEANUP_POINT_EXPR around sl. Call find_range_for_decls and adjust DECL_NAMEs for range fors if not processing_template_decl. Adjust cp_convert_omp_range_for caller. Remove superfluous backslash at the end of line. * decl.cc (initialize_local_var): For flag_range_for_ext_temps temporarily clear stmts_are_full_exprs_p rather than set for for_range__identifier decls. * call.cc (extend_ref_init_temps): For flag_range_for_ext_temps return init early for for_range__identifier decls. * semantics.cc (find_range_for_decls): New function. (finish_for_stmt): Use it. For flag_range_for_ext_temps if cp_convert_range_for set FOR_INIT_STMT, pop_stmt_list it and wrap into CLEANUP_POINT_EXPR. * pt.cc (tsubst_omp_for_iterator): Adjust tsubst_omp_for_iterator caller. (tsubst_stmt) : For flag_range_for_ext_temps if there are any range fors in the loop nest, add push_stmt_list starting before the initializations, pop_stmt_list it after the body and wrap into CLEANUP_POINT_EXPR. Change DECL_NAME of range for temps from NULL to for_range_identifier. gcc/testsuite/ * g++.dg/cpp23/range-for1.C: New test. * g++.dg/cpp23/range-for2.C: New test. * g++.dg/cpp23/range-for3.C: New test. * g++.dg/cpp23/range-for4.C: New test. * g++.dg/cpp23/range-for5.C: New t
[gcc r15-3839] libgcc, Darwin: Drop the legacy library build for macOS >= 15 [PR116809].
https://gcc.gnu.org/g:d9cafa0c4f0a81304d9b95a78ccc8e9003c6d7a3 commit r15-3839-gd9cafa0c4f0a81304d9b95a78ccc8e9003c6d7a3 Author: Iain Sandoe Date: Sun Sep 22 11:43:32 2024 +0100 libgcc, Darwin: Drop the legacy library build for macOS >= 15 [PR116809]. We have been building a legacy libgcc_s.1 DSO to support code that was built with older compilers. From macOS 15, the unwinder no longer exports some of the symbols used in that library which (a) cuases bootstrap fail and (b) means that the legacy library is no longer useful. No open branch of GCC emits references to this library - and any already -built code that depends on the symbols would need rework anyway. PR target/116809 libgcc/ChangeLog: * config.host: Build legacy libgcc_s.1 on hosts before macOS 15. * config/i386/t-darwin: Remove reference to legacy libgcc_s.1 * config/rs6000/t-darwin: Likewise. * config/t-darwin-libgccs1: New file. Signed-off-by: Iain Sandoe Diff: --- libgcc/config.host | 11 +++ libgcc/config/i386/t-darwin | 3 --- libgcc/config/rs6000/t-darwin | 3 --- libgcc/config/t-darwin-libgccs1 | 3 +++ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libgcc/config.host b/libgcc/config.host index 5c6b656531ff..00bd6384c0f9 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -239,22 +239,25 @@ case ${host} in esac tmake_file="$tmake_file t-slibgcc-darwin" case ${host} in +x86_64-*-darwin2[0-3]*) + tmake_file="t-darwin-min-11 t-darwin-libgccs1 $tmake_file" + ;; *-*-darwin2*) tmake_file="t-darwin-min-11 $tmake_file" ;; *-*-darwin1[89]*) - tmake_file="t-darwin-min-8 $tmake_file" + tmake_file="t-darwin-min-8 t-darwin-libgccs1 $tmake_file" ;; *-*-darwin9* | *-*-darwin1[0-7]*) - tmake_file="t-darwin-min-5 $tmake_file" + tmake_file="t-darwin-min-5 t-darwin-libgccs1 $tmake_file" ;; *-*-darwin[4-8]*) - tmake_file="t-darwin-min-1 $tmake_file" + tmake_file="t-darwin-min-1 t-darwin-libgccs1 $tmake_file" ;; *) # Fall back to configuring for the oldest system known to work with # all archs and the current sources. - tmake_file="t-darwin-min-5 $tmake_file" + tmake_file="t-darwin-min-5 t-darwin-libgccs1 $tmake_file" echo "Warning: libgcc configured to support macOS 10.5" 1>&2 ;; esac diff --git a/libgcc/config/i386/t-darwin b/libgcc/config/i386/t-darwin index 4c18da1efbfd..c6b3acaaca28 100644 --- a/libgcc/config/i386/t-darwin +++ b/libgcc/config/i386/t-darwin @@ -4,6 +4,3 @@ LIB2FUNCS_EXCLUDE = _fixtfdi _fixunstfdi _floatditf _floatunditf # Extra symbols for this port. SHLIB_MAPFILES += $(srcdir)/config/i386/libgcc-darwin.ver - -# Build a legacy libgcc_s.1 -BUILD_LIBGCCS1 = YES diff --git a/libgcc/config/rs6000/t-darwin b/libgcc/config/rs6000/t-darwin index 183d0df92ce9..8b513bdb1d78 100644 --- a/libgcc/config/rs6000/t-darwin +++ b/libgcc/config/rs6000/t-darwin @@ -56,6 +56,3 @@ unwind-dw2_s.o: HOST_LIBGCC2_CFLAGS += -maltivec unwind-dw2.o: HOST_LIBGCC2_CFLAGS += -maltivec LIB2ADDEH += $(srcdir)/config/rs6000/darwin-fallback.c - -# Build a legacy libgcc_s.1 -BUILD_LIBGCCS1 = YES diff --git a/libgcc/config/t-darwin-libgccs1 b/libgcc/config/t-darwin-libgccs1 new file mode 100644 index ..b88b1a5bba8a --- /dev/null +++ b/libgcc/config/t-darwin-libgccs1 @@ -0,0 +1,3 @@ + +# Build a legacy libgcc_s.1 +BUILD_LIBGCCS1 = YES
[gcc r15-3838] i386: Fix comment typo
https://gcc.gnu.org/g:dab450021569811402e02917d7dc1f05fb4626c7 commit r15-3838-gdab450021569811402e02917d7dc1f05fb4626c7 Author: Jakub Jelinek Date: Tue Sep 24 19:00:38 2024 +0200 i386: Fix comment typo Found a comment typo, fixed as obvious. 2024-09-24 Jakub Jelinek * config/i386/i386-expand.cc (ix86_expand_round_builtin): Fix comment typo, insead -> instead. Diff: --- gcc/config/i386/i386-expand.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 124cb976ec87..39ee9b8662ad 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -12748,7 +12748,7 @@ ix86_expand_round_builtin (const struct builtin_description *d, /* Skip erasing embedded rounding for below expanders who generates multiple insns. In ix86_erase_embedded_rounding the pattern will be transformed to a single set, and emit_insn -appends the set insead of insert it to chain. So the insns +appends the set instead of insert it to chain. So the insns emitted inside define_expander would be ignored. */ switch (icode) {
[gcc r15-3843] options: Regenerate c.opt.urls
https://gcc.gnu.org/g:1762b7f89eb9d8a1f150ab294344e945c0870399 commit r15-3843-g1762b7f89eb9d8a1f150ab294344e945c0870399 Author: Jakub Jelinek Date: Tue Sep 24 22:21:26 2024 +0200 options: Regenerate c.opt.urls Forgot to regenerate URLs for the C++23 P2718R0 patch. 2024-09-24 Jakub Jelinek * c.opt.urls: Regenerate. Diff: --- gcc/c-family/c.opt.urls | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/c-family/c.opt.urls b/gcc/c-family/c.opt.urls index 2f1e9f95271b..084dfd297c5a 100644 --- a/gcc/c-family/c.opt.urls +++ b/gcc/c-family/c.opt.urls @@ -1268,6 +1268,9 @@ UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-fno-pretty-templates) fprintf-return-value UrlSuffix(gcc/Optimize-Options.html#index-fno-printf-return-value) +frange-for-ext-temps +UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-frange-for-ext-temps) + freplace-objc-classes UrlSuffix(gcc/Objective-C-and-Objective-C_002b_002b-Dialect-Options.html#index-freplace-objc-classes)
[gcc r15-3847] OpenMP: Fix testsuite failure on x86 with -m32
https://gcc.gnu.org/g:6935bddd8f90dde6009a1b8dea9745788ceeefb1 commit r15-3847-g6935bddd8f90dde6009a1b8dea9745788ceeefb1 Author: Sandra Loosemore Date: Wed Sep 25 02:59:53 2024 + OpenMP: Fix testsuite failure on x86 with -m32 The testcase decare-variant-duplicates.c added in commit 96246bff0bcd9e5cdec9e6cf811ee3db4997f6d4 failed on 32-bit x86 because on that target "i386" is defined as a preprocessor macro and cannot be used as an identifier. Fixed by rewriting that test not to do that. gcc/testsuite/ChangeLog * c-c++-common/gomp/declare-variant-duplicates.c: Avoid using "i386" as an identifier. Diff: --- gcc/testsuite/c-c++-common/gomp/declare-variant-duplicates.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/c-c++-common/gomp/declare-variant-duplicates.c b/gcc/testsuite/c-c++-common/gomp/declare-variant-duplicates.c index 47d34fc52e2f..9f319c724492 100644 --- a/gcc/testsuite/c-c++-common/gomp/declare-variant-duplicates.c +++ b/gcc/testsuite/c-c++-common/gomp/declare-variant-duplicates.c @@ -8,6 +8,6 @@ extern int f4 (int); #pragma omp declare variant (f1) match (device={kind(cpu,gpu,"cpu")}) /* { dg-error "trait-property .cpu. specified more than once" } */ #pragma omp declare variant (f2) match (device={isa(sse4,"avx",avx)}) /* { dg-error "trait-property .avx. specified more than once" } */ -#pragma omp declare variant (f3) match (device={arch(x86_64,i386,aarch64,"i386")}) /* { dg-error "trait-property .i386. specified more than once" } */ +#pragma omp declare variant (f3) match (device={arch(x86_64,"i386",aarch64,"x86_64")}) /* { dg-error "trait-property .x86_64. specified more than once" } */ #pragma omp declare variant (f4) match (implementation={vendor(llvm,gnu,"arm",gnu)}) /* { dg-error "trait-property .gnu. specified more than once" } */ int f (int);
[gcc r15-3848] i386: Update the comment for mapxf option
https://gcc.gnu.org/g:742d242fad997142f32a8ec5a40d78d8af4871ca commit r15-3848-g742d242fad997142f32a8ec5a40d78d8af4871ca Author: Lingling Kong Date: Wed Sep 25 11:18:44 2024 +0800 i386: Update the comment for mapxf option gcc/ChangeLog: * config/i386/i386.opt: Update the features included in apxf. Diff: --- gcc/config/i386/i386.opt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index fe16e44a4eab..64c295d344cf 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1313,7 +1313,7 @@ Enable vectorization for scatter instruction. mapxf Target Mask(ISA2_APX_F) Var(ix86_isa_flags2) Save Support code generation for APX features, including EGPR, PUSH2POP2, -NDD and PPX. +NDD, PPX, NF, CCMP and ZU. mapx-features= Target Undocumented Joined Enum(apx_features) EnumSet Var(ix86_apx_features) Init(apx_none) Save
[gcc/devel/sh-lra] SH: Try to reduce R0 live ranges
https://gcc.gnu.org/g:8ee643e50957904d75affece056a6dd84de343d6 commit 8ee643e50957904d75affece056a6dd84de343d6 Author: Kaz Kojima Date: Fri Sep 20 18:15:30 2024 +0900 SH: Try to reduce R0 live ranges Some move or extend patterns will make long R0 live ranges and could confuse LRA. gcc/ChangeLog: * config/sh/sh-protos.h (sh_satisfies_constraint_Sid_subreg_index): Declare. * config/sh/sh.cc (sh_satisfies_constraint_Sid_subreg_index): New function. * config/sh/sh.md (extendsi2_short_mem_disp_z, *mov_store_mem_index, mov_store_mem_index): New insn and insn_and_split patterns. (extendsi2, mov): Use them for LRA. Diff: --- gcc/config/sh/sh-protos.h | 1 + gcc/config/sh/sh.cc | 12 +++ gcc/config/sh/sh.md | 90 ++- 3 files changed, 102 insertions(+), 1 deletion(-) diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index b151a7c8fccc..5e5bd0aff7e7 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -61,6 +61,7 @@ extern rtx legitimize_pic_address (rtx, machine_mode, rtx); extern bool nonpic_symbol_mentioned_p (rtx); extern void output_pic_addr_const (FILE *, rtx); extern bool expand_block_move (rtx *); +extern bool sh_satisfies_constraint_Sid_subreg_index (rtx); extern void prepare_move_operands (rtx[], machine_mode mode); extern bool sh_expand_cmpstr (rtx *); extern bool sh_expand_cmpnstr (rtx *); diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc index 7391b8df5830..c9222c3e6ac0 100644 --- a/gcc/config/sh/sh.cc +++ b/gcc/config/sh/sh.cc @@ -1577,6 +1577,18 @@ sh_encode_section_info (tree decl, rtx rtl, int first) SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION; } +/* Test Sid constraint with subreg index. See also the comment in + prepare_move_operands. */ +bool +sh_satisfies_constraint_Sid_subreg_index (rtx op) +{ + return ((GET_CODE (op) == MEM) + && ((GET_CODE (XEXP (op, 0)) == PLUS) + && ((GET_CODE (XEXP (XEXP (op, 0), 0)) == REG) + && ((GET_CODE (XEXP (XEXP (op, 0), 1)) == SUBREG) + && (GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == REG); +} + /* Prepare operands for a move define_expand; specifically, one of the operands must be in a register. */ void diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 7eee12ca6b8a..6d93f5cb816b 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -4801,7 +4801,38 @@ (define_expand "extendsi2" [(set (match_operand:SI 0 "arith_reg_dest") - (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))]) + (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))] + "" +{ + /* When the displacement addressing is used, RA will assign r0 to + the pseudo register operand for the QI/HImode load. See + the comment in sh.cc:prepare_move_operand and PR target/55212. */ + if (! lra_in_progress && ! reload_completed + && sh_lra_p () + && ! TARGET_SH2A + && arith_reg_dest (operands[0], mode) + && short_displacement_mem_operand (operands[1], mode)) +{ + emit_insn (gen_extendsi2_short_mem_disp_z (operands[0], + operands[1])); + DONE; +} +}) + +(define_insn_and_split "extendsi2_short_mem_disp_z" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (sign_extend:SI + (match_operand:QIHI 1 "short_displacement_mem_operand" "m"))) + (clobber (reg:SI R0_REG))] + "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()" + "#" + "&& 1" + [(set (match_dup 2) (sign_extend:SI (match_dup 1))) + (set (match_dup 0) (match_dup 2))] +{ + operands[2] = gen_rtx_REG (SImode, R0_REG); +} + [(set_attr "type" "load")]) (define_insn_and_split "*extendsi2_compact_reg" [(set (match_operand:SI 0 "arith_reg_dest" "=r") @@ -5343,9 +5374,50 @@ operands[1] = gen_lowpart (mode, reg); } + if (! lra_in_progress && ! reload_completed + && sh_lra_p () + && ! TARGET_SH2A + && arith_reg_operand (operands[1], mode) + && (satisfies_constraint_Sid (operands[0]) + || sh_satisfies_constraint_Sid_subreg_index (operands[0]))) +{ + rtx adr = XEXP (operands[0], 0); + rtx base = XEXP (adr, 0); + rtx idx = XEXP (adr, 1); + emit_insn (gen_mov_store_mem_index (base, idx, + operands[1])); + DONE; +} + prepare_move_operands (operands, mode); }) +(define_insn "*mov_store_mem_index" + [(set (mem:QIHI + (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (match_operand:SI 1 "arith_reg_operand" "z"))) + (match_operand:QIHI 2 "arith_reg_operan
[gcc] Created branch 'devel/sh-lra'
The branch 'devel/sh-lra' was created pointing to: 09210f927265... SH: enable LRA by default
[gcc/devel/sh-lra] SH: pin input args to hard-regs via predicates for sfuncs
https://gcc.gnu.org/g:4d28c5b1eb15cff97f07982c73770485e6eaa986 commit 4d28c5b1eb15cff97f07982c73770485e6eaa986 Author: Kaz Kojima Date: Tue Sep 24 18:26:42 2024 +0900 SH: pin input args to hard-regs via predicates for sfuncs Some sfuncs uses hard reg as input and clobber its raw reg pattern. It seems that LRA doesn't process this clobber pattern. Rewrite these patterns so as to work with LRA. gcc/ChangeLog: * config/sh/predicates.md (hard_reg_r4, hard_reg_r5, hard_reg_r6): New predicates. * config/sh/sh.md (udivsi3_i4, udivsi3_i4_single, udivsi3_i1): Rewrite with match_operand and match_dup. (block_lump_real, block_lump_real_i4): Ditto. (udivsi3): Adjust for it. * config/sh/sh-mem.cc (expand_block_move): Ditto. Diff: --- gcc/config/sh/predicates.md | 19 + gcc/config/sh/sh-mem.cc | 4 +- gcc/config/sh/sh.md | 101 +++- 3 files changed, 84 insertions(+), 40 deletions(-) diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md index 3732cec9608b..b10af71c280e 100644 --- a/gcc/config/sh/predicates.md +++ b/gcc/config/sh/predicates.md @@ -818,3 +818,22 @@ return false; }) + +;; Predicats for the arguments of sfunc R4, R5 and R6. +(define_predicate "hard_reg_r4" + (match_code "reg") +{ + return REGNO (op) == R4_REG; +}) + +(define_predicate "hard_reg_r5" + (match_code "reg") +{ + return REGNO (op) == R5_REG; +}) + +(define_predicate "hard_reg_r6" + (match_code "reg") +{ + return REGNO (op) == R6_REG; +}) diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc index e22419912d6f..751c826e84fb 100644 --- a/gcc/config/sh/sh-mem.cc +++ b/gcc/config/sh/sh-mem.cc @@ -134,7 +134,7 @@ expand_block_move (rtx *operands) int dwords = bytes >> 3; emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); - emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab)); + emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab, r4, r5, r6)); return true; } else @@ -178,7 +178,7 @@ expand_block_move (rtx *operands) final_switch = 16 - ((bytes / 4) % 16); while_loop = ((bytes / 4) / 16 - 1) * 16; emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); - emit_insn (gen_block_lump_real (func_addr_rtx, lab)); + emit_insn (gen_block_lump_real (func_addr_rtx, lab, r4, r5, r6)); return true; } diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index c69eda36885f..451ae0b76891 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -2194,13 +2194,24 @@ ;; there is nothing to prevent reload from using r0 to reload the address. ;; This reload would clobber the value in r0 we are trying to store. ;; If we let reload allocate r0, then this problem can never happen. +;; +;; In addition to that, we also must pin the input regs to hard-regs via the +;; predicates. When these insns are instantiated it also emits the +;; accompanying mov insns to load the hard-regs. However, subsequent RTL +;; passes might move things around and reassign the operands to pseudo regs +;; which might get allocated to different (wrong) hard-regs eventually. To +;; avoid that, only allow matching these insns if the operands are the +;; expected hard-regs. (define_insn "udivsi3_i1" [(set (match_operand:SI 0 "register_operand" "=z,z") - (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (udiv:SI (match_operand:SI 3 "hard_reg_r4" "=r,r") +(match_operand:SI 4 "hard_reg_r5" "=r,r"))) (clobber (reg:SI T_REG)) (clobber (reg:SI PR_REG)) (clobber (reg:SI R1_REG)) - (clobber (reg:SI R4_REG)) + (clobber (match_dup 3)) + (use (reg:SI R4_REG)) + (use (reg:SI R5_REG)) (use (match_operand:SI 1 "arith_reg_operand" "r,r")) (use (match_operand 2 "" "Z,Ccl"))] "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1" @@ -2212,7 +2223,8 @@ (define_insn "udivsi3_i4" [(set (match_operand:SI 0 "register_operand" "=y,y") - (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (udiv:SI (match_operand:SI 3 "hard_reg_r4" "=r,r") +(match_operand:SI 4 "hard_reg_r5" "=r,r"))) (clobber (reg:SI T_REG)) (clobber (reg:SI PR_REG)) (clobber (reg:DF DR0_REG)) @@ -2220,9 +2232,11 @@ (clobber (reg:DF DR4_REG)) (clobber (reg:SI R0_REG)) (clobber (reg:SI R1_REG)) - (clobber (reg:SI R4_REG)) - (clobber (reg:SI R5_REG)) + (clobber (match_dup 3)) + (clobber (match_dup 4)) (clobber (reg:SI FPSCR_STAT_REG)) + (use (reg:SI R4_REG)) + (use (reg:SI R5_REG)) (use (match_operand:SI 1 "arith_reg_operand" "r,r")) (use (match_operand 2 "" "Z,Ccl")) (use (reg:SI FPSCR_MODES_REG))] @@ -2236,7 +2250,8 @@ (define_insn "udivsi3_i4_single" [(set (match_operand:SI 0 "register_operand" "=y,y") - (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG))) + (udiv:SI (match_o
[gcc/devel/sh-lra] SH: Fix the condition to use movsh_ie_y pattern.
https://gcc.gnu.org/g:eee3b80ab6217ab792f01280bc6240c55468600a commit eee3b80ab6217ab792f01280bc6240c55468600a Author: Kaz Kojima Date: Tue Sep 24 18:35:46 2024 +0900 SH: Fix the condition to use movsh_ie_y pattern. gcc/ChangeLog: * config/sh/sh.cc (sh_movsf_ie_y_split_p): Take the subreg of DImode into account. Diff: --- gcc/config/sh/sh.cc | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc index b2ba7488c5de..d9e7e67cba65 100644 --- a/gcc/config/sh/sh.cc +++ b/gcc/config/sh/sh.cc @@ -11462,11 +11462,15 @@ sh_movsf_ie_y_split_p (rtx op0, rtx op1) { /* f, r */ if (REG_P (op0) - && (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == SImode)) + && (SUBREG_P (op1) + && (GET_MODE (SUBREG_REG (op1)) == SImode + || GET_MODE (SUBREG_REG (op1)) == DImode))) return true; /* r, f */ if (REG_P (op1) - && (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == SImode)) + && (SUBREG_P (op0) + && (GET_MODE (SUBREG_REG (op0)) == SImode + || GET_MODE (SUBREG_REG (op0)) == DImode))) return true; return false;
[gcc/devel/sh-lra] SH: A test case for the wrong-code with -mlra PR55212 c#248.
https://gcc.gnu.org/g:c5a639c84c15adf402d5b664184628ff809a5bb0 commit c5a639c84c15adf402d5b664184628ff809a5bb0 Author: Kaz Kojima Date: Fri Sep 20 18:36:22 2024 +0900 SH: A test case for the wrong-code with -mlra PR55212 c#248. gcc/testsuite/ChangeLog: * gcc.target/sh/pr55212-c248.c: New test. Diff: --- gcc/testsuite/gcc.target/sh/pr55212-c248.c | 31 ++ 1 file changed, 31 insertions(+) diff --git a/gcc/testsuite/gcc.target/sh/pr55212-c248.c b/gcc/testsuite/gcc.target/sh/pr55212-c248.c new file mode 100644 index ..94fd6afaab3c --- /dev/null +++ b/gcc/testsuite/gcc.target/sh/pr55212-c248.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -m4 -mlra -ffixed-r7 -ffixed-r8 -ffixed-r9 -ffixed-r10 -ffixed-r11 -ffixed-r12 -ffixed-r13" } */ +#include +#include + +typedef struct { int c[64]; } obj; +obj obj0; +obj obj1; + +void __attribute__ ((noinline)) +bar (int a, int b, int c, int d, obj *q) +{ + if (q->c[0] != 0x12345678 || q->c[1] != 0xdeadbeef) +abort (); +} + +void foo (obj *p) +{ + obj bobj; + bobj = *p; + bar (0, 0, 0, 0, &bobj); +} + +int +main () +{ + obj0.c[0] = 0x12345678; + obj0.c[1] = 0xdeadbeef; + foo (&obj0); + exit (0); +}
[gcc/devel/sh-lra] LRA: Take scratch as implicit unused output reloads
https://gcc.gnu.org/g:dbd192caf14916d0869c7123e03ad21ef7bfd65f commit dbd192caf14916d0869c7123e03ad21ef7bfd65f Author: Alexandre Oliva Date: Fri Sep 20 18:22:12 2024 +0900 LRA: Take scratch as implicit unused output reloads gcc/ChangeLog: * lra-constraints.cc (match_reload, process_alt_operands, curr_insn_transform): Take scratch as implicit unused output reloads. Diff: --- gcc/lra-constraints.cc | 14 ++ 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc index fdcc07764a2e..4bc40ef532ba 100644 --- a/gcc/lra-constraints.cc +++ b/gcc/lra-constraints.cc @@ -1213,7 +1213,9 @@ match_reload (signed char out, signed char *ins, signed char *outs, return; /* See a comment for the input operand above. */ narrow_reload_pseudo_class (out_rtx, goal_class); - if (find_reg_note (curr_insn, REG_UNUSED, out_rtx) == NULL_RTX) + if (find_reg_note (curr_insn, REG_UNUSED, out_rtx) == NULL_RTX + && !ira_former_scratch_p (REGNO (SUBREG_P (out_rtx) + ? SUBREG_REG (out_rtx) : out_rtx))) { reg = SUBREG_P (out_rtx) ? SUBREG_REG (out_rtx) : out_rtx; start_sequence (); @@ -2946,7 +2948,8 @@ process_alt_operands (int only_alternative) objects with a REG_UNUSED note. */ if ((curr_static_id->operand[nop].type != OP_IN && no_output_reloads_p - && ! find_reg_note (curr_insn, REG_UNUSED, op)) + && ! find_reg_note (curr_insn, REG_UNUSED, op) + && ! ira_former_scratch_p (REGNO (operand_reg[nop]))) || (curr_static_id->operand[nop].type != OP_OUT && no_input_reloads_p && ! const_to_mem) || (this_alternative_matches >= 0 @@ -2956,7 +2959,9 @@ process_alt_operands (int only_alternative) [this_alternative_matches].type != OP_IN) && ! find_reg_note (curr_insn, REG_UNUSED, no_subreg_reg_operand - [this_alternative_matches]) + [this_alternative_matches]) + && ! (ira_former_scratch_p + (REGNO (operand_reg[nop]))) { if (lra_dump_file != NULL) fprintf @@ -4744,7 +4749,8 @@ curr_insn_transform (bool check_only_p) if (type != OP_IN && find_reg_note (curr_insn, REG_UNUSED, old) == NULL_RTX /* OLD can be an equivalent constant here. */ - && !CONSTANT_P (old)) + && !CONSTANT_P (old) + && !ira_former_scratch_p (REGNO (old))) { start_sequence (); lra_emit_move (type == OP_INOUT ? copy_rtx (old) : old, new_reg);
[gcc/devel/sh-lra] SH: try to workaround fp-reg related move insns
https://gcc.gnu.org/g:b924a79f5d3a8d32e4e65e62b63dc4432076af1d commit b924a79f5d3a8d32e4e65e62b63dc4432076af1d Author: Kaz Kojima Date: Fri Sep 20 18:17:31 2024 +0900 SH: try to workaround fp-reg related move insns LRA will try to satisfy the constraints in match_scratch for the memory displacements and it will make issues on this target. To mitigate the issue, split movsf_ie_ra into several new patterns to remove match_scratch. Also define a new sub-pattern of movdf for constant loads. gcc/ChangeLog: * gcc/config/sh/predicates.md (pc_relative_load_operand): New predicate. * gcc/config/sh/sh-protos.h (sh_movsf_ie_ra_split_p): Remove. (sh_movsf_ie_y_split_p): New proto. * gcc/config/sh/sh.cc: (sh_movsf_ie_ra_split_p): Remove. (sh_movsf_ie_y_split_p): New function. (broken_move): Take movsf_ie_ra into account for fldi cases. * gcc/config/sh/sh.md (movdf_i4_F_z): New insn pattern. (movdf): Use it. (movsf_ie_ra): Use define_insn instead of define_insn_and_split. (movsf_ie_F_z, movsf_ie_Q_z, movsf_ie_y): New insn pattern. (movsf): Use new patterns. (movsf-1): Don't split when operands[0] or operands[1] is fpul. (movdf_i4_F_z+7): New splitter. Diff: --- gcc/config/sh/predicates.md | 11 + gcc/config/sh/sh-protos.h | 2 +- gcc/config/sh/sh.cc | 30 - gcc/config/sh/sh.md | 103 +++- 4 files changed, 95 insertions(+), 51 deletions(-) diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md index da32329b4b54..3732cec9608b 100644 --- a/gcc/config/sh/predicates.md +++ b/gcc/config/sh/predicates.md @@ -485,6 +485,17 @@ && sh_legitimate_index_p (mode, XEXP (plus0_rtx, 1), TARGET_SH2A, true); }) +;; Returns true if OP is a pc relative load operand. +(define_predicate "pc_relative_load_operand" + (match_code "mem") +{ + if (GET_MODE (op) != QImode + && IS_PC_RELATIVE_LOAD_ADDR_P (XEXP (op, 0))) +return true; + + return false; +}) + ;; Returns true if OP is a valid source operand for a logical operation. (define_predicate "logical_operand" (and (match_code "subreg,reg,const_int") diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index 5e5bd0aff7e7..ffbe5164f08c 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -103,7 +103,7 @@ extern rtx sh_find_equiv_gbr_addr (rtx_insn* cur_insn, rtx mem); extern int sh_eval_treg_value (rtx op); extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op); extern int sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a); -extern bool sh_movsf_ie_ra_split_p (rtx, rtx, rtx); +extern bool sh_movsf_ie_y_split_p (rtx, rtx); extern void sh_expand_sym_label2reg (rtx, rtx, rtx, bool); /* Result value of sh_find_set_of_reg. */ diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc index c9222c3e6ac0..b2ba7488c5de 100644 --- a/gcc/config/sh/sh.cc +++ b/gcc/config/sh/sh.cc @@ -4832,6 +4832,7 @@ broken_move (rtx_insn *insn) we changed this to do a constant load. In that case we don't have an r0 clobber, hence we must use fldi. */ && (TARGET_FMOVD + || sh_lra_p () || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0)) == SCRATCH)) && REG_P (SET_DEST (pat)) @@ -11455,30 +11456,17 @@ sh_legitimize_address_displacement (rtx *offset1, rtx *offset2, return false; } -/* Return true if movsf insn should be splited with an additional - register. */ +/* Return true if movsf insn should be splited with fpul register. */ bool -sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2) +sh_movsf_ie_y_split_p (rtx op0, rtx op1) { - /* op0 == op1 */ - if (rtx_equal_p (op0, op1)) + /* f, r */ + if (REG_P (op0) + && (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == SImode)) return true; - /* fy, FQ, reg */ - if (GET_CODE (op1) == CONST_DOUBLE - && ! satisfies_constraint_G (op1) - && ! satisfies_constraint_H (op1) - && REG_P (op0) - && REG_P (op2)) -return true; - /* f, r, y */ - if (REG_P (op0) && FP_REGISTER_P (REGNO (op0)) - && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1)) - && REG_P (op2) && (REGNO (op2) == FPUL_REG)) -return true; - /* r, f, y */ - if (REG_P (op1) && FP_REGISTER_P (REGNO (op1)) - && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0)) - && REG_P (op2) && (REGNO (op2) == FPUL_REG)) + /* r, f */ + if (REG_P (op1) + && (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == SImode)) return true; return false; diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 6d93f5cb816b..c69eda36885f 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -5858,6 +5858,15 @@
[gcc/devel/sh-lra] SH: enable LRA by default
https://gcc.gnu.org/g:09210f927265fb4f198e904ba11297ac1e370554 commit 09210f927265fb4f198e904ba11297ac1e370554 Author: Oleg Endo Date: Wed Sep 25 09:47:25 2024 +0900 SH: enable LRA by default gcc/ChangeLog: PR target/55212 * conifg/sh/sh.opt (sh_lra_flag): Init to 1. Diff: --- gcc/config/sh/sh.opt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt index c44cfe70cb11..718dfb744ff5 100644 --- a/gcc/config/sh/sh.opt +++ b/gcc/config/sh/sh.opt @@ -299,5 +299,5 @@ Target Var(TARGET_FSRRA) Enable the use of the fsrra instruction. mlra -Target Var(sh_lra_flag) Init(0) Save +Target Var(sh_lra_flag) Init(1) Save Use LRA instead of reload (transitional).
[gcc r15-3831] tree-optimization/116819 - SLP with !STMT_VINFO_RELEVANT representative
https://gcc.gnu.org/g:cef29936c6b6773bff1939f94fb629760725bd82 commit r15-3831-gcef29936c6b6773bff1939f94fb629760725bd82 Author: Richard Biener Date: Tue Sep 24 13:47:04 2024 +0200 tree-optimization/116819 - SLP with !STMT_VINFO_RELEVANT representative Under some circumstances we can end up picking a not relevant stmt as representative of a SLP node. Instead of skipping stmt analysis and declaring success we have to either ignore relevancy throughout the code base or fail SLP operation verification. The following does the latter. PR tree-optimization/116819 * tree-vect-stmts.cc (vect_analyze_stmt): When the SLP representative isn't relevant signal failure instead of success. Diff: --- gcc/tree-vect-stmts.cc | 6 ++ 1 file changed, 6 insertions(+) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index c654e01a540f..1b351c5c66ec 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -13295,6 +13295,12 @@ vect_analyze_stmt (vec_info *vinfo, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n"); + if (node) + return opt_result::failure_at (stmt_info->stmt, + "not vectorized:" + " irrelevant stmt as SLP node %p " + "representative.\n", + (void *)node); return opt_result::success (); } }
[gcc r15-3832] Widening-Mul: Fix one ICE for SAT_SUB matching operand checking
https://gcc.gnu.org/g:de6fe690db32689ba5e5c6f551672a19e6cae5d4 commit r15-3832-gde6fe690db32689ba5e5c6f551672a19e6cae5d4 Author: Pan Li Date: Mon Sep 23 22:37:58 2024 +0800 Widening-Mul: Fix one ICE for SAT_SUB matching operand checking This patch would like to fix the following ICE for -O2 -m32 of x86_64. during RTL pass: expand JackMidiAsyncWaitQueue.cpp.cpp: In function 'void DequeueEvent(unsigned int)': JackMidiAsyncWaitQueue.cpp.cpp:3:6: internal compiler error: in expand_fn_using_insn, at internal-fn.cc:263 3 | void DequeueEvent(unsigned frame) { | ^~~~ 0x27b580d diagnostic_context::diagnostic_impl(rich_location*, diagnostic_metadata const*, diagnostic_option_id, char const*, __va_list_tag (*) [1], diagnostic_t) ???:0 0x27c4a3f internal_error(char const*, ...) ???:0 0x27b3994 fancy_abort(char const*, int, char const*) ???:0 0xf25ae5 expand_fn_using_insn(gcall*, insn_code, unsigned int, unsigned int) ???:0 0xf2a124 expand_direct_optab_fn(internal_fn, gcall*, optab_tag, unsigned int) ???:0 0xf2c87c expand_SAT_SUB(internal_fn, gcall*) ???:0 We allowed the operand convert when matching SAT_SUB in match.pd, to support the zip benchmark SAT_SUB pattern. Aka, (convert? (minus (convert1? @0) (convert1? @1))) for below sample code. void test (uint16_t *x, unsigned b, unsigned n) { unsigned a = 0; register uint16_t *p = x; do { a = *--p; *p = (uint16_t)(a >= b ? a - b : 0); // Truncate after .SAT_SUB } while (--n); } The pattern match for SAT_SUB itself may also act on below scalar sample code too. unsigned long long GetTimeFromFrames(int); unsigned long long GetMicroSeconds(); void DequeueEvent(unsigned frame) { long long frame_time = GetTimeFromFrames(frame); unsigned long long current_time = GetMicroSeconds(); DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time); } Aka: uint32_t a = (uint32_t)SAT_SUB(uint64_t, uint64_t); Then there will be a problem when ia32 or -m32 is given when compiling. Because we only check the lhs (aka uint32_t) type is supported by ifn instead of the operand (aka uint64_t). Mostly DImode is disabled for 32 bits target like ia32 or rv32gcv, and then trigger ICE when expanding. The below test suites are passed for this patch. * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. PR middle-end/116814 gcc/ChangeLog: * tree-ssa-math-opts.cc (build_saturation_binary_arith_call): Make ifn is_supported type check based on operand instead of lhs. gcc/testsuite/ChangeLog: * g++.dg/torture/pr116814-1.C: New test. Signed-off-by: Pan Li Diff: --- gcc/testsuite/g++.dg/torture/pr116814-1.C | 12 gcc/tree-ssa-math-opts.cc | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/g++.dg/torture/pr116814-1.C b/gcc/testsuite/g++.dg/torture/pr116814-1.C new file mode 100644 index ..dd6f29daa7c3 --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr116814-1.C @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ia32 } } } */ +/* { dg-options "-O2" } */ + +unsigned long long GetTimeFromFrames(int); +unsigned long long GetMicroSeconds(); + +void DequeueEvent(unsigned frame) { + long long frame_time = GetTimeFromFrames(frame); + unsigned long long current_time = GetMicroSeconds(); + + DequeueEvent(frame_time < current_time ? 0 : frame_time - current_time); +} diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc index d61668aacfc6..8c622514dbd9 100644 --- a/gcc/tree-ssa-math-opts.cc +++ b/gcc/tree-ssa-math-opts.cc @@ -4042,7 +4042,7 @@ build_saturation_binary_arith_call (gimple_stmt_iterator *gsi, gphi *phi, internal_fn fn, tree lhs, tree op_0, tree op_1) { - if (direct_internal_fn_supported_p (fn, TREE_TYPE (lhs), OPTIMIZE_FOR_BOTH)) + if (direct_internal_fn_supported_p (fn, TREE_TYPE (op_0), OPTIMIZE_FOR_BOTH)) { gcall *call = gimple_build_call_internal (fn, 2, op_0, op_1); gimple_call_set_lhs (call, lhs);
[gcc r15-3821] Testsuite, darwin: account for macOS 15
https://gcc.gnu.org/g:7e560ffd7562cbd1a51ae6298c515b89ebed1363 commit r15-3821-g7e560ffd7562cbd1a51ae6298c515b89ebed1363 Author: Francois-Xavier Coudert Date: Tue Sep 24 09:59:56 2024 +0200 Testsuite, darwin: account for macOS 15 gcc/testsuite/ChangeLog: * gcc.dg/darwin-minversion-link.c: Account for macOS 15. Diff: --- gcc/testsuite/gcc.dg/darwin-minversion-link.c | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.dg/darwin-minversion-link.c b/gcc/testsuite/gcc.dg/darwin-minversion-link.c index a835e9d4648a..af712a1b8963 100644 --- a/gcc/testsuite/gcc.dg/darwin-minversion-link.c +++ b/gcc/testsuite/gcc.dg/darwin-minversion-link.c @@ -19,6 +19,7 @@ /* { dg-additional-options "-mmacosx-version-min=012.000.00 -DCHECK=12" { target *-*-darwin21* } } */ /* { dg-additional-options "-mmacosx-version-min=013.000.00 -DCHECK=13" { target *-*-darwin22* } } */ /* { dg-additional-options "-mmacosx-version-min=014.000.00 -DCHECK=14" { target *-*-darwin23* } } */ +/* { dg-additional-options "-mmacosx-version-min=015.000.00 -DCHECK=15" { target *-*-darwin24* } } */ int main ()
[gcc r15-3824] tree-optimization/114855 - more update_ssa speedup
https://gcc.gnu.org/g:9a795b3a5b6a0d8b4b4f38a66ab9782aabead92e commit r15-3824-g9a795b3a5b6a0d8b4b4f38a66ab9782aabead92e Author: Richard Biener Date: Tue Sep 24 12:53:11 2024 +0200 tree-optimization/114855 - more update_ssa speedup The following tackles another source of slow bitmap operations, namely populating blocks_to_update. We already have that in tree view around PHI insertion but also the initial population is slow. There's unfortunately a conditional inbetween list view requirement and the bitmap API doesn't allow opportunistic switching but rejects tree -> tree or list -> list transitions. So the following patch wraps the early population in a tree view section with possibly one redundant tree -> list -> tree view transition. This cuts tree SSA incremental from 228.25s (21%) to 65.05s (7%). PR tree-optimization/114855 * tree-into-ssa.cc (update_ssa): Use tree view for the initial population of blocks_to_update. Diff: --- gcc/tree-into-ssa.cc | 5 + 1 file changed, 5 insertions(+) diff --git a/gcc/tree-into-ssa.cc b/gcc/tree-into-ssa.cc index 1cce9d628090..fc61d47ca777 100644 --- a/gcc/tree-into-ssa.cc +++ b/gcc/tree-into-ssa.cc @@ -3445,6 +3445,7 @@ update_ssa (unsigned update_flags) blocks_with_phis_to_rewrite = BITMAP_ALLOC (NULL); bitmap_tree_view (blocks_with_phis_to_rewrite); blocks_to_update = BITMAP_ALLOC (NULL); + bitmap_tree_view (blocks_to_update); insert_phi_p = (update_flags != TODO_update_ssa_no_phi); @@ -3492,6 +3493,8 @@ update_ssa (unsigned update_flags) placement heuristics. */ prepare_block_for_update (start_bb, insert_phi_p); + bitmap_list_view (blocks_to_update); + tree name; if (flag_checking) @@ -3517,6 +3520,8 @@ update_ssa (unsigned update_flags) } else { + bitmap_list_view (blocks_to_update); + /* Otherwise, the entry block to the region is the nearest common dominator for the blocks in BLOCKS. */ start_bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
[gcc r15-3826] build: enable C++11 narrowing warnings
https://gcc.gnu.org/g:2249c3b459510f307b4f241ea4b14f6557035152 commit r15-3826-g2249c3b459510f307b4f241ea4b14f6557035152 Author: Jason Merrill Date: Thu Sep 19 15:50:19 2024 -0400 build: enable C++11 narrowing warnings We've been using -Wno-narrowing since gcc 4.7, but at this point narrowing diagnostics seem like a stable part of C++ and we should adjust. This patch changes -Wno-narrowing to -Wno-error=narrowing so that narrowing issues will still not break bootstrap, but we can see them. The rest of the patch fixes the narrowing warnings I see in an x86_64-pc-linux-gnu bootstrap. In most of the cases, by adjusting the types of various declarations so that we store the values in the same types we compute them in, which seems worthwhile anyway. This also allowed us to remove a few -Wsign-compare casts. gcc/ChangeLog: * configure.ac (CXX_WARNING_OPTS): Change -Wno-narrowing to -Wno-error=narrowing. * configure: Regenerate. * config/i386/i386.h (debugger_register_map) (debugger64_register_map) (svr4_debugger_register_map): Make unsigned. * config/i386/i386.cc: Likewise. * diagnostic-event-id.h (diagnostic_thread_id_t): Make int. * vec.h (vec::size): Make unsigned int. * ipa-modref.cc (escape_point::arg): Make unsigned. (modref_lattice::add_escape_point): Use eaf_flags_t. (update_escape_summary_1): Use eaf_flags_t, && for bool. * pair-fusion.cc (pair_fusion_bb_info::track_access): Make mem_size unsigned int. * pretty-print.cc (format_phase_2): Cast va_arg to char. * tree-ssa-loop-ch.cc (ch_base::copy_headers): Make nheaders unsigned, remove cast. * tree-ssa-structalias.cc (bitpos_of_field): Return unsigned. (push_fields_onto_fieldstack):Make offset unsigned, remove cast. * tree-vect-slp.cc (vect_prologue_cost_for_slp): Use nelt_limit. * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): Make scale unsigned. (vectorizable_operation): Make ncopies unsigned. * rtl-ssa/member-fns.inl: Make num_accesses unsigned int. Diff: --- gcc/config/i386/i386.h | 6 +++--- gcc/diagnostic-event-id.h | 2 +- gcc/vec.h | 2 +- gcc/config/i386/i386.cc | 6 +++--- gcc/ipa-modref.cc | 13 +++-- gcc/pair-fusion.cc | 2 +- gcc/pretty-print.cc | 2 +- gcc/tree-ssa-loop-ch.cc | 6 +++--- gcc/tree-ssa-structalias.cc | 16 gcc/tree-vect-slp.cc| 4 ++-- gcc/tree-vect-stmts.cc | 7 --- gcc/configure.ac| 3 +-- gcc/rtl-ssa/member-fns.inl | 3 ++- gcc/configure | 7 +++ 14 files changed, 40 insertions(+), 39 deletions(-) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index c1ec92ffb150..751c250ddb31 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2091,9 +2091,9 @@ do { \ #define DEBUGGER_REGNO(N) \ (TARGET_64BIT ? debugger64_register_map[(N)] : debugger_register_map[(N)]) -extern int const debugger_register_map[FIRST_PSEUDO_REGISTER]; -extern int const debugger64_register_map[FIRST_PSEUDO_REGISTER]; -extern int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER]; +extern unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER]; +extern unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER]; +extern unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER]; /* Before the prologue, RA is at 0(%esp). */ #define INCOMING_RETURN_ADDR_RTX \ diff --git a/gcc/diagnostic-event-id.h b/gcc/diagnostic-event-id.h index 8237ba34df33..06985d23c124 100644 --- a/gcc/diagnostic-event-id.h +++ b/gcc/diagnostic-event-id.h @@ -67,6 +67,6 @@ typedef diagnostic_event_id_t *diagnostic_event_id_ptr; /* A type for compactly referring to a particular thread within a diagnostic_path. Typically there is just one thread per path, with id 0. */ -typedef unsigned diagnostic_thread_id_t; +typedef int diagnostic_thread_id_t; #endif /* ! GCC_DIAGNOSTIC_EVENT_ID_H */ diff --git a/gcc/vec.h b/gcc/vec.h index bc83827f644e..b13c4716428e 100644 --- a/gcc/vec.h +++ b/gcc/vec.h @@ -2409,7 +2409,7 @@ public: const value_type &back () const; const value_type &operator[] (unsigned int i) const; - size_t size () const { return m_size; } + unsigned size () const { return m_size; } size_t size_bytes () const { return m_size * sizeof (T); } bool empty () const { return m_size == 0; } diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 7dbae1d72e35..2f736a3b346e 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -181,7 +181,7 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER]
[gcc r15-3827] Fortran: Allow to nullify caf token when not in ultimate component. [PR101100]
https://gcc.gnu.org/g:0c0d79c783f5c289651d76aa697b48d4505e169d commit r15-3827-g0c0d79c783f5c289651d76aa697b48d4505e169d Author: Andre Vehreschild Date: Wed Sep 18 15:55:28 2024 +0200 Fortran: Allow to nullify caf token when not in ultimate component. [PR101100] gcc/fortran/ChangeLog: PR fortran/101100 * trans-expr.cc (trans_caf_token_assign): Take caf-token from decl for non ultimate coarray components. gcc/testsuite/ChangeLog: * gfortran.dg/coarray/proc_pointer_assign_1.f90: New test. Diff: --- gcc/fortran/trans-expr.cc | 8 +- .../gfortran.dg/coarray/proc_pointer_assign_1.f90 | 29 ++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index 01cf3f0ff148..d0c7dfea903d 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -10359,7 +10359,13 @@ trans_caf_token_assign (gfc_se *lse, gfc_se *rse, gfc_expr *expr1, else if (lhs_attr.codimension) { lhs_tok = gfc_get_ultimate_alloc_ptr_comps_caf_token (lse, expr1); - lhs_tok = build_fold_indirect_ref (lhs_tok); + if (!lhs_tok) + { + lhs_tok = gfc_get_tree_for_caf_expr (expr1); + lhs_tok = GFC_TYPE_ARRAY_CAF_TOKEN (TREE_TYPE (lhs_tok)); + } + else + lhs_tok = build_fold_indirect_ref (lhs_tok); tmp = build2_loc (input_location, MODIFY_EXPR, void_type_node, lhs_tok, null_pointer_node); gfc_prepend_expr_to_block (&lse->post, tmp); diff --git a/gcc/testsuite/gfortran.dg/coarray/proc_pointer_assign_1.f90 b/gcc/testsuite/gfortran.dg/coarray/proc_pointer_assign_1.f90 new file mode 100644 index ..81f0c3b19cf1 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/coarray/proc_pointer_assign_1.f90 @@ -0,0 +1,29 @@ +!{ dg-do run } + +! Check that PR101100 is fixed. + +! Contributed by G. Steinmetz + +program p + type t +procedure(), pointer, nopass :: f + end type + + integer :: i = 0 + type(t) :: x[*] + + x%f => null() + if ( associated(x%f) ) stop 1 + + x%f => g + if (.not. associated(x%f) ) stop 2 + + call x%f() + if ( i /= 1 ) stop 3 + +contains + subroutine g() +i = 1 + end subroutine +end +
[gcc r15-3829] RISC-V: Add more vector-vector extract cases.
https://gcc.gnu.org/g:be50c763a07893416419b82538f259f43e0773d4 commit r15-3829-gbe50c763a07893416419b82538f259f43e0773d4 Author: Robin Dapp Date: Tue Sep 3 17:53:34 2024 +0200 RISC-V: Add more vector-vector extract cases. This adds a V16SI -> V4SI and related i.e. "quartering" vector-vector extract expander for VLS modes. It helps with spills in x264 that may cause a load-hit-store. gcc/ChangeLog: * config/riscv/autovec.md (vec_extract): Add quarter vec-vec extract. * config/riscv/vector-iterators.md: New iterators. Diff: --- gcc/config/riscv/autovec.md | 28 ++ gcc/config/riscv/vector-iterators.md | 184 +++ 2 files changed, 212 insertions(+) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index a53c44659f0f..836cdd4491f6 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -1488,6 +1488,34 @@ DONE; }) +(define_expand "vec_extract" + [(set (match_operand:0 "nonimmediate_operand") + (vec_select: + (match_operand:VLS_HAS_QUARTER 1 "register_operand") + (parallel +[(match_operand 2 "immediate_operand")])))] + "TARGET_VECTOR" +{ + int sz = GET_MODE_NUNITS (mode).to_constant (); + int part = INTVAL (operands[2]); + + rtx start = GEN_INT (part * sz); + rtx tmp = operands[1]; + + if (part != 0) +{ + tmp = gen_reg_rtx (mode); + + rtx ops[] = {tmp, operands[1], start}; + riscv_vector::emit_vlmax_insn + (code_for_pred_slide (UNSPEC_VSLIDEDOWN, mode), +riscv_vector::BINARY_OP, ops); +} + + emit_move_insn (operands[0], gen_lowpart (mode, tmp)); + DONE; +}) + ;; - ;; [FP] Binary operations ;; - diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index a00b5c3feddd..43325d1ba87a 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -4328,3 +4328,187 @@ (V256DF "v128df") (V512DF "v256df") ]) + +(define_mode_iterator VLS_HAS_QUARTER [ + (V4QI "riscv_vector::vls_mode_valid_p (V4QImode)") + (V8QI "riscv_vector::vls_mode_valid_p (V8QImode)") + (V16QI "riscv_vector::vls_mode_valid_p (V16QImode)") + (V4HI "riscv_vector::vls_mode_valid_p (V4HImode)") + (V8HI "riscv_vector::vls_mode_valid_p (V8HImode)") + (V16HI "riscv_vector::vls_mode_valid_p (V16HImode)") + (V4SI "riscv_vector::vls_mode_valid_p (V4SImode)") + (V8SI "riscv_vector::vls_mode_valid_p (V8SImode)") + (V16SI "riscv_vector::vls_mode_valid_p (V16SImode) && TARGET_MIN_VLEN >= 64") + (V4DI "riscv_vector::vls_mode_valid_p (V4DImode) && TARGET_VECTOR_ELEN_64") + (V8DI "riscv_vector::vls_mode_valid_p (V8DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 64") + (V16DI "riscv_vector::vls_mode_valid_p (V16DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32") + (V8SF "riscv_vector::vls_mode_valid_p (V8SFmode) && TARGET_VECTOR_ELEN_FP_32") + (V16SF "riscv_vector::vls_mode_valid_p (V16SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64") + (V4DF "riscv_vector::vls_mode_valid_p (V4DFmode) && TARGET_VECTOR_ELEN_FP_64") + (V8DF "riscv_vector::vls_mode_valid_p (V8DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64") + (V16DF "riscv_vector::vls_mode_valid_p (V16DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") + (V32QI "riscv_vector::vls_mode_valid_p (V32QImode)") + (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 64") + (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 128") + (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 256") + (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 512") + (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 1024") + (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 2048") + (V4096QI "riscv_vector::vls_mode_valid_p (V4096QImode) && TARGET_MIN_VLEN >= 4096") + (V32HI "riscv_vector::vls_mode_valid_p (V32HImode) && TARGET_MIN_VLEN >= 64") + (V64HI "riscv_vector::vls_mode_valid_p (V64HImode) && TARGET_MIN_VLEN >= 128") + (V128HI "riscv_vector::vls_mode_valid_p (V128HImode) && TARGET_MIN_VLEN >= 256") + (V256HI "riscv_vector::vls_mode_valid_p (V256HImode) && TARGET_MIN_VLEN >= 512") + (V512HI "riscv_vector::vls_mode_valid_p (V512HImode) && TARGET_MIN_VLEN >= 1024") + (V1024HI "riscv_vector::vls_mode_valid_p (V1024HImode) && TARGET_MIN_VLEN >= 2048") + (V2048HI "riscv_vector::vls_mode_valid_p (V2048HImode) && TARGET_MIN_VLEN >= 4096") + (V32SI "riscv_vector::vls_mode_valid_p (V32SImode) && TARGET_MIN_VLEN >= 128") + (V64SI "riscv
[gcc r15-3828] RISC-V: Fix effective target check.
https://gcc.gnu.org/g:e45537f56250f19cdf2ec09a744c6b11170c1001 commit r15-3828-ge45537f56250f19cdf2ec09a744c6b11170c1001 Author: Robin Dapp Date: Fri Aug 30 14:35:08 2024 +0200 RISC-V: Fix effective target check. The return value is inverted in check_effective_target_rvv_zvl256b_ok and check_effective_target_rvv_zvl512b_ok. Fix this and also just use the current march. gcc/testsuite/ChangeLog: * lib/target-supports.exp: Fix effective target check. Diff: --- gcc/testsuite/lib/target-supports.exp | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 8f2afe866c7c..05a63c4e9a55 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -1978,15 +1978,15 @@ proc check_effective_target_riscv_v { } { proc check_effective_target_rvv_zvl256b_ok { } { # Check if the target has a VLENB of 32. -set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v] +set gcc_march [riscv_get_arch] return [check_runtime ${gcc_march}_exec { int main() { int vlenb = 0; asm ("csrr %0,vlenb" : "=r" (vlenb) : : ); if (vlenb == 32) - return 1; - return 0; + return 0; + return 1; } } "-march=${gcc_march}"] } @@ -1996,15 +1996,15 @@ proc check_effective_target_rvv_zvl256b_ok { } { proc check_effective_target_rvv_zvl512b_ok { } { # Check if the target has a VLENB of 64. -set gcc_march [regsub {[[:alnum:]]*} [riscv_get_arch] &v] +set gcc_march [riscv_get_arch] return [check_runtime ${gcc_march}_exec { int main() { int vlenb = 0; asm ("csrr %0,vlenb" : "=r" (vlenb) : : ); if (vlenb == 64) - return 1; - return 0; + return 0; + return 1; } } "-march=${gcc_march}"] }
[gcc r15-3830] RISC-V: testsuite: Fix SELECT_VL SLP fallout.
https://gcc.gnu.org/g:4bd3ccae58d40fad6bd99ed08ef4e1e4d70fefd0 commit r15-3830-g4bd3ccae58d40fad6bd99ed08ef4e1e4d70fefd0 Author: Robin Dapp Date: Thu Sep 19 05:08:47 2024 -0700 RISC-V: testsuite: Fix SELECT_VL SLP fallout. This fixes asm-scan fallout from r15-3712-g5e3a4a01785e2d where we allow SLP with SELECT_VL. Assisted by sed and regtested on rv64gcv_zvfh_zvbb. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c: Expect length-controlled loop. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_
[gcc r15-3823] Alphabetize my entry in MAINTAINER's DCO list.
https://gcc.gnu.org/g:34366176046351250e1beb578664d926fbdd50c9 commit r15-3823-g34366176046351250e1beb578664d926fbdd50c9 Author: Aldy Hernandez Date: Tue Sep 24 11:40:52 2024 +0200 Alphabetize my entry in MAINTAINER's DCO list. ChangeLog: * MAINTAINERS: Move my entry in DCO list into alphabetical order. Diff: --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3b4cf9d20d80..47b5915e9f8f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -917,6 +917,7 @@ information. Juergen Christ Robin Dapp Robin Dapp +Aldy Hernandez Michal Jires Matthias Kretz Prathamesh Kulkarni @@ -949,4 +950,3 @@ Jonathan Wakely Alexander Westbrooks Chung-Ju Wu Pengxuan Zheng -Aldy Hernandez
[gcc r15-3825] Fortran: Assign allocated caf-memory to scalar members [PR84870]
https://gcc.gnu.org/g:f5035d7d015ebd4a7f5df5831cfc1269f9567e06 commit r15-3825-gf5035d7d015ebd4a7f5df5831cfc1269f9567e06 Author: Andre Vehreschild Date: Thu Sep 19 15:09:52 2024 +0200 Fortran: Assign allocated caf-memory to scalar members [PR84870] Allocating a coarray required an array-descriptor. For scalars a temporary descriptor was created. Assigning the allocated memory from the temporary descriptor back to the scalar is now added. gcc/fortran/ChangeLog: PR fortran/84870 * trans-array.cc (duplicate_allocatable_coarray): For scalar allocatable components the memory allocated is now assigned to the component's pointer. gcc/testsuite/ChangeLog: * gfortran.dg/coarray/alloc_comp_10.f90: New test. Diff: --- gcc/fortran/trans-array.cc | 2 ++ .../gfortran.dg/coarray/alloc_comp_10.f90 | 24 ++ 2 files changed, 26 insertions(+) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 7d8274ab5718..0b8ef0b5e018 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -9505,6 +9505,7 @@ duplicate_allocatable_coarray (tree dest, tree dest_tok, tree src, tree type, gfc_build_addr_expr (NULL_TREE, dest_tok), NULL_TREE, NULL_TREE, NULL_TREE, GFC_CAF_COARRAY_ALLOC_REGISTER_ONLY); + gfc_add_modify (&block, dest, gfc_conv_descriptor_data_get (dummy_desc)); null_data = gfc_finish_block (&block); gfc_init_block (&block); @@ -9514,6 +9515,7 @@ duplicate_allocatable_coarray (tree dest, tree dest_tok, tree src, tree type, gfc_build_addr_expr (NULL_TREE, dest_tok), NULL_TREE, NULL_TREE, NULL_TREE, GFC_CAF_COARRAY_ALLOC); + gfc_add_modify (&block, dest, gfc_conv_descriptor_data_get (dummy_desc)); tmp = builtin_decl_explicit (BUILT_IN_MEMCPY); tmp = build_call_expr_loc (input_location, tmp, 3, dest, src, diff --git a/gcc/testsuite/gfortran.dg/coarray/alloc_comp_10.f90 b/gcc/testsuite/gfortran.dg/coarray/alloc_comp_10.f90 new file mode 100644 index ..a31d005498c1 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/coarray/alloc_comp_10.f90 @@ -0,0 +1,24 @@ +!{ dg-do run } + +! Check that copying of memory for allocated scalar is assigned +! to coarray object. + +! Contributed by G. Steinmetz + +program p + type t +integer, allocatable :: a + end type + type t2 +type(t), allocatable :: b + end type + type(t2) :: x, y[*] + + x%b = t(1) + y = x + y%b%a = 2 + + if (x%b%a /= 1) stop 1 + if (y%b%a /= 2) stop 2 +end +
[gcc r15-3822] OpenMP: Add support for 'self_maps' to the 'require' directive
https://gcc.gnu.org/g:b752eed3e3f2f27570ea89b7c2339468698472a8 commit r15-3822-gb752eed3e3f2f27570ea89b7c2339468698472a8 Author: Tobias Burnus Date: Tue Sep 24 10:53:59 2024 +0200 OpenMP: Add support for 'self_maps' to the 'require' directive 'self_maps' implies 'unified_shared_memory', except that the latter also permits that explicit maps copy data to device memory while self_maps does not. In GCC, currently, both are handled identical. gcc/c/ChangeLog: * c-parser.cc (c_parser_omp_requires): Handle self_maps clause. gcc/cp/ChangeLog: * parser.cc (cp_parser_omp_requires): Handle self_maps clause. gcc/fortran/ChangeLog: * gfortran.h (enum gfc_omp_requires_kind): Add OMP_REQ_SELF_MAPS. (gfc_namespace): Enlarge omp_requires bitfield. * module.cc (enum ab_attribute, attr_bits): Add AB_OMP_REQ_SELF_MAPS. (mio_symbol_attribute): Handle it. * openmp.cc (gfc_check_omp_requires, gfc_match_omp_requires): Handle self_maps clause. * parse.cc (gfc_parse_file): Handle self_maps clause. gcc/ChangeLog: * lto-cgraph.cc (output_offload_tables, omp_requires_to_name): Handle self_maps clause. * omp-general.cc (struct omp_ts_info, omp_context_selector_matches): Likewise for the associated trait. * omp-general.h (enum omp_requires): Add OMP_REQUIRES_SELF_MAPS. * omp-selectors.h (enum omp_ts_code): Add OMP_TRAIT_IMPLEMENTATION_SELF_MAPS. include/ChangeLog: * gomp-constants.h (GOMP_REQUIRES_SELF_MAPS): #define. libgomp/ChangeLog: * plugin/plugin-gcn.c (GOMP_OFFLOAD_get_num_devices): Accept self_maps clause. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_num_devices): Likewise. * libgomp.texi (TR13 Impl. Status): Set to 'Y'. * target.c (gomp_requires_to_name, GOMP_offload_register_ver, gomp_target_init): Handle self_maps clause. * testsuite/libgomp.fortran/self_maps.f90: New test. gcc/testsuite/ChangeLog: * c-c++-common/gomp/declare-variant-1.c: Add self_maps test. * c-c++-common/gomp/requires-4.c: Likewise. * gfortran.dg/gomp/declare-variant-3.f90: Likewise. * c-c++-common/gomp/requires-2.c: Update dg-error msg. * gfortran.dg/gomp/requires-2.f90: Likewise. * gfortran.dg/gomp/requires-self-maps-aux.f90: New. * gfortran.dg/gomp/requires-self-maps.f90: New. Diff: --- gcc/c/c-parser.cc | 3 ++ gcc/cp/parser.cc | 3 ++ gcc/fortran/gfortran.h | 10 +++-- gcc/fortran/module.cc | 11 - gcc/fortran/openmp.cc | 30 - gcc/fortran/parse.cc | 3 ++ gcc/lto-cgraph.cc | 4 ++ gcc/omp-general.cc | 21 ++ gcc/omp-general.h | 1 + gcc/omp-selectors.h| 1 + .../c-c++-common/gomp/declare-variant-1.c | 6 +++ gcc/testsuite/c-c++-common/gomp/requires-2.c | 2 +- gcc/testsuite/c-c++-common/gomp/requires-4.c | 1 + .../gfortran.dg/gomp/declare-variant-3.f90 | 3 ++ gcc/testsuite/gfortran.dg/gomp/requires-2.f90 | 2 +- .../gfortran.dg/gomp/requires-self-maps-aux.f90| 18 .../gfortran.dg/gomp/requires-self-maps.f90| 17 include/gomp-constants.h | 4 ++ libgomp/libgomp.texi | 2 +- libgomp/plugin/plugin-gcn.c| 4 +- libgomp/plugin/plugin-nvptx.c | 4 +- libgomp/target.c | 10 +++-- libgomp/testsuite/libgomp.fortran/self_maps.f90| 49 ++ 23 files changed, 185 insertions(+), 24 deletions(-) diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index aff5af174303..6a46577f5119 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -26208,6 +26208,8 @@ c_parser_omp_requires (c_parser *parser) this_req = OMP_REQUIRES_UNIFIED_ADDRESS; else if (!strcmp (p, "unified_shared_memory")) this_req = OMP_REQUIRES_UNIFIED_SHARED_MEMORY; + else if (!strcmp (p, "self_maps")) + this_req = OMP_REQUIRES_SELF_MAPS; else if (!strcmp (p, "dynamic_allocators")) this_req = OMP_REQUIRES_DYNAMIC_ALLOCATORS; else if (!strcmp (p, "reverse_offload")) @@ -26274,6 +26276,7 @@ c_parser_omp_requires (c_parser *parser) { error_at (cloc, "expected %, " "%, " + "%,
[gcc r15-3833] Simplify range-op shift mask generation
https://gcc.gnu.org/g:2114243c23ec5168cd54633cd4995ab123d92573 commit r15-3833-g2114243c23ec5168cd54633cd4995ab123d92573 Author: Richard Biener Date: Tue Sep 24 14:23:40 2024 +0200 Simplify range-op shift mask generation The following reduces the number of wide_ints built which show up in the profile for PR114855 as the largest remaining bit at -O1. * range-op.cc (operator_rshift::op1_range): Use wi::mask instead of shift and not. Diff: --- gcc/range-op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/range-op.cc b/gcc/range-op.cc index c576f6882219..3f5cf0834402 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -2863,7 +2863,7 @@ operator_rshift::op1_range (irange &r, // OP1 is anything from 0011 1000 to 0011 . That is, a // range from LHS<<3 plus a mask of the 3 bits we shifted on the // right hand side (0x07). - wide_int mask = wi::bit_not (wi::lshift (wi::minus_one (prec), shift)); + wide_int mask = wi::mask (shift.to_uhwi (), false, prec); int_range_max mask_range (type, wi::zero (TYPE_PRECISION (type)), mask);
[gcc r15-3834] OpenMP: Check additional restrictions on context selector properties
https://gcc.gnu.org/g:96246bff0bcd9e5cdec9e6cf811ee3db4997f6d4 commit r15-3834-g96246bff0bcd9e5cdec9e6cf811ee3db4997f6d4 Author: Sandra Loosemore Date: Fri Sep 6 20:58:13 2024 + OpenMP: Check additional restrictions on context selector properties TR13 (pre-6.0) of the OpenMP spec says: "Each trait-property may only be specified once in a trait selector other than those in the construct selector set." and "If trait-property any is specified in the kind trait-selector of the device selector set or the target_device selector sets, no other trait-property may be specified in the same selector set." These restrictions (with slightly different wording) date back to OpenMP 5.1, but were not in 5.0 which was the basis for GCC's implementation. This patch adds a diagnostic, adds new testcases, and fixes some older testcases that include now-invalid selectors. gcc/ChangeLog * omp-general.cc (omp_check_context_selector): Reject other properties in the same selector set with kind(any). Also reject duplicate name-list properties. gcc/testsuite/ChangeLog * c-c++-common/gomp/declare-variant-10.c: Fix broken tests. * c-c++-common/gomp/declare-variant-3.c: Likewise. * c-c++-common/gomp/declare-variant-9.c: Likewise. * c-c++-common/gomp/declare-variant-any.c: New. * c-c++-common/gomp/declare-variant-duplicates.c: New. * gfortran.dg/gomp/declare-variant-10.f90: Fix broken tests. * gfortran.dg/gomp/declare-variant-3.f90: Likewise. * gfortran.dg/gomp/declare-variant-9.f90: Likewise. * gfortran.dg/gomp/declare-variant-any.f90: New. * gfortran.dg/gomp/declare-variant-duplicates.f90: New. Diff: --- gcc/omp-general.cc | 64 +- .../c-c++-common/gomp/declare-variant-10.c | 4 +- .../c-c++-common/gomp/declare-variant-3.c | 10 +--- .../c-c++-common/gomp/declare-variant-9.c | 4 +- .../c-c++-common/gomp/declare-variant-any.c| 17 ++ .../c-c++-common/gomp/declare-variant-duplicates.c | 13 + .../gfortran.dg/gomp/declare-variant-10.f90| 4 +- .../gfortran.dg/gomp/declare-variant-3.f90 | 12 +--- .../gfortran.dg/gomp/declare-variant-9.f90 | 2 +- .../gfortran.dg/gomp/declare-variant-any.f90 | 40 ++ .../gomp/declare-variant-duplicates.f90| 30 ++ 11 files changed, 176 insertions(+), 24 deletions(-) diff --git a/gcc/omp-general.cc b/gcc/omp-general.cc index c93bf129e4d7..9713e684e830 100644 --- a/gcc/omp-general.cc +++ b/gcc/omp-general.cc @@ -1293,6 +1293,8 @@ omp_check_context_selector (location_t loc, tree ctx) for (tree tss = ctx; tss; tss = TREE_CHAIN (tss)) { enum omp_tss_code tss_code = OMP_TSS_CODE (tss); + bool saw_any_prop = false; + bool saw_other_prop = false; /* We can parse this, but not handle it yet. */ if (tss_code == OMP_TRAIT_SET_TARGET_DEVICE) @@ -1329,9 +1331,61 @@ omp_check_context_selector (location_t loc, tree ctx) else ts_seen[ts_code] = true; + /* If trait-property "any" is specified in the "kind" +trait-selector of the "device" selector set or the +"target_device" selector sets, no other trait-property +may be specified in the same selector set. */ + if (ts_code == OMP_TRAIT_DEVICE_KIND) + for (tree p = OMP_TS_PROPERTIES (ts); p; p = TREE_CHAIN (p)) + { + const char *prop = omp_context_name_list_prop (p); + if (!prop) + continue; + else if (strcmp (prop, "any") == 0) + saw_any_prop = true; + else + saw_other_prop = true; + } + /* It seems slightly suspicious that the spec's language covers +the device_num selector too, but + target_device={device_num(whatever),kind(any)} +is probably not terribly useful anyway. */ + else if (ts_code == OMP_TRAIT_DEVICE_ARCH + || ts_code == OMP_TRAIT_DEVICE_ISA + || ts_code == OMP_TRAIT_DEVICE_NUM) + saw_other_prop = true; + + /* Each trait-property can only be specified once in a trait-selector +other than the construct selector set. FIXME: only handles +name-list properties, not clause-list properties, since the +"requires" selector is not implemented yet (PR 113067). */ + if (tss_code != OMP_TRAIT_SET_CONSTRUCT) + for (tree p1 = OMP_TS_PROPERTIES (ts); p1; p1 = TREE_CHAIN (p1)) + { + if (OMP_TP_NAME (p1) != OMP_TP_NAMELIST_NODE) + break; + const char *n1 = omp_context_name_list_prop (p1); +
[gcc r15-3842] Implement SUM and PRODUCT for unsigned.
https://gcc.gnu.org/g:5e918a4db9e4a5bdbeafec6881fa8b22a55d3789 commit r15-3842-g5e918a4db9e4a5bdbeafec6881fa8b22a55d3789 Author: Thomas Koenig Date: Tue Sep 24 21:59:10 2024 +0200 Implement SUM and PRODUCT for unsigned. gcc/fortran/ChangeLog: * gfortran.texi: Document SUM and PRODUCT. * iresolve.cc (resolve_transformational): New argument, use_integer, to translate calls to unsigned to calls to integer. (gfc_resolve_product): Use it (gfc_resolve_sum): Use it. * simplify.cc (init_result_expr): Handle BT_UNSIGNED. libgfortran/ChangeLog: * generated/product_c10.c: Regenerated. * generated/product_c16.c: Regenerated. * generated/product_c17.c: Regenerated. * generated/product_c4.c: Regenerated. * generated/product_c8.c: Regenerated. * generated/product_i1.c: Regenerated. * generated/product_i16.c: Regenerated. * generated/product_i2.c: Regenerated. * generated/product_i4.c: Regenerated. * generated/product_i8.c: Regenarated. * generated/product_r10.c: Regenerated. * generated/product_r16.c: Regenerated. * generated/product_r17.c: Regenerated. * generated/product_r4.c: Regenerated. * generated/product_r8.c: Regenarated. * generated/sum_c10.c: Regenerated. * generated/sum_c16.c: Regenerated. * generated/sum_c17.c: Regenerated. * generated/sum_c4.c: Regenerated. * generated/sum_c8.c: Regenerated. * generated/sum_i1.c: Regenerated. * generated/sum_i16.c: Regenerated. * generated/sum_i2.c: Regenerated. * generated/sum_i4.c: Regenerated. * generated/sum_i8.c: Regenerated. * generated/sum_r10.c: Regenerated. * generated/sum_r16.c: Regenerated. * generated/sum_r17.c: Regenerated. * generated/sum_r4.c: Regenerated. * generated/sum_r8.c: Regenerated. * m4/ifunction.m4: Whitespace fix. * m4/product.m4: If type is integer, change to unsigned. * m4/sum.m4: Likewise. Diff: --- gcc/fortran/gfortran.texi | 2 +- gcc/fortran/iresolve.cc | 19 ++--- gcc/fortran/simplify.cc | 11 +++- libgfortran/generated/product_c10.c | 22 +++ libgfortran/generated/product_c16.c | 22 +++ libgfortran/generated/product_c17.c | 22 +++ libgfortran/generated/product_c4.c | 22 +++ libgfortran/generated/product_c8.c | 22 +++ libgfortran/generated/product_i1.c | 54 ++--- libgfortran/generated/product_i16.c | 54 ++--- libgfortran/generated/product_i2.c | 54 ++--- libgfortran/generated/product_i4.c | 54 ++--- libgfortran/generated/product_i8.c | 54 ++--- libgfortran/generated/product_r10.c | 22 +++ libgfortran/generated/product_r16.c | 22 +++ libgfortran/generated/product_r17.c | 22 +++ libgfortran/generated/product_r4.c | 22 +++ libgfortran/generated/product_r8.c | 22 +++ libgfortran/generated/sum_c10.c | 22 +++ libgfortran/generated/sum_c16.c | 22 +++ libgfortran/generated/sum_c17.c | 22 +++ libgfortran/generated/sum_c4.c | 22 +++ libgfortran/generated/sum_c8.c | 22 +++ libgfortran/generated/sum_i1.c | 54 ++--- libgfortran/generated/sum_i16.c | 54 ++--- libgfortran/generated/sum_i2.c | 54 ++--- libgfortran/generated/sum_i4.c | 54 ++--- libgfortran/generated/sum_i8.c | 54 ++--- libgfortran/generated/sum_r10.c | 22 +++ libgfortran/generated/sum_r16.c | 22 +++ libgfortran/generated/sum_r17.c | 22 +++ libgfortran/generated/sum_r4.c | 22 +++ libgfortran/generated/sum_r8.c | 22 +++ libgfortran/m4/ifunction.m4 | 22 +++ libgfortran/m4/product.m4 | 5 libgfortran/m4/sum.m4 | 5 36 files changed, 537 insertions(+), 507 deletions(-) diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi index 829ab00c6653..e5ffe678 100644 --- a/gcc/fortran/gfortran.texi +++ b/gcc/fortran/gfortran.texi @@ -2788,7 +2788,7 @@ As of now, the following intrinsics take unsigned arguments: @item @code{MVBITS} @item @code{RANGE} @item @code{TRANSFER} -@item @code{MATMUL} and @code{DOT_PRODUCT} +@item @
[gcc r15-3835] [PATCH] RISC-V: Fix FIXED_REGISTERS comment missing return address register
https://gcc.gnu.org/g:79a3d3da8c8a5ad56547b7f7991577271ee5d1b2 commit r15-3835-g79a3d3da8c8a5ad56547b7f7991577271ee5d1b2 Author: Yixuan Chen Date: Tue Sep 24 09:15:00 2024 -0600 [PATCH] RISC-V: Fix FIXED_REGISTERS comment missing return address register gcc/ChangeLog: * config/riscv/riscv.h: Fix FIXED_REGISTERS comment missing return address register. Diff: --- gcc/config/riscv/riscv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index ead97867eb8e..3aecb43f8312 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -316,7 +316,7 @@ ASM_MISA_SPEC #define FIRST_PSEUDO_REGISTER 128 -/* x0, sp, gp, and tp are fixed. */ +/* x0, ra, sp, gp, and tp are fixed. */ #define FIXED_REGISTERS \ { /* General registers. */\
[gcc r15-3836] libgomp: with USM, init 'link' variables with host address
https://gcc.gnu.org/g:4cb20dc043cf70b8a1b4846c86599cc1ff9680d9 commit r15-3836-g4cb20dc043cf70b8a1b4846c86599cc1ff9680d9 Author: Tobias Burnus Date: Tue Sep 24 17:41:39 2024 +0200 libgomp: with USM, init 'link' variables with host address If requires unified_shared_memory or self_maps is set, make 'declare target link' variables to point initially to the host pointer. libgomp/ChangeLog: * target.c (gomp_load_image_to_device): For requires unified_shared_memory, update 'link' vars to point to the host var. * testsuite/libgomp.c-c++-common/target-link-3.c: New test. * testsuite/libgomp.c-c++-common/target-link-4.c: New test. Diff: --- libgomp/target.c | 6 +++ .../testsuite/libgomp.c-c++-common/target-link-3.c | 52 ++ .../testsuite/libgomp.c-c++-common/target-link-4.c | 52 ++ 3 files changed, 110 insertions(+) diff --git a/libgomp/target.c b/libgomp/target.c index 6918694a843b..cf62af61f3b6 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -2454,6 +2454,12 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, array->right = NULL; splay_tree_insert (&devicep->mem_map, array); array++; + + if (is_link_var + && (omp_requires_mask + & (GOMP_REQUIRES_UNIFIED_SHARED_MEMORY | GOMP_REQUIRES_SELF_MAPS))) + gomp_copy_host2dev (devicep, NULL, (void *) target_var->start, + &k->host_start, sizeof (void *), false, NULL); } /* Last entry is for the ICV struct variable; if absent, start = end = 0. */ diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-link-3.c b/libgomp/testsuite/libgomp.c-c++-common/target-link-3.c new file mode 100644 index ..c707b38b7d46 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-link-3.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ + +#include +#include + +#pragma omp requires unified_shared_memory + +int A[3] = {-3,-4,-5}; +static int q = -401; +#pragma omp declare target link(A, q) + +#pragma omp begin declare target +void +f (uintptr_t *pA, uintptr_t *pq) +{ + if (A[0] != 1 || A[1] != 2 || A[2] != 3 || q != 42) +__builtin_abort (); + A[0] = 13; + A[1] = 14; + A[2] = 15; + q = 23; + *pA = (uintptr_t) &A[0]; + *pq = (uintptr_t) &q; +} +#pragma omp end declare target + +int +main () +{ + uintptr_t hpA = (uintptr_t) &A[0]; + uintptr_t hpq = (uintptr_t) &q; + uintptr_t dpA, dpq; + + A[0] = 1; + A[1] = 2; + A[2] = 3; + q = 42; + + for (int i = 0; i <= omp_get_num_devices (); ++i) +{ + #pragma omp target device(device_num: i) map(dpA, dpq) + f (&dpA, &dpq); + if (hpA != dpA || hpq != dpq) + __builtin_abort (); + if (A[0] != 13 || A[1] != 14 || A[2] != 15 || q != 23) + __builtin_abort (); + A[0] = 1; + A[1] = 2; + A[2] = 3; + q = 42; +} +} diff --git a/libgomp/testsuite/libgomp.c-c++-common/target-link-4.c b/libgomp/testsuite/libgomp.c-c++-common/target-link-4.c new file mode 100644 index ..785055e216d7 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/target-link-4.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ + +#include +#include + +#pragma omp requires self_maps + +int A[3] = {-3,-4,-5}; +static int q = -401; +#pragma omp declare target link(A, q) + +#pragma omp begin declare target +void +f (uintptr_t *pA, uintptr_t *pq) +{ + if (A[0] != 1 || A[1] != 2 || A[2] != 3 || q != 42) +__builtin_abort (); + A[0] = 13; + A[1] = 14; + A[2] = 15; + q = 23; + *pA = (uintptr_t) &A[0]; + *pq = (uintptr_t) &q; +} +#pragma omp end declare target + +int +main () +{ + uintptr_t hpA = (uintptr_t) &A[0]; + uintptr_t hpq = (uintptr_t) &q; + uintptr_t dpA, dpq; + + A[0] = 1; + A[1] = 2; + A[2] = 3; + q = 42; + + for (int i = 0; i <= omp_get_num_devices (); ++i) +{ + #pragma omp target device(device_num: i) map(dpA, dpq) + f (&dpA, &dpq); + if (hpA != dpA || hpq != dpq) + __builtin_abort (); + if (A[0] != 13 || A[1] != 14 || A[2] != 15 || q != 23) + __builtin_abort (); + A[0] = 1; + A[1] = 2; + A[2] = 3; + q = 42; +} +}
[gcc r15-3837] c++/contracts: ICE in build_contract_condition_function [PR116490]
https://gcc.gnu.org/g:ae57e52754ca6c96145a1b7504c2c7613a9e54d9 commit r15-3837-gae57e52754ca6c96145a1b7504c2c7613a9e54d9 Author: Nina Dinka Ranns Date: Fri Aug 30 13:49:07 2024 +0100 c++/contracts: ICE in build_contract_condition_function [PR116490] We currently do not expect comdat group of the guarded function to be set at the time of generating pre and post check function. However, in the case of an explicit instantiation, the guarded function has been added to a comdat group before generating contract check functions, which causes the observed ICE. Current assert removed and an additional check for comdat group of the guarded function added. With this change, the pre and post check functions get added to the same comdat group of the guarded function if the guarded function is already placed in a comdat group. PR c++/116490 gcc/cp/ChangeLog: * contracts.cc (build_contract_condition_function): added a check for comdat group of the guarded function. If set, the condition check function is added to the same comdat group. gcc/testsuite/ChangeLog: * g++.dg/contracts/pr116490.C: New test. Signed-off-by: Nina Ranns Diff: --- gcc/cp/contracts.cc | 12 +-- gcc/testsuite/g++.dg/contracts/pr116490.C | 56 +++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/gcc/cp/contracts.cc b/gcc/cp/contracts.cc index 39f0487ea367..4d2849a289a1 100644 --- a/gcc/cp/contracts.cc +++ b/gcc/cp/contracts.cc @@ -145,6 +145,7 @@ along with GCC; see the file COPYING3. If not see #include "print-tree.h" #include "stor-layout.h" #include "intl.h" +#include "cgraph.h" const int max_custom_roles = 32; static contract_role contract_build_roles[max_custom_roles] = { @@ -1458,9 +1459,14 @@ build_contract_condition_function (tree fndecl, bool pre) DECL_WEAK (fn) = false; DECL_COMDAT (fn) = false; - /* We haven't set the comdat group on the guarded function yet, we'll add -this to the same group in comdat_linkage later. */ - gcc_assert (!DECL_ONE_ONLY (fndecl)); + /* We may not have set the comdat group on the guarded function yet. +If we haven't, we'll add this to the same group in comdat_linkage +later. Otherwise, add it to the same comdat group now. */ + if (DECL_ONE_ONLY (fndecl)) + { + symtab_node *n = symtab_node::get (fndecl); + cgraph_node::get_create (fn)->add_to_same_comdat_group (n); + } DECL_INTERFACE_KNOWN (fn) = true; } diff --git a/gcc/testsuite/g++.dg/contracts/pr116490.C b/gcc/testsuite/g++.dg/contracts/pr116490.C new file mode 100644 index ..e3a5d77bafd3 --- /dev/null +++ b/gcc/testsuite/g++.dg/contracts/pr116490.C @@ -0,0 +1,56 @@ +// ICE in explicit instantiation of a function with contracts +// { dg-do run } +// { dg-options "-std=c++20 -fcontracts -fcontract-continuation-mode=on" } + +template +void foo(T t) +[[pre : t == 9 ]] { +} + +template void foo(int i); + + +template +struct templateS +{ + void fooS(T t) + [[pre : t == 9 ]] { + } +}; + +template struct templateS; + + +struct S { + + template + void fooS(T t) + [[pre : t == 9 ]] { + } + + template + static void fooStatic(T t) + [[pre : t == 9 ]] { + } +}; + +template void S::fooS(int i); + +template void S::fooStatic(int i); + +int main() +{ + foo(3); + + templateS ts; + ts.fooS(3); + + S s; + s.fooS(3); + S::fooStatic(3); +} + +// { dg-output "contract violation in function foo at .* t == 9.*(\n|\r\n|\r)" } +// { dg-output "contract violation in function templateS::fooS at .* t == 9.*(\n|\r\n|\r)" } +// { dg-output "contract violation in function S::fooS at .* t == 9.*(\n|\r\n|\r)" } +// { dg-output "contract violation in function S::fooStatic at .* t == 9.*(\n|\r\n|\r)" }
[gcc r15-3844] Implement IANY, IALL and IPARITY for unsigned.
https://gcc.gnu.org/g:fbeb1a965d85492e2f6f3adf913b90d005151b00 commit r15-3844-gfbeb1a965d85492e2f6f3adf913b90d005151b00 Author: Thomas Koenig Date: Tue Sep 24 22:53:59 2024 +0200 Implement IANY, IALL and IPARITY for unsigned. gcc/fortran/ChangeLog: * check.cc (gfc_check_transf_bit_intrins): Handle unsigned. * gfortran.texi: Docment IANY, IALL and IPARITY for unsigned. * iresolve.cc (gfc_resolve_iall): Set flag to use integer if type is BT_UNSIGNED. (gfc_resolve_iany): Likewise. (gfc_resolve_iparity): Likewise. * simplify.cc (do_bit_and): Adjust asserts for BT_UNSIGNED. (do_bit_ior): Likewise. (do_bit_xor): Likewise gcc/testsuite/ChangeLog: * gfortran.dg/unsigned_29.f90: New test. Diff: --- gcc/fortran/check.cc | 14 - gcc/fortran/gfortran.texi | 1 + gcc/fortran/iresolve.cc | 6 ++-- gcc/fortran/simplify.cc | 51 +-- gcc/testsuite/gfortran.dg/unsigned_29.f90 | 40 5 files changed, 99 insertions(+), 13 deletions(-) diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc index 7c630dd73f43..533c9d7d3438 100644 --- a/gcc/fortran/check.cc +++ b/gcc/fortran/check.cc @@ -4430,7 +4430,19 @@ gfc_check_mask (gfc_expr *i, gfc_expr *kind) bool gfc_check_transf_bit_intrins (gfc_actual_arglist *ap) { - if (ap->expr->ts.type != BT_INTEGER) + bt type = ap->expr->ts.type; + + if (flag_unsigned) +{ + if (type != BT_INTEGER && type != BT_UNSIGNED) + { + gfc_error ("%qs argument of %qs intrinsic at %L must be INTEGER " +"or UNSIGNED", gfc_current_intrinsic_arg[0]->name, +gfc_current_intrinsic, &ap->expr->where); + return false; + } +} + else if (ap->expr->ts.type != BT_INTEGER) { gfc_error ("%qs argument of %qs intrinsic at %L must be INTEGER", gfc_current_intrinsic_arg[0]->name, diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi index e5ffe678..3eb8039c09fd 100644 --- a/gcc/fortran/gfortran.texi +++ b/gcc/fortran/gfortran.texi @@ -2789,6 +2789,7 @@ As of now, the following intrinsics take unsigned arguments: @item @code{RANGE} @item @code{TRANSFER} @item @code{SUM}, @code{PRODUCT}, @code{MATMUL} and @code{DOT_PRODUCT} +@item @code{IANY}, @code{IALL} and @code{IPARITY} @end itemize This list will grow in the near future. @c - diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc index b4c9a636260e..b281ab740b1d 100644 --- a/gcc/fortran/iresolve.cc +++ b/gcc/fortran/iresolve.cc @@ -1195,7 +1195,7 @@ gfc_resolve_hypot (gfc_expr *f, gfc_expr *x, gfc_expr *y ATTRIBUTE_UNUSED) void gfc_resolve_iall (gfc_expr *f, gfc_expr *array, gfc_expr *dim, gfc_expr *mask) { - resolve_transformational ("iall", f, array, dim, mask); + resolve_transformational ("iall", f, array, dim, mask, true); } @@ -1223,7 +1223,7 @@ gfc_resolve_iand (gfc_expr *f, gfc_expr *i, gfc_expr *j) void gfc_resolve_iany (gfc_expr *f, gfc_expr *array, gfc_expr *dim, gfc_expr *mask) { - resolve_transformational ("iany", f, array, dim, mask); + resolve_transformational ("iany", f, array, dim, mask, true); } @@ -1429,7 +1429,7 @@ gfc_resolve_long (gfc_expr *f, gfc_expr *a) void gfc_resolve_iparity (gfc_expr *f, gfc_expr *array, gfc_expr *dim, gfc_expr *mask) { - resolve_transformational ("iparity", f, array, dim, mask); + resolve_transformational ("iparity", f, array, dim, mask, true); } diff --git a/gcc/fortran/simplify.cc b/gcc/fortran/simplify.cc index e5681c42a48c..bd2f6485c95e 100644 --- a/gcc/fortran/simplify.cc +++ b/gcc/fortran/simplify.cc @@ -3401,9 +3401,20 @@ gfc_simplify_iachar (gfc_expr *e, gfc_expr *kind) static gfc_expr * do_bit_and (gfc_expr *result, gfc_expr *e) { - gcc_assert (e->ts.type == BT_INTEGER && e->expr_type == EXPR_CONSTANT); - gcc_assert (result->ts.type == BT_INTEGER - && result->expr_type == EXPR_CONSTANT); + if (flag_unsigned) +{ + gcc_assert ((e->ts.type == BT_INTEGER || e->ts.type == BT_UNSIGNED) + && e->expr_type == EXPR_CONSTANT); + gcc_assert ((result->ts.type == BT_INTEGER + || result->ts.type == BT_UNSIGNED) + && result->expr_type == EXPR_CONSTANT); +} + else +{ + gcc_assert (e->ts.type == BT_INTEGER && e->expr_type == EXPR_CONSTANT); + gcc_assert (result->ts.type == BT_INTEGER + && result->expr_type == EXPR_CONSTANT); +} mpz_and (result->value.integer, result->value.integer, e->value.integer); return result; @@ -3420,9 +3431,20 @@ gfc_simplify_iall (gfc_expr *array, gfc_expr *dim, gfc_expr *mask) static gfc_expr * do_bit_ior (gfc_expr *result, gfc_expr *e) { -
[gcc r15-3845] Add random numbers and fix some bugs.
https://gcc.gnu.org/g:291e20e86090e5940e2bd862ec83c7d5e0715dd5 commit r15-3845-g291e20e86090e5940e2bd862ec83c7d5e0715dd5 Author: Thomas Koenig Date: Tue Sep 24 22:57:42 2024 +0200 Add random numbers and fix some bugs. This patch adds random number support for UNSIGNED, plus fixes two bugs, with array I/O where the type used to be set to BT_INTEGER, and for division with the divisor being a constant. gcc/fortran/ChangeLog: * check.cc (gfc_check_random_number): Adjust for unsigned. * iresolve.cc (gfc_resolve_random_number): Handle unsigned. * trans-expr.cc (gfc_conv_expr_op): Handle BT_UNSIGNED for divide. * trans-types.cc (gfc_get_dtype_rank_type): Handle BT_UNSIGNED. * gfortran.texi: Add RANDOM_NUMBER for UNSIGNED. libgfortran/ChangeLog: * gfortran.map: Add _gfortran_random_m1, _gfortran_random_m2, _gfortran_random_m4, _gfortran_random_m8 and _gfortran_random_m16. * intrinsics/random.c (random_m1): New function. (random_m2): New function. (random_m4): New function. (random_m8): New function. (random_m16): New function. (arandom_m1): New function. (arandom_m2): New function. (arandom_m4): New function. (arandom_m8): New funciton. (arandom_m16): New function. gcc/testsuite/ChangeLog: * gfortran.dg/unsigned_30.f90: New test. Diff: --- gcc/fortran/check.cc | 10 +- gcc/fortran/gfortran.texi | 1 + gcc/fortran/iresolve.cc | 6 +- gcc/fortran/trans-expr.cc | 4 +- gcc/fortran/trans-types.cc| 7 +- gcc/testsuite/gfortran.dg/unsigned_30.f90 | 63 + libgfortran/gfortran.map | 10 + libgfortran/intrinsics/random.c | 440 ++ 8 files changed, 534 insertions(+), 7 deletions(-) diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc index 533c9d7d3438..1851cfb8d4ad 100644 --- a/gcc/fortran/check.cc +++ b/gcc/fortran/check.cc @@ -7007,8 +7007,14 @@ gfc_check_random_init (gfc_expr *repeatable, gfc_expr *image_distinct) bool gfc_check_random_number (gfc_expr *harvest) { - if (!type_check (harvest, 0, BT_REAL)) -return false; + if (flag_unsigned) +{ + if (!type_check2 (harvest, 0, BT_REAL, BT_UNSIGNED)) + return false; +} + else +if (!type_check (harvest, 0, BT_REAL)) + return false; if (!variable_check (harvest, 0, false)) return false; diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi index 3eb8039c09fd..a5ebadff3bb8 100644 --- a/gcc/fortran/gfortran.texi +++ b/gcc/fortran/gfortran.texi @@ -2790,6 +2790,7 @@ As of now, the following intrinsics take unsigned arguments: @item @code{TRANSFER} @item @code{SUM}, @code{PRODUCT}, @code{MATMUL} and @code{DOT_PRODUCT} @item @code{IANY}, @code{IALL} and @code{IPARITY} +@item @code{RANDOM_NUMBER}. @end itemize This list will grow in the near future. @c - diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc index b281ab740b1d..5a1e0a6ed1d3 100644 --- a/gcc/fortran/iresolve.cc +++ b/gcc/fortran/iresolve.cc @@ -3452,12 +3452,14 @@ gfc_resolve_random_number (gfc_code *c) { const char *name; int kind; + char type; kind = gfc_type_abi_kind (&c->ext.actual->expr->ts); + type = gfc_type_letter (c->ext.actual->expr->ts.type); if (c->ext.actual->expr->rank == 0) -name = gfc_get_string (PREFIX ("random_r%d"), kind); +name = gfc_get_string (PREFIX ("random_%c%d"), type, kind); else -name = gfc_get_string (PREFIX ("arandom_r%d"), kind); +name = gfc_get_string (PREFIX ("arandom_%c%d"), type, kind); c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); } diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index d0c7dfea903d..e4c491a98486 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -3973,9 +3973,9 @@ gfc_conv_expr_op (gfc_se * se, gfc_expr * expr) case INTRINSIC_DIVIDE: /* If expr is a real or complex expr, use an RDIV_EXPR. If op1 is - an integer, we must round towards zero, so we use a +an integer or unsigned, we must round towards zero, so we use a TRUNC_DIV_EXPR. */ - if (expr->ts.type == BT_INTEGER) + if (expr->ts.type == BT_INTEGER || expr->ts.type == BT_UNSIGNED) code = TRUNC_DIV_EXPR; else code = RDIV_EXPR; diff --git a/gcc/fortran/trans-types.cc b/gcc/fortran/trans-types.cc index 96ef8b49fbef..05e64b3a8e1b 100644 --- a/gcc/fortran/trans-types.cc +++ b/gcc/fortran/trans-types.cc @@ -1651,7 +1651,12 @@ gfc_get_dtype_rank_type (int rank, tree etype) && TYPE_STRING_FLAG (ptype)) n = BT_CHARACTER; else - n = BT_INTEGER;
[gcc r15-3820] tree-optimization/115372 - failed store-lanes in some cases
https://gcc.gnu.org/g:f594008dcced0ebb86908f3d7602fcf943e05bc7 commit r15-3820-gf594008dcced0ebb86908f3d7602fcf943e05bc7 Author: Richard Biener Date: Fri Sep 20 15:07:24 2024 +0200 tree-optimization/115372 - failed store-lanes in some cases The gcc.target/riscv/rvv/autovec/struct/struct_vect-4.c testcase shows that we sometimes fail to use store-lanes even though it should be profitable. We're currently relying on vect_slp_prefer_store_lanes_p at the point we run into the first SLP discovery mismatch with obviously limited information. For the case at hand we have 3, 5 or 7 lanes of VnDImode [2, 2] vectors with the first mismatch at lane 2 so the new group size is 1. The heuristic says that might be an OK split given the rest is a multiple of the vector lanes. Now we continue discovery but in the end mismatches result in uniformly single-lane SLP instances which we can handle via interleaving but of course are prime candidates for store-lanes. The following patch re-assesses with the extra knowledge now just relying on the fact whether the target supports store-lanes for the given group size. PR tree-optimization/115372 * tree-vect-slp.cc (vect_build_slp_instance): Compute the uniform, if, number of lanes of the RHS sub-graphs feeding the store and if uniformly one, use store-lanes if the target supports that. Diff: --- gcc/tree-vect-slp.cc | 18 ++ 1 file changed, 18 insertions(+) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index ab49bb0e7ee1..f5b47e430e31 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -3957,6 +3957,7 @@ vect_build_slp_instance (vec_info *vinfo, /* Calculate the unrolling factor based on the smallest type. */ poly_uint64 unrolling_factor = 1; + unsigned int rhs_common_nlanes = 0; unsigned int start = 0, end = i; while (start < group_size) { @@ -3978,6 +3979,10 @@ vect_build_slp_instance (vec_info *vinfo, calculate_unrolling_factor (max_nunits, end - start)); rhs_nodes.safe_push (node); + if (start == 0) + rhs_common_nlanes = SLP_TREE_LANES (node); + else if (rhs_common_nlanes != SLP_TREE_LANES (node)) + rhs_common_nlanes = 0; start = end; if (want_store_lanes || force_single_lane) end = start + 1; @@ -4015,6 +4020,19 @@ vect_build_slp_instance (vec_info *vinfo, } } + /* Now re-assess whether we want store lanes in case the +discovery ended up producing all single-lane RHSs. */ + if (rhs_common_nlanes == 1 + && ! STMT_VINFO_GATHER_SCATTER_P (stmt_info) + && ! STMT_VINFO_STRIDED_P (stmt_info) + && compare_step_with_zero (vinfo, stmt_info) > 0 + && (vect_store_lanes_supported (SLP_TREE_VECTYPE (rhs_nodes[0]), + group_size, + SLP_TREE_CHILDREN + (rhs_nodes[0]).length () != 1) + != IFN_LAST)) + want_store_lanes = true; + /* Now we assume we can build the root SLP node from all stores. */ if (want_store_lanes) {
[gcc r15-3849] RISC-V: Refine the testcase of vector SAT_ADD
https://gcc.gnu.org/g:043d607cc45a9f45016ab1bf9870429f6d9fbaf5 commit r15-3849-g043d607cc45a9f45016ab1bf9870429f6d9fbaf5 Author: Pan Li Date: Wed Sep 25 11:41:22 2024 +0800 RISC-V: Refine the testcase of vector SAT_ADD Take scan-assembler-times for vsadd insn check instead of function body, as we only care about if we can generate the fixed point insn vsadd. The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c: Remove func body check and take scan asm times instead. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add_imm-9.c: Ditto. Signed-off-by: Pan Li Diff: --- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-1.c| 13 ++--- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-2.c| 13 ++--- .../gcc.target/riscv/rvv/autovec/binop/vec_sat_s_add-3.c| 13 ++--- .../gcc.target/riscv/rvv/autovec
[gcc r15-3850] RISC-V: Refine the testcase of vector SAT_SUB
https://gcc.gnu.org/g:32bcca3e58e67c5f49c5b429da85910e03d21bef commit r15-3850-g32bcca3e58e67c5f49c5b429da85910e03d21bef Author: Pan Li Date: Wed Sep 25 13:55:22 2024 +0800 RISC-V: Refine the testcase of vector SAT_SUB Take scan-assembler-times for vssub insn check instead of function body, as we only care about if we can generate the fixed point insn vssub. The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-1.c: Remove func body check and take scan asm times instead. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-33.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-34.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-35.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-36.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-37.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-38.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-39.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-40.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_trunc-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_sub_zip.c: Ditto. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/binop/vec_sat_u_sub-1.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-10.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-11.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-12.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-13.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-14.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-15.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-16.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-17.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-18.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-19.c | 13 ++--- .../riscv/rvv/autovec/binop/vec_sat_u_sub-2.c | 13 ++--- .../riscv/rvv/a
[gcc r15-3851] RISC-V: Refine the testcase of vector SAT_TRUNC
https://gcc.gnu.org/g:5b652b0132334e509c730311ac625c1dbe287282 commit r15-3851-g5b652b0132334e509c730311ac625c1dbe287282 Author: Pan Li Date: Wed Sep 25 14:37:46 2024 +0800 RISC-V: Refine the testcase of vector SAT_TRUNC Take scan-assembler-times for vnclip insn check instead of function body, as we only care about if we can generate the fixed point insn vnclip. The below test are passed for this patch. * The rv64gcv fully regression test. It is test only patch and obvious up to a point, will commit it directly if no comments in next 48H. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c: Remove func body check and take scan asm times instead. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c: Ditto. * gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c: Ditto. Signed-off-by: Pan Li Diff: --- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-10.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-11.c | 16 +--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-12.c | 12 +--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-13.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-14.c | 17 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-15.c | 21 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-16.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-17.c | 17 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-18.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-19.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-2.c | 17 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-20.c | 17 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-21.c | 21 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-22.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-23.c | 17 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-24.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-3.c | 21 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-4.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-5.c | 17 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-6.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-7.c | 13 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-8.c | 17 ++--- .../riscv/rvv/autovec/unop/vec_sat_u_trunc-9.c | 21 ++--- 24 files changed, 46 insertions(+), 328 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c index 186005733ecd..3d29d26abff1 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/vec_sat_u_trunc-1.c @@ -1,18 +1,9 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */ -/* { dg-skip-if "" { *-*-* } { "-flto" } } */ -/* { dg-