[gcc/devel/omp/gcc-14] OpenACC 2.7: Connect readonly modifier to points-to analysis
https://gcc.gnu.org/g:87dcdc3deb6ab6b1bc09aacbfb828182afe79f74 commit 87dcdc3deb6ab6b1bc09aacbfb828182afe79f74 Author: Chung-Lin Tang Date: Mon Nov 11 17:16:26 2024 + OpenACC 2.7: Connect readonly modifier to points-to analysis This patch links the readonly modifier to points-to analysis. In front-ends, firstprivate pointer clauses are marked with OMP_CLAUSE_MAP_POINTS_TO_READONLY set true, and later during lowering the receiver side read of pointer has VAR_POINTS_TO_READONLY set true, which later directs SSA_NAME_POINTS_TO_READONLY_MEMORY set to true during SSA conversion. SSA_NAME_POINTS_TO_READONLY_MEMORY is an already existing flag connected with alias oracle routines in tree-ssa-alias.cc, thus making the readonly-modifier effective in hinting points-to analysis. Currently have one testcase c-c++-common/goacc/readonly-2.c where we can demonstrate 'readonly' can avoid a clobber by function call. This patch is ported from upstream submission: https://gcc.gnu.org/pipermail/gcc-patches/2024-April/648728.html gcc/c-family/ChangeLog: * c-omp.cc (c_omp_address_inspector::expand_array_base): Set OMP_CLAUSE_MAP_POINTS_TO_READONLY on pointer clause. (c_omp_address_inspector::expand_component_selector): Likewise. gcc/fortran/ChangeLog: * trans-openmp.cc (gfc_trans_omp_array_section): Set OMP_CLAUSE_MAP_POINTS_TO_READONLY on pointer clause. gcc/ChangeLog: * gimple-expr.cc (copy_var_decl): Copy VAR_POINTS_TO_READONLY for VAR_DECLs. * omp-low.cc (lower_omp_target): Set VAR_POINTS_TO_READONLY for variables of receiver refs. * tree-pretty-print.cc (dump_omp_clause): Print OMP_CLAUSE_MAP_POINTS_TO_READONLY. (dump_generic_node): Print SSA_NAME_POINTS_TO_READONLY_MEMORY. * tree-ssanames.cc (make_ssa_name_fn): Set SSA_NAME_POINTS_TO_READONLY_MEMORY if DECL_POINTS_TO_READONLY is set. * tree.h (OMP_CLAUSE_MAP_POINTS_TO_READONLY): New macro. (VAR_POINTS_TO_READONLY): New macro. gcc/testsuite/ChangeLog: * c-c++-common/goacc/readonly-1.c: Adjust testcase. * c-c++-common/goacc/readonly-2.c: New testcase. * gfortran.dg/goacc/readonly-1.f90: Adjust testcase. Diff: --- gcc/c-family/c-omp.cc | 4 gcc/fortran/trans-openmp.cc| 2 ++ gcc/gimple-expr.cc | 2 ++ gcc/omp-low.cc | 2 ++ gcc/testsuite/c-c++-common/goacc/readonly-1.c | 20 ++-- gcc/testsuite/c-c++-common/goacc/readonly-2.c | 16 gcc/testsuite/gfortran.dg/goacc/readonly-1.f90 | 20 ++-- gcc/testsuite/gfortran.dg/pr67170.f90 | 2 +- gcc/tree-pretty-print.cc | 4 gcc/tree-ssanames.cc | 3 +++ gcc/tree.h | 11 +++ 11 files changed, 65 insertions(+), 21 deletions(-) diff --git a/gcc/c-family/c-omp.cc b/gcc/c-family/c-omp.cc index 6b10082cc939..479b69741a54 100644 --- a/gcc/c-family/c-omp.cc +++ b/gcc/c-family/c-omp.cc @@ -4135,6 +4135,8 @@ c_omp_address_inspector::expand_array_base (tree *pc, } else if (c2) { + if (OMP_CLAUSE_MAP_READONLY (c)) + OMP_CLAUSE_MAP_POINTS_TO_READONLY (c2) = 1; OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); OMP_CLAUSE_CHAIN (c) = c2; if (implicit_p) @@ -4324,6 +4326,8 @@ c_omp_address_inspector::expand_component_selector (tree *pc, } else if (c2) { + if (OMP_CLAUSE_MAP_READONLY (c)) + OMP_CLAUSE_MAP_POINTS_TO_READONLY (c2) = 1; OMP_CLAUSE_CHAIN (c2) = OMP_CLAUSE_CHAIN (c); OMP_CLAUSE_CHAIN (c) = c2; pc = &OMP_CLAUSE_CHAIN (c); diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index ff7a8d19c003..e6ba7b5839e3 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -4032,6 +4032,8 @@ gfc_trans_omp_array_section (stmtblock_t *block, toc_directive cd, ptr2 = fold_convert (ptrdiff_type_node, ptr2); OMP_CLAUSE_SIZE (node3) = fold_build2 (MINUS_EXPR, ptrdiff_type_node, ptr, ptr2); + if (n->u.map.readonly) +OMP_CLAUSE_MAP_POINTS_TO_READONLY (node3) = 1; } /* CLAUSES is a list of clauses resulting from an "omp declare mapper" diff --git a/gcc/gimple-expr.cc b/gcc/gimple-expr.cc index f8d7185530c6..22722249a19e 100644 --- a/gcc/gimple-expr.cc +++ b/gcc/gimple-expr.cc @@ -385,6 +385,8 @@ copy_var_decl (tree var, tree name, tree type) DECL_CONTEXT (copy) = DECL_CONTEXT (var); TREE_USED (copy) = 1; DECL_SEEN_IN_BIND_EXPR_P (copy) = 1; + if (VAR_P (var)) +VAR_POINTS_TO_READONLY (copy) = VAR_POINTS_TO_READONLY (var); DECL_A
[gcc r15-5111] opts: fix narrowing warning
https://gcc.gnu.org/g:e3de2962cbac3fc7f1d7c6d0205a62e68f3f4b3a commit r15-5111-ge3de2962cbac3fc7f1d7c6d0205a62e68f3f4b3a Author: Jason Merrill Date: Sun Nov 10 21:42:48 2024 -0500 opts: fix narrowing warning The init-list initialization of cl_deferred_option p had a couple of narrowing warnings: first of opt_index from int to size_t and then of value from HOST_WIDE_INT to int. Fixed by making the types more consistent. gcc/ChangeLog: * opts.h (cl_deferred_option::value): Change to HOST_WIDE_INT. (set_option): Change opt_index parm to size_t. * opts-common.cc (set_option): Likewise. Diff: --- gcc/opts.h | 4 ++-- gcc/opts-common.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/opts.h b/gcc/opts.h index 3fc57773f4ee..25dd42b0796e 100644 --- a/gcc/opts.h +++ b/gcc/opts.h @@ -300,7 +300,7 @@ struct cl_deferred_option options. */ size_t opt_index; const char *arg; - int value; + HOST_WIDE_INT value; }; /* Structure describing a single option-handling callback. */ @@ -398,7 +398,7 @@ extern bool get_option_state (struct gcc_options *, int, struct cl_option_state *); extern void set_option (struct gcc_options *opts, struct gcc_options *opts_set, - int opt_index, HOST_WIDE_INT value, const char *arg, + size_t opt_index, HOST_WIDE_INT value, const char *arg, int kind, location_t loc, diagnostic_context *dc, HOST_WIDE_INT = 0); extern void *option_flag_var (int opt_index, struct gcc_options *opts); diff --git a/gcc/opts-common.cc b/gcc/opts-common.cc index ac2e77b16590..6ab26051297b 100644 --- a/gcc/opts-common.cc +++ b/gcc/opts-common.cc @@ -1656,8 +1656,8 @@ read_cmdline_option (struct gcc_options *opts, void set_option (struct gcc_options *opts, struct gcc_options *opts_set, - int opt_index, HOST_WIDE_INT value, const char *arg, int kind, - location_t loc, diagnostic_context *dc, + size_t opt_index, HOST_WIDE_INT value, const char *arg, + int kind, location_t loc, diagnostic_context *dc, HOST_WIDE_INT mask /* = 0 */) { const struct cl_option *option = &cl_options[opt_index];
[gcc r15-5112] c++: rename -fmodules-ts to -fmodules
https://gcc.gnu.org/g:d9c3c3c85665b248c8a5e87898f4716ff646ed03 commit r15-5112-gd9c3c3c85665b248c8a5e87898f4716ff646ed03 Author: Jason Merrill Date: Mon Oct 14 16:22:34 2024 -0400 c++: rename -fmodules-ts to -fmodules The C++ modules support is not targeting the Modules TS, so it doesn't make much sense to refer to the TS in the option name. But keep the old spelling as an undocumented alias for now. gcc/ChangeLog: * doc/invoke.texi: Rename -fmodules-ts to -fmodules. gcc/c-family/ChangeLog: * c.opt: Add -fmodules with same effect as -fmodules-ts. gcc/cp/ChangeLog: * lang-specs.h: Check fmodules* instead of fmodules-ts. Diff: --- gcc/doc/invoke.texi | 24 gcc/c-family/c.opt | 10 +- gcc/cp/lang-specs.h | 30 +++--- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 93e9af3791c8..6c2c64410d4b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -223,7 +223,7 @@ in the following sections. -fno-implicit-templates -fno-implicit-inline-templates -fno-implement-inlines --fmodule-header@r{[}=@var{kind}@r{]} -fmodule-only -fmodules-ts +-fmodule-header@r{[}=@var{kind}@r{]} -fmodule-only -fmodules -fmodule-implicit-inline -fno-module-lazy -fmodule-mapper=@var{specification} @@ -3504,12 +3504,12 @@ To save space, do not emit out-of-line copies of inline functions controlled by @code{#pragma implementation}. This causes linker errors if these functions are not inlined everywhere they are called. -@opindex fmodules-ts -@opindex fno-modules-ts -@item -fmodules-ts -@itemx -fno-modules-ts +@opindex fmodules +@opindex fno-modules +@item -fmodules +@itemx -fno-modules Enable support for C++20 modules (@pxref{C++ Modules}). The -@option{-fno-modules-ts} is usually not needed, as that is the +@option{-fno-modules} is usually not needed, as that is the default. Even though this is a C++20 feature, it is not currently implicitly enabled by selecting that standard version. @@ -37926,7 +37926,7 @@ Acyclic Graph (DAG). You must build imports before the importer. Header files may themselves be compiled to header units, which are a transitional ability aiming at faster compilation. The @option{-fmodule-header} option is used to enable this, and implies -the @option{-fmodules-ts} option. These CMIs are named by the fully +the @option{-fmodules} option. These CMIs are named by the fully resolved underlying header file, and thus may be a complete pathname containing subdirectories. If the header file is found at an absolute pathname, the CMI location is still relative to a CMI root directory. @@ -37935,7 +37935,7 @@ As header files often have no suffix, you commonly have to specify a @option{-x} option to tell the compiler the source is a header file. You may use @option{-x c++-header}, @option{-x c++-user-header} or @option{-x c++-system-header}. When used in conjunction with -@option{-fmodules-ts}, these all imply an appropriate +@option{-fmodules}, these all imply an appropriate @option{-fmodule-header} option. The latter two variants use the user or system include path to search for the file specified. This allows you to, for instance, compile standard library header files as @@ -37953,8 +37953,8 @@ the bits/stdc++.h header used for libstdc++ precompiled headers you can @smallexample -g++ -fmodules-ts -x c++-system-header -c bits/stdc++.h -g++ -fmodules-ts -include bits/stdc++.h mycode.C +g++ -fmodules -x c++-system-header -c bits/stdc++.h +g++ -fmodules -include bits/stdc++.h mycode.C @end smallexample and any standard library #includes in mycode.C will be skipped, @@ -38138,8 +38138,8 @@ output. Usually you also want to use this option when explicitly preprocessing a header-unit, or consuming such preprocessed output: @smallexample -g++ -fmodules-ts -E -fdirectives-only my-header.hh -o my-header.ii -g++ -x c++-header -fmodules-ts -fpreprocessed -fdirectives-only my-header.ii +g++ -fmodules -E -fdirectives-only my-header.hh -o my-header.ii +g++ -x c++-header -fmodules -fpreprocessed -fdirectives-only my-header.ii @end smallexample @node C++ Compiled Module Interface diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 9b9f5e744f66..47d6f083ffeb 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -2033,12 +2033,12 @@ flax-vector-conversions C ObjC C++ ObjC++ Var(flag_lax_vector_conversions) Allow implicit conversions between vectors with differing numbers of subparts and/or differing element types. -fmodules-ts -C++ ObjC++ Var(flag_modules) Integer Init(0) -Enable C++ modules-ts (experimental). +fmodules +C++ ObjC++ Var(flag_modules) Integer +Enable C++20 Modules (experimental). -fno-modules -C++ ObjC++ Undocumented RejectNegative Var(flag_modules,0) Integer +fmodules-ts +C++ ObjC++ Alias(fmodules) Undocumented ;; undocu
[gcc r15-5113] c++: include libcody in TAGS
https://gcc.gnu.org/g:858912fa675d4c42529bf5f02cbc0dc8438d92b7 commit r15-5113-g858912fa675d4c42529bf5f02cbc0dc8438d92b7 Author: Jason Merrill Date: Sun Nov 10 20:38:40 2024 -0500 c++: include libcody in TAGS The C++ front-end uses symbols from these directories, so they should also be in TAGS. gcc/cp/ChangeLog: * Make-lang.in: Also collect tags from libcody and c++tools. Diff: --- gcc/cp/Make-lang.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gcc/cp/Make-lang.in b/gcc/cp/Make-lang.in index fb7ae59d9d4c..9ec86aac9649 100644 --- a/gcc/cp/Make-lang.in +++ b/gcc/cp/Make-lang.in @@ -232,8 +232,9 @@ c++.srcinfo: c++.srcextra: c++.tags: force - cd $(srcdir)/cp; $(ETAGS) -o TAGS.sub *.cc *.h --language=none \ - --regex='/DEFTREECODE [(]\([A-Z_]+\)/\1/' cp-tree.def; \ + cd $(srcdir)/cp; $(ETAGS) -o TAGS.sub *.cc *.h \ + ../../libcody/*.cc ../../c++tools/*.cc \ + --language=none --regex='/DEFTREECODE [(]\([A-Z_]+\)/\1/' cp-tree.def; \ $(ETAGS) --include TAGS.sub --include ../TAGS.sub c++.man: doc/g++.1
[gcc r15-5115] c++: regenerate opt urls
https://gcc.gnu.org/g:4b9bb1d687e77469b5926e89db3a34b32ed3194a commit r15-5115-g4b9bb1d687e77469b5926e89db3a34b32ed3194a Author: Jason Merrill Date: Mon Nov 11 19:36:20 2024 -0500 c++: regenerate opt urls Forgot this in the -fmodules patch (r15-5112). gcc/c-family/ChangeLog: * c.opt.urls: Regenerate. Diff: --- gcc/c-family/c.opt.urls | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/c-family/c.opt.urls b/gcc/c-family/c.opt.urls index 1fe939856df6..3bfe2b86b240 100644 --- a/gcc/c-family/c.opt.urls +++ b/gcc/c-family/c.opt.urls @@ -1154,8 +1154,8 @@ UrlSuffix(gcc/Optimize-Options.html#index-fkeep-inline-dllexport) flax-vector-conversions UrlSuffix(gcc/C-Dialect-Options.html#index-flax-vector-conversions) -fmodules-ts -UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-fmodules-ts) +fmodules +UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-fmodules) fmodule-header UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-fmodule-header)
[gcc r15-5101] libgomp.c-c++-common/pr109062.c: Fix expected spin count for hybrid x86
https://gcc.gnu.org/g:51147890383b89947c673ffd8d7c2cbd675261bd commit r15-5101-g51147890383b89947c673ffd8d7c2cbd675261bd Author: Tobias Burnus Date: Mon Nov 11 17:22:20 2024 +0100 libgomp.c-c++-common/pr109062.c: Fix expected spin count for hybrid x86 On my system with E and P cores (hybrid) x86, the spincount is by default 1 and not 30, cf. PR109812 and r14-4571-ge1e127de18dbee. Hence, this commit updates the expected value of the testcase to also accept omp_display_env showing "GOMP_SPINCOUNT = '1'" - but only for x86-64, which might be hybrid. libgomp/ChangeLog: * testsuite/libgomp.c-c++-common/pr109062.c: Update dg-output to also accept GOMP_SPINCOUNT = 1 for x86-64. Diff: --- libgomp/testsuite/libgomp.c-c++-common/pr109062.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libgomp/testsuite/libgomp.c-c++-common/pr109062.c b/libgomp/testsuite/libgomp.c-c++-common/pr109062.c index 5c7c287dafd7..cb05c333e0e9 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/pr109062.c +++ b/libgomp/testsuite/libgomp.c-c++-common/pr109062.c @@ -11,4 +11,8 @@ main () return 0; } -/* { dg-output ".*\\\[host] GOMP_SPINCOUNT = '30'.*" { target native } } */ +/* On hybrid x86-64, i.e. with P and E cores, the default is GOMP_SPINCOUNT=1; + hence, handle either value; see PR109812. */ +/* { dg-output ".*\\\[host] GOMP_SPINCOUNT = '(?:30|1)'.*" { target { native && { x86_64-*-* i?86-*-* } } } } */ + +/* { dg-output ".*\\\[host] GOMP_SPINCOUNT = '30'.*" { target { native && { ! { x86_64-*-* i?86-*-* } } } } } */
[gcc r15-5102] c++: reduce unnecessary tree_common
https://gcc.gnu.org/g:416a8b375589d4c2891b437d0991296ef32bde98 commit r15-5102-g416a8b375589d4c2891b437d0991296ef32bde98 Author: Jason Merrill Date: Tue Nov 5 10:27:39 2024 -0500 c++: reduce unnecessary tree_common Lewis' r15-5067 fixing the marking of TRAIT_EXPR led me to compare some other front-end type definitions to their marking in cp_common_init_ts; it seems we can change tree_common to something smaller in several cases, to match how they are marked. gcc/cp/ChangeLog: * cp-tree.h (struct ptrmem_cst): Change tree_common to tree_typed. (struct tree_trait_expr): Likewise. (struct tree_static_assert): Change tree_common to tree_base. (struct tree_argument_pack_select): Likewise. Diff: --- gcc/cp/cp-tree.h | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 92d1dba6a5c9..1a0d5349749d 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -715,7 +715,7 @@ struct GTY(()) template_parm_index { }; struct GTY(()) ptrmem_cst { - struct tree_common common; + struct tree_typed typed; tree member; location_t locus; }; @@ -1378,14 +1378,14 @@ struct GTY (()) tree_deferred_noexcept { (((struct tree_static_assert *)STATIC_ASSERT_CHECK (NODE))->location) struct GTY (()) tree_static_assert { - struct tree_common common; + struct tree_base base; tree condition; tree message; location_t location; }; struct GTY (()) tree_argument_pack_select { - struct tree_common common; + struct tree_base base; tree argument_pack; int index; }; @@ -1425,7 +1425,7 @@ extern const struct cp_trait cp_traits[]; (((struct tree_trait_expr *)TRAIT_EXPR_CHECK (NODE))->locus) struct GTY (()) tree_trait_expr { - struct tree_common common; + struct tree_typed typed; tree type1; tree type2; location_t locus;
[gcc r15-5080] libgomp/plugin/plugin-gcn.c: Show device number in ISA error message
https://gcc.gnu.org/g:8473010807a264af35fb7cecad6f9406feab929f commit r15-5080-g8473010807a264af35fb7cecad6f9406feab929f Author: Tobias Burnus Date: Mon Nov 11 12:17:42 2024 +0100 libgomp/plugin/plugin-gcn.c: Show device number in ISA error message libgomp/ChangeLog: * plugin/plugin-gcn.c (isa_matches_agent): Mention the device number and ROCR_VISIBLE_DEVICES when reporting an ISA mismatch error. Diff: --- libgomp/plugin/plugin-gcn.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 592a7b6daba4..f2f2940de9db 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -2414,14 +2414,17 @@ isa_matches_agent (struct agent_info *agent, Elf64_Ehdr *image) if (isa_field != agent->device_isa) { - char msg[120]; + char msg[204]; const char *agent_isa_s = isa_name (agent->device_isa); assert (agent_isa_s); snprintf (msg, sizeof msg, - "GCN code object ISA '%s' does not match GPU ISA '%s'.\n" - "Try to recompile with '-foffload-options=-march=%s'.\n", - isa_s, agent_isa_s, agent_isa_s); + "GCN code object ISA '%s' does not match GPU ISA '%s' " + "(device %d).\n" + "Try to recompile with '-foffload-options=-march=%s',\n" + "or use ROCR_VISIBLE_DEVICES to disable incompatible " + "devices.\n", + isa_s, agent_isa_s, agent->device_id, agent_isa_s); hsa_error (msg, HSA_STATUS_ERROR); return false;
[gcc r15-5082] tree-optimization/117510 - fix guard hoisting validity check
https://gcc.gnu.org/g:da64698159fe69b68f5264b54cebcb67c501b3cf commit r15-5082-gda64698159fe69b68f5264b54cebcb67c501b3cf Author: Richard Biener Date: Mon Nov 11 09:40:20 2024 +0100 tree-optimization/117510 - fix guard hoisting validity check For the loop in the testcase we currently fail to hoist the guard check of the inner loop (m > 0) out of the outer loop because find_loop_guard checks all blocks of the outer loop for side-effects, including those that are skipped by the guard. This usually is harmless as the guard does not skip any blocks in the outer loop but in this case store-motion was applied to the inner loop and thus there's now a skipped store in the outer loop. The following properly skips blocks that are dominated by the entry to the skipped region. PR tree-optimization/117510 * tree-ssa-loop-unswitch.cc (find_loop_guard): Only check not skipped blocks for side-effects. * gcc.dg/vect/vect-outer-pr117510.c: New testcase. Diff: --- gcc/testsuite/gcc.dg/vect/vect-outer-pr117510.c | 13 + gcc/tree-ssa-loop-unswitch.cc | 6 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-pr117510.c b/gcc/testsuite/gcc.dg/vect/vect-outer-pr117510.c new file mode 100644 index ..e50b67ce0405 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-outer-pr117510.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-additional-options "-O3" } */ + +void f(int n, int m, double *a) +{ + a = __builtin_assume_aligned (a, __BIGGEST_ALIGNMENT__); + for (int i = 0; i < n; i++) +for (int j = 0; j < m; j++) + a[i] += 2*a[i] + j; +} + +/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */ diff --git a/gcc/tree-ssa-loop-unswitch.cc b/gcc/tree-ssa-loop-unswitch.cc index 847f7ac739f7..88516fdb0a1f 100644 --- a/gcc/tree-ssa-loop-unswitch.cc +++ b/gcc/tree-ssa-loop-unswitch.cc @@ -1256,7 +1256,11 @@ find_loop_guard (class loop *loop, vec &dbg_to_reset) guard_edge = NULL; goto end; } - if (!empty_bb_without_guard_p (loop, bb, dbg_to_reset)) + /* If any of the not skipped blocks has side-effects or defs with +uses outside of the loop we cannot hoist the guard. */ + if (!dominated_by_p (CDI_DOMINATORS, + bb, guard_edge == te ? fe->dest : te->dest) + && !empty_bb_without_guard_p (loop, bb, dbg_to_reset)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc,
[gcc r15-5100] Add missing SLP discovery for CFN[_MASK][_LEN]_SCATTER_STORE
https://gcc.gnu.org/g:3d498cfe022f6e035ff24e0d78ff744da83ebf42 commit r15-5100-g3d498cfe022f6e035ff24e0d78ff744da83ebf42 Author: Richard Biener Date: Fri Nov 8 13:59:05 2024 +0100 Add missing SLP discovery for CFN[_MASK][_LEN]_SCATTER_STORE This was responsible for a bunch of SVE FAILs with --param vect-force-slp=1 * tree-vect-slp.cc (arg1_arg3_map): New. (arg1_arg3_arg4_map): Likewise. (vect_get_operand_map): Handle IFN_SCATTER_STORE, IFN_MASK_SCATTER_STORE and IFN_MASK_LEN_SCATTER_STORE. (vect_build_slp_tree_1): Likewise. * tree-vect-stmts.cc (vectorizable_store): For SLP masked gather/scatter record the mask with proper number of copies. * tree-vect-loop.cc (vectorizable_recurr): Avoid costing the initial value construction in the prologue twice with SLP. Diff: --- gcc/tree-vect-loop.cc | 9 ++--- gcc/tree-vect-slp.cc | 17 - gcc/tree-vect-stmts.cc | 6 -- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 3f2095da4490..6cfce5aa7e1e 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -9623,9 +9623,12 @@ vectorizable_recurr (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, return false; /* The recurrence costs the initialization vector and one permute -for each copy. */ - unsigned prologue_cost = record_stmt_cost (cost_vec, 1, scalar_to_vec, -stmt_info, 0, vect_prologue); +for each copy. With SLP the prologue value is explicitly +represented and costed separately. */ + unsigned prologue_cost = 0; + if (!slp_node) + prologue_cost = record_stmt_cost (cost_vec, 1, scalar_to_vec, + stmt_info, 0, vect_prologue); unsigned inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, stmt_info, 0, vect_body); if (dump_enabled_p ()) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index ffe9e718575b..d3efd53b00cb 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -512,7 +512,9 @@ static const int no_arg_map[] = { 0 }; static const int arg0_map[] = { 1, 0 }; static const int arg1_map[] = { 1, 1 }; static const int arg2_map[] = { 1, 2 }; +static const int arg1_arg3_map[] = { 2, 1, 3 }; static const int arg1_arg4_map[] = { 2, 1, 4 }; +static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 }; static const int arg3_arg2_map[] = { 2, 3, 2 }; static const int op1_op0_map[] = { 2, 1, 0 }; static const int off_map[] = { 1, -3 }; @@ -573,6 +575,13 @@ vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false, case IFN_MASK_LEN_GATHER_LOAD: return arg1_arg4_map; + case IFN_SCATTER_STORE: + return arg1_arg3_map; + + case IFN_MASK_SCATTER_STORE: + case IFN_MASK_LEN_SCATTER_STORE: + return arg1_arg3_arg4_map; + case IFN_MASK_STORE: return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map; @@ -1187,7 +1196,10 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, if (cfn == CFN_MASK_LOAD || cfn == CFN_GATHER_LOAD || cfn == CFN_MASK_GATHER_LOAD - || cfn == CFN_MASK_LEN_GATHER_LOAD) + || cfn == CFN_MASK_LEN_GATHER_LOAD + || cfn == CFN_SCATTER_STORE + || cfn == CFN_MASK_SCATTER_STORE + || cfn == CFN_MASK_LEN_SCATTER_STORE) ldst_p = true; else if (cfn == CFN_MASK_STORE) { @@ -1473,6 +1485,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, && rhs_code != CFN_GATHER_LOAD && rhs_code != CFN_MASK_GATHER_LOAD && rhs_code != CFN_MASK_LEN_GATHER_LOAD + && rhs_code != CFN_SCATTER_STORE + && rhs_code != CFN_MASK_SCATTER_STORE + && rhs_code != CFN_MASK_LEN_SCATTER_STORE && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) /* Not grouped loads are handled as externals for BB vectorization. For loop vectorization we can handle diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 9a2c2ea753e6..666e0491a9e8 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -9162,7 +9162,8 @@ vectorizable_store (vec_info *vinfo, { if (loop_masks) final_mask = vect_get_loop_mask (loop_vinfo, gsi, -loop_masks, ncopies, +loop_masks, +ncopies * vec_num, vectype, j); if (vec_mask) final_ma
[gcc r15-5077] Reject UNSIGNED for Complex, some documentation fixes.
https://gcc.gnu.org/g:f5851a5b36b7dce02553d419d90f54e321f417a4 commit r15-5077-gf5851a5b36b7dce02553d419d90f54e321f417a4 Author: Thomas Koenig Date: Sat Nov 9 19:24:43 2024 +0100 Reject UNSIGNED for Complex, some documentation fixes. gcc/fortran/ChangeLog: * check.cc (gfc_check_complex): Reject UNSIGNED. * gfortran.texi: Update example program. Note that CMPLX, INT and REAL also take unsigned arguments. * intrinsic.texi (CMPLX): Document UNSIGNED. (INT): Likewise. (REAL): Likewise. gcc/testsuite/ChangeLog: * gfortran.dg/unsigned_41.f90: New test. Diff: --- gcc/fortran/check.cc | 17 + gcc/fortran/gfortran.texi | 8 ++-- gcc/fortran/intrinsic.texi| 26 +++--- gcc/testsuite/gfortran.dg/unsigned_41.f90 | 8 4 files changed, 46 insertions(+), 13 deletions(-) diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc index 2d4af8e7df33..5c7fe3422628 100644 --- a/gcc/fortran/check.cc +++ b/gcc/fortran/check.cc @@ -2606,6 +2606,23 @@ gfc_check_complex (gfc_expr *x, gfc_expr *y) if (!boz_args_check (x, y)) return false; + /* COMPLEX is an extension, we do not want UNSIGNED there. */ + if (x->ts.type == BT_UNSIGNED) +{ + gfc_error ("%qs argument of %qs intrinsic at %L shall not be " +"UNSIGNED", gfc_current_intrinsic_arg[0]->name, +gfc_current_intrinsic, &x->where); + return false; +} + + if (y->ts.type == BT_UNSIGNED) +{ + gfc_error ("%qs argument of %qs intrinsic at %L shall not be " +"UNSIGNED", gfc_current_intrinsic_arg[1]->name, +gfc_current_intrinsic, &y->where); + return false; +} + if (x->ts.type == BT_BOZ) { if (gfc_invalid_boz (G_("BOZ constant at %L cannot appear in the COMPLEX" diff --git a/gcc/fortran/gfortran.texi b/gcc/fortran/gfortran.texi index 429d8461f8f7..a6617aa15711 100644 --- a/gcc/fortran/gfortran.texi +++ b/gcc/fortran/gfortran.texi @@ -2750,8 +2750,9 @@ and @code{Z} descriptors, plus unformatted I/O. Here is a small, somewhat contrived example of their use: @smallexample program main - unsigned(kind=8) :: v - v = huge(v) - 32u_8 + use iso_fortran_env, only : uint64 + unsigned(kind=uint64) :: v + v = huge(v) - 32u_uint64 print *,v end program main @end smallexample @@ -2779,6 +2780,7 @@ The following intrinsics take unsigned arguments: @item @code{BIT_SIZE}, @pxref{BIT_SIZE} @item @code{BLE}, @pxref{BLE} @item @code{BLT}, @pxref{BLT} +@item @code{CMPLX}, @pxref{CMPLX} @item @code{CSHIFT}, @pxref{CSHIFT} @item @code{DIGITS}, @pxref{DIGITS} @item @code{DOT_PRODUCT}, @pxref{DOT_PRODUCT} @@ -2794,6 +2796,7 @@ The following intrinsics take unsigned arguments: @item @code{IBITS}, @pxref{IBITS} @item @code{IBSET}, @pxref{IBSET} @item @code{IEOR}, @pxref{IEOR} +@item @code{INT}, @pxref{INT} @item @code{IOR}, @pxref{IOR} @item @code{IPARITY}, @pxref{IPARITY} @item @code{ISHFT}, @pxref{ISHFT} @@ -2814,6 +2817,7 @@ The following intrinsics take unsigned arguments: @item @code{PRODUCT}, @pxref{PRODUCT} @item @code{RANDOM_NUMBER}, @pxref{RANDOM_NUMBER} @item @code{RANGE}, @pxref{RANGE} +@item @code{REAL}, @pxref{REAL} @item @code{SHIFTA}, @pxref{SHIFTA} @item @code{SHIFTL}, @pxref{SHIFTL} @item @code{SHIFTR}, @pxref{SHIFTR} diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi index 9d0b752670b4..d11d37761d92 100644 --- a/gcc/fortran/intrinsic.texi +++ b/gcc/fortran/intrinsic.texi @@ -3626,7 +3626,8 @@ component. If @var{Y} is not present then the imaginary component is set to 0.0. If @var{X} is complex then @var{Y} must not be present. @item @emph{Standard}: -Fortran 77 and later +Fortran 77 and later, extension for @code{UNSIGNED} (@pxref{Unsigned +integers}) @item @emph{Class}: Elemental function @@ -3637,9 +3638,9 @@ Elemental function @item @emph{Arguments}: @multitable @columnfractions .15 .70 @item @var{X} @tab The type may be @code{INTEGER}, @code{REAL}, -or @code{COMPLEX}. +@code{COMPLEX} or @code{UNSIGNED}. @item @var{Y} @tab (Optional; only allowed if @var{X} is not -@code{COMPLEX}.) May be @code{INTEGER} or @code{REAL}. +@code{COMPLEX}.) May be @code{INTEGER}, @code{REAL} or @code{UNSIGNED}. @item @var{KIND} @tab (Optional) A scalar @code{INTEGER} constant expression indicating the kind parameter of the result. @end multitable @@ -8355,7 +8356,8 @@ Convert to integer type Fortran 77 and later, with boz-literal-constant Fortran 2008 and later. @item @emph{Class}: -Elemental function +Elemental function, extension for @code{UNSIGNED} (@pxref{Unsigned +integers}). @item @emph{Syntax}: @code{RESULT = INT(A [, KIND))} @@ -8386,7 +8388,6 @@ If @var{A} is of type @code{COMPLEX}, rule B is applied to the real part of @var If @var{A} is of type @code{UNSIGNED} and @math{0 \leq
[gcc r15-5078] Fortran: Suppress invalid finalization of artificial variable [PR116388]
https://gcc.gnu.org/g:42a2df0b7985b2a4732ba1c29726ac7aabd5eeae commit r15-5078-g42a2df0b7985b2a4732ba1c29726ac7aabd5eeae Author: Paul Thomas Date: Mon Nov 11 09:01:11 2024 + Fortran: Suppress invalid finalization of artificial variable [PR116388] 2024-11-11 Tomas Trnka Paul Thomas gcc/fortran PR fortran/116388 * class.cc (finalize_component): Leading underscore in the name of 'byte_stride' to suppress invalid finalization. gcc/testsuite/ PR fortran/116388 * gfortran.dg/finalize_58.f90: New test. Diff: --- gcc/fortran/class.cc | 5 +- gcc/testsuite/gfortran.dg/finalize_58.f90 | 77 +++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/gcc/fortran/class.cc b/gcc/fortran/class.cc index 4b2234a958fc..fc709fec322c 100644 --- a/gcc/fortran/class.cc +++ b/gcc/fortran/class.cc @@ -1152,8 +1152,9 @@ finalize_component (gfc_expr *expr, gfc_symbol *derived, gfc_component *comp, gcc_assert (c); - /* Set scalar argument for storage_size. */ - gfc_get_symbol ("comp_byte_stride", sub_ns, &byte_stride); + /* Set scalar argument for storage_size. A leading underscore in +the name prevents an unwanted finalization. */ + gfc_get_symbol ("_comp_byte_stride", sub_ns, &byte_stride); byte_stride->ts = e->ts; byte_stride->attr.flavor = FL_VARIABLE; byte_stride->attr.value = 1; diff --git a/gcc/testsuite/gfortran.dg/finalize_58.f90 b/gcc/testsuite/gfortran.dg/finalize_58.f90 new file mode 100644 index ..54960e6b0305 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/finalize_58.f90 @@ -0,0 +1,77 @@ +! { dg-do run } +! +! Test fix for PR116388 in which an artificial variable in the finalization +! wrapper was generating an invalid finalization. +! +! Contributed by Tomas Trnka +! +module FinalizerTestModule + + use, intrinsic :: ISO_C_BINDING + + implicit none + + type, public :: AType + type(C_ptr) :: cptr = C_null_ptr + logical :: cptr_invalid = .true. + integer, allocatable :: x(:) + contains + final :: FinalizerA + end type + + type, public :: BType + type(C_ptr) :: cptr = C_null_ptr + type(AType) :: a + contains + procedure, public :: New => NewB + final :: FinalizerB + end type + + type, public :: CType + type(BType) :: b + contains + procedure, public :: New => NewC + end type + + integer :: final_A = 0 + integer :: final_B = 0 +contains + + impure elemental subroutine FinalizerA(self) + type(AType), intent(inout) :: self + final_A = final_A + 1 + if (.not. self%cptr_invalid) stop 1 + end subroutine + + subroutine NewB(self) + class(BType), intent(out) :: self + + end subroutine + + impure elemental subroutine FinalizerB(self) + type(BType), intent(inout) :: self + final_B = final_B + 1 + if (transfer (self%cptr, C_LONG_LONG) /= 0) stop 2 + end subroutine + + subroutine NewC(self, b) + class(CType), intent(out) :: self + type(BType), intent(in) :: b + + self%b = b + end subroutine + +end module + +program finalizing_uninitialized + use FinalizerTestModule + implicit none + + type(BType) :: b + type(CType) :: c + + call b%New() + call c%New(b) + if (final_A /= 3) stop 3 + if (final_B /= 3) stop 4 +end program
[gcc r15-5083] Fortran: Fix elemental array refs in SELECT TYPE [PR109345]
https://gcc.gnu.org/g:e22d80d4f0f8d33f538c1a4bad07b2c819a6d55c commit r15-5083-ge22d80d4f0f8d33f538c1a4bad07b2c819a6d55c Author: Paul Thomas Date: Mon Nov 11 12:21:57 2024 + Fortran: Fix elemental array refs in SELECT TYPE [PR109345] 2024-11-10 Paul Thomas gcc/fortran PR fortran/109345 * trans-array.cc (gfc_get_array_span): Unlimited polymorphic expressions are now treated separately since the span need not be the same as the element size. gcc/testsuite/ PR fortran/109345 * gfortran.dg/character_workout_1.f90: Cut trailing whitespace. * gfortran.dg/pr109345.f90: New test. Diff: --- gcc/fortran/trans-array.cc| 44 + gcc/testsuite/gfortran.dg/character_workout_1.f90 | 8 +-- gcc/testsuite/gfortran.dg/pr109345.f90| 77 +++ 3 files changed, 113 insertions(+), 16 deletions(-) diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index a52bde90bd2c..e888b737bec3 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -962,6 +962,8 @@ tree gfc_get_array_span (tree desc, gfc_expr *expr) { tree tmp; + gfc_symbol *sym = expr->expr_type == EXPR_VARIABLE + ? expr->symtree->n.sym : NULL; if (is_pointer_array (desc) || (get_CFI_desc (NULL, expr, &desc, NULL) @@ -983,25 +985,43 @@ gfc_get_array_span (tree desc, gfc_expr *expr) desc = build_fold_indirect_ref_loc (input_location, desc); tmp = gfc_conv_descriptor_span_get (desc); } + else if (UNLIMITED_POLY (expr) + || (sym && UNLIMITED_POLY (sym))) +{ + /* Treat unlimited polymorphic expressions separately because +the element size need not be the same as the span. Obtain +the class container, which is simplified here by their being +no component references. */ + if (sym && sym->attr.dummy) + { + tmp = gfc_get_symbol_decl (sym); + tmp = GFC_DECL_SAVED_DESCRIPTOR (tmp); + if (INDIRECT_REF_P (tmp)) + tmp = TREE_OPERAND (tmp, 0); + } + else + { + gcc_assert (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (desc))); + tmp = TREE_OPERAND (desc, 0); + } + tmp = gfc_class_data_get (tmp); + tmp = gfc_conv_descriptor_span_get (tmp); +} else if (TREE_CODE (desc) == COMPONENT_REF && GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (desc)) && GFC_CLASS_TYPE_P (TREE_TYPE (TREE_OPERAND (desc, 0 { - /* The descriptor is a class _data field and so use the vtable -size for the receiving span field. */ - tmp = gfc_get_vptr_from_expr (desc); + /* The descriptor is a class _data field. Use the vtable size +since it is guaranteed to have been set and is always OK for +class array descriptors that are not unlimited. */ + tmp = gfc_class_vptr_get (TREE_OPERAND (desc, 0)); tmp = gfc_vptr_size_get (tmp); } - else if (expr && expr->expr_type == EXPR_VARIABLE - && expr->symtree->n.sym->ts.type == BT_CLASS - && expr->ref->type == REF_COMPONENT - && expr->ref->next->type == REF_ARRAY - && expr->ref->next->next == NULL - && CLASS_DATA (expr->symtree->n.sym)->attr.dimension) + else if (sym && sym->ts.type == BT_CLASS && sym->attr.dummy) { - /* Dummys come in sometimes with the descriptor detached from -the class field or declaration. */ - tmp = gfc_class_vptr_get (expr->symtree->n.sym->backend_decl); + /* Class dummys usually requires extraction from the saved +descriptor, which gfc_class_vptr_get does for us. */ + tmp = gfc_class_vptr_get (sym->backend_decl); tmp = gfc_vptr_size_get (tmp); } else diff --git a/gcc/testsuite/gfortran.dg/character_workout_1.f90 b/gcc/testsuite/gfortran.dg/character_workout_1.f90 index 98133b48960a..8f8bdbf00690 100644 --- a/gcc/testsuite/gfortran.dg/character_workout_1.f90 +++ b/gcc/testsuite/gfortran.dg/character_workout_1.f90 @@ -1,7 +1,7 @@ ! { dg-do run } ! ! Tests fix for PR100120/100816/100818/100819/100821 -! +! program main_p @@ -27,10 +27,10 @@ program main_p character(len=m, kind=k), pointer :: pm(:) character(len=e, kind=k), pointer :: pe(:) character(len=:, kind=k), pointer :: pd(:) - + class(*), pointer :: su class(*), pointer :: pu(:) - + integer :: i, j nullify(s1, sm, se, sd, su) @@ -41,7 +41,7 @@ program main_p cm(i)(j:j) = char(i*m+j+c-m, kind=k) end do end do - + s1 => c1(n) if(.not.associated(s1)) stop 1 if(.not.associated(s1, c1(n))) stop 2 diff --git a/gcc/testsuite/gfortran.dg/pr109345.f90 b/gcc/testsuite/gfortran.dg/pr109345.f90 new file mode 100644 index ..cff9aaa987a0 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr109345.f90 @@ -0,0
[gcc r15-5079] RISC-V: Fix one nit indent issue of ustrunc pattern [NFC]
https://gcc.gnu.org/g:2a4ce8d0c2fd253bbf018f254156bb1fb3ffc47d commit r15-5079-g2a4ce8d0c2fd253bbf018f254156bb1fb3ffc47d Author: Pan Li Date: Mon Nov 11 15:39:40 2024 +0800 RISC-V: Fix one nit indent issue of ustrunc pattern [NFC] Just notice the indent is not that right for ustrunc pattern from the md files. Thus, make it correct. It is somehow very obvious and will commit it after next 48H if no more comments. gcc/ChangeLog: * config/riscv/autovec.md: Fix indent format issue. Signed-off-by: Pan Li Diff: --- gcc/config/riscv/autovec.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index 1f1849d52372..b5fbe98b5fc5 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -2769,7 +2769,7 @@ (define_expand "ustrunc2" [(match_operand: 0 "register_operand") - (match_operand:VOEXTI 1 "register_operand")] + (match_operand:VOEXTI1 "register_operand")] "TARGET_VECTOR" { riscv_vector::expand_vec_oct_ustrunc (operands[0], operands[1], mode,
[gcc r15-5084] Add push/pop_function_decl
https://gcc.gnu.org/g:9d14f677a0da80bc6355955469c69709b1d3c67e commit r15-5084-g9d14f677a0da80bc6355955469c69709b1d3c67e Author: Richard Sandiford Date: Mon Nov 11 12:32:13 2024 + Add push/pop_function_decl For the aarch64 simd clones patches, it would be useful to be able to push a function declaration onto the cfun stack, even though it has no function body associated with it. That is, we want cfun to be null, current_function_decl to be the decl itself, and the target and optimisation flags to reflect the declaration. This patch adds a push/pop_function_decl pair to do that. I think the more direct way of doing what I want to do under the existing interface would have been: push_cfun (nullptr); invoke_set_current_function_hook (fndecl); pop_cfun (); where invoke_set_current_function_hook would need to become public. But it seemed safer to use the higher-level routines, since it makes sure that the target/optimisation changes are synchronised with the function changes. In particular, if cfun was null before the sequence above, the pop_cfun would leave the flags unchanged, rather than restore them to the state before the push_cfun. gcc/ * function.h (push_function_decl, pop_function_decl): Declare. * function.cc (set_function_decl): New function, extracted from... (set_cfun): ...here. (push_function_decl): New function, extracted from... (push_cfun): ...here. (pop_cfun_1): New function, extracted from... (pop_cfun): ...here. (pop_function_decl): New function. Diff: --- gcc/function.cc | 80 +++-- gcc/function.h | 2 ++ 2 files changed, 69 insertions(+), 13 deletions(-) diff --git a/gcc/function.cc b/gcc/function.cc index 73490f0da10a..bf74e1ea208d 100644 --- a/gcc/function.cc +++ b/gcc/function.cc @@ -4707,40 +4707,74 @@ invoke_set_current_function_hook (tree fndecl) } } -/* cfun should never be set directly; use this function. */ +/* Set cfun to NEW_CFUN and switch to the optimization and target options + associated with NEW_FNDECL. -void -set_cfun (struct function *new_cfun, bool force) + FORCE says whether we should do the switch even if NEW_CFUN is the current + function, e.g. because there has been a change in optimization or target + options. */ + +static void +set_function_decl (function *new_cfun, tree new_fndecl, bool force) { if (cfun != new_cfun || force) { cfun = new_cfun; - invoke_set_current_function_hook (new_cfun ? new_cfun->decl : NULL_TREE); + invoke_set_current_function_hook (new_fndecl); redirect_edge_var_map_empty (); } } +/* cfun should never be set directly; use this function. */ + +void +set_cfun (struct function *new_cfun, bool force) +{ + set_function_decl (new_cfun, new_cfun ? new_cfun->decl : NULL_TREE, force); +} + /* Initialized with NOGC, making this poisonous to the garbage collector. */ static vec cfun_stack; -/* Push the current cfun onto the stack, and set cfun to new_cfun. Also set - current_function_decl accordingly. */ +/* Push the current cfun onto the stack, then switch to function NEW_CFUN + and FUNCTION_DECL NEW_FNDECL. FORCE is as for set_function_decl. */ -void -push_cfun (struct function *new_cfun) +static void +push_function_decl (function *new_cfun, tree new_fndecl, bool force) { gcc_assert ((!cfun && !current_function_decl) || (cfun && current_function_decl == cfun->decl)); cfun_stack.safe_push (cfun); - current_function_decl = new_cfun ? new_cfun->decl : NULL_TREE; - set_cfun (new_cfun); + current_function_decl = new_fndecl; + set_function_decl (new_cfun, new_fndecl, force); } -/* Pop cfun from the stack. Also set current_function_decl accordingly. */ +/* Push the current cfun onto the stack and switch to function declaration + NEW_FNDECL, which might or might not have a function body. FORCE is as for + set_function_decl. */ void -pop_cfun (void) +push_function_decl (tree new_fndecl, bool force) +{ + force |= current_function_decl != new_fndecl; + push_function_decl (DECL_STRUCT_FUNCTION (new_fndecl), new_fndecl, force); +} + +/* Push the current cfun onto the stack, and set cfun to new_cfun. Also set + current_function_decl accordingly. */ + +void +push_cfun (struct function *new_cfun) +{ + push_function_decl (new_cfun, new_cfun ? new_cfun->decl : NULL_TREE, false); +} + +/* A common subroutine for pop_cfun and pop_function_decl. FORCE is as + for set_function_decl. */ + +static void +pop_cfun_1 (bool force) { struct function *new_cfun = cfun_stack.pop (); /* When in_dummy_function, we do have a cfun but current_function_decl is @@ -4750,10 +4784,30 @@ pop_cfun (void) gcc_checking_assert (in_dummy_function || !cfun
[gcc r15-5088] aarch64: Use braces in SVE TBL instructions
https://gcc.gnu.org/g:a7e0e80630abf82b16be1d1faf95bc4b5f6f807a commit r15-5088-ga7e0e80630abf82b16be1d1faf95bc4b5f6f807a Author: Richard Sandiford Date: Mon Nov 11 12:32:15 2024 + aarch64: Use braces in SVE TBL instructions GCC previously used the older assembly syntax for SVE TBL, with no braces around the second operand. This patch switches to the newer, official syntax, with braces around the operand. The initial SVE binutils submission supported both syntaxes, so there should be no issues with backwards compatibility. gcc/ * config/aarch64/aarch64-sve.md (@aarch64_sve_tbl): Wrap the second operand in braces. gcc/testsuite/ * gcc.target/aarch64/sve/acle/asm/dup_lane_bf16.c: Wrap the second TBL operand in braces * gcc.target/aarch64/sve/acle/asm/dup_lane_f16.c: Likewise. * gcc.target/aarch64/sve/acle/asm/dup_lane_f32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/dup_lane_f64.c: Likewise. * gcc.target/aarch64/sve/acle/asm/dup_lane_s16.c: Likewise. * gcc.target/aarch64/sve/acle/asm/dup_lane_s32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/dup_lane_s64.c: Likewise. * gcc.target/aarch64/sve/acle/asm/dup_lane_s8.c: Likewise. * gcc.target/aarch64/sve/acle/asm/dup_lane_u16.c: Likewise. * gcc.target/aarch64/sve/acle/asm/dup_lane_u32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/dup_lane_u64.c: Likewise. * gcc.target/aarch64/sve/acle/asm/dup_lane_u8.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_bf16.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_f16.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_f32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_f64.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_s16.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_s32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_s64.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_s8.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_u16.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_u32.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_u64.c: Likewise. * gcc.target/aarch64/sve/acle/asm/tbl_u8.c: Likewise. * gcc.target/aarch64/sve/slp_perm_6.c: Likewise. * gcc.target/aarch64/sve/slp_perm_7.c: Likewise. * gcc.target/aarch64/sve/vec_perm_1.c: Likewise. * gcc.target/aarch64/sve/vec_perm_const_1.c: Likewise. * gcc.target/aarch64/sve/vec_perm_const_1_overrun.c: Likewise. * gcc.target/aarch64/sve/vec_perm_const_single_1.c: Likewise. * gcc.target/aarch64/sve/vec_perm_single_1.c: Likewise. * gcc.target/aarch64/sve/uzp1_1.c: Shorten the scan-assembler-nots to just "\ttbl\". * gcc.target/aarch64/sve/uzp2_1.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve.md| 2 +- .../gcc.target/aarch64/sve/acle/asm/dup_lane_bf16.c | 12 ++-- .../gcc.target/aarch64/sve/acle/asm/dup_lane_f16.c | 12 ++-- .../gcc.target/aarch64/sve/acle/asm/dup_lane_f32.c | 16 .../gcc.target/aarch64/sve/acle/asm/dup_lane_f64.c | 18 +- .../gcc.target/aarch64/sve/acle/asm/dup_lane_s16.c | 12 ++-- .../gcc.target/aarch64/sve/acle/asm/dup_lane_s32.c | 16 .../gcc.target/aarch64/sve/acle/asm/dup_lane_s64.c | 20 ++-- .../gcc.target/aarch64/sve/acle/asm/dup_lane_s8.c| 8 .../gcc.target/aarch64/sve/acle/asm/dup_lane_u16.c | 12 ++-- .../gcc.target/aarch64/sve/acle/asm/dup_lane_u32.c | 16 .../gcc.target/aarch64/sve/acle/asm/dup_lane_u64.c | 20 ++-- .../gcc.target/aarch64/sve/acle/asm/dup_lane_u8.c| 8 .../gcc.target/aarch64/sve/acle/asm/tbl_bf16.c | 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_f16.c| 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_f32.c| 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_f64.c| 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_s16.c| 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_s32.c| 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_s64.c| 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_s8.c | 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_u16.c| 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_u32.c| 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_u64.c| 6 +++--- .../gcc.target/aarch64/sve/acle/asm/tbl_u8.c | 6 +++--- gcc/testsuite/gcc.target/aarch64/sve/slp_perm_6.c| 2 +- gcc/testsuite/gcc.target/aarch64/sve/slp_perm_7.c| 2 +- gcc/testsuite/gcc.target/aarch64/sve/uzp1_1.c| 8 +
[gcc r15-5093] aarch64: Sort some SVE2 lists alphabetically
https://gcc.gnu.org/g:fe26ce62650b0c046941e255732172f09b6b5155 commit r15-5093-gfe26ce62650b0c046941e255732172f09b6b5155 Author: Richard Sandiford Date: Mon Nov 11 12:32:18 2024 + aarch64: Sort some SVE2 lists alphabetically gcc/ * config/aarch64/aarch64-sve-builtins-sve2.def: Sort entries alphabetically. * config/aarch64/aarch64-sve-builtins-sve2.h: Likewise. * config/aarch64/aarch64-sve-builtins-sve2.cc: Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-sve2.cc | 24 +- gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 32 gcc/config/aarch64/aarch64-sve-builtins-sve2.h | 14 +-- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc index f0ab7400ef50..24e95afd6ebe 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc @@ -589,20 +589,20 @@ FUNCTION (svabalb, unspec_based_add_function, (UNSPEC_SABDLB, UNSPEC_UABDLB, -1)) FUNCTION (svabalt, unspec_based_add_function, (UNSPEC_SABDLT, UNSPEC_UABDLT, -1)) +FUNCTION (svabdlb, unspec_based_function, (UNSPEC_SABDLB, UNSPEC_UABDLB, -1)) +FUNCTION (svabdlt, unspec_based_function, (UNSPEC_SABDLT, UNSPEC_UABDLT, -1)) +FUNCTION (svadalp, unspec_based_function, (UNSPEC_SADALP, UNSPEC_UADALP, -1)) FUNCTION (svadclb, unspec_based_function, (-1, UNSPEC_ADCLB, -1)) FUNCTION (svadclt, unspec_based_function, (-1, UNSPEC_ADCLT, -1)) FUNCTION (svaddhnb, unspec_based_function, (UNSPEC_ADDHNB, UNSPEC_ADDHNB, -1)) FUNCTION (svaddhnt, unspec_based_function, (UNSPEC_ADDHNT, UNSPEC_ADDHNT, -1)) -FUNCTION (svabdlb, unspec_based_function, (UNSPEC_SABDLB, UNSPEC_UABDLB, -1)) -FUNCTION (svabdlt, unspec_based_function, (UNSPEC_SABDLT, UNSPEC_UABDLT, -1)) -FUNCTION (svadalp, unspec_based_function, (UNSPEC_SADALP, UNSPEC_UADALP, -1)) FUNCTION (svaddlb, unspec_based_function, (UNSPEC_SADDLB, UNSPEC_UADDLB, -1)) FUNCTION (svaddlbt, unspec_based_function, (UNSPEC_SADDLBT, -1, -1)) FUNCTION (svaddlt, unspec_based_function, (UNSPEC_SADDLT, UNSPEC_UADDLT, -1)) -FUNCTION (svaddwb, unspec_based_function, (UNSPEC_SADDWB, UNSPEC_UADDWB, -1)) -FUNCTION (svaddwt, unspec_based_function, (UNSPEC_SADDWT, UNSPEC_UADDWT, -1)) FUNCTION (svaddp, unspec_based_pred_function, (UNSPEC_ADDP, UNSPEC_ADDP, UNSPEC_FADDP)) +FUNCTION (svaddwb, unspec_based_function, (UNSPEC_SADDWB, UNSPEC_UADDWB, -1)) +FUNCTION (svaddwt, unspec_based_function, (UNSPEC_SADDWT, UNSPEC_UADDWT, -1)) FUNCTION (svaesd, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesd)) FUNCTION (svaese, fixed_insn_function, (CODE_FOR_aarch64_sve2_aese)) FUNCTION (svaesimc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesimc)) @@ -649,12 +649,12 @@ FUNCTION (svldnt1uh_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_u16)) FUNCTION (svldnt1uw_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_u32)) FUNCTION (svlogb, unspec_based_function, (-1, -1, UNSPEC_COND_FLOGB)) FUNCTION (svmatch, svmatch_svnmatch_impl, (UNSPEC_MATCH)) +FUNCTION (svmaxnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMAXNMP)) FUNCTION (svmaxp, unspec_based_pred_function, (UNSPEC_SMAXP, UNSPEC_UMAXP, UNSPEC_FMAXP)) -FUNCTION (svmaxnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMAXNMP)) +FUNCTION (svminnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMINNMP)) FUNCTION (svminp, unspec_based_pred_function, (UNSPEC_SMINP, UNSPEC_UMINP, UNSPEC_FMINP)) -FUNCTION (svminnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMINNMP)) FUNCTION (svmlalb, unspec_based_mla_function, (UNSPEC_SMULLB, UNSPEC_UMULLB, UNSPEC_FMLALB)) FUNCTION (svmlalb_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLB, @@ -723,15 +723,15 @@ FUNCTION (svqdmullt_lane, unspec_based_lane_function, (UNSPEC_SQDMULLT, FUNCTION (svqneg, rtx_code_function, (SS_NEG, UNKNOWN, UNKNOWN)) FUNCTION (svqrdcmlah, svqrdcmlah_impl,) FUNCTION (svqrdcmlah_lane, svqrdcmlah_lane_impl,) -FUNCTION (svqrdmulh, unspec_based_function, (UNSPEC_SQRDMULH, -1, -1)) -FUNCTION (svqrdmulh_lane, unspec_based_lane_function, (UNSPEC_SQRDMULH, - -1, -1)) FUNCTION (svqrdmlah, unspec_based_function, (UNSPEC_SQRDMLAH, -1, -1)) FUNCTION (svqrdmlah_lane, unspec_based_lane_function, (UNSPEC_SQRDMLAH, -1, -1)) FUNCTION (svqrdmlsh, unspec_based_function, (UNSPEC_SQRDMLSH, -1, -1)) FUNCTION (svqrdmlsh_lane, unspec_based_lane_function, (UNSPEC_SQRDMLSH, -1, -1)) +FUNCTION (svqrdmulh, unspec_based_function, (
[gcc r15-5094] aarch64: Add svboolx4_t
https://gcc.gnu.org/g:fdbe94f7c88f53f1d65e9891e6eab2fe803a6e77 commit r15-5094-gfdbe94f7c88f53f1d65e9891e6eab2fe803a6e77 Author: Richard Sandiford Date: Mon Nov 11 12:32:18 2024 + aarch64: Add svboolx4_t This patch adds an svboolx4_t type, to go alongside the existing svboolx2_t type. It doesn't require any special ISA support beyond SVE itself and it currently has no associated instructions. gcc/ * config/aarch64/aarch64-modes.def (VNx64BI): New mode. * config/aarch64/aarch64-protos.h (aarch64_split_double_move): Generalize to... (aarch64_split_move): ...this. * config/aarch64/aarch64-sve-builtins-base.def (svcreate4, svget4) (svset4, svundef4): Add bool variants. * config/aarch64/aarch64-sve-builtins.cc (handle_arm_sve_h): Add svboolx4_t. * config/aarch64/iterators.md (SVE_STRUCT_BI): New mode iterator. * config/aarch64/aarch64-sve.md (movvnx32bi): Generalize to... (mov): ...this. * config/aarch64/aarch64.cc (pure_scalable_type_info::piece::get_rtx): Allow num_prs to be 4. (aarch64_classify_vector_mode): Handle VNx64BI. (aarch64_hard_regno_nregs): Likewise. (aarch64_class_max_nregs): Likewise. (aarch64_array_mode): Use VNx64BI for arrays of 4 svbool_ts. (aarch64_split_double_move): Generalize to... (aarch64_split_move): ...this. (aarch64_split_128bit_move): Update call accordingly. gcc/testsuite/ * gcc.target/aarch64/sve/acle/general-c/create_5.c: Expect svcreate4 to succeed for svbool_ts. * gcc.target/aarch64/sve/acle/asm/test_sve_acle.h (TEST_UNDEF_B): New macro. * gcc.target/aarch64/sve/acle/asm/create4_1.c: Test _b form. * gcc.target/aarch64/sve/acle/asm/undef2_1.c: Likewise. * gcc.target/aarch64/sve/acle/asm/undef4_1.c: Likewise. * gcc.target/aarch64/sve/acle/asm/get4_b.c: New test. * gcc.target/aarch64/sve/acle/asm/set4_b.c: Likewise. * gcc.target/aarch64/sve/acle/general-c/svboolx4_1.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-modes.def | 3 + gcc/config/aarch64/aarch64-protos.h| 2 +- gcc/config/aarch64/aarch64-sve-builtins-base.def | 4 + gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- gcc/config/aarch64/aarch64-sve.md | 8 +- gcc/config/aarch64/aarch64.cc | 50 - gcc/config/aarch64/iterators.md| 2 + .../gcc.target/aarch64/sve/acle/asm/create4_1.c| 10 ++ .../gcc.target/aarch64/sve/acle/asm/get4_b.c | 73 + .../gcc.target/aarch64/sve/acle/asm/set4_b.c | 87 +++ .../aarch64/sve/acle/asm/test_sve_acle.h | 8 ++ .../gcc.target/aarch64/sve/acle/asm/undef2_1.c | 7 ++ .../gcc.target/aarch64/sve/acle/asm/undef4_1.c | 7 ++ .../aarch64/sve/acle/general-c/create_5.c | 2 +- .../aarch64/sve/acle/general-c/svboolx4_1.c| 117 + 15 files changed, 351 insertions(+), 31 deletions(-) diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 25a22c1195e1..813421e1e39e 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -48,18 +48,21 @@ ADJUST_FLOAT_FORMAT (HF, &ieee_half_format); /* Vector modes. */ +VECTOR_BOOL_MODE (VNx64BI, 64, BI, 8); VECTOR_BOOL_MODE (VNx32BI, 32, BI, 4); VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2); VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2); VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2); VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2); +ADJUST_NUNITS (VNx64BI, aarch64_sve_vg * 32); ADJUST_NUNITS (VNx32BI, aarch64_sve_vg * 16); ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8); ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4); ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2); ADJUST_NUNITS (VNx2BI, aarch64_sve_vg); +ADJUST_ALIGNMENT (VNx64BI, 2); ADJUST_ALIGNMENT (VNx32BI, 2); ADJUST_ALIGNMENT (VNx16BI, 2); ADJUST_ALIGNMENT (VNx8BI, 2); diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 6ab41a21c75d..05d3258abf7b 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1045,7 +1045,7 @@ rtx aarch64_simd_expand_builtin (int, tree, rtx); void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree); rtx aarch64_endian_lane_rtx (machine_mode, unsigned int); -void aarch64_split_double_move (rtx, rtx, machine_mode); +void aarch64_split_move (rtx, rtx, machine_mode); void aarch64_split_128bit_move (rtx, rtx); bool aarch64_split_128bit_move_p (rtx, rtx); diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def index da2a0e41aa5d..0353f56e7057 100644 --
[gcc r15-5090] aarch64: Add an abstraction for scatter store type inference
https://gcc.gnu.org/g:bd45d4f9955022e688e75756b0cd8b8e54b33d4b commit r15-5090-gbd45d4f9955022e688e75756b0cd8b8e54b33d4b Author: Richard Sandiford Date: Mon Nov 11 12:32:16 2024 + aarch64: Add an abstraction for scatter store type inference Until now, all data arguments to a scatter store needed to have 32-bit or 64-bit elements. This isn't true for the upcoming SVE2.1 svst1q scatter intrinsic, so this patch adds an abstraction around the restriction. gcc/ * config/aarch64/aarch64-sve-builtins-shapes.cc (store_scatter_base::infer_vector_type): New virtual member function. (store_scatter_base::resolve): Use it. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index f190770250fe..e1204c283b6d 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -994,12 +994,18 @@ struct store_scatter_base : public overloaded_base<0> mode_suffix_index mode; type_suffix_index type; if (!r.check_gp_argument (has_displacement_p ? 3 : 2, i, nargs) - || (type = r.infer_sd_vector_type (nargs - 1)) == NUM_TYPE_SUFFIXES + || (type = infer_vector_type (r, nargs - 1)) == NUM_TYPE_SUFFIXES || (mode = r.resolve_gather_address (i, type, false)) == MODE_none) return error_mark_node; return r.resolve_to (mode, type); } + + virtual type_suffix_index + infer_vector_type (function_resolver &r, unsigned int argno) const + { +return r.infer_sd_vector_type (argno); + } }; /* Base class for ternary operations in which the final argument is an
[gcc r15-5087] aarch64: Tweak definition of all_data & co
https://gcc.gnu.org/g:23b7fe39d716c73a8cbae0f0585bc918041e3f74 commit r15-5087-g23b7fe39d716c73a8cbae0f0585bc918041e3f74 Author: Richard Sandiford Date: Mon Nov 11 12:32:15 2024 + aarch64: Tweak definition of all_data & co Past extensions to SVE have required new subsets of all_data; the SVE2.1 patches will add another. This patch tries to make this more scalable by defining the multi-size *_data macros to be unions of single-size *_data macros. gcc/ * config/aarch64/aarch64-sve-builtins.cc (TYPES_all_data): Redefine in terms of single-size *_data definitions. (TYPES_bhs_data, TYPES_hs_data, TYPES_sd_data): Likewise. (TYPES_b_data, TYPES_h_data, TYPES_s_data): New macros. Diff: --- gcc/config/aarch64/aarch64-sve-builtins.cc | 51 ++ 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 44b7f6edae58..c0b5115fdebe 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -231,12 +231,11 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_all_arith(S, D) \ TYPES_all_float (S, D), TYPES_all_integer (S, D) -/* _bf16 - _f16 _f32 _f64 - _s8 _s16 _s32 _s64 - _u8 _u16 _u32 _u64. */ #define TYPES_all_data(S, D) \ - S (bf16), TYPES_all_arith (S, D) + TYPES_b_data (S, D), \ + TYPES_h_data (S, D), \ + TYPES_s_data (S, D), \ + TYPES_d_data (S, D) /* _b only. */ #define TYPES_b(S, D) \ @@ -255,6 +254,11 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_b_integer(S, D) \ S (s8), TYPES_b_unsigned (S, D) +/* _s8 + _u8. */ +#define TYPES_b_data(S, D) \ + TYPES_b_integer (S, D) + /* _s8 _s16 _u8 _u16. */ #define TYPES_bh_integer(S, D) \ @@ -277,12 +281,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_bhs_integer(S, D) \ TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D) -/* _bf16 -_f16 _f32 -_s8 _s16 _s32 -_u8 _u16 _u32. */ #define TYPES_bhs_data(S, D) \ - S (bf16), S (f16), S (f32), TYPES_bhs_integer (S, D) + TYPES_b_data (S, D), \ + TYPES_h_data (S, D), \ + TYPES_s_data (S, D) /* _s16_s8 _s32_s16 _s64_s32 _u16_u8 _u32_u16 _u64_u32. */ @@ -295,6 +297,13 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_h_integer(S, D) \ S (s16), S (u16) +/* _bf16 + _f16 + _s16 + _u16. */ +#define TYPES_h_data(S, D) \ + S (bf16), S (f16), TYPES_h_integer (S, D) + /* _s16 _s32. */ #define TYPES_hs_signed(S, D) \ S (s16), S (s32) @@ -308,12 +317,9 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_hs_float(S, D) \ S (f16), S (f32) -/* _bf16 -_f16 _f32 -_s16 _s32 -_u16 _u32. */ #define TYPES_hs_data(S, D) \ - S (bf16), S (f16), S (f32), TYPES_hs_integer (S, D) + TYPES_h_data (S, D), \ + TYPES_s_data (S, D) /* _u16 _u64. */ #define TYPES_hd_unsigned(S, D) \ @@ -352,10 +358,17 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_s_unsigned(S, D) \ S (u32) -/* _s32 _u32. */ +/* _s32 + _u32. */ #define TYPES_s_integer(S, D) \ TYPES_s_signed (S, D), TYPES_s_unsigned (S, D) +/* _f32 + _s32 + _u32. */ +#define TYPES_s_data(S, D) \ + TYPES_s_float (S, D), TYPES_s_integer (S, D) + /* _s32 _s64. */ #define TYPES_sd_signed(S, D) \ S (s32), S (s64) @@ -369,11 +382,9 @@ CONSTEXPR const group_suffix_info group_suffixes[] = { #define TYPES_sd_integer(S, D) \ TYPES_sd_signed (S, D), TYPES_sd_unsigned (S, D) -/* _f32 _f64 - _s32 _s64 - _u32 _u64. */ #define TYPES_sd_data(S, D) \ - S (f32), S (f64), TYPES_sd_integer (S, D) + TYPES_s_data (S, D), \ + TYPES_d_data (S, D) /* _f16 _f32 _f64 _s32 _s64
[gcc r15-5086] aarch64: Test TARGET_STREAMING instead of TARGET_STREAMING_SME
https://gcc.gnu.org/g:22d25b1fee6656df3203dc8ffee6d1d55fc1a7fd commit r15-5086-g22d25b1fee6656df3203dc8ffee6d1d55fc1a7fd Author: Richard Sandiford Date: Mon Nov 11 12:32:14 2024 + aarch64: Test TARGET_STREAMING instead of TARGET_STREAMING_SME g:ede97598e2c recorded separate ISA requirements for streaming and non-streaming mode. The premise there was that AARCH64_FL_SME should not be included in the streaming mode requirements, since: (a) an __arm_streaming_compatible function wouldn't be in streaming mode if SME wasn't available. (b) __arm_streaming_compatible functions only allow things that are possible in non-streaming mode, so the non-streaming architecture is enough to assemble the code, even if +sme isn't enabled. (c) we reject __arm_streaming if +sme isn't enabled, so don't need to test it for individual intrinsics as well. Later patches lean into this further. This patch applies the same reasoning to the .md constructs for base streaming-only SME instructions, guarding them with TARGET_STREAMING rather than TARGET_STREAMING_SME. gcc/ * config/aarch64/aarch64.h (TARGET_SME): Expand comment. (TARGET_STREAMING_SME): Delete. * config/aarch64/aarch64-sme.md: Use TARGET_STREAMING instead of TARGET_STREAMING_SME. * config/aarch64/aarch64-sve2.md: Likewise. Diff: --- gcc/config/aarch64/aarch64-sme.md | 28 ++-- gcc/config/aarch64/aarch64-sve2.md | 8 gcc/config/aarch64/aarch64.h | 6 ++ 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 9215f51b01f8..8fca138314c2 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -481,7 +481,7 @@ (match_operand: 2 "register_operand" "Upl") (match_operand:SME_ZA_I 3 "aarch64_sve_ldff1_operand" "Utf")] SME_LD1))] - "TARGET_STREAMING_SME" + "TARGET_STREAMING" "ld1\t{ za%0.[%w1, 0] }, %2/z, %3" ) @@ -496,7 +496,7 @@ (match_operand: 3 "register_operand" "Upl") (match_operand:SME_ZA_I 4 "aarch64_sve_ldff1_operand" "Utf")] SME_LD1))] - "TARGET_STREAMING_SME + "TARGET_STREAMING && UINTVAL (operands[2]) < 128 / " "ld1\t{ za%0.[%w1, %2] }, %3/z, %4" ) @@ -583,7 +583,7 @@ (match_operand:SI 2 "register_operand" "Ucj") (match_operand: 3 "register_operand" "Upl")] SME_ST1))] - "TARGET_STREAMING_SME" + "TARGET_STREAMING" "st1\t{ za%1.[%w2, 0] }, %3, %0" ) @@ -598,7 +598,7 @@ (match_operand:SI 3 "const_int_operand")) (match_operand: 4 "register_operand" "Upl")] SME_ST1))] - "TARGET_STREAMING_SME + "TARGET_STREAMING && UINTVAL (operands[3]) < 128 / " "st1\t{ za%1.[%w2, %3] }, %4, %0" ) @@ -663,7 +663,7 @@ (match_operand:DI 3 "const_int_operand") (match_operand:SI 4 "register_operand" "Ucj")] SME_READ))] - "TARGET_STREAMING_SME" + "TARGET_STREAMING" "mova\t%0., %2/m, za%3.[%w4, 0]" ) @@ -678,7 +678,7 @@ (plus:SI (match_operand:SI 4 "register_operand" "Ucj") (match_operand:SI 5 "const_int_operand"))] SME_READ))] - "TARGET_STREAMING_SME + "TARGET_STREAMING && UINTVAL (operands[5]) < 128 / " "mova\t%0., %2/m, za%3.[%w4, %5]" ) @@ -693,7 +693,7 @@ (match_operand:DI 3 "const_int_operand") (match_operand:SI 4 "register_operand" "Ucj")] SME_READ))] - "TARGET_STREAMING_SME" + "TARGET_STREAMING" "mova\t%0.q, %2/m, za%3.q[%w4, 0]" ) @@ -707,7 +707,7 @@ (match_operand: 2 "register_operand" "Upl") (match_operand:SVE_FULL 3 "register_operand" "w")] SME_WRITE))] - "TARGET_STREAMING_SME" + "TARGET_STREAMING" "mova\tza%0.[%w1, 0], %2/m, %3." ) @@ -722,7 +722,7 @@ (match_operand: 3 "register_operand" "Upl") (match_operand:SVE_FULL 4 "register_operand" "w")] SME_WRITE))] - "TARGET_STREAMING_SME + "TARGET_STREAMING && UINTVAL (operands[2]) < 128 / " "mova\tza%0.[%w1, %2], %3/m, %4." ) @@ -737,7 +737,7 @@ (match_operand:VNx2BI 2 "register_operand" "Upl") (match_operand:SVE_FULL 3 "register_operand" "w")] SME_WRITE))] - "TARGET_STREAMING_SME" + "TARGET_STREAMING" "mova\tza%0.q[%w1, 0], %2/m, %3.q" ) @@ -917,7 +917,7 @@ (match_operand: 2 "register_operand" "Upl") (match_operand:SME_ZA_SDI 3 "register_operand" "w")] SME_BINARY_SDI))] - "TARGET_STREAMING_SME" + "TARGET_STREAMING" "\tza%0., %1/m, %2/m, %3." ) @@ -1479,7 +1479,7 @@ (match_operand:VNx16QI_ONLY 3 "register_operand" "w") (match_operand:VNx16QI_ONLY 4 "register_operand" "w")] SME_INT_MOP))] - "TARGET_STREAMI
[gcc r15-5089] aarch64: Add an abstraction for vector base addresses
https://gcc.gnu.org/g:67ba352ce13f13b9a8f4296a850b491adf025d59 commit r15-5089-g67ba352ce13f13b9a8f4296a850b491adf025d59 Author: Richard Sandiford Date: Mon Nov 11 12:32:16 2024 + aarch64: Add an abstraction for vector base addresses In the upcoming SVE2.1 svld1q and svst1q intrinsics, the relationship between the base vector and the data vector differs from existing gather/scatter intrinsics. This patch adds a new abstraction to handle the difference. gcc/ * config/aarch64/aarch64-sve-builtins.h (function_shape::vector_base_type): New member function. * config/aarch64/aarch64-sve-builtins.cc (function_shape::vector_base_type): Likewise. (function_resolver::resolve_sv_displacement): Use it. (function_resolver::resolve_gather_address): Likewise. Diff: --- gcc/config/aarch64/aarch64-sve-builtins.cc | 24 ++-- gcc/config/aarch64/aarch64-sve-builtins.h | 2 ++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index c0b5115fdebe..a259f637a290 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -1176,6 +1176,21 @@ aarch64_const_binop (enum tree_code code, tree arg1, tree arg2) return NULL_TREE; } +/* Return the type that a vector base should have in a gather load or + scatter store involving vectors of type TYPE. In an extending load, + TYPE is the result of the extension; in a truncating store, it is the + input to the truncation. + + Index vectors have the same width as base vectors, but can be either + signed or unsigned. */ +type_suffix_index +function_shape::vector_base_type (type_suffix_index type) const +{ + unsigned int required_bits = type_suffixes[type].element_bits; + gcc_assert (required_bits == 32 || required_bits == 64); + return required_bits == 32 ? TYPE_SUFFIX_u32 : TYPE_SUFFIX_u64; +} + /* Return a hash code for a function_instance. */ hashval_t function_instance::hash () const @@ -2750,7 +2765,8 @@ function_resolver::resolve_sv_displacement (unsigned int argno, return mode; } - unsigned int required_bits = type_suffixes[type].element_bits; + auto base_type = shape->vector_base_type (type); + unsigned int required_bits = type_suffixes[base_type].element_bits; if (required_bits == 32 && displacement_units () == UNITS_elements && !lookup_form (MODE_s32index, type) @@ -2900,11 +2916,7 @@ function_resolver::resolve_gather_address (unsigned int argno, return MODE_none; /* Check whether the type is the right one. */ - unsigned int required_bits = type_suffixes[type].element_bits; - gcc_assert (required_bits == 32 || required_bits == 64); - type_suffix_index required_type = (required_bits == 32 -? TYPE_SUFFIX_u32 -: TYPE_SUFFIX_u64); + auto required_type = shape->vector_base_type (type); if (required_type != base_type) { error_at (location, "passing %qT to argument %d of %qE," diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index d5cc6e0a40d4..1fb7abe132fa 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -784,6 +784,8 @@ public: more common than false, so provide a default definition. */ virtual bool explicit_group_suffix_p () const { return true; } + virtual type_suffix_index vector_base_type (type_suffix_index) const; + /* Define all functions associated with the given group. */ virtual void build (function_builder &, const function_group_info &) const = 0;
[gcc r15-5085] aarch64: Make more use of TARGET_STREAMING_SME2
https://gcc.gnu.org/g:0c9a5ed01662daca5f30b3861db8680b377feb71 commit r15-5085-g0c9a5ed01662daca5f30b3861db8680b377feb71 Author: Richard Sandiford Date: Mon Nov 11 12:32:14 2024 + aarch64: Make more use of TARGET_STREAMING_SME2 Some code was checking TARGET_STREAMING and TARGET_SME2 separately, but we now have a macro to test both at once. gcc/ * config/aarch64/aarch64-sme.md: Use TARGET_STREAMING_SME2 instead of separate TARGET_STREAMING and TARGET_SME2 tests. * config/aarch64/aarch64-sve2.md: Likewise. * config/aarch64/iterators.md: Likewise. Diff: --- gcc/config/aarch64/aarch64-sme.md | 34 ++ gcc/config/aarch64/aarch64-sve2.md | 6 +++--- gcc/config/aarch64/iterators.md| 8 3 files changed, 21 insertions(+), 27 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 78ad2fc699f2..9215f51b01f8 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -1334,7 +1334,7 @@ (match_operand:VNx8HI_ONLY 1 "register_operand" "w") (match_operand:VNx8HI_ONLY 2 "register_operand" "x")] SME_INT_TERNARY_SLICE))] - "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "TARGET_STREAMING_SME2 && TARGET_SME_I16I64" "ll\tza.d[%w0, 0:3], %1.h, %2.h" ) @@ -1348,7 +1348,7 @@ (match_operand:VNx8HI_ONLY 2 "register_operand" "w") (match_operand:VNx8HI_ONLY 3 "register_operand" "x")] SME_INT_TERNARY_SLICE))] - "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "TARGET_STREAMING_SME2 && TARGET_SME_I16I64" { operands[4] = GEN_INT (INTVAL (operands[1]) + 3); return "ll\tza.d[%w0, %1:%4], %2.h, %3.h"; @@ -1364,7 +1364,7 @@ (match_operand:SME_ZA_HIx24 1 "aligned_register_operand" "Uw") (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw")] SME_INT_TERNARY_SLICE))] - "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "TARGET_STREAMING_SME2 && TARGET_SME_I16I64" "ll\tza.d[%w0, 0:3, vgx], %1, %2" ) @@ -1378,7 +1378,7 @@ (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw") (match_operand:SME_ZA_HIx24 3 "aligned_register_operand" "Uw")] SME_INT_TERNARY_SLICE))] - "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "TARGET_STREAMING_SME2 && TARGET_SME_I16I64" { operands[4] = GEN_INT (INTVAL (operands[1]) + 3); return "ll\tza.d[%w0, %1:%4, vgx], %2, %3"; @@ -1395,7 +1395,7 @@ (vec_duplicate:SME_ZA_HIx24 (match_operand: 2 "register_operand" "x"))] SME_INT_TERNARY_SLICE))] - "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "TARGET_STREAMING_SME2 && TARGET_SME_I16I64" "ll\tza.d[%w0, 0:3, vgx], %1, %2.h" ) @@ -1410,7 +1410,7 @@ (vec_duplicate:SME_ZA_HIx24 (match_operand: 3 "register_operand" "x"))] SME_INT_TERNARY_SLICE))] - "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "TARGET_STREAMING_SME2 && TARGET_SME_I16I64" { operands[4] = GEN_INT (INTVAL (operands[1]) + 3); return "ll\tza.d[%w0, %1:%4, vgx], %2, %3.h"; @@ -1429,7 +1429,7 @@ (match_operand:SI 3 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SME_INT_TERNARY_SLICE))] - "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "TARGET_STREAMING_SME2 && TARGET_SME_I16I64" "ll\tza.d[%w0, 0:3], %1, %2.h[%3]" ) @@ -1446,7 +1446,7 @@ (match_operand:SI 4 "const_int_operand")] UNSPEC_SVE_LANE_SELECT)] SME_INT_TERNARY_SLICE))] - "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME" + "TARGET_STREAMING_SME2 && TARGET_SME_I16I64" { operands[5] = GEN_INT (INTVAL (operands[1]) + 3); return "ll\tza.d[%w0, %1:%5], %2, %3.h[%4]"; @@ -1642,8 +1642,7 @@ (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw") (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw")] SME_FP_TERNARY_SLICE))] - "TARGET_SME2 - && TARGET_STREAMING_SME + "TARGET_STREAMING_SME2 && == " "\tza.[%w0, 0, vgx], %1, %2" ) @@ -1658,8 +1657,7 @@ (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw") (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw")] SME_FP_TERNARY_SLICE))] - "TARGET_SME2 - && TARGET_STREAMING_SME + "TARGET_STREAMING_SME2 && == " "\tza.[%w0, %1, vgx], %2, %3" ) @@ -1674,8 +1672,7 @@ (vec_duplicate:SME_ZA_SDFx24 (match_operand: 2 "register_operand" "x"))] SME_FP_TERNARY_SLICE))] - "TARGET_SME2 - && TARGET_STREAMING_SME + "TARGET_STREAMING_SME2 && == " "\tza.[%w0, 0, vgx], %1, %2." ) @@ -1691,8 +1688,7 @@ (vec_duplicate:SME_ZA_SDFx24 (match_operand: 3 "register_operand"
[gcc r15-5091] aarch64: Parameterise SVE pointer type inference
https://gcc.gnu.org/g:cb83ad9d93fa113a5d038a727b1f8a2bd48a3696 commit r15-5091-gcb83ad9d93fa113a5d038a727b1f8a2bd48a3696 Author: Richard Sandiford Date: Mon Nov 11 12:32:17 2024 + aarch64: Parameterise SVE pointer type inference All extending gather load intrinsics encode the source type in their name (e.g. svld1sb for an extending load from signed bytes). The type of the extension result has to be specified using an explicit type suffix; it isn't something that can be inferred from the arguments, since there are multiple valid choices for the same arguments. This meant that type inference for gather loads was only needed for non-extending loads, in which case the pointer target had to be a 32-bit or 64-bit element type. The gather_scatter_p argument to function_resolver::infer_pointer_type therefore controlled two things: how we should react to vector base addresses, and whether we should require a minimum element size of 32. The element size restriction doesn't apply to the upcomding SVE2.1 svld1q intrinsic, so this patch adds a separate argument for the minimum element size requirement. gcc/ * config/aarch64/aarch64-sve-builtins.h (function_resolver::target_type_restrictions): New enum. (function_resolver::infer_pointer_type): Add an extra argument that specifies what the target type can be. * config/aarch64/aarch64-sve-builtins.cc (function_resolver::infer_pointer_type): Likewise. * config/aarch64/aarch64-sve-builtins-shapes.cc (load_gather_sv_base::get_target_type_restrictions): New virtual member function. (load_gather_sv_base::resolve): Use it. Update call to infer_pointer_type. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 10 +- gcc/config/aarch64/aarch64-sve-builtins.cc| 8 +--- gcc/config/aarch64/aarch64-sve-builtins.h | 4 +++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index e1204c283b6d..cf321540b60e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -815,14 +815,22 @@ struct load_gather_sv_base : public overloaded_base<0> unsigned int i, nargs; mode_suffix_index mode; type_suffix_index type; +auto restrictions = get_target_type_restrictions (r); if (!r.check_gp_argument (2, i, nargs) - || (type = r.infer_pointer_type (i, true)) == NUM_TYPE_SUFFIXES + || (type = r.infer_pointer_type (i, true, +restrictions)) == NUM_TYPE_SUFFIXES || (mode = r.resolve_sv_displacement (i + 1, type, true), mode == MODE_none)) return error_mark_node; return r.resolve_to (mode, type); } + + virtual function_resolver::target_type_restrictions + get_target_type_restrictions (const function_instance &) const + { +return function_resolver::TARGET_32_64; + } }; /* Base class for load_ext_gather_index and load_ext_gather_offset, diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index a259f637a290..9fb0d6fd4168 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -1998,10 +1998,12 @@ function_resolver::infer_64bit_scalar_integer_pair (unsigned int argno) corresponding type suffix. Return that type suffix on success, otherwise report an error and return NUM_TYPE_SUFFIXES. GATHER_SCATTER_P is true if the function is a gather/scatter - operation, and so requires a pointer to 32-bit or 64-bit data. */ + operation. RESTRICTIONS describes any additional restrictions + on the target type. */ type_suffix_index function_resolver::infer_pointer_type (unsigned int argno, - bool gather_scatter_p) + bool gather_scatter_p, + target_type_restrictions restrictions) { tree actual = get_argument_type (argno); if (actual == error_mark_node) @@ -2027,7 +2029,7 @@ function_resolver::infer_pointer_type (unsigned int argno, return NUM_TYPE_SUFFIXES; } unsigned int bits = type_suffixes[type].element_bits; - if (gather_scatter_p && bits != 32 && bits != 64) + if (restrictions == TARGET_32_64 && bits != 32 && bits != 64) { error_at (location, "passing %qT to argument %d of %qE, which" " expects a pointer to 32-bit or 64-bit elements", diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index 1fb7abe132fa..5bd9b88d1179 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -
[gcc r15-5095] aarch64: Define arm_neon.h types in arm_sve.h too
https://gcc.gnu.org/g:ddc014d2c69240ecf8a49399d1a58ebb2530b9d1 commit r15-5095-gddc014d2c69240ecf8a49399d1a58ebb2530b9d1 Author: Richard Sandiford Date: Mon Nov 11 12:32:19 2024 + aarch64: Define arm_neon.h types in arm_sve.h too This patch moves the scalar and single-vector Advanced SIMD types from arm_neon.h into a private header, so that they can be defined by arm_sve.h as well. This is needed for the upcoming SVE2.1 hybrid-VLA reductions, which return 128-bit Advanced SIMD vectors. The approach follows Claudio's patch for FP8. gcc/ * config.gcc (extra_headers): Add arm_private_neon_types.h. * config/aarch64/arm_private_neon_types.h: New file, split out from... * config/aarch64/arm_neon.h: ...here. * config/aarch64/arm_sve.h: Include arm_private_neon_types.h Diff: --- gcc/config.gcc | 2 +- gcc/config/aarch64/arm_neon.h | 49 ++ gcc/config/aarch64/arm_private_neon_types.h | 79 + gcc/config/aarch64/arm_sve.h| 5 +- 4 files changed, 84 insertions(+), 51 deletions(-) diff --git a/gcc/config.gcc b/gcc/config.gcc index b8133524d82b..9b616bd6e1f8 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -347,7 +347,7 @@ m32c*-*-*) ;; aarch64*-*-*) cpu_type=aarch64 - extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h arm_sme.h arm_neon_sve_bridge.h arm_private_fp8.h" + extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h arm_sme.h arm_neon_sve_bridge.h arm_private_fp8.h arm_private_neon_types.h" c_target_objs="aarch64-c.o" cxx_target_objs="aarch64-c.o" d_target_objs="aarch64-d.o" diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index d3533f3ee6fe..c727302ac75f 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -30,58 +30,15 @@ #pragma GCC push_options #pragma GCC target ("+nothing+simd") +#include #include -#pragma GCC aarch64 "arm_neon.h" +#include -#include +#pragma GCC aarch64 "arm_neon.h" #define __AARCH64_UINT64_C(__C) ((uint64_t) __C) #define __AARCH64_INT64_C(__C) ((int64_t) __C) -typedef __Int8x8_t int8x8_t; -typedef __Int16x4_t int16x4_t; -typedef __Int32x2_t int32x2_t; -typedef __Int64x1_t int64x1_t; -typedef __Float16x4_t float16x4_t; -typedef __Float32x2_t float32x2_t; -typedef __Poly8x8_t poly8x8_t; -typedef __Poly16x4_t poly16x4_t; -typedef __Uint8x8_t uint8x8_t; -typedef __Uint16x4_t uint16x4_t; -typedef __Uint32x2_t uint32x2_t; -typedef __Float64x1_t float64x1_t; -typedef __Uint64x1_t uint64x1_t; -typedef __Int8x16_t int8x16_t; -typedef __Int16x8_t int16x8_t; -typedef __Int32x4_t int32x4_t; -typedef __Int64x2_t int64x2_t; -typedef __Float16x8_t float16x8_t; -typedef __Float32x4_t float32x4_t; -typedef __Float64x2_t float64x2_t; -typedef __Poly8x16_t poly8x16_t; -typedef __Poly16x8_t poly16x8_t; -typedef __Poly64x2_t poly64x2_t; -typedef __Poly64x1_t poly64x1_t; -typedef __Uint8x16_t uint8x16_t; -typedef __Uint16x8_t uint16x8_t; -typedef __Uint32x4_t uint32x4_t; -typedef __Uint64x2_t uint64x2_t; - -typedef __Poly8_t poly8_t; -typedef __Poly16_t poly16_t; -typedef __Poly64_t poly64_t; -typedef __Poly128_t poly128_t; - -typedef __Mfloat8x8_t mfloat8x8_t; -typedef __Mfloat8x16_t mfloat8x16_t; - -typedef __fp16 float16_t; -typedef float float32_t; -typedef double float64_t; - -typedef __Bfloat16x4_t bfloat16x4_t; -typedef __Bfloat16x8_t bfloat16x8_t; - /* __aarch64_vdup_lane internal macros. */ #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \ vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b)) diff --git a/gcc/config/aarch64/arm_private_neon_types.h b/gcc/config/aarch64/arm_private_neon_types.h new file mode 100644 index ..0f588f026b7d --- /dev/null +++ b/gcc/config/aarch64/arm_private_neon_types.h @@ -0,0 +1,79 @@ +/* AArch64 type definitions for arm_neon.h + Do not include this file directly. Use one of arm_neon.h, arm_sme.h, + or arm_sve.h instead. + + Copyright (C) 2024 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the
[gcc r15-5092] aarch64: Factor out part of the SVE ext_def class
https://gcc.gnu.org/g:4a9721ca521d7a5626f1788afaf04a2b45960ea3 commit r15-5092-g4a9721ca521d7a5626f1788afaf04a2b45960ea3 Author: Richard Sandiford Date: Mon Nov 11 12:32:17 2024 + aarch64: Factor out part of the SVE ext_def class This patch factors out some of ext_def into a base class, so that it can be reused for the SVE2.1 svextq intrinsic. gcc/ * config/aarch64/aarch64-sve-builtins-shapes.cc (ext_base): New base class, extracted from... (ext_def): ...here. Diff: --- gcc/config/aarch64/aarch64-sve-builtins-shapes.cc | 32 +-- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc index cf321540b60e..62277afaeff4 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc @@ -735,6 +735,23 @@ struct binary_za_slice_opt_single_base : public overloaded_base<1> } }; +/* Base class for ext. */ +struct ext_base : public overloaded_base<0> +{ + void + build (function_builder &b, const function_group_info &group) const override + { +b.add_overloaded_functions (group, MODE_none); +build_all (b, "v0,v0,v0,su64", group, MODE_none); + } + + tree + resolve (function_resolver &r) const override + { +return r.resolve_uniform (2, 1); + } +}; + /* Base class for inc_dec and inc_dec_pat. */ struct inc_dec_base : public overloaded_base<0> { @@ -2413,21 +2430,8 @@ SHAPE (dupq) where the final argument is an integer constant expression that when multiplied by the number of bytes in t0 is in the range [0, 255]. */ -struct ext_def : public overloaded_base<0> +struct ext_def : public ext_base { - void - build (function_builder &b, const function_group_info &group) const override - { -b.add_overloaded_functions (group, MODE_none); -build_all (b, "v0,v0,v0,su64", group, MODE_none); - } - - tree - resolve (function_resolver &r) const override - { -return r.resolve_uniform (2, 1); - } - bool check (function_checker &c) const override {
[gcc r15-5099] aarch64: Conditionally define __ARM_FEATURE_SVE2p1
https://gcc.gnu.org/g:e68e814d1f4360bac78d9b5f4e0feb6e41a98a27 commit r15-5099-ge68e814d1f4360bac78d9b5f4e0feb6e41a98a27 Author: Richard Sandiford Date: Mon Nov 11 12:32:21 2024 + aarch64: Conditionally define __ARM_FEATURE_SVE2p1 Previous patches are supposed to add full support for SVE2.1, so this patch advertises that through __ARM_FEATURE_SVE2p1. pragma_cpp_predefs_3.c had one fewer pop than push. The final test is triple-nested: - armv8-a (to start with a clean slate, untainted by command-line flags) - the maximal SVE set - general-regs-only gcc/ * config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Handle __ARM_FEATURE_SVE2p1. gcc/testsuite/ * gcc.target/aarch64/pragma_cpp_predefs_3.c: Add SVE2p1 tests. Diff: --- gcc/config/aarch64/aarch64-c.cc| 1 + .../gcc.target/aarch64/pragma_cpp_predefs_3.c | 84 ++ 2 files changed, 85 insertions(+) diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc index f9b9e3793755..d1ae80c0bb3e 100644 --- a/gcc/config/aarch64/aarch64-c.cc +++ b/gcc/config/aarch64/aarch64-c.cc @@ -214,6 +214,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) "__ARM_FEATURE_SVE2_BITPERM", pfile); aarch64_def_or_undef (TARGET_SVE2_SHA3, "__ARM_FEATURE_SVE2_SHA3", pfile); aarch64_def_or_undef (TARGET_SVE2_SM4, "__ARM_FEATURE_SVE2_SM4", pfile); + aarch64_def_or_undef (TARGET_SVE2p1, "__ARM_FEATURE_SVE2p1", pfile); aarch64_def_or_undef (TARGET_LSE, "__ARM_FEATURE_ATOMICS", pfile); aarch64_def_or_undef (TARGET_AES, "__ARM_FEATURE_AES", pfile); diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_3.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_3.c index 39128528600f..f1f70ed7b5c1 100644 --- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_3.c +++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_3.c @@ -28,6 +28,10 @@ #error "__ARM_FEATURE_SVE2_SM4 is defined but should not be!" #endif +#ifdef __ARM_FEATURE_SVE2p1 +#error "__ARM_FEATURE_SVE2p1 is defined but should not be!" +#endif + #pragma GCC push_options #pragma GCC target ("arch=armv8.2-a+sve") @@ -55,6 +59,10 @@ #error "__ARM_FEATURE_SVE2_SM4 is defined but should not be!" #endif +#ifdef __ARM_FEATURE_SVE2p1 +#error "__ARM_FEATURE_SVE2p1 is defined but should not be!" +#endif + #pragma GCC pop_options #pragma GCC push_options @@ -84,6 +92,10 @@ #error "__ARM_FEATURE_SVE2_SM4 is defined but should not be!" #endif +#ifdef __ARM_FEATURE_SVE2p1 +#error "__ARM_FEATURE_SVE2p1 is defined but should not be!" +#endif + #pragma GCC pop_options #pragma GCC push_options @@ -242,6 +254,72 @@ #error "__ARM_FEATURE_SVE2_SM4 is not defined but should be!" #endif +#pragma GCC pop_options + +#pragma GCC push_options +#pragma GCC target ("arch=armv9-a+sve2p1") + +#ifndef __ARM_FEATURE_SVE +#error "__ARM_FEATURE_SVE is not defined but should be!" +#endif + +#ifndef __ARM_FEATURE_SVE2 +#error "__ARM_FEATURE_SVE2 is not defined but should be!" +#endif + +#ifdef __ARM_FEATURE_SVE2_AES +#error "__ARM_FEATURE_SVE2_AES is defined but should not be!" +#endif + +#ifdef __ARM_FEATURE_SVE2_BITPERM +#error "__ARM_FEATURE_SVE2_BITPERM is defined but should not be!" +#endif + +#ifdef __ARM_FEATURE_SVE2_SHA3 +#error "__ARM_FEATURE_SVE2_SHA3 is defined but should not be!" +#endif + +#ifdef __ARM_FEATURE_SVE2_SM4 +#error "__ARM_FEATURE_SVE2_SM4 is defined but should not be!" +#endif + +#ifndef __ARM_FEATURE_SVE2p1 +#error "__ARM_FEATURE_SVE2p1 is not defined but should be!" +#endif + +#pragma GCC pop_options + +#pragma GCC push_options +#pragma GCC target ("arch=armv9-a+sve2-aes+sve2-bitperm+sve2-sha3+sve2-sm4+sve2p1") + +#ifndef __ARM_FEATURE_SVE +#error "__ARM_FEATURE_SVE is not defined but should be!" +#endif + +#ifndef __ARM_FEATURE_SVE2 +#error "__ARM_FEATURE_SVE2 is not defined but should be!" +#endif + +#ifndef __ARM_FEATURE_SVE2_AES +#error "__ARM_FEATURE_SVE2_AES is not defined but should be!" +#endif + +#ifndef __ARM_FEATURE_SVE2_BITPERM +#error "__ARM_FEATURE_SVE2_BITPERM is not defined but should be!" +#endif + +#ifndef __ARM_FEATURE_SVE2_SHA3 +#error "__ARM_FEATURE_SVE2_SHA3 is not defined but should be!" +#endif + +#ifndef __ARM_FEATURE_SVE2_SM4 +#error "__ARM_FEATURE_SVE2_SM4 is not defined but should be!" +#endif + +#ifndef __ARM_FEATURE_SVE2p1 +#error "__ARM_FEATURE_SVE2p1 is not defined but should be!" +#endif + #pragma GCC push_options #pragma GCC target ("general-regs-only") @@ -269,6 +347,12 @@ #error "__ARM_FEATURE_SVE2_SM4 is defined but should not be!" #endif +#ifdef __ARM_FEATURE_SVE2p1 +#error "__ARM_FEATURE_SVE2p1 is defined but should not be!" +#endif + +#pragma GCC pop_options + #pragma GCC pop_options #pragma GCC pop_options
[gcc r15-5096] aarch64: Add common subset of SVE2p1 and SME
https://gcc.gnu.org/g:f13e0138269f16ae896d952425c04ad2e11b8dbe commit r15-5096-gf13e0138269f16ae896d952425c04ad2e11b8dbe Author: Richard Sandiford Date: Mon Nov 11 12:32:20 2024 + aarch64: Add common subset of SVE2p1 and SME Some instructions that were previously restricted to streaming mode can also be used in non-streaming mode with SVE2.1. This patch adds support for those, as well as the usual new-extension boilerplate. A later patch will add the feature macro. gcc/ * config/aarch64/aarch64-option-extensions.def (sve2p1): New extension. * doc/invoke.texi (sve2p1): Document it. * config/aarch64/aarch64-sve-builtins-sve2.def: Mark instructions that are common to both SVE2p1 and SME. * config/aarch64/aarch64.h (TARGET_SVE2p1): New macro. (TARGET_SVE2p1_OR_SME): Likewise. * config/aarch64/aarch64-sve2.md (@aarch64_sve_psel): Require TARGET_SVE2p1_OR_SME instead of TARGET_STREAMING. (*aarch64_sve_psel_plus): Likewise. (@aarch64_sve_clamp): Likewise. (*aarch64_sve_clamp_x): Likewise. (@aarch64_pred_): Likewise. (@cond_): Likewise. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_aarch64_asm_sve2p1_ok): New procedure. * gcc.target/aarch64/sve/clamp_1.c: New test. * gcc.target/aarch64/sve/clamp_2.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_s16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_s32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_s64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_s8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_u16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_u32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_u64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/clamp_u8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/psel_lane_b16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/psel_lane_b32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/psel_lane_b64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/psel_lane_b8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/psel_lane_c16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/psel_lane_c32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/psel_lane_c64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/psel_lane_c8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_bf16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_f16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_f32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_f64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_s16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_s32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_s64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_s8.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_u16.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_u32.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_u64.c: Likewise. * gcc.target/aarch64/sve2/acle/asm/revd_u8.c: Likewise. Diff: --- gcc/config/aarch64/aarch64-option-extensions.def | 2 + gcc/config/aarch64/aarch64-sve-builtins-sve2.def | 2 +- gcc/config/aarch64/aarch64-sve2.md | 12 +-- gcc/config/aarch64/aarch64.h | 9 +++ gcc/doc/invoke.texi| 2 + gcc/testsuite/gcc.target/aarch64/sve/clamp_1.c | 40 ++ gcc/testsuite/gcc.target/aarch64/sve/clamp_2.c | 34 .../gcc.target/aarch64/sve2/acle/asm/clamp_s16.c | 46 +++ .../gcc.target/aarch64/sve2/acle/asm/clamp_s32.c | 46 +++ .../gcc.target/aarch64/sve2/acle/asm/clamp_s64.c | 46 +++ .../gcc.target/aarch64/sve2/acle/asm/clamp_s8.c| 46 +++ .../gcc.target/aarch64/sve2/acle/asm/clamp_u16.c | 46 +++ .../gcc.target/aarch64/sve2/acle/asm/clamp_u32.c | 46 +++ .../gcc.target/aarch64/sve2/acle/asm/clamp_u64.c | 46 +++ .../gcc.target/aarch64/sve2/acle/asm/clamp_u8.c| 46 +++ .../aarch64/sve2/acle/asm/psel_lane_b16.c | 93 ++ .../aarch64/sve2/acle/asm/psel_lane_b32.c | 93 ++ .../aarch64/sve2/acle/asm/psel_lane_b64.c | 84 +++ .../aarch64/sve2/acle/asm/psel_lane_b8.c | 93 ++ .../aarch64/sve2/acle/asm/psel_lane_c16.c | 93 ++ .../aarch64/sve2/acle/asm/psel_lane_c32.c | 93 ++ .../aarch64/sve2/acle/asm/psel_lane_c6
[gcc r15-5104] testsuite: arm: fast-math-complex-add-half-float.c test should not xfail
https://gcc.gnu.org/g:a2467372e7241be661c5fde80b751d84df4d567b commit r15-5104-ga2467372e7241be661c5fde80b751d84df4d567b Author: Torbjörn SVENSSON Date: Sun Nov 10 14:46:51 2024 +0100 testsuite: arm: fast-math-complex-add-half-float.c test should not xfail With the change in 15-3128-gde1923f9f4d, this test case no longer xfail. gcc/testsuite/ChangeLog: * gcc.dg/vect/complex/fast-math-complex-add-half-float.c: Remove xfail from test. Signed-off-by: Torbjörn SVENSSON Diff: --- .../gcc.dg/vect/complex/fast-math-complex-add-half-float.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-half-float.c b/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-half-float.c index 1fa914916eea..a773e796ddcd 100644 --- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-half-float.c +++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-complex-add-half-float.c @@ -8,7 +8,5 @@ #define N 200 #include "complex-add-template.c" -/* Vectorization is failing for these cases. They should work but for now ignore. */ - -/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
[gcc r14-10917] libstdc++: Do not define _Insert_base::try_emplace before C++17
https://gcc.gnu.org/g:2f744df601b298e31dcdb49316da93f0ce1f00b7 commit r14-10917-g2f744df601b298e31dcdb49316da93f0ce1f00b7 Author: Jonathan Wakely Date: Fri Nov 8 13:58:23 2024 + libstdc++: Do not define _Insert_base::try_emplace before C++17 This is not a reserved name in C++11 and C++14, so must not be defined. Also use the appropriate feature test macros for the try_emplace members of the Debug Mode maps. libstdc++-v3/ChangeLog: * include/bits/hashtable_policy.h (_Insert_base::try_emplace): Do not define for C++11 and C++14. * include/debug/map.h (try_emplace): Use feature test macro. * include/debug/unordered_map (try_emplace): Likewise. * testsuite/17_intro/names.cc: Define try_emplace before C++17. (cherry picked from commit b66a57c0ad300b293ebd366bc29f44f2ddb65c69) Diff: --- libstdc++-v3/include/bits/hashtable_policy.h | 2 ++ libstdc++-v3/include/debug/map.h | 2 +- libstdc++-v3/include/debug/unordered_map | 2 +- libstdc++-v3/testsuite/17_intro/names.cc | 2 ++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h index 26def24f24e6..68ec8d9470a7 100644 --- a/libstdc++-v3/include/bits/hashtable_policy.h +++ b/libstdc++-v3/include/bits/hashtable_policy.h @@ -966,6 +966,7 @@ namespace __detail return __h._M_insert(__hint, __v, __node_gen, __unique_keys{}); } +#ifdef __glibcxx_unordered_map_try_emplace // C++ >= 17 && HOSTED template std::pair try_emplace(const_iterator, _KType&& __k, _Args&&... __args) @@ -987,6 +988,7 @@ namespace __detail __node._M_node = nullptr; return { __it, true }; } +#endif void insert(initializer_list __l) diff --git a/libstdc++-v3/include/debug/map.h b/libstdc++-v3/include/debug/map.h index d0e398f0fd97..5323a2b0d950 100644 --- a/libstdc++-v3/include/debug/map.h +++ b/libstdc++-v3/include/debug/map.h @@ -344,7 +344,7 @@ namespace __debug } -#if __cplusplus > 201402L +#ifdef __glibcxx_map_try_emplace // C++ >= 17 && HOSTED template pair try_emplace(const key_type& __k, _Args&&... __args) diff --git a/libstdc++-v3/include/debug/unordered_map b/libstdc++-v3/include/debug/unordered_map index 8a969d817402..1700da16c0f3 100644 --- a/libstdc++-v3/include/debug/unordered_map +++ b/libstdc++-v3/include/debug/unordered_map @@ -440,7 +440,7 @@ namespace __debug _M_check_rehashed(__bucket_count); } -#if __cplusplus > 201402L +#ifdef __glibcxx_unordered_map_try_emplace // C++ >= 17 && HOSTED template pair try_emplace(const key_type& __k, _Args&&... __args) diff --git a/libstdc++-v3/testsuite/17_intro/names.cc b/libstdc++-v3/testsuite/17_intro/names.cc index 9b0ffcb50b2e..ab12641b14df 100644 --- a/libstdc++-v3/testsuite/17_intro/names.cc +++ b/libstdc++-v3/testsuite/17_intro/names.cc @@ -136,6 +136,8 @@ // defines to_chars_result::ptr and to_chars_result::ec #define ec ( #define ptr ( +// and define try_emplace +#define try_emplace ( #endif // These clash with newlib so don't use them.
[gcc r15-5105] testsuite: Require atomic operations for c2y-if-decls-*
https://gcc.gnu.org/g:7b5c974dfc83edfb534dc0550dee8b0e8fd32d96 commit r15-5105-g7b5c974dfc83edfb534dc0550dee8b0e8fd32d96 Author: Dimitar Dimitrov Date: Sun Oct 27 09:49:49 2024 +0200 testsuite: Require atomic operations for c2y-if-decls-* Since some of the c2y-if-decls tests use _Atomic, add a requirement for target to support atomic operations on int and long types. This fixes spurious test link failures on pru-unknown-elf, which lacks atomic ops. The tests still pass on x86_64-linux-gnu. gcc/testsuite/ChangeLog: * gcc.dg/c2y-if-decls-1.c: Require target that supports atomic operations on int and long types. * gcc.dg/c2y-if-decls-11.c: Ditto. * gcc.dg/c2y-if-decls-4.c: Ditto. * gcc.dg/c2y-if-decls-8.c: Ditto. Signed-off-by: Dimitar Dimitrov Diff: --- gcc/testsuite/gcc.dg/c2y-if-decls-1.c | 1 + gcc/testsuite/gcc.dg/c2y-if-decls-11.c | 1 + gcc/testsuite/gcc.dg/c2y-if-decls-4.c | 1 + gcc/testsuite/gcc.dg/c2y-if-decls-8.c | 1 + 4 files changed, 4 insertions(+) diff --git a/gcc/testsuite/gcc.dg/c2y-if-decls-1.c b/gcc/testsuite/gcc.dg/c2y-if-decls-1.c index ab9b3f207003..f4e8e5d2f882 100644 --- a/gcc/testsuite/gcc.dg/c2y-if-decls-1.c +++ b/gcc/testsuite/gcc.dg/c2y-if-decls-1.c @@ -1,6 +1,7 @@ /* N3356 - if declarations. */ /* PR c/117019 */ /* { dg-do run } */ +/* { dg-require-effective-target sync_int_long } */ /* { dg-options "-std=c2y -Wc23-c2y-compat" } */ /* Test C2Y if declarations. Valid usages. */ diff --git a/gcc/testsuite/gcc.dg/c2y-if-decls-11.c b/gcc/testsuite/gcc.dg/c2y-if-decls-11.c index 5ac962507c9e..065e74c530af 100644 --- a/gcc/testsuite/gcc.dg/c2y-if-decls-11.c +++ b/gcc/testsuite/gcc.dg/c2y-if-decls-11.c @@ -1,6 +1,7 @@ /* N3356 - if declarations. */ /* PR c/117019 */ /* { dg-do run } */ +/* { dg-require-effective-target sync_int_long } */ /* { dg-options "-std=c2y -pedantic-errors" } */ /* Test C2Y if declarations. Valid usages. */ diff --git a/gcc/testsuite/gcc.dg/c2y-if-decls-4.c b/gcc/testsuite/gcc.dg/c2y-if-decls-4.c index 6df4bb09a9e2..1648f6583bd2 100644 --- a/gcc/testsuite/gcc.dg/c2y-if-decls-4.c +++ b/gcc/testsuite/gcc.dg/c2y-if-decls-4.c @@ -1,6 +1,7 @@ /* N3356 - if declarations. */ /* PR c/117019 */ /* { dg-do run } */ +/* { dg-require-effective-target sync_int_long } */ /* { dg-options "-std=c2y -Wc23-c2y-compat" } */ /* Test C2Y if declarations. Valid usages. */ diff --git a/gcc/testsuite/gcc.dg/c2y-if-decls-8.c b/gcc/testsuite/gcc.dg/c2y-if-decls-8.c index bd3c5cc4403c..432b3da77f82 100644 --- a/gcc/testsuite/gcc.dg/c2y-if-decls-8.c +++ b/gcc/testsuite/gcc.dg/c2y-if-decls-8.c @@ -1,6 +1,7 @@ /* N3356 - if declarations. */ /* PR c/117019 */ /* { dg-do run } */ +/* { dg-require-effective-target sync_int_long } */ /* { dg-options "-std=c2y -pedantic-errors" } */ /* Test C2Y if declarations. Valid usages. */
[gcc r15-5108] libstdc++: Improve exception messages in conversion classes
https://gcc.gnu.org/g:dfc9062eca47c237953c88a5614ae792792d058d commit r15-5108-gdfc9062eca47c237953c88a5614ae792792d058d Author: Jonathan Wakely Date: Fri Nov 8 11:09:47 2024 + libstdc++: Improve exception messages in conversion classes The std::logic_error exceptions thrown from misuses of std::wbuffer_convert and std::wstring_convert should use names qualified with "std::". libstdc++-v3/ChangeLog: * include/bits/locale_conv.h (wstring_convert, wbuffer_convert): Adjust strings passed to exception constructors. Diff: --- libstdc++-v3/include/bits/locale_conv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/bits/locale_conv.h b/libstdc++-v3/include/bits/locale_conv.h index 63dee1ac8727..fa328ee28949 100644 --- a/libstdc++-v3/include/bits/locale_conv.h +++ b/libstdc++-v3/include/bits/locale_conv.h @@ -289,7 +289,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 * The object's conversion state will persist between conversions. */ wstring_convert(_Codecvt* __pcvt, state_type __state) - : _M_cvt(__pcvt, "wstring_convert"), + : _M_cvt(__pcvt, "std::wstring_convert"), _M_state(__state), _M_with_cvtstate(true) { } @@ -428,7 +428,7 @@ _GLIBCXX_END_NAMESPACE_CXX11 explicit wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt, state_type __state = state_type()) - : _M_buf(__bytebuf), _M_cvt(__pcvt, "wbuffer_convert"), + : _M_buf(__bytebuf), _M_cvt(__pcvt, "std::wbuffer_convert"), _M_state(__state), _M_always_noconv(_M_cvt->always_noconv()) { if (_M_buf)
[gcc r15-5107] libstdc++: Fix typos in iterator increment for std::text_encoding [PR117520]
https://gcc.gnu.org/g:128f6a6d7af9cf187d59c1dbd9e59f5b782e17c8 commit r15-5107-g128f6a6d7af9cf187d59c1dbd9e59f5b782e17c8 Author: Jonathan Wakely Date: Mon Nov 11 11:54:00 2024 + libstdc++: Fix typos in iterator increment for std::text_encoding [PR117520] The intended behaviour for std::text_encoding::aliases_view's iterator is that it incrementing or decrementing too far sets it to a value-initialized state, or fails an assertion when those are enabled. There were typos that used == instead of = which meant that instead of becoming singular or aborting, an out-of-range increment just did nothing. This meant erroneous operations were well-defined and didn't produce any undefined behaviour, but were not diagnosed with assertions enabled, as had been intended. This change fixes the bugs and adds more tests to verify the intended behaviour. libstdc++-v3/ChangeLog: PR libstdc++/117520 * include/std/text_encoding (aliases_view:_Iterator::operator+=): Fix typos that caused == to be used instead of =. (aliases_view::_Iterator): Fix friend declaration. * testsuite/std/text_encoding/members.cc: Adjust expected behaviour of invalid subscript. Add tests for other erroneous operations on iterators. Diff: --- libstdc++-v3/include/std/text_encoding | 6 +++--- libstdc++-v3/testsuite/std/text_encoding/members.cc | 19 +-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/libstdc++-v3/include/std/text_encoding b/libstdc++-v3/include/std/text_encoding index 48742dcb0765..6fcaed1ab6aa 100644 --- a/libstdc++-v3/include/std/text_encoding +++ b/libstdc++-v3/include/std/text_encoding @@ -575,7 +575,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION && _M_rep[__n - 1]._M_id == _M_id) [[likely]] _M_rep += __n; else - *this == _Iterator{}; + *this = _Iterator{}; } else if (__n < 0) { @@ -583,7 +583,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION && _M_rep[__n]._M_id == _M_id) [[likely]] _M_rep += __n; else - *this == _Iterator{}; + *this = _Iterator{}; } } if (__n != 0) @@ -645,7 +645,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } private: -friend class text_encoding; +friend struct text_encoding; constexpr explicit _Iterator(const _Rep* __r) noexcept diff --git a/libstdc++-v3/testsuite/std/text_encoding/members.cc b/libstdc++-v3/testsuite/std/text_encoding/members.cc index adbd74ab85ea..253653250c8a 100644 --- a/libstdc++-v3/testsuite/std/text_encoding/members.cc +++ b/libstdc++-v3/testsuite/std/text_encoding/members.cc @@ -70,10 +70,25 @@ test_every_id() auto end = aliases.end(); VERIFY( (begin + std::ranges::distance(aliases)) == end ); #ifndef _GLIBCXX_ASSERTIONS -// This is an error, but with assertions disabled is guaranteed safe: +// These ops violate preconditions, but as libstdc++ extensions they are +// guaranteed to either assert or have well-defined behaviour. + +// This erroneously returns ""sv: VERIFY( begin[std::ranges::distance(aliases)] == ""sv ); // Likewise: -VERIFY( begin[99] == *begin ); +VERIFY( begin[99] == ""sv ); + +auto iter = begin; +std::ranges::advance(iter, end); +// Erroneously sets iter to a value-initialized state. +++iter; +VERIFY( iter == decltype(iter){} ); +VERIFY( *iter == ""sv ); + +iter = begin; +// Erroneously sets iter to a value-initialized state. +--iter; +VERIFY( iter == decltype(iter){} ); #endif } }
[gcc r15-5106] libstdc++: Add parentheses around operand of |
https://gcc.gnu.org/g:d8992f70a59b178de85305c542eb45d1676bf0a9 commit r15-5106-gd8992f70a59b178de85305c542eb45d1676bf0a9 Author: Jonathan Wakely Date: Mon Nov 11 11:23:08 2024 + libstdc++: Add parentheses around operand of | libstdc++-v3/ChangeLog: * include/bits/unicode.h (_Utf_iterator::_M_read_utf16): Add parentheses. Diff: --- libstdc++-v3/include/bits/unicode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libstdc++-v3/include/bits/unicode.h b/libstdc++-v3/include/bits/unicode.h index d79824c3ddb1..24b1ac3d53d6 100644 --- a/libstdc++-v3/include/bits/unicode.h +++ b/libstdc++-v3/include/bits/unicode.h @@ -377,7 +377,7 @@ namespace __unicode { ++_M_curr(); __to_incr = 2; - uint32_t __x = (__u & 0x3F) << 10 | __u2 & 0x3FF; + uint32_t __x = (__u & 0x3F) << 10 | (__u2 & 0x3FF); uint32_t __w = (__u >> 6) & 0x1F; __c = (__w + 1) << 16 | __x; }
[gcc r15-5109] c++: Add __builtin_operator_{new,delete} support
https://gcc.gnu.org/g:417b4cc9bf218083838aeab458bbb7510e36375a commit r15-5109-g417b4cc9bf218083838aeab458bbb7510e36375a Author: Jakub Jelinek Date: Mon Nov 11 19:54:32 2024 +0100 c++: Add __builtin_operator_{new,delete} support clang++ adds __builtin_operator_{new,delete} builtins which as documented work similarly to ::operator {new,delete}, except that it is an error if the called ::operator {new,delete} is not a replaceable global operator and allow optimizations which C++ normally allows just when those are used from new/delete expressions https://eel.is/c++draft/expr.new#14 When using these builtins, the same optimizations can be done even when using those builtins. For GCC we note that in the CALL_FROM_NEW_OR_DELETE_P flag on CALL_EXPRs. The following patch implements it as a C++ FE keyword (because passing references through ... changes the argument and so BUILT_IN_FRONTEND builtin can't be used), just attempts to call the ::operator {new,delete} and if it isn't replaceable, diagnoses it. libstdc++ already uses the builtin in some cases. 2024-11-11 Jakub Jelinek gcc/c-family/ * c-common.h (enum rid): Add RID_BUILTIN_OPERATOR_NEW and RID_BUILTIN_OPERATOR_DELETE. (names_builtin_p): Change return type from bool to int. * c-common.cc (c_common_reswords): Add __builtin_operator_new and __builtin_operator_delete. gcc/c/ * c-decl.cc (names_builtin_p): Change return type from bool to int, adjust return statments. gcc/cp/ * parser.cc (cp_parser_postfix_expression): Handle RID_BUILTIN_OPERATOR_NEW and RID_BUILTIN_OPERATOR_DELETE. * cp-objcp-common.cc (names_builtin_p): Change return type from bool to int, adjust return statments. Handle RID_BUILTIN_OPERATOR_NEW and RID_BUILTIN_OPERATOR_DELETE. * pt.cc (tsubst_expr) : Handle CALL_FROM_NEW_OR_DELETE_P. gcc/ * doc/extend.texi (New/Delete Builtins): Document __builtin_operator_new and __builtin_operator_delete. gcc/testsuite/ * g++.dg/ext/builtin-operator-new-1.C: New test. * g++.dg/ext/builtin-operator-new-2.C: New test. * g++.dg/ext/builtin-operator-new-3.C: New test. Diff: --- gcc/c-family/c-common.cc | 2 + gcc/c-family/c-common.h | 3 +- gcc/c/c-decl.cc | 10 +- gcc/cp/cp-objcp-common.cc | 21 +++-- gcc/cp/parser.cc | 35 +++ gcc/cp/pt.cc | 24 + gcc/doc/extend.texi | 28 ++ gcc/testsuite/g++.dg/ext/builtin-operator-new-1.C | 106 ++ gcc/testsuite/g++.dg/ext/builtin-operator-new-2.C | 49 ++ gcc/testsuite/g++.dg/ext/builtin-operator-new-3.C | 47 ++ 10 files changed, 310 insertions(+), 15 deletions(-) diff --git a/gcc/c-family/c-common.cc b/gcc/c-family/c-common.cc index 06be2a37b4f6..dae5f5e1c27e 100644 --- a/gcc/c-family/c-common.cc +++ b/gcc/c-family/c-common.cc @@ -434,6 +434,8 @@ const struct c_common_resword c_common_reswords[] = { "__builtin_counted_by_ref", RID_BUILTIN_COUNTED_BY_REF, D_CONLY }, { "__builtin_has_attribute", RID_BUILTIN_HAS_ATTRIBUTE, 0 }, { "__builtin_launder", RID_BUILTIN_LAUNDER, D_CXXONLY }, + { "__builtin_operator_new", RID_BUILTIN_OPERATOR_NEW, D_CXXONLY }, + { "__builtin_operator_delete", RID_BUILTIN_OPERATOR_DELETE, D_CXXONLY }, { "__builtin_shuffle", RID_BUILTIN_SHUFFLE, 0 }, { "__builtin_shufflevector", RID_BUILTIN_SHUFFLEVECTOR, 0 }, { "__builtin_stdc_bit_ceil", RID_BUILTIN_STDC, D_CONLY }, diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index 46099b635146..3c3da66272bd 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -168,6 +168,7 @@ enum rid RID_ADDRESSOF, RID_BUILTIN_LAUNDER, RID_BUILTIN_BIT_CAST, + RID_BUILTIN_OPERATOR_NEW, RID_BUILTIN_OPERATOR_DELETE, /* C++11 */ RID_CONSTEXPR, RID_DECLTYPE, RID_NOEXCEPT, RID_NULLPTR, RID_STATIC_ASSERT, @@ -840,7 +841,7 @@ extern bool in_late_binary_op; extern const char *c_addr_space_name (addr_space_t as); extern tree identifier_global_value (tree); extern tree identifier_global_tag (tree); -extern bool names_builtin_p (const char *); +extern int names_builtin_p (const char *); extern tree c_linkage_bindings (tree); extern void record_builtin_type (enum rid, const char *, tree); extern void start_fname_decls (void); diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc index ac47ef24a3de..e5045c636736 100644 --- a/gcc/c/c-decl.cc +++ b/gcc/c/c-decl.cc @@ -11750,10 +11750,10 @@ identifier_global_tag (tree t) return NULL_TREE; } -/* Returns true if NAME refers to a built-in function or function-like - op
[gcc r15-5110] c++: Fix another crash with invalid new operators [PR117463]
https://gcc.gnu.org/g:f32e7339871beec0e4d49698f7e34d77ee882088 commit r15-5110-gf32e7339871beec0e4d49698f7e34d77ee882088 Author: Simon Martin Date: Mon Nov 11 20:22:32 2024 +0100 c++: Fix another crash with invalid new operators [PR117463] Even though this PR is very close to PR117101, it's not addressed by the fix I made through r15-4958-g5821f5c8c89a05 because cxx_placement_new_fn has the very same issue as std_placement_new_fn_p used to have. As suggested by Jason, this patch changes both functions so that cxx_placement_new_fn leverages std_placement_new_fn_p which reduces code duplication and fixes the PR. PR c++/117463 gcc/cp/ChangeLog: * constexpr.cc (cxx_placement_new_fn): Implement in terms of std_placement_new_fn_p. * cp-tree.h (std_placement_new_fn_p): Declare. * init.cc (std_placement_new_fn_p): Add missing checks to ensure that fndecl is a non-replaceable ::operator new. gcc/testsuite/ChangeLog: * g++.dg/init/new54.C: New test. Diff: --- gcc/cp/constexpr.cc | 13 + gcc/cp/cp-tree.h | 1 + gcc/cp/init.cc| 6 -- gcc/testsuite/g++.dg/init/new54.C | 14 ++ 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index 71e6dc4ef326..c097860e6551 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -2327,18 +2327,7 @@ cxx_replaceable_global_alloc_fn (tree fndecl) static inline bool cxx_placement_new_fn (tree fndecl) { - if (cxx_dialect >= cxx20 - && IDENTIFIER_NEW_OP_P (DECL_NAME (fndecl)) - && CP_DECL_CONTEXT (fndecl) == global_namespace - && !DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl) - && TREE_CODE (TREE_TYPE (fndecl)) == FUNCTION_TYPE) -{ - tree first_arg = TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); - if (TREE_VALUE (first_arg) == ptr_type_node - && TREE_CHAIN (first_arg) == void_list_node) - return true; -} - return false; + return (cxx_dialect >= cxx20 && std_placement_new_fn_p (fndecl)); } /* Return true if FNDECL is std::construct_at. */ diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 1a0d5349749d..b3c909b05c41 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -7281,6 +7281,7 @@ extern tree build_offset_ref (tree, tree, bool, extern tree throw_bad_array_new_length (void); extern bool type_has_new_extended_alignment(tree); extern unsigned malloc_alignment (void); +extern bool std_placement_new_fn_p (tree); extern tree build_new_constexpr_heap_type (tree, tree, tree); extern tree build_new (location_t, vec **, tree, diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc index 62b3d6f6ce91..a11701002c8d 100644 --- a/gcc/cp/init.cc +++ b/gcc/cp/init.cc @@ -2976,10 +2976,12 @@ malloc_alignment () /* Determine whether an allocation function is a namespace-scope non-replaceable placement new function. See DR 1748. */ -static bool +bool std_placement_new_fn_p (tree alloc_fn) { - if (DECL_NAMESPACE_SCOPE_P (alloc_fn)) + if (DECL_NAMESPACE_SCOPE_P (alloc_fn) + && IDENTIFIER_NEW_OP_P (DECL_NAME (alloc_fn)) + && !DECL_IS_REPLACEABLE_OPERATOR_NEW_P (alloc_fn)) { tree first_arg = TREE_CHAIN (TYPE_ARG_TYPES (TREE_TYPE (alloc_fn))); if (first_arg diff --git a/gcc/testsuite/g++.dg/init/new54.C b/gcc/testsuite/g++.dg/init/new54.C new file mode 100644 index ..fdff1b55f0d0 --- /dev/null +++ b/gcc/testsuite/g++.dg/init/new54.C @@ -0,0 +1,14 @@ +// PR c++/117463 +// { dg-do "compile" { target c++20 } } + +struct S {}; +void *operator new[] (unsigned long, // { dg-bogus "first parameter" "" { xfail *-*-* } } + void void *volatile p); // { dg-error "two or more" } +S *fun(void *p) { + return new(p) S[10]; +} + +void *operator new (decltype(sizeof(0)), // { dg-bogus "first parameter" "" { xfail *-*-* } } + void void * p); // { dg-error "two or more" } +void *p; +auto t = new(p) int;
[gcc r15-5103] aarch64: return scalar fp8 values in fp registers
https://gcc.gnu.org/g:2ad277478620037103379ffad6a99dc00bf0bca7 commit r15-5103-g2ad277478620037103379ffad6a99dc00bf0bca7 Author: Claudio Bantaloukas Date: Mon Nov 11 18:08:28 2024 + aarch64: return scalar fp8 values in fp registers According to the aapcs64: If the argument is an 8-bit (...) precision Floating-point or short vector type and the NSRN is less than 8, then the argument is allocated to the least significant bits of register v[NSRN]. gcc/ * config/aarch64/aarch64.cc (aarch64_vfp_is_call_or_return_candidate): use fp registers to return svmfloat8_t parameters. gcc/testsuite/ * gcc.target/aarch64/fp8_scalar_1.c: Diff: --- gcc/config/aarch64/aarch64.cc | 3 ++- gcc/testsuite/gcc.target/aarch64/fp8_scalar_1.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index e9cee0aa5801..00bcf18ae97c 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -22306,7 +22306,8 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode, if ((!composite_p && (GET_MODE_CLASS (mode) == MODE_FLOAT - || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT)) + || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT + || (type && TYPE_MAIN_VARIANT (type) == aarch64_mfp8_type_node))) || aarch64_short_vector_p (type, mode)) { *count = 1; diff --git a/gcc/testsuite/gcc.target/aarch64/fp8_scalar_1.c b/gcc/testsuite/gcc.target/aarch64/fp8_scalar_1.c index 1bc2ac26b2a8..61edf06401b8 100644 --- a/gcc/testsuite/gcc.target/aarch64/fp8_scalar_1.c +++ b/gcc/testsuite/gcc.target/aarch64/fp8_scalar_1.c @@ -7,10 +7,10 @@ /* **stacktest1: +** umovw0, v0.b\[0\] ** sub sp, sp, #16 -** and w0, w0, 255 ** strbw0, \[sp, 15\] -** ldrbw0, \[sp, 15\] +** ldr b0, \[sp, 15\] ** add sp, sp, 16 ** ret */
[gcc r15-5117] tree-optimization/117502 - VMAT_STRIDED_SLP vs VMAT_ELEMENTWISE when considering gather
https://gcc.gnu.org/g:0b27a7dd050262a7d64d87863201e4ebbde88386 commit r15-5117-g0b27a7dd050262a7d64d87863201e4ebbde88386 Author: Richard Biener Date: Fri Nov 8 13:06:07 2024 +0100 tree-optimization/117502 - VMAT_STRIDED_SLP vs VMAT_ELEMENTWISE when considering gather The following treats both the same when considering to use gather or scatter for single-element interleaving accesses. This will cause FAIL: gcc.target/aarch64/sve/sve_iters_low_2.c scan-tree-dump-not vect "LOOP VECTORIZED" where we now vectorize the loop with VNx4QI, I'll leave it to ARM folks to investigate whether that's OK and to adjust the testcase or to see where to adjust things to make the testcase not vectorized again. The original fix for which the testcase was introduced is still efffective. PR tree-optimization/117502 * tree-vect-stmts.cc (get_group_load_store_type): Also consider VMAT_STRIDED_SLP when checking to use gather/scatter for single-element interleaving access. * tree-vect-loop.cc (update_epilogue_loop_vinfo): STMT_VINFO_STRIDED_P can be classified as VMAT_GATHER_SCATTER, so update DR_REF for those as well. Diff: --- gcc/tree-vect-loop.cc | 1 + gcc/tree-vect-stmts.cc | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 6cfce5aa7e1e..f50ee2e958ef 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -12295,6 +12295,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) refs that get_load_store_type classified as VMAT_GATHER_SCATTER. */ auto vstmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo); if (STMT_VINFO_MEMORY_ACCESS_TYPE (vstmt_vinfo) == VMAT_GATHER_SCATTER + || STMT_VINFO_STRIDED_P (vstmt_vinfo) || STMT_VINFO_GATHER_SCATTER_P (vstmt_vinfo)) { /* ??? As we copy epilogues from the main loop incremental diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 666e0491a9e8..f77a223b0c4f 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2274,7 +2274,8 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, on nearby locations. Or, even if it's a win over scalar code, it might not be a win over vectorizing at a lower VF, if that allows us to use contiguous accesses. */ - if (*memory_access_type == VMAT_ELEMENTWISE + if ((*memory_access_type == VMAT_ELEMENTWISE + || *memory_access_type == VMAT_STRIDED_SLP) && single_element_p && loop_vinfo && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
[gcc r15-5118] tree-optimization/117484 - issue with SLP discovery of permuted .MASK_LOAD
https://gcc.gnu.org/g:61cd1c43b82dc9d4c3edf122d22887fdce340223 commit r15-5118-g61cd1c43b82dc9d4c3edf122d22887fdce340223 Author: Richard Biener Date: Fri Nov 8 09:43:26 2024 +0100 tree-optimization/117484 - issue with SLP discovery of permuted .MASK_LOAD When we do SLP discovery of a .MASK_LOAD for a dataref group with gaps the discovery for the mask will have gaps as well and this was unexpected in a few places. The following re-organizes things slightly to accomodate for this. PR tree-optimization/117484 * tree-vect-slp.cc (vect_build_slp_tree_2): Handle gaps in mask discovery. Fix condition to release the load permutation. (vect_lower_load_permutations): Assert we get no load permutation for the unpermuted node. * tree-vect-slp-patterns.cc (linear_loads_p): Properly identify loads (without permutation). (compatible_complex_nodes_p): Likewise. * gcc.dg/vect/pr117484-1.c: New testcase. * gcc.dg/vect/pr117484-2.c: Likewise. Diff: --- gcc/testsuite/gcc.dg/vect/pr117484-1.c | 13 + gcc/testsuite/gcc.dg/vect/pr117484-2.c | 16 gcc/tree-vect-slp-patterns.cc | 14 ++ gcc/tree-vect-slp.cc | 22 +- 4 files changed, 52 insertions(+), 13 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/pr117484-1.c b/gcc/testsuite/gcc.dg/vect/pr117484-1.c new file mode 100644 index ..453556c50f96 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr117484-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ + +extern int a; +extern short b[]; +extern signed char c[], d[]; +int main() +{ + for (long j = 3; j < 1024; j += 3) +if (c[j] ? b[j] : 0) { + b[j] = d[j - 2]; + a = d[j]; +} +} diff --git a/gcc/testsuite/gcc.dg/vect/pr117484-2.c b/gcc/testsuite/gcc.dg/vect/pr117484-2.c new file mode 100644 index ..baffe7597ba4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr117484-2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ + +int a; +extern int d[]; +extern int b[]; +extern _Bool c[]; +extern char h[]; +int main() +{ + for (int i = 0; i < 1024; i += 4) +if (h[i] || c[i]) + { + a = d[i]; + b[i] = d[i - 3]; + } +} diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc index 8adae8a6ec0d..d62682be43c9 100644 --- a/gcc/tree-vect-slp-patterns.cc +++ b/gcc/tree-vect-slp-patterns.cc @@ -221,9 +221,15 @@ linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root) perm_cache->put (root, retval); /* If it's a load node, then just read the load permute. */ - if (SLP_TREE_LOAD_PERMUTATION (root).exists ()) + if (SLP_TREE_DEF_TYPE (root) == vect_internal_def + && SLP_TREE_CODE (root) != VEC_PERM_EXPR + && STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root)) + && DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root { - retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root)); + if (SLP_TREE_LOAD_PERMUTATION (root).exists ()) + retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root)); + else + retval = PERM_EVENODD; perm_cache->put (root, retval); return retval; } @@ -798,8 +804,8 @@ compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache, return false; } - if (!SLP_TREE_LOAD_PERMUTATION (a).exists () - || !SLP_TREE_LOAD_PERMUTATION (b).exists ()) + if (!STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a)) + || !STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b))) { for (unsigned i = 0; i < gimple_num_args (a_stmt); i++) { diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index d3efd53b00cb..eebac1955de7 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -2019,14 +2019,15 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, = STMT_VINFO_GROUPED_ACCESS (stmt_info) ? DR_GROUP_FIRST_ELEMENT (stmt_info) : stmt_info; bool any_permute = false; - bool any_null = false; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info) { int load_place; if (! load_info) { - load_place = j; - any_null = true; + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + load_place = j; + else + load_place = 0; } else if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) load_place = vect_get_place_in_interleaving_chain @@ -2037,11 +2038,6 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, any_permute |= load_place != j; load_permutation.quick_push (load_place); } - if (any_null) - { - gcc_assert (!any_permute); - load_permutation.release ();
[gcc r15-5116] Match: Optimize log (x) CMP CST and exp (x) CMP CST operations
https://gcc.gnu.org/g:e232dc3bb5c3e8f8a3749239135b7b859a204fc7 commit r15-5116-ge232dc3bb5c3e8f8a3749239135b7b859a204fc7 Author: Soumya AR Date: Tue Nov 12 09:26:24 2024 +0530 Match: Optimize log (x) CMP CST and exp (x) CMP CST operations This patch implements transformations for the following optimizations. logN(x) CMP CST -> x CMP expN(CST) expN(x) CMP CST -> x CMP logN(CST) Where CMP expands to ge and le operations. For example: int foo (float x) { return __builtin_logf (x) <= 0.0f; } can just be: int foo (float x) { return x <= 1.0f; } The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Soumya AR gcc/ChangeLog: * match.pd: Fold logN(x) CMP CST -> x CMP expN(CST) and expN(x) CMP CST -> x CMP logN(CST) gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/log_exp.c: New test. Diff: --- gcc/match.pd| 15 - gcc/testsuite/gcc.dg/tree-ssa/log_exp.c | 40 + 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index 00988241348a..fc33b9ac3b1b 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -8347,7 +8347,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) /* Simplify logN(a)-logN(b) into logN(a/b). */ (simplify (minus (logs:s @0) (logs:s @1)) - (logs (rdiv @0 @1) + (logs (rdiv @0 @1 + + (for cmp (le ge) +(for logs (LOG LOG2 LOG10) +exps (EXP EXP2 EXP10) +/* Simplify logN (x) CMP CST into x CMP expN (CST) */ +(simplify +(cmp:c (logs:s @0) REAL_CST@1) + (cmp @0 (exps @1))) + +/* Simplify expN (x) CMP CST into x CMP logN (CST) */ +(simplify +(cmp:c (exps:s @0) REAL_CST@1) + (cmp @0 (logs @1)) (for logs (LOG LOG2 LOG10 LOG10) exps (EXP EXP2 EXP10 POW10) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/log_exp.c b/gcc/testsuite/gcc.dg/tree-ssa/log_exp.c new file mode 100644 index ..1c5d967cdf76 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/log_exp.c @@ -0,0 +1,40 @@ +/* { dg-do link } */ +/* { dg-options "-O2 -ffast-math" } */ +/* { dg-require-effective-target c99_runtime } */ + +#include + +extern void link_error(void); + +#define T(FUNC1, FUNC2, CMP, TYPE, C_TY, ID) \ +void test_##FUNC1##_##FUNC2##_##ID (TYPE x)\ +{ \ + TYPE a = 10.0##C_TY; \ + TYPE t1 = __builtin_##FUNC1(x); \ + bool b1 = t1 CMP a; \ + TYPE t2 = __builtin_##FUNC2(a); \ + bool b2 = x CMP t2; \ + if (b1 != b2)\ +link_error(); \ +} + +#define TEST(FUNC1, FUNC2, TYPE, C_TY) \ + T(FUNC1, FUNC2, <=, TYPE, C_TY, 1) \ + T(FUNC1, FUNC2, >=, TYPE, C_TY, 2) \ + +#define TEST_ALL(TYPE, C_TY, F_TY) \ + TEST(exp##F_TY, log##F_TY, TYPE, C_TY) \ + TEST(exp2##F_TY, log2##F_TY, TYPE, C_TY) \ + TEST(exp10##F_TY, log10##F_TY, TYPE, C_TY) \ + TEST(log##F_TY, exp##F_TY, TYPE, C_TY) \ + TEST(log2##F_TY, exp2##F_TY, TYPE, C_TY) \ + TEST(log10##F_TY, exp10##F_TY, TYPE, C_TY) + +TEST_ALL(double, , ) +TEST_ALL(float, f, f) +TEST_ALL(long double, L, l) + +int main (void) +{ + return 0; +}