[gcc r15-3467] Handle unused-only-live stmts in SLP discovery
https://gcc.gnu.org/g:5df05de3d917754274cadb7d006b2011f93f4f7b commit r15-3467-g5df05de3d917754274cadb7d006b2011f93f4f7b Author: Richard Biener Date: Thu Mar 7 15:13:33 2024 +0100 Handle unused-only-live stmts in SLP discovery The following adds SLP discovery for roots that are only live but otherwise unused. These are usually inductions. This allows a few more testcases to be handled fully with SLP, for example gcc.dg/vect/no-scevccp-pr86725-1.c * tree-vect-slp.cc (vect_analyze_slp): Analyze SLP for live but otherwise unused defs. Diff: --- gcc/tree-vect-slp.cc | 30 ++ 1 file changed, 30 insertions(+) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index d35e0609174..b6839c7707b 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -4681,6 +4681,36 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) saved_stmts.release (); } } + + /* Make sure to vectorize only-live stmts, usually inductions. */ + for (edge e : get_loop_exit_edges (LOOP_VINFO_LOOP (loop_vinfo))) + for (auto gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); +gsi_next (&gsi)) + { + gphi *lc_phi = *gsi; + tree def = gimple_phi_arg_def_from_edge (lc_phi, e); + stmt_vec_info stmt_info; + if (TREE_CODE (def) == SSA_NAME + && !virtual_operand_p (def) + && (stmt_info = loop_vinfo->lookup_def (def)) + && STMT_VINFO_RELEVANT (stmt_info) == vect_used_only_live + && STMT_VINFO_LIVE_P (stmt_info) + && (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def + || (STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def + && STMT_VINFO_REDUC_IDX (stmt_info) == -1))) + { + vec stmts; + vec roots = vNULL; + vec remain = vNULL; + stmts.create (1); + stmts.quick_push (vect_stmt_to_vectorize (stmt_info)); + vect_build_slp_instance (vinfo, +slp_inst_kind_reduc_group, +stmts, roots, remain, +max_tree_size, &limit, +bst_map, NULL); + } + } } hash_set visited_patterns;
[gcc r15-3468] ada: Tweak assertions in Inline.Cannot_Inline
https://gcc.gnu.org/g:87023e0c7cd3aadb2ef60aa2592ba34d0ab8a5af commit r15-3468-g87023e0c7cd3aadb2ef60aa2592ba34d0ab8a5af Author: Ronan Desplanques Date: Wed Aug 21 17:22:20 2024 +0200 ada: Tweak assertions in Inline.Cannot_Inline The purpose of this patch is to silence a GNATSAS report. gcc/ada/ * inline.adb (Cannot_Inline): Remove assertion. * inline.ads (Cannot_Inline): Add precondition. Diff: --- gcc/ada/inline.adb | 2 -- gcc/ada/inline.ads | 5 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/gcc/ada/inline.adb b/gcc/ada/inline.adb index 519e26ecec8..5f310abafda 100644 --- a/gcc/ada/inline.adb +++ b/gcc/ada/inline.adb @@ -2136,8 +2136,6 @@ package body Inline is end; end if; - pragma Assert (Msg (Msg'Last) = '?'); - -- Legacy front-end inlining model if not Back_End_Inlining then diff --git a/gcc/ada/inline.ads b/gcc/ada/inline.ads index bc90c0ce6d8..696f4227c7b 100644 --- a/gcc/ada/inline.ads +++ b/gcc/ada/inline.ads @@ -165,7 +165,10 @@ package Inline is N : Node_Id; Subp : Entity_Id; Is_Serious: Boolean := False; - Suppress_Info : Boolean := False); + Suppress_Info : Boolean := False) + with + Pre => Msg'First <= Msg'Last + and then Msg (Msg'Last) = '?'; -- This procedure is called if the node N, an instance of a call to -- subprogram Subp, cannot be inlined. Msg is the message to be issued, -- which ends with ? (it does not end with ?p?, this routine takes care of
[gcc r15-3469] ada: Binder respects Ada version for checksum of runtime files
https://gcc.gnu.org/g:47a30d6981db282a4a0e74cf02ff60a3eb0c14cf commit r15-3469-g47a30d6981db282a4a0e74cf02ff60a3eb0c14cf Author: Jose Ruiz Date: Fri Aug 23 16:25:13 2024 + ada: Binder respects Ada version for checksum of runtime files The parsing to compute the checksums of runtime files (within the binder) was done using the default Ada version (Ada 2012 currently), while the creation of the checksum, when the runtime files are compiled, is performed in a more recent Ada version (Ada 2022 currently). This change forces the checksum computation for runtime files to be done with the same Ada version as when they were created. gcc/ada/ * ali-util.adb (Get_File_Checksum): Force the parsing for the checksum computation of runtime files to be done in the corresponding recent Ada version. Diff: --- gcc/ada/ali-util.adb | 22 -- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/gcc/ada/ali-util.adb b/gcc/ada/ali-util.adb index 61dddb94e85..4bcb06e6a1f 100644 --- a/gcc/ada/ali-util.adb +++ b/gcc/ada/ali-util.adb @@ -29,6 +29,7 @@ with Opt; use Opt; with Output; use Output; with Osint; use Osint; with Scans; use Scans; +with Fname; use Fname; with Scng; with Sinput.C; with Stringt; @@ -87,8 +88,10 @@ package body ALI.Util is --- function Get_File_Checksum (Fname : File_Name_Type) return Word is - Full_Name: File_Name_Type; - Source_Index : Source_File_Index; + Full_Name : File_Name_Type; + Source_Index: Source_File_Index; + Ada_Version_Current : Ada_Version_Type; + Internal_Unit : constant Boolean := Is_Internal_File_Name (Fname); begin Full_Name := Find_File (Fname, Osint.Source); @@ -109,6 +112,15 @@ package body ALI.Util is Scanner.Initialize_Scanner (Source_Index); + -- The runtime files are precompiled with an implicitly defined Ada + -- version that we set here to improve the parsing required to compute + -- the checksum. + + if Internal_Unit then + Ada_Version_Current := Ada_Version; + Ada_Version := Ada_Version_Runtime; + end if; + -- Scan the complete file to compute its checksum loop @@ -116,6 +128,12 @@ package body ALI.Util is exit when Token = Tok_EOF; end loop; + -- Restore the Ada version if we changed it + + if Internal_Unit then + Ada_Version := Ada_Version_Current; + end if; + return Scans.Checksum; end Get_File_Checksum;
[gcc r15-3471] ada: Remove unused parameters in validity checking routine
https://gcc.gnu.org/g:23f0bfa027f81a40f8c94a657f7427ed7573b6e8 commit r15-3471-g23f0bfa027f81a40f8c94a657f7427ed7573b6e8 Author: Piotr Trojanek Date: Mon Aug 26 15:25:03 2024 +0200 ada: Remove unused parameters in validity checking routine Code cleanup; semantics is unaffected. gcc/ada/ * exp_util.ads, exp_util.adb (Duplicate_Subexpr_No_Checks): Remove parameters, which are no longer used. Diff: --- gcc/ada/exp_util.adb | 18 ++ gcc/ada/exp_util.ads | 16 +++- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb index 8e5cdb7332e..9b67384755a 100644 --- a/gcc/ada/exp_util.adb +++ b/gcc/ada/exp_util.adb @@ -5049,23 +5049,17 @@ package body Exp_Util is - function Duplicate_Subexpr_No_Checks - (Exp : Node_Id; - Name_Req : Boolean := False; - Renaming_Req : Boolean := False; - Related_Id: Entity_Id := Empty; - Is_Low_Bound : Boolean := False; - Is_High_Bound : Boolean := False) return Node_Id + (Exp : Node_Id; + Name_Req : Boolean := False; + Renaming_Req : Boolean := False) return Node_Id is New_Exp : Node_Id; begin Remove_Side_Effects -(Exp => Exp, - Name_Req => Name_Req, - Renaming_Req => Renaming_Req, - Related_Id=> Related_Id, - Is_Low_Bound => Is_Low_Bound, - Is_High_Bound => Is_High_Bound); +(Exp => Exp, + Name_Req => Name_Req, + Renaming_Req => Renaming_Req); New_Exp := New_Copy_Tree (Exp); Remove_Checks (New_Exp); diff --git a/gcc/ada/exp_util.ads b/gcc/ada/exp_util.ads index 279feb2e6fe..49e75c79d35 100644 --- a/gcc/ada/exp_util.ads +++ b/gcc/ada/exp_util.ads @@ -457,24 +457,14 @@ package Exp_Util is -- following functions allow this behavior to be modified. function Duplicate_Subexpr_No_Checks - (Exp : Node_Id; - Name_Req : Boolean := False; - Renaming_Req : Boolean := False; - Related_Id: Entity_Id := Empty; - Is_Low_Bound : Boolean := False; - Is_High_Bound : Boolean := False) return Node_Id; + (Exp : Node_Id; + Name_Req : Boolean := False; + Renaming_Req : Boolean := False) return Node_Id; -- Identical in effect to Duplicate_Subexpr, except that Remove_Checks is -- called on the result, so that the duplicated expression does not include -- checks. This is appropriate for use when Exp, the original expression is -- unconditionally elaborated before the duplicated expression, so that -- there is no need to repeat any checks. - -- - -- Related_Id denotes the entity of the context where Expr appears. Flags - -- Is_Low_Bound and Is_High_Bound specify whether the expression to check - -- is the low or the high bound of a range. These three optional arguments - -- signal Remove_Side_Effects to create an external symbol of the form - -- Chars (Related_Id)_FIRST/_LAST. For suggested use of these parameters - -- see the warning in the body of Sem_Ch3.Process_Range_Expr_In_Decl. function Duplicate_Subexpr_Move_Checks (Exp : Node_Id;
[gcc r15-3472] ada: Streamline handling of low-level peculiarities of record field layout
https://gcc.gnu.org/g:72c6938f29cbeddb3220720e68add4cf09ffd794 commit r15-3472-g72c6938f29cbeddb3220720e68add4cf09ffd794 Author: Eric Botcazou Date: Sun Aug 25 15:20:59 2024 +0200 ada: Streamline handling of low-level peculiarities of record field layout This factors out the interface to the low-level field layout machinery. gcc/ada/ * gcc-interface/gigi.h (default_field_alignment): New function. * gcc-interface/misc.cc: Include tm_p header file. (default_field_alignment): New function. * gcc-interface/trans.cc (addressable_p) : Replace previous alignment klduge with call to default_field_alignment. * gcc-interface/utils.cc (finish_record_type): Likewise for the alignment based on which DECL_BIT_FIELD should be cleared. Diff: --- gcc/ada/gcc-interface/gigi.h | 4 gcc/ada/gcc-interface/misc.cc | 21 + gcc/ada/gcc-interface/trans.cc | 24 +++- gcc/ada/gcc-interface/utils.cc | 2 +- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/gcc/ada/gcc-interface/gigi.h b/gcc/ada/gcc-interface/gigi.h index 40f3f0d3d13..f4b302be3e0 100644 --- a/gcc/ada/gcc-interface/gigi.h +++ b/gcc/ada/gcc-interface/gigi.h @@ -1008,6 +1008,10 @@ extern bool must_pass_by_ref (tree gnu_type); /* Return the size of the FP mode with precision PREC. */ extern int fp_prec_to_size (int prec); +/* Return the default alignment of a FIELD of TYPE declared in a record or + union type as specified by the ABI of the target architecture. */ +extern unsigned int default_field_alignment (tree field, tree type); + /* Return the precision of the FP mode with size SIZE. */ extern int fp_size_to_prec (int size); diff --git a/gcc/ada/gcc-interface/misc.cc b/gcc/ada/gcc-interface/misc.cc index 13cb39e91cb..ef5de7f5651 100644 --- a/gcc/ada/gcc-interface/misc.cc +++ b/gcc/ada/gcc-interface/misc.cc @@ -28,6 +28,7 @@ #include "coretypes.h" #include "target.h" #include "tree.h" +#include "tm_p.h" #include "diagnostic.h" #include "opts.h" #include "alias.h" @@ -1129,6 +1130,26 @@ must_pass_by_ref (tree gnu_type) && TREE_CODE (TYPE_SIZE_UNIT (gnu_type)) != INTEGER_CST)); } +/* Return the default alignment of a FIELD of TYPE declared in a record or + union type as specified by the ABI of the target architecture. */ + +unsigned int +default_field_alignment (tree ARG_UNUSED (field), tree type) +{ + /* This is modeled on layout_decl. */ + unsigned int align = TYPE_ALIGN (type); + +#ifdef BIGGEST_FIELD_ALIGNMENT + align = MIN (align, (unsigned int) BIGGEST_FIELD_ALIGNMENT); +#endif + +#ifdef ADJUST_FIELD_ALIGN + align = ADJUST_FIELD_ALIGN (field, type, align); +#endif + + return align; +} + /* This function is called by the front-end to enumerate all the supported modes for the machine, as well as some predefined C types. F is a function which is called back with the parameters as listed below, first a string, diff --git a/gcc/ada/gcc-interface/trans.cc b/gcc/ada/gcc-interface/trans.cc index c99b06670d5..9e9f5f8dcba 100644 --- a/gcc/ada/gcc-interface/trans.cc +++ b/gcc/ada/gcc-interface/trans.cc @@ -10291,23 +10291,13 @@ addressable_p (tree gnu_expr, tree gnu_type) /* Even with DECL_BIT_FIELD cleared, we have to ensure that the field is sufficiently aligned, in case it is subject to a pragma Component_Alignment. But we don't need to - check the alignment of the containing record, as it is - guaranteed to be not smaller than that of its most - aligned field that is not a bit-field. */ - && (DECL_ALIGN (TREE_OPERAND (gnu_expr, 1)) - >= TYPE_ALIGN (TREE_TYPE (gnu_expr)) -#ifdef TARGET_ALIGN_DOUBLE - /* Cope with the misalignment of doubles in records for - ancient 32-bit ABIs like that of x86/Linux. */ - || (DECL_ALIGN (TREE_OPERAND (gnu_expr, 1)) == 32 - && TYPE_ALIGN (TREE_TYPE (gnu_expr)) == 64 - && !TARGET_ALIGN_DOUBLE -#ifdef TARGET_64BIT - && !TARGET_64BIT -#endif - ) -#endif - )) + check the alignment of the containing record, since it + is guaranteed to be not smaller than that of its most + aligned field that is not a bit-field. However, we need + to cope with quirks of ABIs that may misalign fields. */ + && DECL_ALIGN (TREE_OPERAND (gnu_expr, 1)) + >= default_field_alignment (TREE_OPERAND (gnu_expr, 1), + TREE_TYPE (gnu_expr))) /* The field of a padding record is always addressable. */ || TYPE_IS_PADDING_P (TREE_TYPE (TREE_OPERAND (gnu_expr, 0 &
[gcc r15-3473] ada: Add bypass for internal fields on strict-alignment platforms
https://gcc.gnu.org/g:65186cab1d3c9b0dad17dc0536586bec634b93f3 commit r15-3473-g65186cab1d3c9b0dad17dc0536586bec634b93f3 Author: Eric Botcazou Date: Mon Aug 26 11:16:41 2024 +0200 ada: Add bypass for internal fields on strict-alignment platforms This is required to support misalignment of tagged types in legacy code. gcc/ada/ * gcc-interface/trans.cc (addressable_p) : Add bypass for internal fields on strict-alignment platforms. Diff: --- gcc/ada/gcc-interface/trans.cc | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gcc/ada/gcc-interface/trans.cc b/gcc/ada/gcc-interface/trans.cc index 9e9f5f8dcba..92e000686fb 100644 --- a/gcc/ada/gcc-interface/trans.cc +++ b/gcc/ada/gcc-interface/trans.cc @@ -10295,9 +10295,14 @@ addressable_p (tree gnu_expr, tree gnu_type) is guaranteed to be not smaller than that of its most aligned field that is not a bit-field. However, we need to cope with quirks of ABIs that may misalign fields. */ - && DECL_ALIGN (TREE_OPERAND (gnu_expr, 1)) - >= default_field_alignment (TREE_OPERAND (gnu_expr, 1), - TREE_TYPE (gnu_expr))) + && (DECL_ALIGN (TREE_OPERAND (gnu_expr, 1)) + >= default_field_alignment (TREE_OPERAND (gnu_expr, 1), + TREE_TYPE (gnu_expr)) + /* We do not enforce this on strict-alignment platforms for + internal fields in order to keep supporting misalignment + of tagged types in legacy code. */ + || (!STRICT_ALIGNMENT + && DECL_INTERNAL_P (TREE_OPERAND (gnu_expr, 1) /* The field of a padding record is always addressable. */ || TYPE_IS_PADDING_P (TREE_TYPE (TREE_OPERAND (gnu_expr, 0 && addressable_p (TREE_OPERAND (gnu_expr, 0), NULL_TREE));
[gcc r15-3474] testsuite: Fix xorsign.c, vect-double-2.c fails with -march=x86-64-v2
https://gcc.gnu.org/g:811204f52a111af24ba4b00df9e947a44c4c1161 commit r15-3474-g811204f52a111af24ba4b00df9e947a44c4c1161 Author: Hu, Lin1 Date: Thu Sep 5 14:51:42 2024 +0800 testsuite: Fix xorsign.c, vect-double-2.c fails with -march=x86-64-v2 These testcases raise fails with -march=x86-64-v2, so add -mno-sse4 to avoid these unexpected fails. gcc/testsuite/ChangeLog: PR testsuite/116608 * gcc.target/i386/vect-double-2.c: Add extra option -mno-sse4 * gcc.target/i386/xorsign.c: Ditto. Diff: --- gcc/testsuite/gcc.target/i386/vect-double-2.c | 2 +- gcc/testsuite/gcc.target/i386/xorsign.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/vect-double-2.c b/gcc/testsuite/gcc.target/i386/vect-double-2.c index eea53bfa6b1..065d2e5af08 100644 --- a/gcc/testsuite/gcc.target/i386/vect-double-2.c +++ b/gcc/testsuite/gcc.target/i386/vect-double-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns -mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats" } */ +/* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns -mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats -mno-sse4" } */ extern void abort (void); diff --git a/gcc/testsuite/gcc.target/i386/xorsign.c b/gcc/testsuite/gcc.target/i386/xorsign.c index ebed5edccb6..f280dd20d7b 100644 --- a/gcc/testsuite/gcc.target/i386/xorsign.c +++ b/gcc/testsuite/gcc.target/i386/xorsign.c @@ -1,5 +1,5 @@ /* { dg-do run { target sse2_runtime } } */ -/* { dg-options "-O2 -msse2 -mfpmath=sse -ftree-vectorize -fdump-tree-vect-details -save-temps" } */ +/* { dg-options "-O2 -msse2 -mfpmath=sse -ftree-vectorize -fdump-tree-vect-details -save-temps -mno-sse4" } */ extern void abort ();
[gcc r15-3475] RISC-V: Lookup reversely in riscv_select_multilib_by_abi
https://gcc.gnu.org/g:3162abfb5098934e6ed9d4307a86a84d28823612 commit r15-3475-g3162abfb5098934e6ed9d4307a86a84d28823612 Author: YunQiang Su Date: Thu Sep 5 15:14:43 2024 +0800 RISC-V: Lookup reversely in riscv_select_multilib_by_abi When use --print-multi-os-dir or -print-multi-directory, gcc outputs different values with full -march option and the base one only. $ ./gcc/xgcc --print-multi-os-dir -mabi=lp64d -march=rv64gc lib64/lp64d $ ./gcc/xgcc --print-multi-os-dir -mabi=lp64d -march=rv64gc_zba . The reason is that in multilib.h, the fallback value of multilib is listed as the 1st one in `multilib_raw[]`. gcc * common/config/riscv/riscv-common.cc(riscv_select_multilib_by_abi): look up reversely as the fallback path is listed as the 1st one. Diff: --- gcc/common/config/riscv/riscv-common.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 62c6e1dab1f..2c1ce7fc7cb 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -2079,7 +2079,7 @@ riscv_select_multilib_by_abi ( const std::string &riscv_current_abi_str, const std::vector &multilib_infos) { - for (size_t i = 0; i < multilib_infos.size (); ++i) + for (ssize_t i = multilib_infos.size (); i >= 0; --i) if (riscv_current_abi_str == multilib_infos[i].abi_str) return xstrdup (multilib_infos[i].path.c_str ());
[gcc r15-3476] vrp: Fix up diagnostics wording
https://gcc.gnu.org/g:e9e4777ca2415a73e8db64a406c06a79add621e5 commit r15-3476-ge9e4777ca2415a73e8db64a406c06a79add621e5 Author: Jakub Jelinek Date: Thu Sep 5 11:06:12 2024 +0200 vrp: Fix up diagnostics wording I've noticed non-standard wording of this diagnostics when looking at a miscompilation with --param=vrp-block-limit=0. Diagnostics generally shouldn't start with uppercase letter (unless the upper case would appear also in the middle of a sentence) and shouldn't be separate sentences with dot as separator, ; is IMHO more frequently used. 2024-09-05 Jakub Jelinek * tree-vrp.cc (pass_vrp::execute): Start diagnostics with lowercase u rather than capital U, use semicolon instead of dot. Diff: --- gcc/tree-vrp.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/tree-vrp.cc b/gcc/tree-vrp.cc index e184e9af51e..23946c57413 100644 --- a/gcc/tree-vrp.cc +++ b/gcc/tree-vrp.cc @@ -1337,7 +1337,7 @@ public: { use_fvrp = true; warning (OPT_Wdisabled_optimization, - "Using fast VRP algorithm. %d basic blocks" + "using fast VRP algorithm; %d basic blocks" " exceeds %<--param=vrp-block-limit=%d%> limit", n_basic_blocks_for_fn (fun), param_vrp_block_limit);
[gcc r15-3477] docs: double mention of armv9-a.
https://gcc.gnu.org/g:240be78237c6d70e0b30ed187c559e359ce81557 commit r15-3477-g240be78237c6d70e0b30ed187c559e359ce81557 Author: Tamar Christina Date: Thu Sep 5 10:35:18 2024 +0100 docs: double mention of armv9-a. The list of available architecture for Arm is incorrectly listing armv9-a twice. This removes the duplicate armv9-a enumeration from the part of the list having M-profile targets. gcc/ChangeLog: * doc/invoke.texi: Remove duplicate armv9-a mention. Diff: --- gcc/doc/invoke.texi | 1 - 1 file changed, 1 deletion(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 43afb0984e5..193db761d64 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -23025,7 +23025,6 @@ Permissible names are: @samp{armv7-m}, @samp{armv7e-m}, @samp{armv8-m.base}, @samp{armv8-m.main}, @samp{armv8.1-m.main}, -@samp{armv9-a}, @samp{iwmmxt} and @samp{iwmmxt2}. Additionally, the following architectures, which lack support for the
[gcc r15-3478] testsuite: remove -fwrapv from signbit-5.c
https://gcc.gnu.org/g:67eaf67360e434dd5969e1c66f043e3c751f9f52 commit r15-3478-g67eaf67360e434dd5969e1c66f043e3c751f9f52 Author: Tamar Christina Date: Thu Sep 5 10:36:02 2024 +0100 testsuite: remove -fwrapv from signbit-5.c The meaning of the testcase was changed by passing it -fwrapv. The reason for the test failures on some platform was because the test was testing some implementation defined behavior wrt INT_MIN in generic code. Instead of using -fwrapv this just removes the border case from the test so all the values now have a defined semantic. It still relies on the handling of shifting a negative value right, but that wasn't changed with -fwrapv anyway. The -fwrapv case is being handled already by other testcases. gcc/testsuite/ChangeLog: * gcc.dg/signbit-5.c: Remove -fwrapv and change INT_MIN to INT_MIN+1. Diff: --- gcc/testsuite/gcc.dg/signbit-5.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/signbit-5.c b/gcc/testsuite/gcc.dg/signbit-5.c index 57e29e3ca63..2601582ed4e 100644 --- a/gcc/testsuite/gcc.dg/signbit-5.c +++ b/gcc/testsuite/gcc.dg/signbit-5.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O3 -fwrapv" } */ +/* { dg-options "-O3" } */ /* This test does not work when the truth type does not match vector type. */ /* { dg-additional-options "-march=armv8-a" { target aarch64_sve } } */ @@ -42,8 +42,8 @@ int main () TYPE a[N]; TYPE b[N]; - a[0] = INT_MIN; - b[0] = INT_MIN; + a[0] = INT_MIN+1; + b[0] = INT_MIN+1; for (int i = 1; i < N; ++i) {
[gcc r15-3479] middle-end: have vect_recog_cond_store_pattern use pattern statement for cond if available
https://gcc.gnu.org/g:a50f54c0d06139d791b875e09471f2fc03af5b04 commit r15-3479-ga50f54c0d06139d791b875e09471f2fc03af5b04 Author: Tamar Christina Date: Thu Sep 5 10:36:55 2024 +0100 middle-end: have vect_recog_cond_store_pattern use pattern statement for cond if available When vectorizing a conditional operation we rely on the bool_recog pattern to hit and convert the bool of the operand to a valid mask. However we are currently not using the converted operand as this is in a pattern statement. This change updates it to look at the actual statement to be vectorized so we pick up the pattern. Note that there are no tests here since vectorization will fail until we correctly lower all boolean conditionals early. Tests for these are in the next patch, namely vect-conditional_store_5.c and vect-conditional_store_6.c. And the existing vect-conditional_store_[1-4].c checks that the other cases are still handled correctly. gcc/ChangeLog: * tree-vect-patterns.cc (vect_recog_cond_store_pattern): Use pattern statement. Diff: --- gcc/tree-vect-patterns.cc | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 3162250bbdd..f7c3c623ea4 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -6670,7 +6670,15 @@ vect_recog_cond_store_pattern (vec_info *vinfo, if (TREE_CODE (st_rhs) != SSA_NAME) return NULL; - gassign *cond_stmt = dyn_cast (SSA_NAME_DEF_STMT (st_rhs)); + auto cond_vinfo = vinfo->lookup_def (st_rhs); + + /* If the condition isn't part of the loop then bool recog wouldn't have seen + it and so this transformation may not be valid. */ + if (!cond_vinfo) +return NULL; + + cond_vinfo = vect_stmt_to_vectorize (cond_vinfo); + gassign *cond_stmt = dyn_cast (STMT_VINFO_STMT (cond_vinfo)); if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR) return NULL;
[gcc r14-10643] testsuite: remove -fwrapv from signbit-5.c
https://gcc.gnu.org/g:040b979a08843806ea930a8d839dd94301323ebe commit r14-10643-g040b979a08843806ea930a8d839dd94301323ebe Author: Tamar Christina Date: Thu Sep 5 10:36:02 2024 +0100 testsuite: remove -fwrapv from signbit-5.c The meaning of the testcase was changed by passing it -fwrapv. The reason for the test failures on some platform was because the test was testing some implementation defined behavior wrt INT_MIN in generic code. Instead of using -fwrapv this just removes the border case from the test so all the values now have a defined semantic. It still relies on the handling of shifting a negative value right, but that wasn't changed with -fwrapv anyway. The -fwrapv case is being handled already by other testcases. gcc/testsuite/ChangeLog: * gcc.dg/signbit-5.c: Remove -fwrapv and change INT_MIN to INT_MIN+1. (cherry picked from commit 67eaf67360e434dd5969e1c66f043e3c751f9f52) Diff: --- gcc/testsuite/gcc.dg/signbit-5.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/testsuite/gcc.dg/signbit-5.c b/gcc/testsuite/gcc.dg/signbit-5.c index 2bca640f930..e778f91ca33 100644 --- a/gcc/testsuite/gcc.dg/signbit-5.c +++ b/gcc/testsuite/gcc.dg/signbit-5.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O3 -fwrapv" } */ +/* { dg-options "-O3" } */ /* This test does not work when the truth type does not match vector type. */ /* { dg-additional-options "-march=armv8-a" { target aarch64_sve } } */ @@ -44,8 +44,8 @@ int main () TYPE a[N]; TYPE b[N]; - a[0] = INT_MIN; - b[0] = INT_MIN; + a[0] = INT_MIN+1; + b[0] = INT_MIN+1; for (int i = 1; i < N; ++i) {
[gcc r15-3480] libsanitizer: On aarch64 use hint #34 in prologue of libsanitizer functions
https://gcc.gnu.org/g:2379cbb94b2668227c237c94c82e3c49fe39fd0f commit r15-3480-g2379cbb94b2668227c237c94c82e3c49fe39fd0f Author: Jakub Jelinek Date: Thu Sep 5 12:20:57 2024 +0200 libsanitizer: On aarch64 use hint #34 in prologue of libsanitizer functions When gcc is built with -mbranch-protection=standard, running sanitized programs doesn't work properly on bti enabled kernels. This has been fixed upstream with https://github.com/llvm/llvm-project/pull/84061 The following patch cherry picks that from upstream. For trunk we should eventually do a full merge from upstream, but I'm hoping they will first fix up the _BitInt libubsan support mess. 2024-09-05 Jakub Jelinek * sanitizer_common/sanitizer_asm.h: Cherry-pick llvm-project revision 1c792d24e0a228ad49cc004a1c26bbd7cd87f030. * interception/interception.h: Likewise. Diff: --- libsanitizer/interception/interception.h | 4 ++-- libsanitizer/sanitizer_common/sanitizer_asm.h | 14 -- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/libsanitizer/interception/interception.h b/libsanitizer/interception/interception.h index 58e969378a9..73135b34bee 100644 --- a/libsanitizer/interception/interception.h +++ b/libsanitizer/interception/interception.h @@ -204,11 +204,11 @@ const interpose_substitution substitution_##func_name[] \ ".type " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", " \ ASM_TYPE_FUNCTION_STR "\n" \ SANITIZER_STRINGIFY(TRAMPOLINE(func)) ":\n" \ - SANITIZER_STRINGIFY(CFI_STARTPROC) "\n" \ + C_ASM_STARTPROC "\n" \ C_ASM_TAIL_CALL(SANITIZER_STRINGIFY(TRAMPOLINE(func)), \ "__interceptor_" \ SANITIZER_STRINGIFY(ASM_PREEMPTIBLE_SYM(func))) "\n" \ - SANITIZER_STRINGIFY(CFI_ENDPROC) "\n" \ + C_ASM_ENDPROC "\n" \ ".size " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", " \ ".-" SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n" \ ); diff --git a/libsanitizer/sanitizer_common/sanitizer_asm.h b/libsanitizer/sanitizer_common/sanitizer_asm.h index 3af66a4e449..30e9d15184e 100644 --- a/libsanitizer/sanitizer_common/sanitizer_asm.h +++ b/libsanitizer/sanitizer_common/sanitizer_asm.h @@ -42,6 +42,16 @@ # define CFI_RESTORE(reg) #endif +#if defined(__aarch64__) && defined(__ARM_FEATURE_BTI_DEFAULT) +# define ASM_STARTPROC CFI_STARTPROC; hint #34 +# define C_ASM_STARTPROC SANITIZER_STRINGIFY(CFI_STARTPROC) "\nhint #34" +#else +# define ASM_STARTPROC CFI_STARTPROC +# define C_ASM_STARTPROC SANITIZER_STRINGIFY(CFI_STARTPROC) +#endif +#define ASM_ENDPROC CFI_ENDPROC +#define C_ASM_ENDPROC SANITIZER_STRINGIFY(CFI_ENDPROC) + #if defined(__x86_64__) || defined(__i386__) || defined(__sparc__) # define ASM_TAIL_CALL jmp #elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \ @@ -114,9 +124,9 @@ .globl __interceptor_trampoline_##name; \ ASM_TYPE_FUNCTION(__interceptor_trampoline_##name); \ __interceptor_trampoline_##name: \ - CFI_STARTPROC; \ + ASM_STARTPROC; \ ASM_TAIL_CALL ASM_PREEMPTIBLE_SYM(__interceptor_##name); \ - CFI_ENDPROC; \ + ASM_ENDPROC; \ ASM_SIZE(__interceptor_trampoline_##name) # define ASM_INTERCEPTOR_TRAMPOLINE_SUPPORT 1 # endif // Architecture supports interceptor trampoline
[gcc r14-10644] Don't call clean_symbol_name in create_tmp_var_name [PR116219]
https://gcc.gnu.org/g:6fb41c27b62b5774108455d13f5b7a67c9cbdfa3 commit r14-10644-g6fb41c27b62b5774108455d13f5b7a67c9cbdfa3 Author: Jakub Jelinek Date: Wed Aug 7 20:14:31 2024 +0200 Don't call clean_symbol_name in create_tmp_var_name [PR116219] SRA adds fancy names like offset$D94316$_M_impl$D93629$_M_start where the numbers in there are DECL_UIDs if there are unnamed FIELD_DECLs etc. Because -g0 vs. -g can cause differences between the exact DECL_UID values (add bigger gaps in between them, corresponding decls should still be ordered the same based on DECL_UID) we make sure such decls have DECL_NAMELESS set and depending on exact options either don't dump such names at all or dump_fancy_name sanitizes the D123456$ parts in there to D$. Unfortunately in tons of places we then use get_name to grab either user names or these SRA created names and use that as argument to create_tmp_var{,_name,_raw} to base other artificial temporary names based on that. Those are DECL_NAMELESS too, but unfortunately create_tmp_var_name starting with https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=725494f6e4121eace43b7db1202f8ecbf52a8276 calls clean_symbol_name which replaces the $s in there with _s and thus dump_fancy_name doesn't sanitize it anymore. I don't see any discussion of that commit (originally to TM branch, later merged) on the mailing list, but from DECL_NAME (new_decl) = create_tmp_var_name (IDENTIFIER_POINTER (DECL_NAME (old_decl))); - SET_DECL_ASSEMBLER_NAME (new_decl, NULL_TREE); + SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl)); snippet elsewhere in that commit it seems create_tmp_var_name was used at that point also to determine function names of clones, so presumably the clean_symbol_name at that point was to ensure the symbol could be emitted into assembly, maybe in case DECL_NAME is something like C++ operators or whatever could have there undesirable characters. Anyway, we don't do that for years anymore, already GCC 4.5 uses for such purposes clone_function_name which starts of DECL_ASSEMBLER_NAME of the old function and appends based on supportable symbol suffix separators the separator and some suffix and/or number, so that part doesn't go through create_tmp_var_name. I don't see problems with having the $ and . etc. characters in the names intended just to make dumps more readable, after all, we already are using those in the SRA created names. Those names shouldn't make it into the assembly in any way, neither debug info nor assembly labels. There is one theoretical case, where the gimplifier promotes automatic vars into TREE_STATIC ones and therefore those can then appear in assembly, just in case it would be on e.g. SRA created names and regimplified later. Because no cases of promotion of DECL_NAMELESS vars to static was observed in {x86_64,i686,powerpc64le}-linux bootstraps/regtests, the code simply uses C.NNN names for DECL_NAMELESS vars like it does for !DECL_NAME vars. Richi mentioned on IRC that the non-cleaned up names might make things harder to feed stuff back to the GIMPLE FE, but if so, I think it should be the dumping for GIMPLE FE purposes that cleans those up (but at that point it should also verify if some such cleaned up names don't collide with others and somehow deal with those). 2024-08-07 Jakub Jelinek PR c++/116219 * gimple-expr.cc (remove_suffix): Formatting fixes. (create_tmp_var_name): Don't call clean_symbol_name. * gimplify.cc (gimplify_init_constructor): When promoting automatic DECL_NAMELESS vars to static, don't preserve their DECL_NAME. (cherry picked from commit 165e3e7c3ba884345647c0f1c9a3a57a03383651) Diff: --- gcc/gimple-expr.cc | 16 ++-- gcc/gimplify.cc| 2 +- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/gcc/gimple-expr.cc b/gcc/gimple-expr.cc index f8d7185530c..0477c9d5f44 100644 --- a/gcc/gimple-expr.cc +++ b/gcc/gimple-expr.cc @@ -406,14 +406,12 @@ remove_suffix (char *name, int len) { int i; - for (i = 2; i < 7 && len > i; i++) -{ - if (name[len - i] == '.') - { - name[len - i] = '\0'; - break; - } -} + for (i = 2; i < 7 && len > i; i++) +if (name[len - i] == '.') + { + name[len - i] = '\0'; + break; + } } /* Create a new temporary name with PREFIX. Return an identifier. */ @@ -430,8 +428,6 @@ create_tmp_var_name (const char *prefix) char *preftmp = ASTRDUP (prefix); remove_suffix (preftmp, strlen (preftmp)); - clean_symbol_name (preftmp); - prefix = preftmp; } diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 457b33a4293..5753eb90ff5 100644 --- a/gcc/gimplify.cc +++ b
[gcc r14-10645] lower-bitint: Fix up __builtin_{add, sub}_overflow{, _p} bitint lowering [PR116501]
https://gcc.gnu.org/g:73afc3e47e235f3a68abb1c7ce52a9d82003bdab commit r14-10645-g73afc3e47e235f3a68abb1c7ce52a9d82003bdab Author: Jakub Jelinek Date: Tue Sep 3 10:20:44 2024 +0200 lower-bitint: Fix up __builtin_{add,sub}_overflow{,_p} bitint lowering [PR116501] The following testcase is miscompiled. The problem is in the last_ovf step. The second operand has signed _BitInt(513) type but has the MSB clear, so range_to_prec returns 512 for it (i.e. it fits into unsigned _BitInt(512)). Because of that the last step actually doesn't need to get the most significant bit from the second operand, but the code was deciding what to use purely from TYPE_UNSIGNED (type1) - if unsigned, use 0, otherwise sign-extend the last processed bit; but that in this case was set. We don't want to treat the positive operand as if it was negative regardless of the bit below that precision, and precN >= 0 indicates that the operand is in the [0, inf) range. 2024-09-03 Jakub Jelinek PR tree-optimization/116501 * gimple-lower-bitint.cc (bitint_large_huge::lower_addsub_overflow): In the last_ovf case, use build_zero_cst operand not just when TYPE_UNSIGNED (typeN), but also when precN >= 0. * gcc.dg/torture/bitint-73.c: New test. (cherry picked from commit d4d75a83007e884bfcd632ea3b3269704496f048) Diff: --- gcc/gimple-lower-bitint.cc | 4 ++-- gcc/testsuite/gcc.dg/torture/bitint-73.c | 20 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/gcc/gimple-lower-bitint.cc b/gcc/gimple-lower-bitint.cc index b10593035c3..58deaf253e9 100644 --- a/gcc/gimple-lower-bitint.cc +++ b/gcc/gimple-lower-bitint.cc @@ -4192,7 +4192,7 @@ bitint_large_huge::lower_addsub_overflow (tree obj, gimple *stmt) else { m_data_cnt = data_cnt; - if (TYPE_UNSIGNED (type0)) + if (TYPE_UNSIGNED (type0) || prec0 >= 0) rhs1 = build_zero_cst (m_limb_type); else { @@ -4210,7 +4210,7 @@ bitint_large_huge::lower_addsub_overflow (tree obj, gimple *stmt) rhs1 = add_cast (m_limb_type, gimple_assign_lhs (g)); } } - if (TYPE_UNSIGNED (type1)) + if (TYPE_UNSIGNED (type1) || prec1 >= 0) rhs2 = build_zero_cst (m_limb_type); else { diff --git a/gcc/testsuite/gcc.dg/torture/bitint-73.c b/gcc/testsuite/gcc.dg/torture/bitint-73.c new file mode 100644 index 000..1e15f391257 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/bitint-73.c @@ -0,0 +1,20 @@ +/* PR tree-optimization/116501 */ +/* { dg-do run { target bitint575 } } */ +/* { dg-options "-std=c23" } */ +/* { dg-skip-if "" { ! run_expensive_tests } { "*" } { "-O0" "-O2" } } */ +/* { dg-skip-if "" { ! run_expensive_tests } { "-flto" } { "" } } */ + +_BitInt (4) a; + +int +foo (_BitInt(513) b) +{ + return __builtin_sub_overflow_p (a, b, (_BitInt (511)) 0); +} + +int +main () +{ + if (!foo (0xwb)) +__builtin_abort (); +}
[gcc r14-10646] libsanitizer: On aarch64 use hint #34 in prologue of libsanitizer functions
https://gcc.gnu.org/g:24909512101d59807f6d23a9963d64390eca8f60 commit r14-10646-g24909512101d59807f6d23a9963d64390eca8f60 Author: Jakub Jelinek Date: Thu Sep 5 12:20:57 2024 +0200 libsanitizer: On aarch64 use hint #34 in prologue of libsanitizer functions When gcc is built with -mbranch-protection=standard, running sanitized programs doesn't work properly on bti enabled kernels. This has been fixed upstream with https://github.com/llvm/llvm-project/pull/84061 The following patch cherry picks that from upstream. For trunk we should eventually do a full merge from upstream, but I'm hoping they will first fix up the _BitInt libubsan support mess. 2024-09-05 Jakub Jelinek * sanitizer_common/sanitizer_asm.h: Cherry-pick llvm-project revision 1c792d24e0a228ad49cc004a1c26bbd7cd87f030. * interception/interception.h: Likewise. (cherry picked from commit 2379cbb94b2668227c237c94c82e3c49fe39fd0f) Diff: --- libsanitizer/interception/interception.h | 4 ++-- libsanitizer/sanitizer_common/sanitizer_asm.h | 14 -- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/libsanitizer/interception/interception.h b/libsanitizer/interception/interception.h index 58e969378a9..73135b34bee 100644 --- a/libsanitizer/interception/interception.h +++ b/libsanitizer/interception/interception.h @@ -204,11 +204,11 @@ const interpose_substitution substitution_##func_name[] \ ".type " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", " \ ASM_TYPE_FUNCTION_STR "\n" \ SANITIZER_STRINGIFY(TRAMPOLINE(func)) ":\n" \ - SANITIZER_STRINGIFY(CFI_STARTPROC) "\n" \ + C_ASM_STARTPROC "\n" \ C_ASM_TAIL_CALL(SANITIZER_STRINGIFY(TRAMPOLINE(func)), \ "__interceptor_" \ SANITIZER_STRINGIFY(ASM_PREEMPTIBLE_SYM(func))) "\n" \ - SANITIZER_STRINGIFY(CFI_ENDPROC) "\n" \ + C_ASM_ENDPROC "\n" \ ".size " SANITIZER_STRINGIFY(TRAMPOLINE(func)) ", " \ ".-" SANITIZER_STRINGIFY(TRAMPOLINE(func)) "\n" \ ); diff --git a/libsanitizer/sanitizer_common/sanitizer_asm.h b/libsanitizer/sanitizer_common/sanitizer_asm.h index 3af66a4e449..30e9d15184e 100644 --- a/libsanitizer/sanitizer_common/sanitizer_asm.h +++ b/libsanitizer/sanitizer_common/sanitizer_asm.h @@ -42,6 +42,16 @@ # define CFI_RESTORE(reg) #endif +#if defined(__aarch64__) && defined(__ARM_FEATURE_BTI_DEFAULT) +# define ASM_STARTPROC CFI_STARTPROC; hint #34 +# define C_ASM_STARTPROC SANITIZER_STRINGIFY(CFI_STARTPROC) "\nhint #34" +#else +# define ASM_STARTPROC CFI_STARTPROC +# define C_ASM_STARTPROC SANITIZER_STRINGIFY(CFI_STARTPROC) +#endif +#define ASM_ENDPROC CFI_ENDPROC +#define C_ASM_ENDPROC SANITIZER_STRINGIFY(CFI_ENDPROC) + #if defined(__x86_64__) || defined(__i386__) || defined(__sparc__) # define ASM_TAIL_CALL jmp #elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \ @@ -114,9 +124,9 @@ .globl __interceptor_trampoline_##name; \ ASM_TYPE_FUNCTION(__interceptor_trampoline_##name); \ __interceptor_trampoline_##name: \ - CFI_STARTPROC; \ + ASM_STARTPROC; \ ASM_TAIL_CALL ASM_PREEMPTIBLE_SYM(__interceptor_##name); \ - CFI_ENDPROC; \ + ASM_ENDPROC; \ ASM_SIZE(__interceptor_trampoline_##name) # define ASM_INTERCEPTOR_TRAMPOLINE_SUPPORT 1 # endif // Architecture supports interceptor trampoline
[gcc r15-3481] [AARCH64] adjust gcc.target/aarch64/sve/mask_gather_load_7.c
https://gcc.gnu.org/g:7b8da316089bfdf299592a88fdab2f93c444b03a commit r15-3481-g7b8da316089bfdf299592a88fdab2f93c444b03a Author: Richard Biener Date: Thu Sep 5 11:38:04 2024 +0200 [AARCH64] adjust gcc.target/aarch64/sve/mask_gather_load_7.c The following adjusts the scan-assembler to also allow predicate registers p8-15 to be used for the destination of the compares. I see that code generation with a pending vectorizer patch (the only assembler change is different predicate register allocation). * gcc.target/aarch64/sve/mask_gather_load_7.c: Allow p8-15 to be used for the destination of the compares. Diff: --- gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c index c31fae308a5..7812ae7c928 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c @@ -41,13 +41,13 @@ TEST_ALL (TEST_LOOP) /* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 36 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */ -/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 18 } } */ /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 18 } } */ /* Also used for the TEST32 indices. */ /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 72 } } */ -/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */ -/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */ +/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */ /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 36 } } */
[gcc r15-3482] c++: Add missing auto_diagnostic_groups
https://gcc.gnu.org/g:cb76fcf5ebf0817e6f1b7c019071362f7a5f3ae0 commit r15-3482-gcb76fcf5ebf0817e6f1b7c019071362f7a5f3ae0 Author: Nathaniel Shead Date: Wed Aug 7 19:20:19 2024 +1000 c++: Add missing auto_diagnostic_groups This patch goes through all .cc files in gcc/cp and adds in any auto_diagnostic_groups that seem to be missing by looking for any 'inform' calls that aren't grouped with their respective error/warning. Now with SARIF output support this seems to be a bit more important. The patch isn't complete; I've tried to also track helper functions used for diagnostics to group them, but some may have been missed. Additionally there are a few functions that are definitely missing groupings but I wasn't able to see an obvious way to add them without potentially grouping together unrelated messages. This list includes: - lazy_load_{binding,pendings} "during load of {binding,pendings} for" - cp_finish_decomp "in initialization of structured binding variable" - require_deduced_type "using __builtin_source_location" - convert_nontype_argument "in template argument for type %qT" - coerce_template_params "so any instantiation with a non-empty parameter pack" - tsubst_default_argument "when instantiating default argument" - invalid_nontype_parm_type_p "invalid template non-type parameter" gcc/cp/ChangeLog: * class.cc (add_method): Add missing auto_diagnostic_group. (handle_using_decl): Likewise. (maybe_warn_about_overly_private_class): Likewise. (check_field_decl): Likewise. (check_field_decls): Likewise. (resolve_address_of_overloaded_function): Likewise. (note_name_declared_in_class): Likewise. * constraint.cc (associate_classtype_constraints): Likewise. (diagnose_trait_expr): Clean up whitespace. * coroutines.cc (find_coro_traits_template_decl): Add missing auto_diagnostic_group. (coro_promise_type_found_p): Likewise. (coro_diagnose_throwing_fn): Likewise. * cvt.cc (build_expr_type_conversion): Likewise. * decl.cc (validate_constexpr_redeclaration): Likewise. (duplicate_function_template_decls): Likewise. (duplicate_decls): Likewise. (lookup_label_1): Likewise. (check_previous_goto_1): Likewise. (check_goto_1): Likewise. (make_typename_type): Likewise. (make_unbound_class_template): Likewise. (check_tag_decl): Likewise. (start_decl): Likewise. (maybe_commonize_var): Likewise. (check_for_uninitialized_const_var): Likewise. (reshape_init_class): Likewise. (check_initializer): Likewise. (cp_finish_decl): Likewise. (find_decomp_class_base): Likewise. (cp_finish_decomp): Likewise. (expand_static_init): Likewise. (grokfndecl): Likewise. (grokdeclarator): Likewise. (check_elaborated_type_specifier): Likewise. (lookup_and_check_tag): Likewise. (xref_tag): Likewise. (cxx_simulate_enum_decl): Likewise. (finish_function): Likewise. * decl2.cc (check_classfn): Likewise. (record_mangling): Likewise. (mark_used): Likewise. * error.cc (qualified_name_lookup_error): Likewise. * except.cc (build_throw): Likewise. * init.cc (get_nsdmi): Likewise. (diagnose_uninitialized_cst_or_ref_member_1): Likewise. (warn_placement_new_too_small): Likewise. (build_new_1): Likewise. (build_vec_delete_1): Likewise. (build_delete): Likewise. * lambda.cc (add_capture): Likewise. (add_default_capture): Likewise. * lex.cc (unqualified_fn_lookup_error): Likewise. * method.cc (synthesize_method): Likewise. (defaulted_late_check): Likewise. * module.cc (trees_in::is_matching_decl): Likewise. (trees_in::read_enum_def): Likewise. (module_state::check_not_purview): Likewise. (module_state::deferred_macro): Likewise. (module_state::read_config): Likewise. (module_state::check_read): Likewise. (declare_module): Likewise. (init_modules): Likewise. * name-lookup.cc (diagnose_name_conflict): Likewise. (lookup_using_decl): Likewise. (set_decl_namespace): Likewise. (finish_using_directive): Likewise. (push_namespace): Likewise. (add_imported_namespace): Likewise. * parser.cc (cp_parser_check_for_definition_in_return_type): Likewise. (cp_parser_userdef_numeric_literal): Likewise. (cp_parser_nested_na
[gcc r15-3483] Move from 'gcc.target/nvptx/nvptx.exp' into 'target-supports.exp' additions for nvptx target
https://gcc.gnu.org/g:a121af90fe9244258c8620901dd6fa22537767bb commit r15-3483-ga121af90fe9244258c8620901dd6fa22537767bb Author: Thomas Schwinge Date: Mon Jul 22 14:40:34 2024 +0200 Move from 'gcc.target/nvptx/nvptx.exp' into 'target-supports.exp' additions for nvptx target gcc/testsuite/ * gcc.target/nvptx/nvptx.exp (check_effective_target_default_ptx_isa_version_at_least) (check_effective_target_default_ptx_isa_version_at_least_6_0) (check_effective_target_runtime_ptx_isa_version_at_least) (check_effective_target_runtime_ptx_alias) (add_options_for_ptx_alias): Move... * lib/target-supports.exp (check_nvptx_default_ptx_isa_version_at_least) (check_effective_target_nvptx_default_ptx_isa_version_at_least_6_0) (check_nvptx_runtime_ptx_isa_version_at_least) (check_effective_target_nvptx_runtime_alias_ptx) (add_options_for_nvptx_alias_ptx): ... here. * gcc.target/nvptx/alias-1.c: Adjust. * gcc.target/nvptx/alias-2.c: Likewise. * gcc.target/nvptx/alias-3.c: Likewise. * gcc.target/nvptx/alias-4.c: Likewise. * gcc.target/nvptx/alias-to-alias-1.c: Likewise. * gcc.target/nvptx/alias-weak-1.c: Likewise. * gcc.target/nvptx/uniform-simt-5.c: Likewise. gcc/ * doc/sourcebuild.texi (Effective-Target Keywords): Document 'nvptx_default_ptx_isa_version_at_least_6_0', 'nvptx_runtime_alias_ptx'. (Add Options): Document 'nvptx_alias_ptx'. Diff: --- gcc/doc/sourcebuild.texi | 14 + gcc/testsuite/gcc.target/nvptx/alias-1.c | 4 +- gcc/testsuite/gcc.target/nvptx/alias-2.c | 4 +- gcc/testsuite/gcc.target/nvptx/alias-3.c | 4 +- gcc/testsuite/gcc.target/nvptx/alias-4.c | 4 +- gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c | 2 +- gcc/testsuite/gcc.target/nvptx/alias-weak-1.c | 2 +- gcc/testsuite/gcc.target/nvptx/nvptx.exp | 66 - gcc/testsuite/gcc.target/nvptx/uniform-simt-5.c | 4 +- gcc/testsuite/lib/target-supports.exp | 72 +++ 10 files changed, 98 insertions(+), 78 deletions(-) diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 7c7094dc5a9..6ba72fd44a2 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2424,6 +2424,17 @@ MSP430 target has the small memory model enabled (@code{-msmall}). MSP430 target has the large memory model enabled (@code{-mlarge}). @end table +@subsubsection nvptx-specific attributes + +@table @code +@item nvptx_default_ptx_isa_version_at_least_6_0 +nvptx code by default compiles for at least PTX ISA version 6.0. + +@item nvptx_runtime_alias_ptx +The nvptx runtime environment supports the PTX ISA directive +@code{.alias}. +@end table + @subsubsection PowerPC-specific attributes @table @code @@ -3302,6 +3313,9 @@ compliance mode. @code{mips16} function attributes. Only MIPS targets support this feature, and only then in certain modes. +@item nvptx_alias_ptx +Enable using the PTX ISA directive @code{.alias} on nvptx targets. + @item riscv_a Add the 'A' extension to the -march string on RISC-V targets. diff --git a/gcc/testsuite/gcc.target/nvptx/alias-1.c b/gcc/testsuite/gcc.target/nvptx/alias-1.c index d251eee6e42..1c0642b14d9 100644 --- a/gcc/testsuite/gcc.target/nvptx/alias-1.c +++ b/gcc/testsuite/gcc.target/nvptx/alias-1.c @@ -1,7 +1,7 @@ /* { dg-do link } */ -/* { dg-do run { target runtime_ptx_alias } } */ +/* { dg-do run { target nvptx_runtime_alias_ptx } } */ /* { dg-options "-save-temps" } */ -/* { dg-add-options ptx_alias } */ +/* { dg-add-options nvptx_alias_ptx } */ int v; diff --git a/gcc/testsuite/gcc.target/nvptx/alias-2.c b/gcc/testsuite/gcc.target/nvptx/alias-2.c index 96cb7e2c1ef..5c4b9c787e1 100644 --- a/gcc/testsuite/gcc.target/nvptx/alias-2.c +++ b/gcc/testsuite/gcc.target/nvptx/alias-2.c @@ -1,7 +1,7 @@ /* { dg-do link } */ -/* { dg-do run { target runtime_ptx_alias } } */ +/* { dg-do run { target nvptx_runtime_alias_ptx } } */ /* { dg-options "-save-temps -O2" } */ -/* { dg-add-options ptx_alias } */ +/* { dg-add-options nvptx_alias_ptx } */ #include "alias-1.c" diff --git a/gcc/testsuite/gcc.target/nvptx/alias-3.c b/gcc/testsuite/gcc.target/nvptx/alias-3.c index 39649e30b91..b55ff26269e 100644 --- a/gcc/testsuite/gcc.target/nvptx/alias-3.c +++ b/gcc/testsuite/gcc.target/nvptx/alias-3.c @@ -1,7 +1,7 @@ /* { dg-do link } */ -/* { dg-do run { target runtime_ptx_alias } } */ +/* { dg-do run { target nvptx_runtime_alias_ptx } } */ /* { dg-options "-save-temps" } */ -/* { dg-add-options ptx_alias } */ +/* { dg-add-options nvptx_alias_ptx } */ /* Copy of alias-1.c, with static __f and f. */ diff --git a/gcc/testsuite/gcc.target/nvptx/alias-4.c b/gcc/tests
[gcc r15-3484] Fix 'gcc.target/nvptx/alias-2.c' comment
https://gcc.gnu.org/g:973c1bf51fb0f58fbfe43651bb0a61e1d124b35d commit r15-3484-g973c1bf51fb0f58fbfe43651bb0a61e1d124b35d Author: Thomas Schwinge Date: Mon Sep 18 22:41:56 2023 +0200 Fix 'gcc.target/nvptx/alias-2.c' comment PR target/104957 gcc/testsuite/ * gcc.target/nvptx/alias-2.c: Fix comment. Diff: --- gcc/testsuite/gcc.target/nvptx/alias-2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/nvptx/alias-2.c b/gcc/testsuite/gcc.target/nvptx/alias-2.c index 5c4b9c787e1..7a88b6f4f6f 100644 --- a/gcc/testsuite/gcc.target/nvptx/alias-2.c +++ b/gcc/testsuite/gcc.target/nvptx/alias-2.c @@ -9,6 +9,6 @@ /* { dg-final { scan-assembler-not "\\.alias.*;" } } */ /* { dg-final { scan-assembler-not "\\.visible \\.func f;" } } */ -/* Note static and inlined, so still there. */ +/* Note extern and inlined, so still there. */ /* { dg-final { scan-assembler-times "\\.visible \\.func __f;" 1 } } */
[gcc r15-3485] Enhance 'gcc.target/nvptx/alias-*.c' assembler scanning
https://gcc.gnu.org/g:d0f02538494ded78cac12c63f5708a53f5a77bda commit r15-3485-gd0f02538494ded78cac12c63f5708a53f5a77bda Author: Thomas Schwinge Date: Wed Jul 17 15:27:51 2024 +0200 Enhance 'gcc.target/nvptx/alias-*.c' assembler scanning ... in order to demonstrate unexpected behavior (XFAILed here). PR target/104957 gcc/testsuite/ * gcc.target/nvptx/alias-1.c: Enhance assembler scanning. * gcc.target/nvptx/alias-2.c: Likewise. * gcc.target/nvptx/alias-3.c: Likewise. * gcc.target/nvptx/alias-4.c: Likewise. * gcc.target/nvptx/alias-to-alias-1.c: Likewise. Diff: --- gcc/testsuite/gcc.target/nvptx/alias-1.c | 15 --- gcc/testsuite/gcc.target/nvptx/alias-2.c | 16 gcc/testsuite/gcc.target/nvptx/alias-3.c | 15 --- gcc/testsuite/gcc.target/nvptx/alias-4.c | 17 + gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c | 22 +- 5 files changed, 66 insertions(+), 19 deletions(-) diff --git a/gcc/testsuite/gcc.target/nvptx/alias-1.c b/gcc/testsuite/gcc.target/nvptx/alias-1.c index 1c0642b14d9..0fb06495f67 100644 --- a/gcc/testsuite/gcc.target/nvptx/alias-1.c +++ b/gcc/testsuite/gcc.target/nvptx/alias-1.c @@ -23,6 +23,15 @@ main (void) return 0; } -/* { dg-final { scan-assembler-times "\\.alias f,__f;" 1 } } */ -/* { dg-final { scan-assembler-times "\\.visible \\.func __f;" 1 } } */ -/* { dg-final { scan-assembler-times "\\.visible \\.func f;" 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: __f$} 1 } } + { dg-final { scan-assembler-times {(?n)^\.visible \.func __f;$} 1 } } + { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: __f$} 1 } } + { dg-final { scan-assembler-times {(?n)^\.visible \.func __f$} 1 } } */ + +/* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: f$} 1 { xfail *-*-* } } } + { dg-final { scan-assembler-times {(?n)^\.visible \.func f;$} 1 } } + { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: f$} 1 { xfail *-*-* } } } + { dg-final { scan-assembler-times {(?n)^\.alias f,__f;$} 1 } } */ + +/* { dg-final { scan-assembler-times {(?n)\tcall __f;$} 0 } } + { dg-final { scan-assembler-times {(?n)\tcall f;$} 1 } } */ diff --git a/gcc/testsuite/gcc.target/nvptx/alias-2.c b/gcc/testsuite/gcc.target/nvptx/alias-2.c index 7a88b6f4f6f..8ae8b5cfaed 100644 --- a/gcc/testsuite/gcc.target/nvptx/alias-2.c +++ b/gcc/testsuite/gcc.target/nvptx/alias-2.c @@ -5,10 +5,18 @@ #include "alias-1.c" +/* Note extern and inlined, so still there. */ +/* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: __f$} 1 } } + { dg-final { scan-assembler-times {(?n)^\.visible \.func __f;$} 1 } } + { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: __f$} 1 } } + { dg-final { scan-assembler-times {(?n)^\.visible \.func __f$} 1 } } */ + /* Inlined, so no alias. */ -/* { dg-final { scan-assembler-not "\\.alias.*;" } } */ -/* { dg-final { scan-assembler-not "\\.visible \\.func f;" } } */ -/* Note extern and inlined, so still there. */ -/* { dg-final { scan-assembler-times "\\.visible \\.func __f;" 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: f$} 0 } } + { dg-final { scan-assembler-times {(?n)^\.visible \.func f;$} 0 } } + { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: f$} 0 } } + { dg-final { scan-assembler-times {(?n)^\.alias f,__f;$} 0 } } */ +/* { dg-final { scan-assembler-times {(?n)\tcall __f;$} 0 } } + { dg-final { scan-assembler-times {(?n)\tcall f;$} 0 } } */ diff --git a/gcc/testsuite/gcc.target/nvptx/alias-3.c b/gcc/testsuite/gcc.target/nvptx/alias-3.c index b55ff26269e..1906607f95f 100644 --- a/gcc/testsuite/gcc.target/nvptx/alias-3.c +++ b/gcc/testsuite/gcc.target/nvptx/alias-3.c @@ -25,6 +25,15 @@ main (void) return 0; } -/* { dg-final { scan-assembler-times "\\.alias f,__f;" 1 } } */ -/* { dg-final { scan-assembler-times "\\.func __f;" 1 } } */ -/* { dg-final { scan-assembler-times "\\.func f;" 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^// BEGIN FUNCTION DECL: __f$} 1 } } + { dg-final { scan-assembler-times {(?n)^\.func __f;$} 1 } } + { dg-final { scan-assembler-times {(?n)^// BEGIN FUNCTION DEF: __f$} 1 } } + { dg-final { scan-assembler-times {(?n)^\.func __f$} 1 } } */ + +/* { dg-final { scan-assembler-times {(?n)^// BEGIN FUNCTION DECL: f$} 1 { xfail *-*-* } } } + { dg-final { scan-assembler-times {(?n)^\.func f;$} 1 } } + { dg-final { scan-assembler-times {(?n)^// BEGIN FUNCTION DEF: f$} 1 { xfail *-*-* } } } + { dg-final { scan-assembler-times {(?n)^\.alias f,__f;$} 1 } } */ + +/* { dg-final { scan-assembler-times {(?n)\tcall __f;$} 0 } } + { dg-final { scan-assembler-times {(?n)\tcall f;$} 1 } } */ diff --git a/gcc/testsuite/gcc.
[gcc r15-3487] nvptx: Emit DECL and DEF linker markers for aliases [PR104957]
https://gcc.gnu.org/g:8f5aade15e595b288a2c4ec60ddde8dc80df1a80 commit r15-3487-g8f5aade15e595b288a2c4ec60ddde8dc80df1a80 Author: Thomas Schwinge Date: Wed Jul 17 23:56:25 2024 +0200 nvptx: Emit DECL and DEF linker markers for aliases [PR104957] With nvptx '-malias' enabled (as implemented in commit f8b15e177155960017ac0c5daef8780d1127f91c "[nvptx] Use .alias directive for mptx >= 6.3"), the C++ front end in certain cases does 'write_fn_proto' before an eventual 'alias' attribute has been added. In that case, we do emit (via 'write_fn_marker') a DECL linker marker, but then never emit a corresponding DEF linker marker for the alias. This causes hundreds of instances of link-time 'unresolved symbol [alias]' across the C++ test suite, which are regressions compared to a test run with (default) '-mno-alias' (in which case the respective functions get duplicated). PR target/104957 gcc/ * config/nvptx/nvptx.cc (write_fn_proto_1): Revert 2022-03-22 change; 'write_fn_marker' also for alias DECL. (nvptx_asm_output_def_from_decls): 'write_fn_marker' for alias DEF. gcc/testsuite/ * g++.target/nvptx/alias-g++.dg_init_dtor2-1.C: Un-XFAIL. * gcc.target/nvptx/alias-1.c: Likewise. * gcc.target/nvptx/alias-3.c: Likewise. * gcc.target/nvptx/alias-to-alias-1.c: Likewise. Diff: --- gcc/config/nvptx/nvptx.cc | 6 -- gcc/testsuite/g++.target/nvptx/alias-g++.dg_init_dtor2-1.C | 4 ++-- gcc/testsuite/gcc.target/nvptx/alias-1.c | 4 ++-- gcc/testsuite/gcc.target/nvptx/alias-3.c | 4 ++-- gcc/testsuite/gcc.target/nvptx/alias-to-alias-1.c | 8 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 144b8d0c874..4a7c64f05eb 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -997,8 +997,7 @@ static void write_fn_proto_1 (std::stringstream &s, bool is_defn, const char *name, const_tree decl, bool force_public) { - if (lookup_attribute ("alias", DECL_ATTRIBUTES (decl)) == NULL) -write_fn_marker (s, is_defn, TREE_PUBLIC (decl) || force_public, name); + write_fn_marker (s, is_defn, TREE_PUBLIC (decl) || force_public, name); /* PTX declaration. */ if (DECL_EXTERNAL (decl)) @@ -7627,6 +7626,9 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value) fputs (s.str ().c_str (), stream); tree id = DECL_ASSEMBLER_NAME (name); + std::stringstream s_def; + write_fn_marker (s_def, true, TREE_PUBLIC (name), IDENTIFIER_POINTER (id)); + fputs (s_def.str ().c_str (), stream); NVPTX_ASM_OUTPUT_DEF (stream, IDENTIFIER_POINTER (id), IDENTIFIER_POINTER (value)); } diff --git a/gcc/testsuite/g++.target/nvptx/alias-g++.dg_init_dtor2-1.C b/gcc/testsuite/g++.target/nvptx/alias-g++.dg_init_dtor2-1.C index 747656d51d6..a30f99af308 100644 --- a/gcc/testsuite/g++.target/nvptx/alias-g++.dg_init_dtor2-1.C +++ b/gcc/testsuite/g++.target/nvptx/alias-g++.dg_init_dtor2-1.C @@ -1,6 +1,6 @@ /* Reduced from 'g++.dg/init/dtor2.C'. */ -/* { dg-do compile } */ +/* { dg-do link } */ /* { dg-add-options nvptx_alias_ptx } */ /* { dg-additional-options -save-temps } */ /* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ @@ -26,7 +26,7 @@ int main() /* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: _ZN1BD1Ev$} 1 } } { dg-final { scan-assembler-times {(?n)^\.visible \.func _ZN1BD1Ev \(\.param\.u64 %in_ar0\);$} 1 } } - { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: _ZN1BD1Ev$} 1 { xfail *-*-* } } } + { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: _ZN1BD1Ev$} 1 } } { dg-final { scan-assembler-times {(?n)^\.alias _ZN1BD1Ev,_ZN1BD2Ev;$} 1 } } */ /* { dg-final { scan-assembler-times {(?n)\tcall _ZN1BD1Ev, \(} 1 } } diff --git a/gcc/testsuite/gcc.target/nvptx/alias-1.c b/gcc/testsuite/gcc.target/nvptx/alias-1.c index 0fb06495f67..f2aeff36e69 100644 --- a/gcc/testsuite/gcc.target/nvptx/alias-1.c +++ b/gcc/testsuite/gcc.target/nvptx/alias-1.c @@ -28,9 +28,9 @@ main (void) { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: __f$} 1 } } { dg-final { scan-assembler-times {(?n)^\.visible \.func __f$} 1 } } */ -/* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: f$} 1 { xfail *-*-* } } } +/* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: f$} 1 } } { dg-final { scan-assembler-times {(?n)^\.visible \.func f;$} 1 } } - { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: f$} 1 { xfail *-*-* } } } + { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: f$} 1 } } {
[gcc r15-3486] Add 'g++.target/nvptx/alias-g++.dg_init_dtor2-1.C'
https://gcc.gnu.org/g:a1865fd33897bc6c6e0109df0a12ee73ce386315 commit r15-3486-ga1865fd33897bc6c6e0109df0a12ee73ce386315 Author: Thomas Schwinge Date: Wed Jul 17 18:02:50 2024 +0200 Add 'g++.target/nvptx/alias-g++.dg_init_dtor2-1.C' ... as one minimized example for the issue that with nvptx '-malias' enabled (as implemented in commit f8b15e177155960017ac0c5daef8780d1127f91c "[nvptx] Use .alias directive for mptx >= 6.3"), there are hundreds of instances of link-time 'unresolved symbol [alias]' across the C++ test suite, which are regressions compared to a test run with (default) '-mno-alias'. PR target/104957 gcc/testsuite/ * g++.target/nvptx/alias-g++.dg_init_dtor2-1.C: Add. Diff: --- .../g++.target/nvptx/alias-g++.dg_init_dtor2-1.C | 33 ++ 1 file changed, 33 insertions(+) diff --git a/gcc/testsuite/g++.target/nvptx/alias-g++.dg_init_dtor2-1.C b/gcc/testsuite/g++.target/nvptx/alias-g++.dg_init_dtor2-1.C new file mode 100644 index 000..747656d51d6 --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/alias-g++.dg_init_dtor2-1.C @@ -0,0 +1,33 @@ +/* Reduced from 'g++.dg/init/dtor2.C'. */ + +/* { dg-do compile } */ +/* { dg-add-options nvptx_alias_ptx } */ +/* { dg-additional-options -save-temps } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ + +struct B +{ + ~B(); +}; + +B::~B () { +} + +int main() +{ + B b; + return 0; +} + +/* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: _ZN1BD2Ev$} 1 } } + { dg-final { scan-assembler-times {(?n)^\.visible \.func _ZN1BD2Ev \(\.param\.u64 %in_ar0\);$} 1 } } + { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: _ZN1BD2Ev$} 1 } } + { dg-final { scan-assembler-times {(?n)^\.visible \.func _ZN1BD2Ev \(\.param\.u64 %in_ar0\)$} 1 } } */ + +/* { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DECL: _ZN1BD1Ev$} 1 } } + { dg-final { scan-assembler-times {(?n)^\.visible \.func _ZN1BD1Ev \(\.param\.u64 %in_ar0\);$} 1 } } + { dg-final { scan-assembler-times {(?n)^// BEGIN GLOBAL FUNCTION DEF: _ZN1BD1Ev$} 1 { xfail *-*-* } } } + { dg-final { scan-assembler-times {(?n)^\.alias _ZN1BD1Ev,_ZN1BD2Ev;$} 1 } } */ + +/* { dg-final { scan-assembler-times {(?n)\tcall _ZN1BD1Ev, \(} 1 } } + { dg-final { scan-assembler-times {(?n)\tcall _ZN1BD2Ev, \(} 0 } } */
[gcc r13-9005] Arm: Fix incorrect tailcall-generation for indirect calls [PR113780]
https://gcc.gnu.org/g:c56dc83e82af319d02a19f0703c301523ab1ef25 commit r13-9005-gc56dc83e82af319d02a19f0703c301523ab1ef25 Author: Tejas Belagod Date: Thu Jan 25 16:05:36 2024 +0530 Arm: Fix incorrect tailcall-generation for indirect calls [PR113780] This patch fixes a bug that causes indirect calls in PAC-enabled functions to be tailcalled incorrectly when all argument registers R0-R3 are used. 2024-02-07 Tejas Belagod PR target/113780 * config/arm/arm.cc (arm_function_ok_for_sibcall): Don't allow tailcalls for indirect calls with 4 or more arguments in pac-enabled functions. * lib/target-supports.exp (v8_1m_main_pacbti): Add __ARM_FEATURE_PAUTH. * gcc.target/arm/pac-sibcall.c: New. (cherry picked from commit f436a2ab6ad15968275c9bbf3bd56647e5559e68) Diff: --- gcc/config/arm/arm.cc | 11 +++ gcc/testsuite/gcc.target/arm/pac-sibcall.c | 14 ++ gcc/testsuite/lib/target-supports.exp | 2 ++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index c00c6d7c1e6..bf1c6e36dfc 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -7947,10 +7947,13 @@ arm_function_ok_for_sibcall (tree decl, tree exp) && DECL_WEAK (decl)) return false; - /* We cannot do a tailcall for an indirect call by descriptor if all the - argument registers are used because the only register left to load the - address is IP and it will already contain the static chain. */ - if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines) + /* We cannot tailcall an indirect call by descriptor if all the call-clobbered + general registers are live (r0-r3 and ip). This can happen when: + - IP contains the static chain, or + - IP is needed for validating the PAC signature. */ + if (!decl + && ((CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines) + || arm_current_function_pac_enabled_p())) { tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); CUMULATIVE_ARGS cum; diff --git a/gcc/testsuite/gcc.target/arm/pac-sibcall.c b/gcc/testsuite/gcc.target/arm/pac-sibcall.c new file mode 100644 index 000..e15bd2f478d --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pac-sibcall.c @@ -0,0 +1,14 @@ +/* If all call-clobbered general registers are live (r0-r3, ip), disable + indirect tail-call for a PAC-enabled function. */ + +/* { dg-do compile } */ +/* { dg-require-effective-target arm_arch_v8_1m_main_pacbti_ok } */ +/* { dg-add-options arm_arch_v8_1m_main_pacbti } */ +/* { dg-additional-options "-mbranch-protection=pac-ret+leaf -O2" } */ + +void fail(void (*f)(int, int, int, int)) +{ + f(1, 2, 3, 4); +} + +/* { dg-final { scan-assembler-not "bx\tip\t@ indirect register sibling call" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index c81313ea717..be97693e895 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -5057,6 +5057,8 @@ foreach { armfunc armflag armdefs } { __ARM_ARCH_8M_BASE__ v8m_main "-march=armv8-m.main -mthumb" __ARM_ARCH_8M_MAIN__ v8_1m_main "-march=armv8.1-m.main -mthumb" __ARM_ARCH_8M_MAIN__ + v8_1m_main_pacbti "-march=armv8.1-m.main+pacbti+fp -mthumb" + "__ARM_ARCH_8M_MAIN__ && __ARM_FEATURE_BTI && __ARM_FEATURE_PAUTH" v9a "-march=armv9-a" __ARM_ARCH_9A__ } { eval [string map [list FUNC $armfunc FLAG $armflag DEFS $armdefs ] { proc check_effective_target_arm_arch_FUNC_ok { } {
[gcc r15-3488] Avoid ICE when passing VLA vector to accelerator.
https://gcc.gnu.org/g:ae88e91938af364ef5613e5461b12b484b578bc5 commit r15-3488-gae88e91938af364ef5613e5461b12b484b578bc5 Author: Prathamesh Kulkarni Date: Thu Sep 5 18:52:53 2024 +0530 Avoid ICE when passing VLA vector to accelerator. gcc/ChangeLog: * gimplify.cc (omp_add_variable): Check if decl size is not poly_int_tree_p. (gimplify_adjust_omp_clauses): Likewise. * omp-low.cc (scan_sharing_clauses): Likewise. (lower_omp_target): Likewise. Signed-off-by: Prathamesh Kulkarni Diff: --- gcc/gimplify.cc | 4 ++-- gcc/omp-low.cc | 8 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 9300138aa0c..ceb53e5d5bb 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -7799,7 +7799,7 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags) /* When adding a variable-sized variable, we have to handle all sorts of additional bits of data: the pointer replacement variable, and the parameters of the type. */ - if (DECL_SIZE (decl) && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST) + if (DECL_SIZE (decl) && !poly_int_tree_p (DECL_SIZE (decl))) { /* Add the pointer replacement variable as PRIVATE if the variable replacement is private, else FIRSTPRIVATE since we'll need the @@ -14413,7 +14413,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p, } } else if (DECL_SIZE (decl) - && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST + && !poly_int_tree_p (DECL_SIZE (decl)) && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_POINTER && OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER && (OMP_CLAUSE_MAP_KIND (c) diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index 4d003f42098..241f79e34a9 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -1664,7 +1664,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) if (DECL_P (decl)) { if (DECL_SIZE (decl) - && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST) + && !poly_int_tree_p (DECL_SIZE (decl))) { tree decl2 = DECL_VALUE_EXPR (decl); gcc_assert (INDIRECT_REF_P (decl2)); @@ -1906,7 +1906,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) = remap_type (TREE_TYPE (decl), &ctx->cb); } else if (DECL_SIZE (decl) - && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST) + && !poly_int_tree_p (DECL_SIZE (decl))) { tree decl2 = DECL_VALUE_EXPR (decl); gcc_assert (INDIRECT_REF_P (decl2)); @@ -12750,7 +12750,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } if (DECL_SIZE (var) - && TREE_CODE (DECL_SIZE (var)) != INTEGER_CST) + && !poly_int_tree_p (DECL_SIZE (var))) { tree var2 = DECL_VALUE_EXPR (var); gcc_assert (TREE_CODE (var2) == INDIRECT_REF); @@ -13077,7 +13077,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) else { if (DECL_SIZE (ovar) - && TREE_CODE (DECL_SIZE (ovar)) != INTEGER_CST) + && !poly_int_tree_p (DECL_SIZE (ovar))) { tree ovar2 = DECL_VALUE_EXPR (ovar); gcc_assert (TREE_CODE (ovar2) == INDIRECT_REF);
[gcc r14-10647] ipa: Don't disable function parameter analysis for fat LTO
https://gcc.gnu.org/g:6abedee0d4c8d40d412a358a3f6cb6090772d286 commit r14-10647-g6abedee0d4c8d40d412a358a3f6cb6090772d286 Author: H.J. Lu Date: Tue Aug 27 13:11:39 2024 -0700 ipa: Don't disable function parameter analysis for fat LTO Update analyze_parms not to disable function parameter analysis for -ffat-lto-objects. Tested on x86-64, there are no differences in zstd with "-O2 -flto=auto" -g "vs -O2 -flto=auto -g -ffat-lto-objects". PR ipa/116410 * ipa-modref.cc (analyze_parms): Always analyze function parameter for LTO. Signed-off-by: H.J. Lu (cherry picked from commit 2f1689ea8e631ebb4ff3720d56ef0362f5898ff6) Diff: --- gcc/ipa-modref.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc index 37221215a65..10344fbb4b5 100644 --- a/gcc/ipa-modref.cc +++ b/gcc/ipa-modref.cc @@ -2969,7 +2969,7 @@ analyze_parms (modref_summary *summary, modref_summary_lto *summary_lto, summary->arg_flags.safe_grow_cleared (count, true); summary->arg_flags[parm_index] = EAF_UNUSED; } - else if (summary_lto) + if (summary_lto) { if (parm_index >= summary_lto->arg_flags.length ()) summary_lto->arg_flags.safe_grow_cleared (count, true); @@ -3028,7 +3028,7 @@ analyze_parms (modref_summary *summary, modref_summary_lto *summary_lto, summary->arg_flags.safe_grow_cleared (count, true); summary->arg_flags[parm_index] = flags; } - else if (summary_lto) + if (summary_lto) { if (parm_index >= summary_lto->arg_flags.length ()) summary_lto->arg_flags.safe_grow_cleared (count, true);
[gcc r13-9006] ipa: Don't disable function parameter analysis for fat LTO
https://gcc.gnu.org/g:e83df98c8142235cda0bcd538907a4693e420c92 commit r13-9006-ge83df98c8142235cda0bcd538907a4693e420c92 Author: H.J. Lu Date: Tue Aug 27 13:11:39 2024 -0700 ipa: Don't disable function parameter analysis for fat LTO Update analyze_parms not to disable function parameter analysis for -ffat-lto-objects. Tested on x86-64, there are no differences in zstd with "-O2 -flto=auto" -g "vs -O2 -flto=auto -g -ffat-lto-objects". PR ipa/116410 * ipa-modref.cc (analyze_parms): Always analyze function parameter for LTO. Signed-off-by: H.J. Lu (cherry picked from commit 2f1689ea8e631ebb4ff3720d56ef0362f5898ff6) Diff: --- gcc/ipa-modref.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc index e3196df8aa9..0b42955eb3a 100644 --- a/gcc/ipa-modref.cc +++ b/gcc/ipa-modref.cc @@ -2965,7 +2965,7 @@ analyze_parms (modref_summary *summary, modref_summary_lto *summary_lto, summary->arg_flags.safe_grow_cleared (count, true); summary->arg_flags[parm_index] = EAF_UNUSED; } - else if (summary_lto) + if (summary_lto) { if (parm_index >= summary_lto->arg_flags.length ()) summary_lto->arg_flags.safe_grow_cleared (count, true); @@ -3021,7 +3021,7 @@ analyze_parms (modref_summary *summary, modref_summary_lto *summary_lto, summary->arg_flags.safe_grow_cleared (count, true); summary->arg_flags[parm_index] = flags; } - else if (summary_lto) + if (summary_lto) { if (parm_index >= summary_lto->arg_flags.length ()) summary_lto->arg_flags.safe_grow_cleared (count, true);
[gcc r12-10699] ipa: Don't disable function parameter analysis for fat LTO
https://gcc.gnu.org/g:42d4aa02c6016cc8efd896f627f692896e68c914 commit r12-10699-g42d4aa02c6016cc8efd896f627f692896e68c914 Author: H.J. Lu Date: Tue Aug 27 13:11:39 2024 -0700 ipa: Don't disable function parameter analysis for fat LTO Update analyze_parms not to disable function parameter analysis for -ffat-lto-objects. Tested on x86-64, there are no differences in zstd with "-O2 -flto=auto" -g "vs -O2 -flto=auto -g -ffat-lto-objects". PR ipa/116410 * ipa-modref.cc (analyze_parms): Always analyze function parameter for LTO. Signed-off-by: H.J. Lu (cherry picked from commit 2f1689ea8e631ebb4ff3720d56ef0362f5898ff6) Diff: --- gcc/ipa-modref.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/ipa-modref.cc b/gcc/ipa-modref.cc index 556816ab429..d41de9c59c1 100644 --- a/gcc/ipa-modref.cc +++ b/gcc/ipa-modref.cc @@ -2964,7 +2964,7 @@ analyze_parms (modref_summary *summary, modref_summary_lto *summary_lto, summary->arg_flags.safe_grow_cleared (count, true); summary->arg_flags[parm_index] = EAF_UNUSED; } - else if (summary_lto) + if (summary_lto) { if (parm_index >= summary_lto->arg_flags.length ()) summary_lto->arg_flags.safe_grow_cleared (count, true); @@ -3020,7 +3020,7 @@ analyze_parms (modref_summary *summary, modref_summary_lto *summary_lto, summary->arg_flags.safe_grow_cleared (count, true); summary->arg_flags[parm_index] = flags; } - else if (summary_lto) + if (summary_lto) { if (parm_index >= summary_lto->arg_flags.length ()) summary_lto->arg_flags.safe_grow_cleared (count, true);
[gcc r15-3489] c++: fn redecl in fn scope wrongly accepted [PR116239]
https://gcc.gnu.org/g:d44cae2d9310660e3e47f15202e86e4f73f15b37 commit r15-3489-gd44cae2d9310660e3e47f15202e86e4f73f15b37 Author: Marek Polacek Date: Fri Aug 30 14:12:22 2024 -0400 c++: fn redecl in fn scope wrongly accepted [PR116239] Redeclaration such as void f(void); consteval void f(void); is invalid. In a namespace scope, we detect the collision in validate_constexpr_redeclaration, but not when one declaration is at block scope. When we have void f(void); void g() { consteval void f(void); } we call pushdecl on the second f and call push_local_extern_decl_alias. It finds the namespace-scope f: for (ovl_iterator iter (binding); iter; ++iter) if (decls_match (decl, *iter, /*record_versions*/false)) { alias = *iter; break; } but decls_match says they match so we just set DECL_LOCAL_DECL_ALIAS (and do not call another pushdecl leading to duplicate_decls which would detect mismatching return types, for example). I don't think we want to change decls_match, so a simple fix is to detect the problem in push_local_extern_decl_alias. PR c++/116239 gcc/cp/ChangeLog: * cp-tree.h (validate_constexpr_redeclaration): Declare. * decl.cc (validate_constexpr_redeclaration): No longer static. * name-lookup.cc (push_local_extern_decl_alias): Call validate_constexpr_redeclaration. gcc/testsuite/ChangeLog: * g++.dg/diagnostic/redeclaration-6.C: New test. Diff: --- gcc/cp/cp-tree.h | 1 + gcc/cp/decl.cc| 2 +- gcc/cp/name-lookup.cc | 2 ++ gcc/testsuite/g++.dg/diagnostic/redeclaration-6.C | 34 +++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 2eeb5e3e8b1..1a763b683de 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -6992,6 +6992,7 @@ extern bool member_like_constrained_friend_p (tree); extern bool fns_correspond (tree, tree); extern int decls_match (tree, tree, bool = true); extern bool maybe_version_functions(tree, tree, bool); +extern bool validate_constexpr_redeclaration (tree, tree); extern bool merge_default_template_args(tree, tree, bool); extern tree duplicate_decls(tree, tree, bool hiding = false, diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index 7bad3047ad9..f4128dbccdf 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -1412,7 +1412,7 @@ check_redeclaration_exception_specification (tree new_decl, /* Return true if OLD_DECL and NEW_DECL agree on constexprness. Otherwise issue diagnostics. */ -static bool +bool validate_constexpr_redeclaration (tree old_decl, tree new_decl) { old_decl = STRIP_TEMPLATE (old_decl); diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc index 7a6cc244c15..cd3947cbe4f 100644 --- a/gcc/cp/name-lookup.cc +++ b/gcc/cp/name-lookup.cc @@ -3637,6 +3637,8 @@ push_local_extern_decl_alias (tree decl) if (decls_match (decl, *iter, /*record_versions*/false)) { alias = *iter; + if (!validate_constexpr_redeclaration (alias, decl)) + return; break; } diff --git a/gcc/testsuite/g++.dg/diagnostic/redeclaration-6.C b/gcc/testsuite/g++.dg/diagnostic/redeclaration-6.C new file mode 100644 index 000..ed8d4af7792 --- /dev/null +++ b/gcc/testsuite/g++.dg/diagnostic/redeclaration-6.C @@ -0,0 +1,34 @@ +// PR c++/116239 +// { dg-do compile { target c++20 } } + +consteval void f1(); +void f2(); +constexpr void f3(); +void f4(); +consteval void f5(); +constexpr void f6(); + +void +g () +{ + void f1(); // { dg-error "differs in .consteval." } + consteval void f2(); // { dg-error "differs in .consteval." } + + void f3(); // { dg-error "differs in .constexpr." } + constexpr void f4(); // { dg-error "differs in .constexpr." } + + consteval void f5(); + constexpr void f6(); + + void f7(); + consteval void f7(); // { dg-error "differs in .consteval." } + + consteval void f8(); + void f8(); // { dg-error "differs in .consteval." } + + void f9(); + constexpr void f9(); // { dg-error "differs in .constexpr." } + + constexpr void f10(); + void f10(); // { dg-error "differs in .constexpr." } +}
[gcc r15-3490] doc: remove stray character
https://gcc.gnu.org/g:c880fca6cdb16c5efe3a12ee7ecdb2435f5e7105 commit r15-3490-gc880fca6cdb16c5efe3a12ee7ecdb2435f5e7105 Author: Marek Polacek Date: Thu Sep 5 13:17:06 2024 -0400 doc: remove stray character There's an extra '+'. gcc/ChangeLog: * doc/invoke.texi: Remove an extra char in @item sme2. Diff: --- gcc/doc/invoke.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 193db761d64..0f9b1bab19f 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -21847,7 +21847,7 @@ Enable the Scalable Matrix Extension. Enable the FEAT_SME_I16I64 extension to SME. @item sme-f64f64 Enable the FEAT_SME_F64F64 extension to SME. -+@item sme2 +@item sme2 Enable the Scalable Matrix Extension 2. This also enables SME instructions. @item lse128 Enable the LSE128 128-bit atomic instructions extension. This also
[gcc r15-3491] c++: local class memfn synth from noexcept context [PR113063]
https://gcc.gnu.org/g:37977343ff4f9dcb047d966d8cbaa222964763f9 commit r15-3491-g37977343ff4f9dcb047d966d8cbaa222964763f9 Author: Patrick Palka Date: Thu Sep 5 14:31:00 2024 -0400 c++: local class memfn synth from noexcept context [PR113063] Extending the PR113063 testcase to additionally constant evaluate the <=> expression causes us to trip over the assert in cxx_eval_call_expression /* We used to shortcut trivial constructor/op= here, but nowadays we can only get a trivial function here with -fno-elide-constructors. */ gcc_checking_assert (!trivial_fn_p (fun) || !flag_elide_constructors /* We don't elide constructors when processing a noexcept-expression. */ || cp_noexcept_operand); since the local class's <=> was first used and therefore synthesized in a noexcept context and so its definition contains unelided trivial constructors. This patch fixes this by clearing cp_noexcept_operand alongside cp_unevaluated_context in the function-local case of maybe_push_to_top_level. PR c++/113063 gcc/cp/ChangeLog: * name-lookup.cc (local_state_t): Clear and restore cp_noexcept_operand as well. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/spaceship-synth16.C: Also constant evaluate the <=> expression. * g++.dg/cpp2a/spaceship-synth16a.C: Likewise. Reviewed-by: Jason Merrill Diff: --- gcc/cp/name-lookup.cc | 4 gcc/testsuite/g++.dg/cpp2a/spaceship-synth16.C | 1 + gcc/testsuite/g++.dg/cpp2a/spaceship-synth16a.C | 1 + 3 files changed, 6 insertions(+) diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc index cd3947cbe4f..bfe17b7cb2f 100644 --- a/gcc/cp/name-lookup.cc +++ b/gcc/cp/name-lookup.cc @@ -8781,6 +8781,7 @@ struct local_state_t { int cp_unevaluated_operand; int c_inhibit_evaluation_warnings; + int cp_noexcept_operand_; static local_state_t save_and_clear () @@ -8790,6 +8791,8 @@ struct local_state_t ::cp_unevaluated_operand = 0; s.c_inhibit_evaluation_warnings = ::c_inhibit_evaluation_warnings; ::c_inhibit_evaluation_warnings = 0; +s.cp_noexcept_operand_ = ::cp_noexcept_operand; +::cp_noexcept_operand = 0; return s; } @@ -8798,6 +8801,7 @@ struct local_state_t { ::cp_unevaluated_operand = this->cp_unevaluated_operand; ::c_inhibit_evaluation_warnings = this->c_inhibit_evaluation_warnings; +::cp_noexcept_operand = this->cp_noexcept_operand_; } }; diff --git a/gcc/testsuite/g++.dg/cpp2a/spaceship-synth16.C b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth16.C index 37a183de0f5..7dbe7e1db75 100644 --- a/gcc/testsuite/g++.dg/cpp2a/spaceship-synth16.C +++ b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth16.C @@ -10,4 +10,5 @@ int main() { X x; static_assert(noexcept(x <=> x)); x <=> x; + constexpr auto r = x <=> x; } diff --git a/gcc/testsuite/g++.dg/cpp2a/spaceship-synth16a.C b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth16a.C index 68388a680b2..bc0e7a54b7e 100644 --- a/gcc/testsuite/g++.dg/cpp2a/spaceship-synth16a.C +++ b/gcc/testsuite/g++.dg/cpp2a/spaceship-synth16a.C @@ -13,4 +13,5 @@ int main() { X x; static_assert(noexcept(x <=> x)); x <=> x; + constexpr auto r = X{} <=> X{}; }
[gcc r15-3492] c++, coroutines: Revise promise construction/destruction.
https://gcc.gnu.org/g:7d1483921941d21d91f929ef0d59a9794b1946b4 commit r15-3492-g7d1483921941d21d91f929ef0d59a9794b1946b4 Author: Iain Sandoe Date: Sat Aug 31 13:08:42 2024 +0100 c++, coroutines: Revise promise construction/destruction. In examining the coroutine testcases for unexpected diagnostic output for 'Wall', I found a 'statement has no effect' warning for the promise construction in one case. In particular, the case is where the users promise type has an implicit CTOR but a user-provided DTOR. Further, the type does not actually need constructing. In very early versions of the coroutines code we used to check TYPE_NEEDS_CONSTRUCTING() to determine whether to attempt to build a constructor call for the promise. During review, it was suggested to use type_build_ctor_call () instead. This latter call checks the constructors in the type (both user-defined and implicit) and returns true, amongst other cases if any of the found CTORs are marked as deprecated. In a number of places (for example [class.copy.ctor] / 6) the standard says that some version of an implicit CTOR is deprecated when the user provides a DTOR. Thus, for this specific arrangement of promise type, type_build_ctor_call returns true, because of (for example) a deprecated implicit copy CTOR. We are not going to use any of the deprecated CTORs and thus will not see warnings from this - however, since the call returned true, we have now determined that we should attempt to build a constructor call. Note as above, the type does not actually require construction and thus one might expect either a NULL_TREE or error_mark_node in response to the build_special_member_call (). However, in practice the function returns the original instance object instead of a call or some error. When we add that as a statement it triggers the 'statement has no effect' warning. The patch here rearranges the promise construction/destruction code to allow for the case that a DTOR is required independently of a CTOR. In addition, we check that the return from build_special_member_call () has side effects before we add it as a statement. gcc/cp/ChangeLog: * coroutines.cc (cp_coroutine_transform::build_ramp_function): Separate the build of promise constructor and destructor. When evaluating the constructor, check that build_special_member_call returns an expression with side effects before adding it. Signed-off-by: Iain Sandoe Diff: --- gcc/cp/coroutines.cc | 30 +- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc index e605eaec7a4..d6acf09326f 100644 --- a/gcc/cp/coroutines.cc +++ b/gcc/cp/coroutines.cc @@ -4896,16 +4896,12 @@ cp_coroutine_transform::build_ramp_function () tree p = build_class_member_access_expr (deref_fp, promise_m, NULL_TREE, false, tf_warning_or_error); - tree promise_dtor = NULL_TREE; if (type_build_ctor_call (promise_type)) { - /* Do a placement new constructor for the promise type (we never call -the new operator, just the constructor on the object in place in the -frame). + /* Construct the promise object [dcl.fct.def.coroutine] / 5.7. -First try to find a constructor with the same parameter list as the -original function (if it has params), failing that find a constructor -with no parameter list. */ +First try to find a constructor with an argument list comprised of +the parameter copies. */ if (DECL_ARGUMENTS (orig_fn_decl)) { @@ -4917,19 +4913,27 @@ cp_coroutine_transform::build_ramp_function () else r = NULL_TREE; + /* If that fails then the promise constructor argument list is empty. */ if (r == NULL_TREE || r == error_mark_node) r = build_special_member_call (p, complete_ctor_identifier, NULL, promise_type, LOOKUP_NORMAL, tf_warning_or_error); - finish_expr_stmt (r); + /* If type_build_ctor_call() encounters deprecated implicit CTORs it will +return true, and therefore we will execute this code path. However, +we might well not actually require a CTOR and under those conditions +the build call above will not return a call expression, but the +original instance object. Do not attempt to add the statement unless +it has side-effects. */ + if (r && r != error_mark_node && TREE_SIDE_EFFECTS (r)) + finish_expr_stmt (r); +} - r = build_modify_expr (loc, coro_promise_live, boolean_type_node, -INIT_EXPR, loc, boolean_true_node, -
[gcc r15-3493] c++: vtable referring to "unavailable" virtual fn [PR116606]
https://gcc.gnu.org/g:d9d34f9a91371dea4bab0b54b2d7f762a6cc23e0 commit r15-3493-gd9d34f9a91371dea4bab0b54b2d7f762a6cc23e0 Author: Marek Polacek Date: Thu Sep 5 13:01:59 2024 -0400 c++: vtable referring to "unavailable" virtual fn [PR116606] mark_vtable_entries already has /* It's OK for the vtable to refer to deprecated virtual functions. */ warning_sentinel w(warn_deprecated_decl); but that doesn't cover __attribute__((unavailable)). We can use the following override to cover both. PR c++/116606 gcc/cp/ChangeLog: * decl2.cc (mark_vtable_entries): Temporarily override deprecated_state to UNAVAILABLE_DEPRECATED_SUPPRESS. Remove a warning_sentinel. gcc/testsuite/ChangeLog: * g++.dg/ext/attr-unavailable-13.C: New test. Diff: --- gcc/cp/decl2.cc| 3 ++- gcc/testsuite/g++.dg/ext/attr-unavailable-13.C | 8 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc index 3c4f34868ee..0279372488c 100644 --- a/gcc/cp/decl2.cc +++ b/gcc/cp/decl2.cc @@ -2180,7 +2180,8 @@ static void mark_vtable_entries (tree decl, vec &consteval_vtables) { /* It's OK for the vtable to refer to deprecated virtual functions. */ - warning_sentinel w(warn_deprecated_decl); + auto du = make_temp_override (deprecated_state, + UNAVAILABLE_DEPRECATED_SUPPRESS); bool consteval_seen = false; diff --git a/gcc/testsuite/g++.dg/ext/attr-unavailable-13.C b/gcc/testsuite/g++.dg/ext/attr-unavailable-13.C new file mode 100644 index 000..9ca40005419 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/attr-unavailable-13.C @@ -0,0 +1,8 @@ +// PR c++/116606 +// { dg-do compile } + +struct C { +__attribute__((unavailable)) virtual void f() {} +}; + +C c;
[gcc r15-3494] Fortran: fix ICE in gfc_create_module_variable [PR100273]
https://gcc.gnu.org/g:1f462b5072a5e82c35921f7e3bdf3959c4a49dc9 commit r15-3494-g1f462b5072a5e82c35921f7e3bdf3959c4a49dc9 Author: Harald Anlauf Date: Thu Sep 5 21:30:25 2024 +0200 Fortran: fix ICE in gfc_create_module_variable [PR100273] gcc/fortran/ChangeLog: PR fortran/100273 * trans-decl.cc (gfc_create_module_variable): Handle module variable also when it is needed for the result specification of a contained function. gcc/testsuite/ChangeLog: PR fortran/100273 * gfortran.dg/pr100273.f90: New test. Diff: --- gcc/fortran/trans-decl.cc | 3 ++- gcc/testsuite/gfortran.dg/pr100273.f90 | 26 ++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc index 6692ac7ef4c..ee41d66e6d2 100644 --- a/gcc/fortran/trans-decl.cc +++ b/gcc/fortran/trans-decl.cc @@ -5540,7 +5540,8 @@ gfc_create_module_variable (gfc_symbol * sym) /* Create the variable. */ pushdecl (decl); gcc_assert (sym->ns->proc_name->attr.flavor == FL_MODULE - || (sym->ns->parent->proc_name->attr.flavor == FL_MODULE + || ((sym->ns->parent->proc_name->attr.flavor == FL_MODULE + || sym->ns->parent->proc_name->attr.flavor == FL_PROCEDURE) && sym->fn_result_spec)); DECL_CONTEXT (decl) = sym->ns->proc_name->backend_decl; rest_of_decl_compilation (decl, 1, 0); diff --git a/gcc/testsuite/gfortran.dg/pr100273.f90 b/gcc/testsuite/gfortran.dg/pr100273.f90 new file mode 100644 index 000..f71947ad802 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr100273.f90 @@ -0,0 +1,26 @@ +! { dg-do compile } +! PR fortran/100273 - ICE in gfc_create_module_variable +! +! Contributed by G.Steinmetz + +module m + implicit none +contains + character(4) function g(k) +integer :: k +g = f(k) + contains +function f(n) + character(3), parameter :: a(2) = ['1 ', '123'] + integer :: n + character(len_trim(a(n))) :: f + f = 'abc' +end + end +end +program p + use m + implicit none + print *, '>>' // g(1) // '<<' + print *, '>>' // g(2) // '<<' +end
[gcc(refs/users/meissner/heads/work177-vpair)] Rewrite vector-pair.h
https://gcc.gnu.org/g:e0d91e0cb833167e43f2bc54fd4b3668a2fe4532 commit e0d91e0cb833167e43f2bc54fd4b3668a2fe4532 Author: Michael Meissner Date: Thu Sep 5 16:29:35 2024 -0400 Rewrite vector-pair.h 2024-09-04 Michael Meissner * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Signal that we have the vector pair built-in functions. * config/rs6000/vector-pair.h: Rewrite. Diff: --- gcc/config/rs6000/rs6000-c.cc | 8 +- gcc/config/rs6000/vector-pair.h | 716 2 files changed, 519 insertions(+), 205 deletions(-) diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index 82826f96a8e..2d674f9b236 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -590,9 +590,13 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, if (rs6000_cpu == PROCESSOR_CELL) rs6000_define_or_undefine_macro (define_p, "__PPU__"); - /* Tell the user if we support the MMA instructions. */ + /* Tell the user if we support the MMA instructions. Also say that we + support the vector pair built-in functions. */ if ((flags & OPTION_MASK_MMA) != 0) -rs6000_define_or_undefine_macro (define_p, "__MMA__"); +{ + rs6000_define_or_undefine_macro (define_p, "__MMA__"); + rs6000_define_or_undefine_macro (define_p, "__VPAIR__"); +} /* Whether pc-relative code is being generated. */ if ((flags & OPTION_MASK_PCREL) != 0) rs6000_define_or_undefine_macro (define_p, "__PCREL__"); diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h index 3c03e44f3f4..ebd1df9a532 100644 --- a/gcc/config/rs6000/vector-pair.h +++ b/gcc/config/rs6000/vector-pair.h @@ -30,19 +30,55 @@ #ifndef _VECTOR_PAIR_H #define _VECTOR_PAIR_H 1 -/* If we have MMA support, use power10 support. */ -#if __MMA__ -typedef __vector_pair vector_pair_t; - -#define VPAIR_FP_CONSTRAINT"wa"/* Allow all VSX registers. */ -#define VPAIR_FP_SECOND"S" /* Access 2nd VSX register. */ +/* During testing, allow vector-pair.h to be included multiple times. */ +#undef vector_pair_t +#undef vector_pair_f64_t +#undef vector_pair_f32_t + +#undef vpair_f64_abs +#undef vpair_f64_add +#undef vpair_f64_div +#undef vpair_f64_fma +#undef vpair_f64_fms +#undef vpair_f64_max +#undef vpair_f64_min +#undef vpair_f64_mul +#undef vpair_f64_nabs +#undef vpair_f64_neg +#undef vpair_f64_nfma +#undef vpair_f64_nfms +#undef vpair_f64_splat +#undef vpair_f64_sqrt +#undef vpair_f64_sub + +#undef vpair_f32_abs +#undef vpair_f32_add +#undef vpair_f32_div +#undef vpair_f32_fma +#undef vpair_f32_fms +#undef vpair_f32_max +#undef vpair_f32_min +#undef vpair_f32_mul +#undef vpair_f32_nabs +#undef vpair_f32_neg +#undef vpair_f32_nfma +#undef vpair_f32_nfms +#undef vpair_f32_splat +#undef vpair_f32_sqrt +#undef vpair_f32_sub + +/* Do we have MMA support and the vector pair built-in function? */ +#if __MMA__ && __VPAIR__ && !__NO_VPAIR_BUILTIN__ +#define vector_pair_t __vector_pair +#define vector_pair_d64_t __vector_pair +#define vector_pair_d32_t __vector_pair /* vector pair double operations on power10. */ #define vpair_f64_splat(R, A) (*R) = __builtin_vpair_f64_splat (A) -#define vpair_f64_neg(R,A) (*R) = __builtin_vpair_f64_neg (*A) #define vpair_f64_abs(R,A) (*R) = __builtin_vpair_f64_abs (*A) #define vpair_f64_nabs(R,A)(*R) = __builtin_vpair_f64_nabs (*A) +#define vpair_f64_neg(R,A) (*R) = __builtin_vpair_f64_neg (*A) #define vpair_f64_sqrt(R,A)(*R) = __builtin_vpair_f64_sqrt (*A) #define vpair_f64_add(R,A,B) (*R) = __builtin_vpair_f64_add (*A, *B) @@ -57,13 +93,12 @@ typedef __vector_pair vector_pair_t; #define vpair_f64_nfma(R,A,B,C)(*R) = __builtin_vpair_f64_nfma (*A, *B, *C) #define vpair_f64_nfms(R,A,B,C)(*R) = __builtin_vpair_f64_nfms (*A, *B, *C) - /* vector pair float operations on power10. */ #define vpair_f32_splat(R, A) (*R) = __builtin_vpair_f32_splat (A) -#define vpair_f32_neg(R,A) (*R) = __builtin_vpair_f32_neg (*A) #define vpair_f32_abs(R,A) (*R) = __builtin_vpair_f32_abs (*A) #define vpair_f32_nabs(R,A)(*R) = __builtin_vpair_f32_nabs (*A) +#define vpair_f32_neg(R,A) (*R) = __builtin_vpair_f32_neg (*A) #define vpair_f32_sqrt(R,A)(*R) = __builtin_vpair_f32_sqrt (*A) #define vpair_f32_add(R,A,B) (*R) = __builtin_vpair_f32_add (*A, *B) @@ -78,212 +113,487 @@ typedef __vector_pair vector_pair_t; #define vpair_f32_nfma(R,A,B,C)(*R) = __builtin_vpair_f32_nfma (*A, *B, *C) #define vpair_f32_nfms(R,A,B,C)(*R) = __builtin_vpair_f32_nfma (*A, *B, *C) + +/* Do we have the __vector_pair type available, but we don't have the built-in + functions? */ + +#elif __MMA__ && !__NO_VPAIR_ASM__ +#define vector_pair_t __vector_pair +#define vector_pair_d64_t
[gcc(refs/users/meissner/heads/work177-vpair)] Update ChangeLog.*
https://gcc.gnu.org/g:9de1da8488faf38662169e0ae5eeb9bc1180ef8c commit 9de1da8488faf38662169e0ae5eeb9bc1180ef8c Author: Michael Meissner Date: Thu Sep 5 16:31:09 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.vpair | 37 + 1 file changed, 37 insertions(+) diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair index 7717495e866..99b7da2d2a0 100644 --- a/gcc/ChangeLog.vpair +++ b/gcc/ChangeLog.vpair @@ -1,3 +1,40 @@ + Branch work177-vpair, patch #405 + +Rewrite vector-pair.h + +2024-09-04 Michael Meissner + + * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Signal that + we have the vector pair built-in functions. + * config/rs6000/vector-pair.h: Rewrite. + + Branch work177-vpair, patch #404 + +Add vector pair optimizations. + +2024-09-04 Michael Meissner + +gcc/ + + * config/rs6000/vector-pair.md (vpair_add_neg_3): New + combiner insn to convert vector plus/neg into a minus operation. + (vpair_fma__merge): Optimize multiply, add/subtract, and + negation into fma operations if the user specifies to create fmas. + (vpair_fma__merge): Likewise. + (vpair_fma__merge2): Likewise. + (vpair_nfma__merge): Likewise. + (vpair_nfms__merge): Likewise. + (vpair_nfms__merge2): Likewise. + +gcc/testsuite/ + + * gcc.target/powerpc/vector-pair-7.c: New test. + * gcc.target/powerpc/vector-pair-8.c: Likewise. + * gcc.target/powerpc/vector-pair-9.c: Likewise. + * gcc.target/powerpc/vector-pair-10.c: Likewise. + * gcc.target/powerpc/vector-pair-11.c: Likewise. + * gcc.target/powerpc/vector-pair-12xs.c: Likewise. + Branch work177-vpair, patch #403 Add vector pair init and splat.
[gcc r15-3495] c++: tweak redeclaration-6.C
https://gcc.gnu.org/g:b214ffeecb3f480dd7864bcbfc9ac30100c126f3 commit r15-3495-gb214ffeecb3f480dd7864bcbfc9ac30100c126f3 Author: Jason Merrill Date: Thu Sep 5 16:39:55 2024 -0400 c++: tweak redeclaration-6.C gcc/testsuite/ChangeLog: * g++.dg/diagnostic/redeclaration-6.C: Add -fno-implicit-constexpr. Diff: --- gcc/testsuite/g++.dg/diagnostic/redeclaration-6.C | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/g++.dg/diagnostic/redeclaration-6.C b/gcc/testsuite/g++.dg/diagnostic/redeclaration-6.C index ed8d4af7792..5058bfe74b6 100644 --- a/gcc/testsuite/g++.dg/diagnostic/redeclaration-6.C +++ b/gcc/testsuite/g++.dg/diagnostic/redeclaration-6.C @@ -1,5 +1,6 @@ // PR c++/116239 // { dg-do compile { target c++20 } } +// { dg-additional-options "-fno-implicit-constexpr" } consteval void f1(); void f2();
[gcc r15-3496] [V2][RISC-V] Avoid unnecessary extensions after sCC insns
https://gcc.gnu.org/g:b567e5ead5d54f022c57b48f31653f6ae6ece007 commit r15-3496-gb567e5ead5d54f022c57b48f31653f6ae6ece007 Author: Jeff Law Date: Thu Sep 5 15:45:25 2024 -0600 [V2][RISC-V] Avoid unnecessary extensions after sCC insns So the first patch failed the pre-commit CI; it didn't fail in my testing because I'm using --with-arch to set a default configuration that includes things like zicond to ensure that's always tested. And the failing test is skipped when zicond is enabled by default. The failing test is designed to ensure that we don't miss an if-conversion due to costing issues around the extension that was typically done in an sCC sequence (which is why it's only run when zicond is off). The test failed because we have a little routine that is highly dependent on the code generated by the sCC expander and will adjust the costing to account for expansion quirks that usually go away in register allocation. That code needs to be enhanced to work after the sCC expansion change. Essentially it needs to account for the subreg extraction that shows up in the sequence as well as being a bit looser on mode checking. I kept the code working for the old sequences -- in theory a user could conjure up the old sequence so handling them seems useful. This also drops the testsuite changes. Palmer's change makes them unnecessary. --- So I was looking at a performance regression in spec with Ventana's internal tree. Ultimately the problem was a bad interaction with an internal patch (REP_MODE_EXTENDED), fwprop and ext-dce. The details of that problem aren't particularly important. Removal of the local patch went reasonably well. But I did see some secondary cases where we had redundant sign extensions. The most notable cases come from the integer sCC insns. Expansion of those cases for rv64 can be improved using Jivan's trick. ie, if the target is not DImode, then create a DImode temporary for the result and copy the low bits out with a promoted subreg to the real target. With the change in expansion the final code we generate is slightly different for a few tests at -O1/-Og, but should perform the same. The key for the affected tests is we're not seeing the introduction of unnecessary extensions. Rather than adjust the regexps to handle the -O1/-Og output, skipping for those seemed OK to me. I didn't extract a testcase. I'm a bit fried from digging through LTO'd code right now. gcc/ * config/riscv/riscv.cc (riscv_expand_int_scc): For rv64, use a DI temporary for the output and a promoted subreg to extract it into SI arget. (riscv_noce_conversion_profitable_p): Recognize new output from sCC expansion too. Diff: --- gcc/config/riscv/riscv.cc | 46 +- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index a38cb72f09f..39489c4377e 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -4218,11 +4218,29 @@ riscv_noce_conversion_profitable_p (rtx_insn *seq, riscv_if_info.original_cost += COSTS_N_INSNS (1); riscv_if_info.max_seq_cost += COSTS_N_INSNS (1); } - last_dest = NULL_RTX; + rtx dest = SET_DEST (x); - if (COMPARISON_P (src) + + /* Do something similar for the moves that are likely to +turn into NOP moves by the time the register allocator is +done. These are also side effects of how our sCC expanders +work. We'll want to check and update LAST_DEST here too. */ + if (last_dest && REG_P (dest) - && GET_MODE (dest) == SImode) + && GET_MODE (dest) == SImode + && SUBREG_P (src) + && SUBREG_PROMOTED_VAR_P (src) + && REGNO (SUBREG_REG (src)) == REGNO (last_dest)) + { + riscv_if_info.original_cost += COSTS_N_INSNS (1); + riscv_if_info.max_seq_cost += COSTS_N_INSNS (1); + if (last_dest) + last_dest = dest; + } + else + last_dest = NULL_RTX; + + if (COMPARISON_P (src) && REG_P (dest)) last_dest = dest; } else @@ -4904,13 +4922,31 @@ riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *in riscv_extend_comparands (code, &op0, &op1); op0 = force_reg (word_mode, op0); + /* For sub-word targets on rv64, do the computation in DImode + then extract the lowpart for the final target, marking it + as sign extended. Note that it's also properly zero extended, + but it's probably more profitable to expose it as sign extended. */ + rtx t; + if (TARGET_64BIT &&
[gcc r14-10648] c++: vtable referring to "unavailable" virtual fn [PR116606]
https://gcc.gnu.org/g:fe668633f6b5819ba04d80f13343ee7f5bba1c5b commit r14-10648-gfe668633f6b5819ba04d80f13343ee7f5bba1c5b Author: Marek Polacek Date: Thu Sep 5 13:01:59 2024 -0400 c++: vtable referring to "unavailable" virtual fn [PR116606] mark_vtable_entries already has /* It's OK for the vtable to refer to deprecated virtual functions. */ warning_sentinel w(warn_deprecated_decl); but that doesn't cover __attribute__((unavailable)). We can use the following override to cover both. PR c++/116606 gcc/cp/ChangeLog: * decl2.cc (mark_vtable_entries): Temporarily override deprecated_state to UNAVAILABLE_DEPRECATED_SUPPRESS. Remove a warning_sentinel. gcc/testsuite/ChangeLog: * g++.dg/ext/attr-unavailable-13.C: New test. (cherry picked from commit d9d34f9a91371dea4bab0b54b2d7f762a6cc23e0) Diff: --- gcc/cp/decl2.cc| 3 ++- gcc/testsuite/g++.dg/ext/attr-unavailable-13.C | 8 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc index af2e08c8a63..f6c760338cc 100644 --- a/gcc/cp/decl2.cc +++ b/gcc/cp/decl2.cc @@ -2172,7 +2172,8 @@ static void mark_vtable_entries (tree decl, vec &consteval_vtables) { /* It's OK for the vtable to refer to deprecated virtual functions. */ - warning_sentinel w(warn_deprecated_decl); + auto du = make_temp_override (deprecated_state, + UNAVAILABLE_DEPRECATED_SUPPRESS); bool consteval_seen = false; diff --git a/gcc/testsuite/g++.dg/ext/attr-unavailable-13.C b/gcc/testsuite/g++.dg/ext/attr-unavailable-13.C new file mode 100644 index 000..9ca40005419 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/attr-unavailable-13.C @@ -0,0 +1,8 @@ +// PR c++/116606 +// { dg-do compile } + +struct C { +__attribute__((unavailable)) virtual void f() {} +}; + +C c;
[gcc r15-3498] Handle const0_operand for *avx2_pcmp3_1.
https://gcc.gnu.org/g:a51f2fc0d80869ab079a93cc3858f24a1fd28237 commit r15-3498-ga51f2fc0d80869ab079a93cc3858f24a1fd28237 Author: liuhongt Date: Wed Sep 4 15:39:17 2024 +0800 Handle const0_operand for *avx2_pcmp3_1. *_eq3_1 supports nonimm_or_0_operand for op1 and op2, pass_combine would fail to lower avx512 comparision back to avx2 one when op1/op2 is const0_rtx. It's because the splitter only support nonimmediate_operand. Failed to match this instruction: (set (reg/i:V16QI 20 xmm0) (vec_merge:V16QI (const_vector:V16QI [ (const_int -1 [0x]) repeated x16 ]) (const_vector:V16QI [ (const_int 0 [0]) repeated x16 ]) (unspec:HI [ (reg:V16QI 105 [ a ]) (const_vector:V16QI [ (const_int 0 [0]) repeated x16 ]) (const_int 0 [0]) ] UNSPEC_PCMP))) The patch extend predicates of the splitter to handles that. gcc/ChangeLog: PR target/115517 * config/i386/sse.md (*avx2_pcmp3_1): Change predicate of operands[1] and operands[2] from nonimmdiate_operand to nonimm_or_0_operand. gcc/testsuite/ChangeLog: * gcc.target/i386/pr115517.c: New test. Diff: --- gcc/config/i386/sse.md | 9 ++-- gcc/testsuite/gcc.target/i386/pr115517.c | 38 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 3bf95f0b0e5..1946d3513be 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17908,8 +17908,8 @@ (match_operand:VI_128_256 1 "vector_all_ones_operand") (match_operand:VI_128_256 2 "const0_operand") (unspec: - [(match_operand:VI_128_256 3 "nonimmediate_operand") -(match_operand:VI_128_256 4 "nonimmediate_operand") + [(match_operand:VI_128_256 3 "nonimm_or_0_operand") +(match_operand:VI_128_256 4 "nonimm_or_0_operand") (match_operand:SI 5 "const_0_to_7_operand")] UNSPEC_PCMP)))] "TARGET_AVX512VL && ix86_pre_reload_split () @@ -17928,6 +17928,11 @@ { if (INTVAL (operands[5]) == 1) std::swap (operands[3], operands[4]); + + operands[3] = force_reg (mode, operands[3]); + if (operands[4] == CONST0_RTX (mode)) +operands[4] = force_reg (mode, operands[4]); + enum rtx_code code = INTVAL (operands[5]) ? GT : EQ; emit_move_insn (operands[0], gen_rtx_fmt_ee (code, mode, operands[3], operands[4])); diff --git a/gcc/testsuite/gcc.target/i386/pr115517.c b/gcc/testsuite/gcc.target/i386/pr115517.c new file mode 100644 index 000..e91d2c23a6b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115517.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-times "vpcmpeq" 4 } } */ +/* { dg-final { scan-assembler-not {(?n)%k[0-9]} } } */ + +typedef char v16qi __attribute__((vector_size(16))); +typedef short v8hi __attribute__((vector_size(16))); +typedef int v4si __attribute__((vector_size(16))); +typedef long long v2di __attribute__((vector_size(16))); + +v16qi +foo (v16qi a) +{ + v16qi b = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + return a == b; +} + +v8hi +foo2 (v8hi a) +{ + v8hi b = {0, 0, 0, 0, 0, 0, 0, 0}; + return a == b; +} + +v4si +foo3 (v4si a) +{ + v4si b = {0, 0, 0, 0}; + return a == b; +} + +v2di +foo4 (v2di a) +{ + v2di b = {0, 0}; + return a == b; +} +
[gcc r15-3499] libstdc++: -Wswitch and ios::openmode
https://gcc.gnu.org/g:3dafb65bb5c31b169dae180e0664dfcaee64afe6 commit r15-3499-g3dafb65bb5c31b169dae180e0664dfcaee64afe6 Author: Jason Merrill Date: Tue Aug 27 13:16:27 2024 -0400 libstdc++: -Wswitch and ios::openmode In addition to marking it as flag_enum, we want to avoid warnings about not having a case for the implementation detail enumerators _S_ios_openmode_*. And also for _S_noreplace in standard modes before it was added. libstdc++-v3/ChangeLog: * include/bits/ios_base.h (_GLIBCXX_NOREPLACE_UNUSED): New. (_Ios_Openmode): Add unused attributes. * testsuite/27_io/ios_base/types/openmode/case_label.cc: Handle noreplace. Diff: --- libstdc++-v3/include/bits/ios_base.h | 19 ++- .../27_io/ios_base/types/openmode/case_label.cc | 4 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/libstdc++-v3/include/bits/ios_base.h b/libstdc++-v3/include/bits/ios_base.h index b69151c49ad..6e343658096 100644 --- a/libstdc++-v3/include/bits/ios_base.h +++ b/libstdc++-v3/include/bits/ios_base.h @@ -114,8 +114,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION operator^=(_Ios_Fmtflags& __a, _Ios_Fmtflags __b) _GLIBCXX_NOTHROW { return __a = __a ^ __b; } + // If std::ios_base::noreplace isn't available, -Wswitch should ignore + // _S_noreplace. +#ifdef __glibcxx_ios_noreplace +#define _NOREPLACE_UNUSED +#else +#define _NOREPLACE_UNUSED __attribute__((__unused__)) +#endif - enum _Ios_Openmode + enum __attribute__((__flag_enum__)) _Ios_Openmode { _S_app = 1L << 0, _S_ate = 1L << 1, @@ -123,12 +130,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _S_in= 1L << 3, _S_out = 1L << 4, _S_trunc = 1L << 5, - _S_noreplace = 1L << 6, - _S_ios_openmode_end = 1L << 16, - _S_ios_openmode_max = __INT_MAX__, - _S_ios_openmode_min = ~__INT_MAX__ + _S_noreplace _NOREPLACE_UNUSED = 1L << 6, + _S_ios_openmode_end __attribute__((__unused__)) = 1L << 16, + _S_ios_openmode_max __attribute__((__unused__)) = __INT_MAX__, + _S_ios_openmode_min __attribute__((__unused__)) = ~__INT_MAX__ }; +#undef _NOREPLACE_UNUSED + _GLIBCXX_NODISCARD _GLIBCXX_CONSTEXPR inline _Ios_Openmode operator&(_Ios_Openmode __a, _Ios_Openmode __b) _GLIBCXX_NOTHROW diff --git a/libstdc++-v3/testsuite/27_io/ios_base/types/openmode/case_label.cc b/libstdc++-v3/testsuite/27_io/ios_base/types/openmode/case_label.cc index 95c86588a54..9e388a51294 100644 --- a/libstdc++-v3/testsuite/27_io/ios_base/types/openmode/case_label.cc +++ b/libstdc++-v3/testsuite/27_io/ios_base/types/openmode/case_label.cc @@ -44,6 +44,10 @@ case_labels(bitmask_type b) break; case std::ios_base::trunc: break; +#ifdef __glibcxx_ios_noreplace +case std::ios_base::noreplace: + break; +#endif case std::_S_ios_openmode_end: break; case __INT_MAX__:
[gcc r15-3500] c-family: add attribute flag_enum [PR81665]
https://gcc.gnu.org/g:1914ca8791ce4e0ba821e818cb6f86c76afdb6f2 commit r15-3500-g1914ca8791ce4e0ba821e818cb6f86c76afdb6f2 Author: Jason Merrill Date: Thu Aug 29 11:09:21 2024 -0400 c-family: add attribute flag_enum [PR81665] Several PRs complain about -Wswitch warning about a case for a bitwise combination of enumerators. Clang has an attribute flag_enum to prevent this; let's adopt that approach as well. This also recognizes the attribute as [[clang::flag_enum]], introducing handling of the clang attribute namespace. PR c++/46457 PR c++/81665 gcc/c-family/ChangeLog: * c-attribs.cc (handle_flag_enum_attribute): New. (c_common_gnu_attributes): Add it. (c_common_clang_attributes, c_common_clang_attribute_table): New. * c-common.h: Declare c_common_clang_attribute_table. * c-warn.cc (c_do_switch_warnings): Handle flag_enum. gcc/c/ChangeLog: * c-objc-common.h (c_objc_attribute_table): Add c_common_clang_attribute_table. gcc/cp/ChangeLog: * cp-objcp-common.h (cp_objcp_attribute_table): Add c_common_clang_attribute_table. gcc/testsuite/ChangeLog: * c-c++-common/attr-flag-enum-1.c: New test. gcc/ChangeLog: * doc/extend.texi: Document flag_enum attribute. * doc/invoke.texi: Mention flag_enum in -Wswitch. libstdc++-v3/ChangeLog: * include/bits/regex_constants.h: Use flag_enum. Diff: --- gcc/doc/extend.texi | 7 + gcc/doc/invoke.texi | 11 gcc/c-family/c-common.h | 1 + gcc/c/c-objc-common.h | 1 + gcc/cp/cp-objcp-common.h | 1 + libstdc++-v3/include/bits/regex_constants.h | 2 +- gcc/c-family/c-attribs.cc | 30 ++ gcc/c-family/c-warn.cc| 4 +++ gcc/testsuite/c-c++-common/attr-flag-enum-1.c | 37 +++ 9 files changed, 88 insertions(+), 6 deletions(-) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index ebfa6779bec..af0c45b42e0 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -9191,6 +9191,13 @@ initialization will result in future breakage. GCC emits warnings based on this attribute by default; use @option{-Wno-designated-init} to suppress them. +@cindex @code{flag_enum} type attribute +@item flag_enum +This attribute may be applied to an enumerated type to indicate that +its enumerators are used in bitwise operations, so e.g. @option{-Wswitch} +should not warn about a @code{case} that corresponds to a bitwise +combination of enumerators. + @cindex @code{hardbool} type attribute @item hardbool @itemx hardbool (@var{false_value}) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0f9b1bab19f..019e0a5ca80 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -7672,9 +7672,9 @@ unless C++14 mode (or newer) is active. Warn whenever a @code{switch} statement has an index of enumerated type and lacks a @code{case} for one or more of the named codes of that enumeration. (The presence of a @code{default} label prevents this -warning.) @code{case} labels outside the enumeration range also -provoke warnings when this option is used (even if there is a -@code{default} label). +warning.) @code{case} labels that do not correspond to enumerators also +provoke warnings when this option is used, unless the enumeration is marked +with the @code{flag_enum} attribute. This warning is enabled by @option{-Wall}. @opindex Wswitch-default @@ -7688,8 +7688,9 @@ case. @item -Wswitch-enum Warn whenever a @code{switch} statement has an index of enumerated type and lacks a @code{case} for one or more of the named codes of that -enumeration. @code{case} labels outside the enumeration range also -provoke warnings when this option is used. The only difference +enumeration. @code{case} labels that do not correspond to enumerators also +provoke warnings when this option is used, unless the enumeration is marked +with the @code{flag_enum} attribute. The only difference between @option{-Wswitch} and this option is that this option gives a warning about an omitted enumeration code even if there is a @code{default} label. diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index d3827573a36..027f077d51b 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -821,6 +821,7 @@ extern struct visibility_flags visibility_options; /* Attribute table common to the C front ends. */ extern const struct scoped_attribute_specs c_common_gnu_attribute_table; +extern const struct scoped_attribute_specs c_common_clang_attribute_table; extern const struct scoped_attribute_specs c_common_format_attribute_table; /* Pointer to function to lazily
[gcc r15-3501] RISC-V: Fix out of index in riscv_select_multilib_by_abi
https://gcc.gnu.org/g:ead5f587dad3206e45db7ac31f5c34c1530ae529 commit r15-3501-gead5f587dad3206e45db7ac31f5c34c1530ae529 Author: YunQiang Su Date: Thu Sep 5 19:55:20 2024 +0800 RISC-V: Fix out of index in riscv_select_multilib_by_abi commit b5c2aae48723c9098a8a3dab1409b30fd87bbf56 Author: YunQiang Su Date: Thu Sep 5 15:14:43 2024 +0800 RISC-V: Lookup reversely in riscv_select_multilib_by_abi The last element should use index multilib_infos.size () - 1 gcc * common/config/riscv/riscv-common.cc(riscv_select_multilib_by_abi): Fix out of index problem. Diff: --- gcc/common/config/riscv/riscv-common.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 2c1ce7fc7cb..bd42fd01532 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -2079,7 +2079,7 @@ riscv_select_multilib_by_abi ( const std::string &riscv_current_abi_str, const std::vector &multilib_infos) { - for (ssize_t i = multilib_infos.size (); i >= 0; --i) + for (ssize_t i = multilib_infos.size () - 1; i >= 0; --i) if (riscv_current_abi_str == multilib_infos[i].abi_str) return xstrdup (multilib_infos[i].path.c_str ());
[gcc r15-3502] Match: Add int type fits check for form 1 of .SAT_SUB imm operand
https://gcc.gnu.org/g:019335b404c8d7fb2d234bb179745cc28693dd20 commit r15-3502-g019335b404c8d7fb2d234bb179745cc28693dd20 Author: Pan Li Date: Mon Sep 2 09:48:46 2024 +0800 Match: Add int type fits check for form 1 of .SAT_SUB imm operand This patch would like to add strict check for imm operand of .SAT_SUB matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_SUB pattern. We leverage the int_fits_type_p here to make sure the imm operand is a int type fits the result type of the .SAT_SUB. For example: Fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (12, a); uint8_t sum = .SAT_SUB (12u, a); uint8_t sum = .SAT_SUB (126u, a); uint8_t sum = .SAT_SUB (128u, a); uint8_t sum = .SAT_SUB (228, a); uint8_t sum = .SAT_SUB (223u, a); Not fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (-1, a); uint8_t sum = .SAT_SUB (256u, a); uint8_t sum = .SAT_SUB (257, a); The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add int_fits_type_p check for .SAT_SUB imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_add_imm_type_check-53.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-54.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-55.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-56.c: New test. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h | 14 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-53.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-54.c | 27 ++ .../gcc.target/riscv/sat_u_add_imm_type_check-55.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-56.c | 27 ++ 6 files changed, 105 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index 621306213e4..4b86d20e165 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3269,7 +3269,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_sub @0 @1) (cond^ (le @1 INTEGER_CST@2) (minus INTEGER_CST@0 @1) integer_zerop) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && types_match (type, @1)) + && types_match (type, @1) && int_fits_type_p (@0, type)) (with { unsigned precision = TYPE_PRECISION (type); diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 86cd6bea8df..187c94795f7 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -284,6 +284,20 @@ sat_u_sub_imm##IMM##_##T##_fmt_4 (T x) \ #define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \ if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort () +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_1 (T y)\ +{ \ + return IMM >= y ? IMM - y : 0; \ +} + +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_2(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\ +{ \ + return IMM > y ? IMM - y : 0; \ +} + /**/ /* Saturation Truncate (unsigned and signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c new file mode 100644 index 000..c959eeb0d86 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "sat_arith.h" + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (0, uint8_t, -43) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (1, uint8_t, 269) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (2, uint8_t, 369u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (3, uint16_t, -4) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (4, uint16_t, 65579) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (5, uint16_t, 65679u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (6, uint32_t, -62) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (7, uint32_t, 4294967342ll) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (8, uint32_t, 4394967342ull) + +/* { dg-final { scan-rtl-dump-not
[gcc r15-3503] Match: Add int type fits check for form 2 of .SAT_SUB imm operand
https://gcc.gnu.org/g:a2e28b105cea4c44c3903d8d979c7a4afa1193f0 commit r15-3503-ga2e28b105cea4c44c3903d8d979c7a4afa1193f0 Author: Pan Li Date: Mon Sep 2 11:33:08 2024 +0800 Match: Add int type fits check for form 2 of .SAT_SUB imm operand This patch would like to add strict check for imm operand of .SAT_SUB matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_SUB pattern. We leverage the int_fits_type_p here to make sure the imm operand is a int type fits the result type of the .SAT_SUB. For example: Fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (a, 12); uint8_t sum = .SAT_SUB (a, 12u); uint8_t sum = .SAT_SUB (a, 126u); uint8_t sum = .SAT_SUB (a, 128u); uint8_t sum = .SAT_SUB (a, 228); uint8_t sum = .SAT_SUB (a, 223u); Not fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (a, -1); uint8_t sum = .SAT_SUB (a, 256u); uint8_t sum = .SAT_SUB (a, 257); The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add int_fits_type_p check for .SAT_SUB imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_add_imm_type_check-57.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-58.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-59.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-60.c: New test. Signed-off-by: Pan Li Diff: --- gcc/match.pd | 2 +- gcc/testsuite/gcc.target/riscv/sat_arith.h | 14 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-57.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-58.c | 27 ++ .../gcc.target/riscv/sat_u_add_imm_type_check-59.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-60.c | 27 ++ 6 files changed, 105 insertions(+), 1 deletion(-) diff --git a/gcc/match.pd b/gcc/match.pd index 4b86d20e165..bc6a83b47fb 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3288,7 +3288,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (match (unsigned_integer_sat_sub @0 @1) (plus (max @0 INTEGER_CST@1) INTEGER_CST@2) (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type) - && types_match (type, @1)) + && types_match (type, @1) && int_fits_type_p (@1, type)) (with { unsigned precision = TYPE_PRECISION (type); diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 187c94795f7..a8672f66322 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -298,6 +298,20 @@ sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\ return IMM > y ? IMM - y : 0; \ } +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_3 (T x)\ +{ \ + return x >= IMM ? x - IMM : 0; \ +} + +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_4(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_4 (T x)\ +{ \ + return x > IMM ? x - IMM : 0; \ +} + /**/ /* Saturation Truncate (unsigned and signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c new file mode 100644 index 000..1b193bcfb26 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "sat_arith.h" + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (0, uint8_t, -43) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (1, uint8_t, 269) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (2, uint8_t, 369u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (3, uint16_t, -4) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (4, uint16_t, 65579) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (5, uint16_t, 65679u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (6, uint32_t, -62l) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (7, uint32_t, 6294967342ll) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (8, uint32_t, 4394967342ull) + +/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */ diff --git a/gcc/testsuite/gcc.target/ris
[gcc(refs/users/meissner/heads/work177-vpair)] Tweak vector-pair.h.
https://gcc.gnu.org/g:9a08dcf8a5e0df45c387a43a65824341af478f65 commit 9a08dcf8a5e0df45c387a43a65824341af478f65 Author: Michael Meissner Date: Thu Sep 5 23:00:29 2024 -0400 Tweak vector-pair.h. 2024-09-04 Michael Meissner * config/rs6000/vector-pair.h: Add fixes. Diff: --- gcc/config/rs6000/vector-pair.h | 32 ++-- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h index ebd1df9a532..ad42e246c08 100644 --- a/gcc/config/rs6000/vector-pair.h +++ b/gcc/config/rs6000/vector-pair.h @@ -67,11 +67,23 @@ #undef vpair_f32_sqrt #undef vpair_f32_sub +#if !__VPAIR_BUILTIN__ && !__VPAIR_ASM__ && !__VPAIR_NOP10__ +#if __MMA__ && __VPAIR__ +#define __VPAIR_BUILTIN__ 1 + +#elif __MMA__ +#define __VPAIR_ASM__ 1 + +#else +#define __VPAIR_NOP10__1 +#endif +#endif + /* Do we have MMA support and the vector pair built-in function? */ -#if __MMA__ && __VPAIR__ && !__NO_VPAIR_BUILTIN__ +#if __VPAIR_BUILTIN__ #define vector_pair_t __vector_pair -#define vector_pair_d64_t __vector_pair -#define vector_pair_d32_t __vector_pair +#define vector_pair_f64_t __vector_pair +#define vector_pair_f32_t __vector_pair /* vector pair double operations on power10. */ #define vpair_f64_splat(R, A) (*R) = __builtin_vpair_f64_splat (A) @@ -117,10 +129,10 @@ /* Do we have the __vector_pair type available, but we don't have the built-in functions? */ -#elif __MMA__ && !__NO_VPAIR_ASM__ +#elif __VPAIR_ASM__ #define vector_pair_t __vector_pair -#define vector_pair_d64_t __vector_pair -#define vector_pair_d32_t __vector_pair +#define vector_pair_f64_t __vector_pair +#define vector_pair_f32_t __vector_pair #undef __VPAIR_FP_UNARY_ASM #define __VPAIR_FP_UNARY_ASM(OPCODE, R, A) \ @@ -189,7 +201,7 @@ #define vpair_f32_nfms(R,A,B,C)__VPAIR_FP_FMA_ASM ("xvnmsubasp", R, A, B, C) -#else /* !__MMA__. */ +#else /* !__VPAIR_BUILTIN__ && !__VPAIR_ASM__. */ #ifndef __VECTOR_PAIR_UNION__ #define __VECTOR_PAIR_UNION__ 1 @@ -205,8 +217,8 @@ union vpair_union { #endif /* __VECTOR_PAIR_UNION__. */ #define vector_pair_t union vpair_union -#define vector_pair_d64_t union vpair_union -#define vector_pair_d32_t union vpair_union +#define vector_pair_f64_t union vpair_union +#define vector_pair_f32_t union vpair_union /* vector pair double operations on power8/power9. */ #define vpair_f64_splat(R, A) \ @@ -595,6 +607,6 @@ union vpair_union { } \ while (0) -#endif /* __MMA__. */ +#endif /* !__VPAIR_BUILTIN__ && !__VPAIR_ASM__. */ #endif /* _VECTOR_PAIR_H. */
[gcc(refs/users/meissner/heads/work177-vpair)] Update ChangeLog.*
https://gcc.gnu.org/g:ca41b40970735d9e50b8719702e1a966c66df5c5 commit ca41b40970735d9e50b8719702e1a966c66df5c5 Author: Michael Meissner Date: Thu Sep 5 23:02:14 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.vpair | 8 1 file changed, 8 insertions(+) diff --git a/gcc/ChangeLog.vpair b/gcc/ChangeLog.vpair index 99b7da2d2a0..b10bf854938 100644 --- a/gcc/ChangeLog.vpair +++ b/gcc/ChangeLog.vpair @@ -1,3 +1,11 @@ + Branch work177-vpair, patch #406 + +Tweak vector-pair.h. + +2024-09-04 Michael Meissner + + * config/rs6000/vector-pair.h: Add fixes. + Branch work177-vpair, patch #405 Rewrite vector-pair.h
[gcc r15-3504] [PATCH 1/2 v2] RISC-V: Additional large constant synthesis improvements
https://gcc.gnu.org/g:ecdb9f59d0915f154a4c8fa56e11d81479f535eb commit r15-3504-gecdb9f59d0915f154a4c8fa56e11d81479f535eb Author: Raphael Moreira Zinsly Date: Thu Sep 5 21:50:54 2024 -0600 [PATCH 1/2 v2] RISC-V: Additional large constant synthesis improvements Changes since v1: - Fix bit31. - Remove negative shift checks. - Fix synthesis-7.c expected output. -- >8 -- Improve handling of large constants in riscv_build_integer, generate better code for constants where the high half can be constructed by shifting/shiftNadding the low half or if the halves differ by less than 2k. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_build_integer): Detect new case of constants that can be improved. (riscv_move_integer): Add synthesys for concatening constants without Zbkb. gcc/testsuite/ChangeLog: * gcc.target/riscv/synthesis-7.c: Adjust expected output. * gcc.target/riscv/synthesis-12.c: New test. * gcc.target/riscv/synthesis-13.c: New test. * gcc.target/riscv/synthesis-14.c: New test. Diff: --- gcc/config/riscv/riscv.cc | 138 -- gcc/testsuite/gcc.target/riscv/synthesis-12.c | 26 + gcc/testsuite/gcc.target/riscv/synthesis-13.c | 26 + gcc/testsuite/gcc.target/riscv/synthesis-14.c | 28 ++ gcc/testsuite/gcc.target/riscv/synthesis-7.c | 2 +- 5 files changed, 213 insertions(+), 7 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 39489c4377e..064ffa4ea06 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -1231,6 +1231,122 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value, } } + else if (cost > 4 && TARGET_64BIT && can_create_pseudo_p () + && allow_new_pseudos) +{ + struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS]; + int alt_cost; + + unsigned HOST_WIDE_INT loval = value & 0x; + unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32; + bool bit31 = (loval & 0x8000) != 0; + int trailing_shift = ctz_hwi (loval) - ctz_hwi (hival); + int leading_shift = clz_hwi (loval) - clz_hwi (hival); + int shiftval = 0; + + /* Adjust the shift into the high half accordingly. */ + if ((trailing_shift > 0 && hival == (loval >> trailing_shift))) + shiftval = 32 - trailing_shift; + else if ((leading_shift > 0 && hival == (loval << leading_shift))) + shiftval = 32 + leading_shift; + + if (shiftval && !bit31) + alt_cost = 2 + riscv_build_integer_1 (alt_codes, sext_hwi (loval, 32), + mode); + + /* For constants where the upper half is a shift of the lower half we +can do a shift followed by an or. */ + if (shiftval && !bit31 && alt_cost < cost) + { + /* We need to save the first constant we build. */ + alt_codes[alt_cost - 3].save_temporary = true; + + /* Now we want to shift the previously generated constant into the +high half. */ + alt_codes[alt_cost - 2].code = ASHIFT; + alt_codes[alt_cost - 2].value = shiftval; + alt_codes[alt_cost - 2].use_uw = false; + alt_codes[alt_cost - 2].save_temporary = false; + + /* And the final step, IOR the two halves together. Since this uses +the saved temporary, use CONCAT similar to what we do for Zbkb. */ + alt_codes[alt_cost - 1].code = CONCAT; + alt_codes[alt_cost - 1].value = 0; + alt_codes[alt_cost - 1].use_uw = false; + alt_codes[alt_cost - 1].save_temporary = false; + + memcpy (codes, alt_codes, sizeof (alt_codes)); + cost = alt_cost; + } + + if (cost > 4 && !bit31 && TARGET_ZBA) + { + int value = 0; + + /* Check for a shNadd. */ + if (hival == loval * 3) + value = 3; + else if (hival == loval * 5) + value = 5; + else if (hival == loval * 9) + value = 9; + + if (value) + alt_cost = 2 + riscv_build_integer_1 (alt_codes, + sext_hwi (loval, 32), mode); + + /* For constants where the upper half is a shNadd of the lower half +we can do a similar transformation. */ + if (value && alt_cost < cost) + { + alt_codes[alt_cost - 3].save_temporary = true; + alt_codes[alt_cost - 2].code = FMA; + alt_codes[alt_cost - 2].value = value; + alt_codes[alt_cost - 2].use_uw = false; + alt_codes[alt_cost - 2].save_temporary = false; + alt_codes[alt_cost - 1].code = CONCAT; + alt_codes[alt_cost - 1].value = 0; + alt_codes[alt_cost - 1].use_uw = false; + al
[gcc r15-3505] [PATCH 2/2 v2] RISC-V: Constant synthesis of inverted halves
https://gcc.gnu.org/g:1dd175a0ccdd0ff4e7cb6668164a4fe99e47015d commit r15-3505-g1dd175a0ccdd0ff4e7cb6668164a4fe99e47015d Author: Raphael Moreira Zinsly Date: Thu Sep 5 22:14:32 2024 -0600 [PATCH 2/2 v2] RISC-V: Constant synthesis of inverted halves Changes since v1: - Fix synthesis-15.c. -- >8 -- Improve handling of constants where the high half can be constructed by inverting the lower half. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_build_integer): Detect constants were the higher half is the lower half inverted. gcc/testsuite/ChangeLog: * gcc.target/riscv/synthesis-15.c: New test. Diff: --- gcc/config/riscv/riscv.cc | 30 +++ gcc/testsuite/gcc.target/riscv/synthesis-15.c | 26 +++ 2 files changed, 56 insertions(+) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 064ffa4ea06..6efe14ff199 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -1341,6 +1341,36 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value, alt_codes[alt_cost - 1].use_uw = false; alt_codes[alt_cost - 1].save_temporary = false; + memcpy (codes, alt_codes, sizeof (alt_codes)); + cost = alt_cost; + } + } + } + + if (cost > 5 && !bit31) + { + /* For constants where the upper half is the lower half inverted we can flip +it with an xor and do a shift 32 followed by an or. */ + if (hival == (~loval & 0x)) + { + alt_cost = 3 + riscv_build_integer_1 (alt_codes, + sext_hwi (loval, 32), mode); + if (alt_cost < cost) + { + alt_codes[alt_cost - 4].save_temporary = true; + alt_codes[alt_cost - 3].code = XOR; + alt_codes[alt_cost - 3].value = -1; + alt_codes[alt_cost - 3].use_uw = false; + alt_codes[alt_cost - 3].save_temporary = false; + alt_codes[alt_cost - 2].code = ASHIFT; + alt_codes[alt_cost - 2].value = 32; + alt_codes[alt_cost - 2].use_uw = false; + alt_codes[alt_cost - 2].save_temporary = false; + alt_codes[alt_cost - 1].code = CONCAT; + alt_codes[alt_cost - 1].value = 0; + alt_codes[alt_cost - 1].use_uw = false; + alt_codes[alt_cost - 1].save_temporary = false; + memcpy (codes, alt_codes, sizeof (alt_codes)); cost = alt_cost; } diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-15.c b/gcc/testsuite/gcc.target/riscv/synthesis-15.c new file mode 100644 index 000..dc1f8b70194 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/synthesis-15.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv64 } */ +/* We aggressively skip as we really just need to test the basic synthesis + which shouldn't vary based on the optimization level. -O1 seems to work + and eliminates the usual sources of extraneous dead code that would throw + off the counts. */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */ +/* { dg-options "-march=rv64gc" } */ + +/* Rather than test for a specific synthesis of all these constants or + having thousands of tests each testing one variant, we just test the + total number of instructions. + + This isn't expected to change much and any change is worthy of a look. */ +/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 60 } } */ + +unsigned long foo_0xcafe605f35019fa0(void) { return 0xcafe605f35019fa0UL; } +unsigned long foo_0x87a80d217857f2de(void) { return 0x87a80d217857f2deUL; } +unsigned long foo_0x6699f19c19660e63(void) { return 0xe699f19c19660e63UL; } +unsigned long foo_0xec80e48a137f1b75(void) { return 0xec80e48a137f1b75UL; } +unsigned long foo_0xc7d7193e3828e6c1(void) { return 0xc7d7193e3828e6c1UL; } +unsigned long foo_0xfc627816039d87e9(void) { return 0xfc627816039d87e9UL; } +unsigned long foo_0xbd69e83e429617c1(void) { return 0xbd69e83e429617c1UL; } +unsigned long foo_0xdbee7ee624118119(void) { return 0xdbee7ee624118119UL; } +unsigned long foo_0xf3fe20820c01df7d(void) { return 0xf3fe20820c01df7dUL; } +unsigned long foo_0x8f1dc29470e23d6b(void) { return 0x8f1dc29470e23d6bUL; }
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH] RISC-V: Make the setCC/REE tests robust to instruction selection
https://gcc.gnu.org/g:ba4e0f2174b7b807e37255f5b6e5f2572338afc5 commit ba4e0f2174b7b807e37255f5b6e5f2572338afc5 Author: Palmer Dabbelt Date: Wed Sep 4 21:34:31 2024 -0600 [PATCH] RISC-V: Make the setCC/REE tests robust to instruction selection These tests were checking that the output of the setCC instruction was bit flipped, but it looks like they're really designed to test that redundant sign extension elimination fires on conditionals from function inputs. Jeff just posed a patch to clean this code up with trips up on the arbitrary xori/snez instruction selection decision changing, so let's just robustify the tests. gcc/testsuite/ChangeLog: * gcc.target/riscv/sge.c: Adjust regex to match the input. * gcc.target/riscv/sgeu.c: Likewise. * gcc.target/riscv/sle.c: Likewise. * gcc.target/riscv/sleu.c: Likewise. (cherry picked from commit de3ca363811a3974e4398ecdb1db2274efd61a1c) Diff: --- gcc/testsuite/gcc.target/riscv/sge.c | 2 +- gcc/testsuite/gcc.target/riscv/sgeu.c | 2 +- gcc/testsuite/gcc.target/riscv/sle.c | 2 +- gcc/testsuite/gcc.target/riscv/sleu.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gcc/testsuite/gcc.target/riscv/sge.c b/gcc/testsuite/gcc.target/riscv/sge.c index 5f7e7ae82db..70f934c4d0f 100644 --- a/gcc/testsuite/gcc.target/riscv/sge.c +++ b/gcc/testsuite/gcc.target/riscv/sge.c @@ -8,5 +8,5 @@ sge (int x, int y) return x >= y; } -/* { dg-final { scan-assembler "\\sxori\\sa0,a0,1\n\\sret\n" } } */ +/* { dg-final { scan-assembler "slt\\sa0,a0,a1" } } */ /* { dg-final { scan-assembler-not "andi|sext\\.w" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sgeu.c b/gcc/testsuite/gcc.target/riscv/sgeu.c index 234b9aa52bd..0ff21cfe5e0 100644 --- a/gcc/testsuite/gcc.target/riscv/sgeu.c +++ b/gcc/testsuite/gcc.target/riscv/sgeu.c @@ -8,5 +8,5 @@ sgeu (unsigned int x, unsigned int y) return x >= y; } -/* { dg-final { scan-assembler "\\sxori\\sa0,a0,1\n\\sret\n" } } */ +/* { dg-final { scan-assembler "sltu\\sa0,a0,a1" } } */ /* { dg-final { scan-assembler-not "andi|sext\\.w" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sle.c b/gcc/testsuite/gcc.target/riscv/sle.c index 3259c191598..770840d0564 100644 --- a/gcc/testsuite/gcc.target/riscv/sle.c +++ b/gcc/testsuite/gcc.target/riscv/sle.c @@ -8,5 +8,5 @@ sle (int x, int y) return x <= y; } -/* { dg-final { scan-assembler "\\sxori\\sa0,a0,1\n\\sret\n" } } */ +/* { dg-final { scan-assembler "sgt\\sa0,a0,a1" } } */ /* { dg-final { scan-assembler-not "andi|sext\\.w" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sleu.c b/gcc/testsuite/gcc.target/riscv/sleu.c index 301b8c32eb7..ae00ccc2067 100644 --- a/gcc/testsuite/gcc.target/riscv/sleu.c +++ b/gcc/testsuite/gcc.target/riscv/sleu.c @@ -8,5 +8,5 @@ sleu (unsigned int x, unsigned int y) return x <= y; } -/* { dg-final { scan-assembler "\\sxori\\sa0,a0,1\n\\sret\n" } } */ +/* { dg-final { scan-assembler "sgtu\\sa0,a0,a1"} } */ /* { dg-final { scan-assembler-not "andi|sext\\.w" } } */
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Lookup reversely in riscv_select_multilib_by_abi
https://gcc.gnu.org/g:e3dcc815e3d72d6e63503e6c72a70576c5108a6a commit e3dcc815e3d72d6e63503e6c72a70576c5108a6a Author: YunQiang Su Date: Thu Sep 5 15:14:43 2024 +0800 RISC-V: Lookup reversely in riscv_select_multilib_by_abi When use --print-multi-os-dir or -print-multi-directory, gcc outputs different values with full -march option and the base one only. $ ./gcc/xgcc --print-multi-os-dir -mabi=lp64d -march=rv64gc lib64/lp64d $ ./gcc/xgcc --print-multi-os-dir -mabi=lp64d -march=rv64gc_zba . The reason is that in multilib.h, the fallback value of multilib is listed as the 1st one in `multilib_raw[]`. gcc * common/config/riscv/riscv-common.cc(riscv_select_multilib_by_abi): look up reversely as the fallback path is listed as the 1st one. (cherry picked from commit 3162abfb5098934e6ed9d4307a86a84d28823612) Diff: --- gcc/common/config/riscv/riscv-common.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 62c6e1dab1f..2c1ce7fc7cb 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -2079,7 +2079,7 @@ riscv_select_multilib_by_abi ( const std::string &riscv_current_abi_str, const std::vector &multilib_infos) { - for (size_t i = 0; i < multilib_infos.size (); ++i) + for (ssize_t i = multilib_infos.size (); i >= 0; --i) if (riscv_current_abi_str == multilib_infos[i].abi_str) return xstrdup (multilib_infos[i].path.c_str ());
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [V2][RISC-V] Avoid unnecessary extensions after sCC insns
https://gcc.gnu.org/g:2cc7dc042140083dee3a9459fd79e1eb0b4c1a71 commit 2cc7dc042140083dee3a9459fd79e1eb0b4c1a71 Author: Jeff Law Date: Thu Sep 5 15:45:25 2024 -0600 [V2][RISC-V] Avoid unnecessary extensions after sCC insns So the first patch failed the pre-commit CI; it didn't fail in my testing because I'm using --with-arch to set a default configuration that includes things like zicond to ensure that's always tested. And the failing test is skipped when zicond is enabled by default. The failing test is designed to ensure that we don't miss an if-conversion due to costing issues around the extension that was typically done in an sCC sequence (which is why it's only run when zicond is off). The test failed because we have a little routine that is highly dependent on the code generated by the sCC expander and will adjust the costing to account for expansion quirks that usually go away in register allocation. That code needs to be enhanced to work after the sCC expansion change. Essentially it needs to account for the subreg extraction that shows up in the sequence as well as being a bit looser on mode checking. I kept the code working for the old sequences -- in theory a user could conjure up the old sequence so handling them seems useful. This also drops the testsuite changes. Palmer's change makes them unnecessary. --- So I was looking at a performance regression in spec with Ventana's internal tree. Ultimately the problem was a bad interaction with an internal patch (REP_MODE_EXTENDED), fwprop and ext-dce. The details of that problem aren't particularly important. Removal of the local patch went reasonably well. But I did see some secondary cases where we had redundant sign extensions. The most notable cases come from the integer sCC insns. Expansion of those cases for rv64 can be improved using Jivan's trick. ie, if the target is not DImode, then create a DImode temporary for the result and copy the low bits out with a promoted subreg to the real target. With the change in expansion the final code we generate is slightly different for a few tests at -O1/-Og, but should perform the same. The key for the affected tests is we're not seeing the introduction of unnecessary extensions. Rather than adjust the regexps to handle the -O1/-Og output, skipping for those seemed OK to me. I didn't extract a testcase. I'm a bit fried from digging through LTO'd code right now. gcc/ * config/riscv/riscv.cc (riscv_expand_int_scc): For rv64, use a DI temporary for the output and a promoted subreg to extract it into SI arget. (riscv_noce_conversion_profitable_p): Recognize new output from sCC expansion too. (cherry picked from commit b567e5ead5d54f022c57b48f31653f6ae6ece007) Diff: --- gcc/config/riscv/riscv.cc | 46 +- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 74de8c7f3d7..e2e2d2db342 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -4217,11 +4217,29 @@ riscv_noce_conversion_profitable_p (rtx_insn *seq, riscv_if_info.original_cost += COSTS_N_INSNS (1); riscv_if_info.max_seq_cost += COSTS_N_INSNS (1); } - last_dest = NULL_RTX; + rtx dest = SET_DEST (x); - if (COMPARISON_P (src) + + /* Do something similar for the moves that are likely to +turn into NOP moves by the time the register allocator is +done. These are also side effects of how our sCC expanders +work. We'll want to check and update LAST_DEST here too. */ + if (last_dest && REG_P (dest) - && GET_MODE (dest) == SImode) + && GET_MODE (dest) == SImode + && SUBREG_P (src) + && SUBREG_PROMOTED_VAR_P (src) + && REGNO (SUBREG_REG (src)) == REGNO (last_dest)) + { + riscv_if_info.original_cost += COSTS_N_INSNS (1); + riscv_if_info.max_seq_cost += COSTS_N_INSNS (1); + if (last_dest) + last_dest = dest; + } + else + last_dest = NULL_RTX; + + if (COMPARISON_P (src) && REG_P (dest)) last_dest = dest; } else @@ -4903,13 +4921,31 @@ riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *in riscv_extend_comparands (code, &op0, &op1); op0 = force_reg (word_mode, op0); + /* For sub-word targets on rv64, do the computation in DImode + then extract the lowpart for the final target, marking it + as sign extended. Note that it's also properly zero extended, + but it's probably more profitable
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Fix out of index in riscv_select_multilib_by_abi
https://gcc.gnu.org/g:36acd0cc9bbbab875238a7d186577f19964d471f commit 36acd0cc9bbbab875238a7d186577f19964d471f Author: YunQiang Su Date: Thu Sep 5 19:55:20 2024 +0800 RISC-V: Fix out of index in riscv_select_multilib_by_abi commit b5c2aae48723c9098a8a3dab1409b30fd87bbf56 Author: YunQiang Su Date: Thu Sep 5 15:14:43 2024 +0800 RISC-V: Lookup reversely in riscv_select_multilib_by_abi The last element should use index multilib_infos.size () - 1 gcc * common/config/riscv/riscv-common.cc(riscv_select_multilib_by_abi): Fix out of index problem. (cherry picked from commit ead5f587dad3206e45db7ac31f5c34c1530ae529) Diff: --- gcc/common/config/riscv/riscv-common.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 2c1ce7fc7cb..bd42fd01532 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -2079,7 +2079,7 @@ riscv_select_multilib_by_abi ( const std::string &riscv_current_abi_str, const std::vector &multilib_infos) { - for (ssize_t i = multilib_infos.size (); i >= 0; --i) + for (ssize_t i = multilib_infos.size () - 1; i >= 0; --i) if (riscv_current_abi_str == multilib_infos[i].abi_str) return xstrdup (multilib_infos[i].path.c_str ());
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Partial -- just the testsuite changes.
https://gcc.gnu.org/g:15880b511b5e04c9e47f07012fe95b43ffc6ae9b commit 15880b511b5e04c9e47f07012fe95b43ffc6ae9b Author: Pan Li Date: Mon Sep 2 09:48:46 2024 +0800 Partial -- just the testsuite changes. Match: Add int type fits check for form 1 of .SAT_SUB imm operand This patch would like to add strict check for imm operand of .SAT_SUB matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_SUB pattern. We leverage the int_fits_type_p here to make sure the imm operand is a int type fits the result type of the .SAT_SUB. For example: Fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (12, a); uint8_t sum = .SAT_SUB (12u, a); uint8_t sum = .SAT_SUB (126u, a); uint8_t sum = .SAT_SUB (128u, a); uint8_t sum = .SAT_SUB (228, a); uint8_t sum = .SAT_SUB (223u, a); Not fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (-1, a); uint8_t sum = .SAT_SUB (256u, a); uint8_t sum = .SAT_SUB (257, a); The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add int_fits_type_p check for .SAT_SUB imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_add_imm_type_check-53.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-54.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-55.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-56.c: New test. Signed-off-by: Pan Li (cherry picked from commit 019335b404c8d7fb2d234bb179745cc28693dd20) Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 14 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-53.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-54.c | 27 ++ .../gcc.target/riscv/sat_u_add_imm_type_check-55.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-56.c | 27 ++ 5 files changed, 104 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 86cd6bea8df..187c94795f7 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -284,6 +284,20 @@ sat_u_sub_imm##IMM##_##T##_fmt_4 (T x) \ #define RUN_SAT_U_SUB_IMM_FMT_4(T, x, IMM, expect) \ if (sat_u_sub_imm##IMM##_##T##_fmt_4(x) != expect) __builtin_abort () +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_1 (T y)\ +{ \ + return IMM >= y ? IMM - y : 0; \ +} + +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_2(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\ +{ \ + return IMM > y ? IMM - y : 0; \ +} + /**/ /* Saturation Truncate (unsigned and signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c new file mode 100644 index 000..c959eeb0d86 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-53.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "sat_arith.h" + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (0, uint8_t, -43) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (1, uint8_t, 269) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (2, uint8_t, 369u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (3, uint16_t, -4) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (4, uint16_t, 65579) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (5, uint16_t, 65679u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (6, uint32_t, -62) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (7, uint32_t, 4294967342ll) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_1 (8, uint32_t, 4394967342ull) + +/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-54.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-54.c new file mode 100644 index 000..abc19e22be4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-54.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "sat_arith.h" + +DEF_SAT_U_SUB
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Partial -- just the testsuite changes.
https://gcc.gnu.org/g:00d5bc4c546e9f256a8980de53265033fcac4bbb commit 00d5bc4c546e9f256a8980de53265033fcac4bbb Author: Pan Li Date: Mon Sep 2 11:33:08 2024 +0800 Partial -- just the testsuite changes. Match: Add int type fits check for form 2 of .SAT_SUB imm operand This patch would like to add strict check for imm operand of .SAT_SUB matching. We have no type checking for imm operand in previous, which may result in unexpected IL to be catched by .SAT_SUB pattern. We leverage the int_fits_type_p here to make sure the imm operand is a int type fits the result type of the .SAT_SUB. For example: Fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (a, 12); uint8_t sum = .SAT_SUB (a, 12u); uint8_t sum = .SAT_SUB (a, 126u); uint8_t sum = .SAT_SUB (a, 128u); uint8_t sum = .SAT_SUB (a, 228); uint8_t sum = .SAT_SUB (a, 223u); Not fits uint8_t: uint8_t a; uint8_t sum = .SAT_SUB (a, -1); uint8_t sum = .SAT_SUB (a, 256u); uint8_t sum = .SAT_SUB (a, 257); The below test suite are passed for this patch: * The rv64gcv fully regression test. * The x86 bootstrap test. * The x86 fully regression test. gcc/ChangeLog: * match.pd: Add int_fits_type_p check for .SAT_SUB imm operand. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macros. * gcc.target/riscv/sat_u_add_imm_type_check-57.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-58.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-59.c: New test. * gcc.target/riscv/sat_u_add_imm_type_check-60.c: New test. Signed-off-by: Pan Li (cherry picked from commit a2e28b105cea4c44c3903d8d979c7a4afa1193f0) Diff: --- gcc/testsuite/gcc.target/riscv/sat_arith.h | 14 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-57.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-58.c | 27 ++ .../gcc.target/riscv/sat_u_add_imm_type_check-59.c | 18 +++ .../gcc.target/riscv/sat_u_add_imm_type_check-60.c | 27 ++ 5 files changed, 104 insertions(+) diff --git a/gcc/testsuite/gcc.target/riscv/sat_arith.h b/gcc/testsuite/gcc.target/riscv/sat_arith.h index 187c94795f7..a8672f66322 100644 --- a/gcc/testsuite/gcc.target/riscv/sat_arith.h +++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h @@ -298,6 +298,20 @@ sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_2 (T y)\ return IMM > y ? IMM - y : 0; \ } +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_3 (T x)\ +{ \ + return x >= IMM ? x - IMM : 0; \ +} + +#define DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_4(INDEX, T, IMM) \ +T __attribute__((noinline)) \ +sat_u_sub_imm_type_check##_##INDEX##_##T##_fmt_4 (T x)\ +{ \ + return x > IMM ? x - IMM : 0; \ +} + /**/ /* Saturation Truncate (unsigned and signed) */ /**/ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c new file mode 100644 index 000..1b193bcfb26 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-57.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "sat_arith.h" + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (0, uint8_t, -43) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (1, uint8_t, 269) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (2, uint8_t, 369u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (3, uint16_t, -4) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (4, uint16_t, 65579) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (5, uint16_t, 65679u) + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (6, uint32_t, -62l) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (7, uint32_t, 6294967342ll) +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (8, uint32_t, 4394967342ull) + +/* { dg-final { scan-rtl-dump-not ".SAT_ADD " "expand" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c new file mode 100644 index 000..b7dc71b42ee --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat_u_add_imm_type_check-58.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details" } */ + +#include "sat_arith.h" + +DEF_SAT_U_SUB_IMM_TYPE_CHECK_FMT_3 (0, uint8_t, 123u) +DE
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 1/2 v2] RISC-V: Additional large constant synthesis improvements
https://gcc.gnu.org/g:c9993e022343a20b9293d2852aa50ab7d90b1c06 commit c9993e022343a20b9293d2852aa50ab7d90b1c06 Author: Raphael Moreira Zinsly Date: Thu Sep 5 21:50:54 2024 -0600 [PATCH 1/2 v2] RISC-V: Additional large constant synthesis improvements Changes since v1: - Fix bit31. - Remove negative shift checks. - Fix synthesis-7.c expected output. -- >8 -- Improve handling of large constants in riscv_build_integer, generate better code for constants where the high half can be constructed by shifting/shiftNadding the low half or if the halves differ by less than 2k. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_build_integer): Detect new case of constants that can be improved. (riscv_move_integer): Add synthesys for concatening constants without Zbkb. gcc/testsuite/ChangeLog: * gcc.target/riscv/synthesis-7.c: Adjust expected output. * gcc.target/riscv/synthesis-12.c: New test. * gcc.target/riscv/synthesis-13.c: New test. * gcc.target/riscv/synthesis-14.c: New test. (cherry picked from commit ecdb9f59d0915f154a4c8fa56e11d81479f535eb) Diff: --- gcc/config/riscv/riscv.cc | 138 -- gcc/testsuite/gcc.target/riscv/synthesis-12.c | 26 + gcc/testsuite/gcc.target/riscv/synthesis-13.c | 26 + gcc/testsuite/gcc.target/riscv/synthesis-14.c | 28 ++ gcc/testsuite/gcc.target/riscv/synthesis-7.c | 2 +- 5 files changed, 213 insertions(+), 7 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index e2e2d2db342..f7482c4f5cc 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -1230,6 +1230,122 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value, } } + else if (cost > 4 && TARGET_64BIT && can_create_pseudo_p () + && allow_new_pseudos) +{ + struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS]; + int alt_cost; + + unsigned HOST_WIDE_INT loval = value & 0x; + unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32; + bool bit31 = (loval & 0x8000) != 0; + int trailing_shift = ctz_hwi (loval) - ctz_hwi (hival); + int leading_shift = clz_hwi (loval) - clz_hwi (hival); + int shiftval = 0; + + /* Adjust the shift into the high half accordingly. */ + if ((trailing_shift > 0 && hival == (loval >> trailing_shift))) + shiftval = 32 - trailing_shift; + else if ((leading_shift > 0 && hival == (loval << leading_shift))) + shiftval = 32 + leading_shift; + + if (shiftval && !bit31) + alt_cost = 2 + riscv_build_integer_1 (alt_codes, sext_hwi (loval, 32), + mode); + + /* For constants where the upper half is a shift of the lower half we +can do a shift followed by an or. */ + if (shiftval && !bit31 && alt_cost < cost) + { + /* We need to save the first constant we build. */ + alt_codes[alt_cost - 3].save_temporary = true; + + /* Now we want to shift the previously generated constant into the +high half. */ + alt_codes[alt_cost - 2].code = ASHIFT; + alt_codes[alt_cost - 2].value = shiftval; + alt_codes[alt_cost - 2].use_uw = false; + alt_codes[alt_cost - 2].save_temporary = false; + + /* And the final step, IOR the two halves together. Since this uses +the saved temporary, use CONCAT similar to what we do for Zbkb. */ + alt_codes[alt_cost - 1].code = CONCAT; + alt_codes[alt_cost - 1].value = 0; + alt_codes[alt_cost - 1].use_uw = false; + alt_codes[alt_cost - 1].save_temporary = false; + + memcpy (codes, alt_codes, sizeof (alt_codes)); + cost = alt_cost; + } + + if (cost > 4 && !bit31 && TARGET_ZBA) + { + int value = 0; + + /* Check for a shNadd. */ + if (hival == loval * 3) + value = 3; + else if (hival == loval * 5) + value = 5; + else if (hival == loval * 9) + value = 9; + + if (value) + alt_cost = 2 + riscv_build_integer_1 (alt_codes, + sext_hwi (loval, 32), mode); + + /* For constants where the upper half is a shNadd of the lower half +we can do a similar transformation. */ + if (value && alt_cost < cost) + { + alt_codes[alt_cost - 3].save_temporary = true; + alt_codes[alt_cost - 2].code = FMA; + alt_codes[alt_cost - 2].value = value; + alt_codes[alt_cost - 2].use_uw = false; + alt_codes[alt_cost - 2].save_temporary = false; + alt_codes[alt_cost - 1].code = CONCAT; + alt_codes[alt_cost - 1].value = 0; +
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PATCH 2/2 v2] RISC-V: Constant synthesis of inverted halves
https://gcc.gnu.org/g:43677eed7d22953c677bcfc905acb830e6d33be6 commit 43677eed7d22953c677bcfc905acb830e6d33be6 Author: Raphael Moreira Zinsly Date: Thu Sep 5 22:14:32 2024 -0600 [PATCH 2/2 v2] RISC-V: Constant synthesis of inverted halves Changes since v1: - Fix synthesis-15.c. -- >8 -- Improve handling of constants where the high half can be constructed by inverting the lower half. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_build_integer): Detect constants were the higher half is the lower half inverted. gcc/testsuite/ChangeLog: * gcc.target/riscv/synthesis-15.c: New test. (cherry picked from commit 1dd175a0ccdd0ff4e7cb6668164a4fe99e47015d) Diff: --- gcc/config/riscv/riscv.cc | 30 +++ gcc/testsuite/gcc.target/riscv/synthesis-15.c | 26 +++ 2 files changed, 56 insertions(+) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index f7482c4f5cc..5c9b92de275 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -1340,6 +1340,36 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value, alt_codes[alt_cost - 1].use_uw = false; alt_codes[alt_cost - 1].save_temporary = false; + memcpy (codes, alt_codes, sizeof (alt_codes)); + cost = alt_cost; + } + } + } + + if (cost > 5 && !bit31) + { + /* For constants where the upper half is the lower half inverted we can flip +it with an xor and do a shift 32 followed by an or. */ + if (hival == (~loval & 0x)) + { + alt_cost = 3 + riscv_build_integer_1 (alt_codes, + sext_hwi (loval, 32), mode); + if (alt_cost < cost) + { + alt_codes[alt_cost - 4].save_temporary = true; + alt_codes[alt_cost - 3].code = XOR; + alt_codes[alt_cost - 3].value = -1; + alt_codes[alt_cost - 3].use_uw = false; + alt_codes[alt_cost - 3].save_temporary = false; + alt_codes[alt_cost - 2].code = ASHIFT; + alt_codes[alt_cost - 2].value = 32; + alt_codes[alt_cost - 2].use_uw = false; + alt_codes[alt_cost - 2].save_temporary = false; + alt_codes[alt_cost - 1].code = CONCAT; + alt_codes[alt_cost - 1].value = 0; + alt_codes[alt_cost - 1].use_uw = false; + alt_codes[alt_cost - 1].save_temporary = false; + memcpy (codes, alt_codes, sizeof (alt_codes)); cost = alt_cost; } diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-15.c b/gcc/testsuite/gcc.target/riscv/synthesis-15.c new file mode 100644 index 000..dc1f8b70194 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/synthesis-15.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv64 } */ +/* We aggressively skip as we really just need to test the basic synthesis + which shouldn't vary based on the optimization level. -O1 seems to work + and eliminates the usual sources of extraneous dead code that would throw + off the counts. */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */ +/* { dg-options "-march=rv64gc" } */ + +/* Rather than test for a specific synthesis of all these constants or + having thousands of tests each testing one variant, we just test the + total number of instructions. + + This isn't expected to change much and any change is worthy of a look. */ +/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 60 } } */ + +unsigned long foo_0xcafe605f35019fa0(void) { return 0xcafe605f35019fa0UL; } +unsigned long foo_0x87a80d217857f2de(void) { return 0x87a80d217857f2deUL; } +unsigned long foo_0x6699f19c19660e63(void) { return 0xe699f19c19660e63UL; } +unsigned long foo_0xec80e48a137f1b75(void) { return 0xec80e48a137f1b75UL; } +unsigned long foo_0xc7d7193e3828e6c1(void) { return 0xc7d7193e3828e6c1UL; } +unsigned long foo_0xfc627816039d87e9(void) { return 0xfc627816039d87e9UL; } +unsigned long foo_0xbd69e83e429617c1(void) { return 0xbd69e83e429617c1UL; } +unsigned long foo_0xdbee7ee624118119(void) { return 0xdbee7ee624118119UL; } +unsigned long foo_0xf3fe20820c01df7d(void) { return 0xf3fe20820c01df7dUL; } +unsigned long foo_0x8f1dc29470e23d6b(void) { return 0x8f1dc29470e23d6bUL; }