[gcc r13-8936] Fixup unaligned load/store cost for znver4
https://gcc.gnu.org/g:b35276655e6767a6e037e58edfa4738317498337 commit r13-8936-gb35276655e6767a6e037e58edfa4738317498337 Author: Richard Biener Date: Mon Jul 15 13:01:24 2024 +0200 Fixup unaligned load/store cost for znver4 Currently unaligned YMM and ZMM load and store costs are cheaper than aligned which causes the vectorizer to purposely mis-align accesses by adding an alignment prologue. It looks like the unaligned costs were simply left untouched from znver3 where they equate the aligned costs when tweaking aligned costs for znver4. The following makes the unaligned costs equal to the aligned costs. This avoids the miscompile seen in PR115843 but it's of course not a real fix for the issue uncovered there. But it makes it qualify as a regression fix. PR tree-optimization/115843 * config/i386/x86-tune-costs.h (znver4_cost): Update unaligned load and store cost from the aligned costs. (cherry picked from commit 1e3aa9c9278db69d4bdb661a750a7268789188d6) Diff: --- gcc/config/i386/x86-tune-costs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 4f7a67ca5c5e..14c5507a601f 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -1924,8 +1924,8 @@ struct processor_costs znver4_cost = { in 32bit, 64bit, 128bit, 256bit and 512bit */ {8, 8, 8, 12, 12}, /* cost of storing SSE register in 32bit, 64bit, 128bit, 256bit and 512bit */ - {6, 6, 6, 6, 6}, /* cost of unaligned loads. */ - {8, 8, 8, 8, 8}, /* cost of unaligned stores. */ + {6, 6, 10, 10, 12}, /* cost of unaligned loads. */ + {8, 8, 8, 12, 12}, /* cost of unaligned stores. */ 2, 2, 2, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */
[gcc r12-10636] Fixup unaligned load/store cost for znver4
https://gcc.gnu.org/g:f78eb9524bd97679c8baa47a62e82147272719ae commit r12-10636-gf78eb9524bd97679c8baa47a62e82147272719ae Author: Richard Biener Date: Mon Jul 15 13:01:24 2024 +0200 Fixup unaligned load/store cost for znver4 Currently unaligned YMM and ZMM load and store costs are cheaper than aligned which causes the vectorizer to purposely mis-align accesses by adding an alignment prologue. It looks like the unaligned costs were simply left untouched from znver3 where they equate the aligned costs when tweaking aligned costs for znver4. The following makes the unaligned costs equal to the aligned costs. This avoids the miscompile seen in PR115843 but it's of course not a real fix for the issue uncovered there. But it makes it qualify as a regression fix. PR tree-optimization/115843 * config/i386/x86-tune-costs.h (znver4_cost): Update unaligned load and store cost from the aligned costs. (cherry picked from commit 1e3aa9c9278db69d4bdb661a750a7268789188d6) Diff: --- gcc/config/i386/x86-tune-costs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index f105d57cae79..d58827888994 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -1894,8 +1894,8 @@ struct processor_costs znver4_cost = { in 32bit, 64bit, 128bit, 256bit and 512bit */ {8, 8, 8, 12, 12}, /* cost of storing SSE register in 32bit, 64bit, 128bit, 256bit and 512bit */ - {6, 6, 6, 6, 6}, /* cost of unaligned loads. */ - {8, 8, 8, 8, 8}, /* cost of unaligned stores. */ + {6, 6, 10, 10, 12}, /* cost of unaligned loads. */ + {8, 8, 8, 12, 12}, /* cost of unaligned stores. */ 2, 2, 2, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */
[gcc r15-2218] [v2] rtl-optimization/116002 - cselib hash is bad
https://gcc.gnu.org/g:44e065a52fa6069d6c8cacebc8f876840d278dd0 commit r15-2218-g44e065a52fa6069d6c8cacebc8f876840d278dd0 Author: Richard Biener Date: Fri Jul 19 16:23:51 2024 +0200 [v2] rtl-optimization/116002 - cselib hash is bad The following addresses the bad hash function of cselib which uses integer plus for merging. This causes a huge number of collisions for the testcase in the PR and thus very large compile-time. The following rewrites it to use inchash, eliding duplicate mixing of RTX code and mode in some cases and more consistently avoiding a return value of zero as well as treating zero as fatal. An important part is to preserve mixing of hashes of commutative operators as commutative. For cselib_hash_plus_const_int this removes the apparent attempt of making sure to hash the same as a PLUS as cselib_hash_rtx makes sure to dispatch to cselib_hash_plus_const_int consistently. This reduces compile-time for the testcase in the PR from unknown to 22s and for a reduced testcase from 73s to 9s. There's another pending patchset to improve the speed of inchash mixing, but it's not in the profile for this testcase (PTA pops up now). The generated code is equal. I've also compared cc1 builds with and without the patch and they are now commparing equal after retaining commutative hashing for commutative operators. PR rtl-optimization/116002 * cselib.cc (cselib_hash_rtx): Use inchash to get proper mixing. Consistently avoid a zero return value when hashing successfully. Consistently treat a zero hash value from recursing as fatal. Use hashval_t where appropriate. (cselib_hash_plus_const_int): Likewise. (new_cselib_val): Use hashval_t. (cselib_lookup_1): Likewise. Diff: --- gcc/cselib.cc | 224 -- 1 file changed, 122 insertions(+), 102 deletions(-) diff --git a/gcc/cselib.cc b/gcc/cselib.cc index cbaab7d515cc..7beaca424244 100644 --- a/gcc/cselib.cc +++ b/gcc/cselib.cc @@ -51,7 +51,7 @@ static void unchain_one_value (cselib_val *); static void unchain_one_elt_list (struct elt_list **); static void unchain_one_elt_loc_list (struct elt_loc_list **); static void remove_useless_values (void); -static unsigned int cselib_hash_rtx (rtx, int, machine_mode); +static hashval_t cselib_hash_rtx (rtx, int, machine_mode); static cselib_val *new_cselib_val (unsigned int, machine_mode, rtx); static void add_mem_for_addr (cselib_val *, cselib_val *, rtx); static cselib_val *cselib_lookup_mem (rtx, int); @@ -1244,7 +1244,7 @@ cselib_redundant_set_p (rtx set) /* Helper function for cselib_hash_rtx. Arguments like for cselib_hash_rtx, except that it hashes (plus:P x c). */ -static unsigned int +static hashval_t cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int create, machine_mode memmode) { @@ -1266,14 +1266,13 @@ cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int create, if (c == 0) return e->hash; - unsigned hash = (unsigned) PLUS + (unsigned) GET_MODE (x); - hash += e->hash; - unsigned int tem_hash = (unsigned) CONST_INT + (unsigned) VOIDmode; - tem_hash += ((unsigned) CONST_INT << 7) + (unsigned HOST_WIDE_INT) c; - if (tem_hash == 0) -tem_hash = (unsigned int) CONST_INT; - hash += tem_hash; - return hash ? hash : 1 + (unsigned int) PLUS; + inchash::hash hash; + hash.add_int (PLUS); + hash.add_int (GET_MODE (x)); + hash.merge_hash (e->hash); + hash.add_hwi (c); + + return hash.end () ? hash.end () : 1 + (unsigned int) PLUS; } /* Hash an rtx. Return 0 if we couldn't hash the rtx. @@ -1298,7 +1297,7 @@ cselib_hash_plus_const_int (rtx x, HOST_WIDE_INT c, int create, If the mode is important in any context, it must be checked specifically in a comparison anyway, since relying on hash differences is unsafe. */ -static unsigned int +static hashval_t cselib_hash_rtx (rtx x, int create, machine_mode memmode) { cselib_val *e; @@ -1306,10 +1305,11 @@ cselib_hash_rtx (rtx x, int create, machine_mode memmode) int i, j; enum rtx_code code; const char *fmt; - unsigned int hash = 0; + inchash::hash hash; code = GET_CODE (x); - hash += (unsigned) code + (unsigned) GET_MODE (x); + hash.add_int (code); + hash.add_int (GET_MODE (x)); switch (code) { @@ -1326,19 +1326,16 @@ cselib_hash_rtx (rtx x, int create, machine_mode memmode) return e->hash; case DEBUG_EXPR: - hash += ((unsigned) DEBUG_EXPR << 7) - + DEBUG_TEMP_UID (DEBUG_EXPR_TREE_DECL (x)); - return hash ? hash : (unsigned int) DEBUG_EXPR; + hash.add_int (DEBUG_TEMP_UID (DEBUG_EXPR_TREE_DECL (x))); + return hash.end () ? hash.end() : (unsigned int) DEBUG_EXPR; case DEBUG_IMPLICIT_PTR: - hash += ((unsigned) DEBUG_IMPLICIT_PTR <<
[gcc r15-2219] c++: Remove CHECK_CONSTR
https://gcc.gnu.org/g:58756c9f5507e5db0eaddcbaaa2de7f39c34b5d0 commit r15-2219-g58756c9f5507e5db0eaddcbaaa2de7f39c34b5d0 Author: Jakub Jelinek Date: Tue Jul 23 10:39:08 2024 +0200 c++: Remove CHECK_CONSTR On Mon, Jul 22, 2024 at 11:48:51AM -0400, Patrick Palka wrote: > FWIW this tree code seems to be a vestige of the initial Concepts TS > implementation and is effectively unused, we can remove it outright. Here is a patch which removes that. 2024-07-23 Jakub Jelinek * cp-tree.def (CHECK_CONSTR): Remove. * cp-tree.h (CHECK_CONSTR_CONCEPT, CHECK_CONSTR_ARGS): Remove. * cp-objcp-common.cc (cp_common_init_ts): Don't handle CHECK_CONSTR. * tree.cc (cp_tree_equal): Likewise. * error.cc (dump_expr): Likewise. * cxx-pretty-print.cc (cxx_pretty_printer::expression): Likewise. (pp_cxx_check_constraint): Remove. (pp_cxx_constraint): Don't handle CHECK_CONSTR. Diff: --- gcc/cp/cp-objcp-common.cc | 1 - gcc/cp/cp-tree.def | 8 gcc/cp/cp-tree.h | 8 gcc/cp/cxx-pretty-print.cc | 28 gcc/cp/error.cc| 1 - gcc/cp/tree.cc | 5 - 6 files changed, 51 deletions(-) diff --git a/gcc/cp/cp-objcp-common.cc b/gcc/cp/cp-objcp-common.cc index 86e0b49d46b8..cd379514991d 100644 --- a/gcc/cp/cp-objcp-common.cc +++ b/gcc/cp/cp-objcp-common.cc @@ -701,7 +701,6 @@ cp_common_init_ts (void) MARK_TS_EXP (UNARY_RIGHT_FOLD_EXPR); /* Constraints. */ - MARK_TS_EXP (CHECK_CONSTR); MARK_TS_EXP (COMPOUND_REQ); MARK_TS_EXP (CONJ_CONSTR); MARK_TS_EXP (DISJ_CONSTR); diff --git a/gcc/cp/cp-tree.def b/gcc/cp/cp-tree.def index a0a47c3950fa..18f75108c7bd 100644 --- a/gcc/cp/cp-tree.def +++ b/gcc/cp/cp-tree.def @@ -538,14 +538,6 @@ DEFTREECODE (ATOMIC_CONSTR, "atomic_constr", tcc_expression, 1) DEFTREECODE (CONJ_CONSTR, "conj_constr", tcc_expression, 2) DEFTREECODE (DISJ_CONSTR, "disj_constr", tcc_expression, 2) -/* A check constraint represents the checking of a concept - C. It has two operands: the template defining the concept - and a sequence of template arguments. - - CHECK_CONSTR_CONCEPT has the concept definition - CHECK_CONSTR_ARGS are the template arguments. */ -DEFTREECODE (CHECK_CONSTR, "check_constr", tcc_expression, 2) - /* The co_await expression is used to support coroutines. Op 0 is the cast expresssion (potentially modified by the diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 609d8941cf72..76ac9c31763c 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -1723,14 +1723,6 @@ check_constraint_info (tree t) #define ATOMIC_CONSTR_EXPR(NODE) \ CONSTR_EXPR (ATOMIC_CONSTR_CHECK (NODE)) -/* The concept of a concept check. */ -#define CHECK_CONSTR_CONCEPT(NODE) \ - TREE_OPERAND (TREE_CHECK (NODE, CHECK_CONSTR), 0) - -/* The template arguments of a concept check. */ -#define CHECK_CONSTR_ARGS(NODE) \ - TREE_OPERAND (TREE_CHECK (NODE, CHECK_CONSTR), 1) - /* Whether a PARM_DECL represents a local parameter in a requires-expression. */ #define CONSTRAINT_VAR_P(NODE) \ diff --git a/gcc/cp/cxx-pretty-print.cc b/gcc/cp/cxx-pretty-print.cc index 806aebff174b..e690354e08e9 100644 --- a/gcc/cp/cxx-pretty-print.cc +++ b/gcc/cp/cxx-pretty-print.cc @@ -1257,7 +1257,6 @@ cxx_pretty_printer::expression (tree t) break; case ATOMIC_CONSTR: -case CHECK_CONSTR: case CONJ_CONSTR: case DISJ_CONSTR: pp_cxx_constraint (this, t); @@ -2815,29 +2814,6 @@ pp_cxx_nested_requirement (cxx_pretty_printer *pp, tree t) pp_cxx_semicolon (pp); } -void -pp_cxx_check_constraint (cxx_pretty_printer *pp, tree t) -{ - tree decl = CHECK_CONSTR_CONCEPT (t); - tree tmpl = DECL_TI_TEMPLATE (decl); - tree args = CHECK_CONSTR_ARGS (t); - tree id = build_nt (TEMPLATE_ID_EXPR, tmpl, args); - - if (TREE_CODE (decl) == CONCEPT_DECL) -pp->expression (id); - else if (VAR_P (decl)) -pp->expression (id); - else if (TREE_CODE (decl) == FUNCTION_DECL) -{ - tree call = build_vl_exp (CALL_EXPR, 2); - TREE_OPERAND (call, 0) = integer_two_node; - TREE_OPERAND (call, 1) = id; - pp->expression (call); -} - else -gcc_unreachable (); -} - /* Output the "[with ...]" clause for a parameter mapping of an atomic constraint. */ @@ -2917,10 +2893,6 @@ pp_cxx_constraint (cxx_pretty_printer *pp, tree t) pp_cxx_atomic_constraint (pp, t); break; -case CHECK_CONSTR: - pp_cxx_check_constraint (pp, t); - break; - case CONJ_CONSTR: pp_cxx_conjunction (pp, t); break; diff --git a/gcc/cp/error.cc b/gcc/cp/error.cc index 6d99cb277038..d80bac822ba2 100644 --- a/gcc/cp/error.cc +++ b/gcc/cp/error.cc @@ -3095,7 +3095,6 @@ dump_expr (cxx_pretty_printer *pp, tree t, int flags) break; case ATOMIC_CONSTR: -case CHECK_CONSTR: case CONJ_CONSTR: case DISJ
[gcc r15-2220] ssa: Fix up maybe_rewrite_mem_ref_base complex type handling [PR116034]
https://gcc.gnu.org/g:b9cefd67a2a464a3c9413e6b3f28e7dc7a9ef162 commit r15-2220-gb9cefd67a2a464a3c9413e6b3f28e7dc7a9ef162 Author: Jakub Jelinek Date: Tue Jul 23 10:50:29 2024 +0200 ssa: Fix up maybe_rewrite_mem_ref_base complex type handling [PR116034] The folding into REALPART_EXPR is correct, used only when the mem_offset is zero, but for IMAGPART_EXPR it didn't check the exact offset value (just that it is not 0). The following patch fixes that by using IMAGPART_EXPR only if the offset is right and using BITFIELD_REF or whatever else otherwise. 2024-07-23 Jakub Jelinek Andrew Pinski PR tree-optimization/116034 * tree-ssa.cc (maybe_rewrite_mem_ref_base): Only use IMAGPART_EXPR if MEM_REF offset is equal to element type size. * gcc.dg/pr116034.c: New test. Diff: --- gcc/testsuite/gcc.dg/pr116034.c | 22 ++ gcc/tree-ssa.cc | 5 - 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/pr116034.c b/gcc/testsuite/gcc.dg/pr116034.c new file mode 100644 index ..9a31de034246 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr116034.c @@ -0,0 +1,22 @@ +/* PR tree-optimization/116034 */ +/* { dg-do run } */ +/* { dg-options "-O1 -fno-strict-aliasing" } */ + +int g; + +static inline int +foo (_Complex unsigned short c) +{ + __builtin_memmove (&g, 1 + (char *) &c, 2); + return g; +} + +int +main () +{ + if (__SIZEOF_SHORT__ == 2 + && __CHAR_BIT__ == 8 + && (foo (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ ? 0x100 : 1) + != (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ ? 1 : 0x100))) +__builtin_abort (); +} diff --git a/gcc/tree-ssa.cc b/gcc/tree-ssa.cc index 27ab9cfac823..f4fa4e98c5d0 100644 --- a/gcc/tree-ssa.cc +++ b/gcc/tree-ssa.cc @@ -1506,7 +1506,10 @@ maybe_rewrite_mem_ref_base (tree *tp, bitmap suitable_for_renaming) } else if (TREE_CODE (TREE_TYPE (sym)) == COMPLEX_TYPE && useless_type_conversion_p (TREE_TYPE (*tp), -TREE_TYPE (TREE_TYPE (sym +TREE_TYPE (TREE_TYPE (sym))) + && (integer_zerop (TREE_OPERAND (*tp, 1)) + || tree_int_cst_equal (TREE_OPERAND (*tp, 1), + TYPE_SIZE_UNIT (TREE_TYPE (*tp) { *tp = build1 (integer_zerop (TREE_OPERAND (*tp, 1)) ? REALPART_EXPR : IMAGPART_EXPR,
[gcc r14-10501] ssa: Fix up maybe_rewrite_mem_ref_base complex type handling [PR116034]
https://gcc.gnu.org/g:084768c865cd50a6f7ff177db2dbdbb7aadaeee0 commit r14-10501-g084768c865cd50a6f7ff177db2dbdbb7aadaeee0 Author: Jakub Jelinek Date: Tue Jul 23 10:50:29 2024 +0200 ssa: Fix up maybe_rewrite_mem_ref_base complex type handling [PR116034] The folding into REALPART_EXPR is correct, used only when the mem_offset is zero, but for IMAGPART_EXPR it didn't check the exact offset value (just that it is not 0). The following patch fixes that by using IMAGPART_EXPR only if the offset is right and using BITFIELD_REF or whatever else otherwise. 2024-07-23 Jakub Jelinek Andrew Pinski PR tree-optimization/116034 * tree-ssa.cc (maybe_rewrite_mem_ref_base): Only use IMAGPART_EXPR if MEM_REF offset is equal to element type size. * gcc.dg/pr116034.c: New test. (cherry picked from commit b9cefd67a2a464a3c9413e6b3f28e7dc7a9ef162) Diff: --- gcc/testsuite/gcc.dg/pr116034.c | 22 ++ gcc/tree-ssa.cc | 5 - 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/pr116034.c b/gcc/testsuite/gcc.dg/pr116034.c new file mode 100644 index ..9a31de034246 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr116034.c @@ -0,0 +1,22 @@ +/* PR tree-optimization/116034 */ +/* { dg-do run } */ +/* { dg-options "-O1 -fno-strict-aliasing" } */ + +int g; + +static inline int +foo (_Complex unsigned short c) +{ + __builtin_memmove (&g, 1 + (char *) &c, 2); + return g; +} + +int +main () +{ + if (__SIZEOF_SHORT__ == 2 + && __CHAR_BIT__ == 8 + && (foo (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ ? 0x100 : 1) + != (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ ? 1 : 0x100))) +__builtin_abort (); +} diff --git a/gcc/tree-ssa.cc b/gcc/tree-ssa.cc index 27ab9cfac823..f4fa4e98c5d0 100644 --- a/gcc/tree-ssa.cc +++ b/gcc/tree-ssa.cc @@ -1506,7 +1506,10 @@ maybe_rewrite_mem_ref_base (tree *tp, bitmap suitable_for_renaming) } else if (TREE_CODE (TREE_TYPE (sym)) == COMPLEX_TYPE && useless_type_conversion_p (TREE_TYPE (*tp), -TREE_TYPE (TREE_TYPE (sym +TREE_TYPE (TREE_TYPE (sym))) + && (integer_zerop (TREE_OPERAND (*tp, 1)) + || tree_int_cst_equal (TREE_OPERAND (*tp, 1), + TYPE_SIZE_UNIT (TREE_TYPE (*tp) { *tp = build1 (integer_zerop (TREE_OPERAND (*tp, 1)) ? REALPART_EXPR : IMAGPART_EXPR,
[gcc r15-2221] libstdc++: Do not use isatty on avr [PR115482]
https://gcc.gnu.org/g:8439405e38c56b774cf3c65bdafae5f9e11d470a commit r15-2221-g8439405e38c56b774cf3c65bdafae5f9e11d470a Author: Detlef Vollmann Date: Tue Jul 23 09:25:22 2024 +0100 libstdc++: Do not use isatty on avr [PR115482] avrlibc has an incomplete unistd.h that doesn't have isatty. So building libstdc++ fails when compiling c++23/print.cc. As a workaround I added a check for AVR. libstdc++-v3/ChangeLog: PR libstdc++/115482 * src/c++23/print.cc (__open_terminal) [__AVR__]: Do not use isatty. Diff: --- libstdc++-v3/src/c++23/print.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/src/c++23/print.cc b/libstdc++-v3/src/c++23/print.cc index 99a19cd45002..558dc149d125 100644 --- a/libstdc++-v3/src/c++23/print.cc +++ b/libstdc++-v3/src/c++23/print.cc @@ -75,7 +75,7 @@ namespace #ifdef _WIN32 if (int fd = ::_fileno(f); fd >= 0) return check_for_console((void*)_get_osfhandle(fd)); -#elifdef _GLIBCXX_HAVE_UNISTD_H +#elif defined _GLIBCXX_HAVE_UNISTD_H && ! defined __AVR__ if (int fd = (::fileno)(f); fd >= 0 && ::isatty(fd)) return f; #endif @@ -100,7 +100,7 @@ namespace #ifdef _WIN32 if (auto fb = dynamic_cast(sb)) return check_for_console(fb->native_handle()); -#elifdef _GLIBCXX_HAVE_UNISTD_H +#elif defined _GLIBCXX_HAVE_UNISTD_H && ! defined __AVR__ if (auto fb = dynamic_cast(sb)) if (int fd = fb->native_handle(); fd >= 0 && ::isatty(fd)) return ::fdopen(::dup(fd), "w"); // Caller must call fclose.
[gcc r15-2222] libstdc++: Use [[maybe_unused]] attribute in src/c++23/print.cc
https://gcc.gnu.org/g:b40156d69153364315e071dc968227ce1c3bd2a8 commit r15--gb40156d69153364315e071dc968227ce1c3bd2a8 Author: Jonathan Wakely Date: Tue Jul 23 10:08:52 2024 +0100 libstdc++: Use [[maybe_unused]] attribute in src/c++23/print.cc This avoids some warnings when the preprocessor conditions are not met. libstdc++-v3/ChangeLog: * src/c++23/print.cc (__open_terminal): Use [[maybe_unused]] on parameter. Diff: --- libstdc++-v3/src/c++23/print.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/src/c++23/print.cc b/libstdc++-v3/src/c++23/print.cc index 558dc149d125..8ba714059672 100644 --- a/libstdc++-v3/src/c++23/print.cc +++ b/libstdc++-v3/src/c++23/print.cc @@ -67,7 +67,7 @@ namespace // This returns intptr_t that is either a Windows HANDLE // or 1 + a POSIX file descriptor. A zero return indicates failure. void* - __open_terminal(FILE* f) + __open_terminal([[maybe_unused]] FILE* f) { #ifndef _GLIBCXX_USE_STDIO_PURE if (f) @@ -85,7 +85,7 @@ namespace } void* - __open_terminal(std::streambuf* sb) + __open_terminal([[maybe_unused]] std::streambuf* sb) { #if ! defined _GLIBCXX_USE_STDIO_PURE && defined __cpp_rtti using namespace __gnu_cxx;
[gcc r14-10502] libstdc++: Do not use isatty on avr [PR115482]
https://gcc.gnu.org/g:5fad8874300ef67c577cc204e339dca6bca15467 commit r14-10502-g5fad8874300ef67c577cc204e339dca6bca15467 Author: Detlef Vollmann Date: Tue Jul 23 09:25:22 2024 +0100 libstdc++: Do not use isatty on avr [PR115482] avrlibc has an incomplete unistd.h that doesn't have isatty. So building libstdc++ fails when compiling c++23/print.cc. As a workaround I added a check for AVR. libstdc++-v3/ChangeLog: PR libstdc++/115482 * src/c++23/print.cc (__open_terminal) [__AVR__]: Do not use isatty. (cherry picked from commit 8439405e38c56b774cf3c65bdafae5f9e11d470a) Diff: --- libstdc++-v3/src/c++23/print.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/src/c++23/print.cc b/libstdc++-v3/src/c++23/print.cc index 99a19cd45002..558dc149d125 100644 --- a/libstdc++-v3/src/c++23/print.cc +++ b/libstdc++-v3/src/c++23/print.cc @@ -75,7 +75,7 @@ namespace #ifdef _WIN32 if (int fd = ::_fileno(f); fd >= 0) return check_for_console((void*)_get_osfhandle(fd)); -#elifdef _GLIBCXX_HAVE_UNISTD_H +#elif defined _GLIBCXX_HAVE_UNISTD_H && ! defined __AVR__ if (int fd = (::fileno)(f); fd >= 0 && ::isatty(fd)) return f; #endif @@ -100,7 +100,7 @@ namespace #ifdef _WIN32 if (auto fb = dynamic_cast(sb)) return check_for_console(fb->native_handle()); -#elifdef _GLIBCXX_HAVE_UNISTD_H +#elif defined _GLIBCXX_HAVE_UNISTD_H && ! defined __AVR__ if (auto fb = dynamic_cast(sb)) if (int fd = fb->native_handle(); fd >= 0 && ::isatty(fd)) return ::fdopen(::dup(fd), "w"); // Caller must call fclose.
[gcc r14-10503] libstdc++: Use [[maybe_unused]] attribute in src/c++23/print.cc
https://gcc.gnu.org/g:b41487a883282b28a136fa16e1d941e14dae commit r14-10503-gb41487a883282b28a136fa16e1d941e14dae Author: Jonathan Wakely Date: Tue Jul 23 10:08:52 2024 +0100 libstdc++: Use [[maybe_unused]] attribute in src/c++23/print.cc This avoids some warnings when the preprocessor conditions are not met. libstdc++-v3/ChangeLog: * src/c++23/print.cc (__open_terminal): Use [[maybe_unused]] on parameter. (cherry picked from commit b40156d69153364315e071dc968227ce1c3bd2a8) Diff: --- libstdc++-v3/src/c++23/print.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/src/c++23/print.cc b/libstdc++-v3/src/c++23/print.cc index 558dc149d125..8ba714059672 100644 --- a/libstdc++-v3/src/c++23/print.cc +++ b/libstdc++-v3/src/c++23/print.cc @@ -67,7 +67,7 @@ namespace // This returns intptr_t that is either a Windows HANDLE // or 1 + a POSIX file descriptor. A zero return indicates failure. void* - __open_terminal(FILE* f) + __open_terminal([[maybe_unused]] FILE* f) { #ifndef _GLIBCXX_USE_STDIO_PURE if (f) @@ -85,7 +85,7 @@ namespace } void* - __open_terminal(std::streambuf* sb) + __open_terminal([[maybe_unused]] std::streambuf* sb) { #if ! defined _GLIBCXX_USE_STDIO_PURE && defined __cpp_rtti using namespace __gnu_cxx;
[gcc r15-2223] tree-optimization/116002 - PTA solving slow with degenerate graph
https://gcc.gnu.org/g:15d3b2dab9182eff036a604169b5e6f4ab3b2a40 commit r15-2223-g15d3b2dab9182eff036a604169b5e6f4ab3b2a40 Author: Richard Biener Date: Tue Jul 23 10:29:58 2024 +0200 tree-optimization/116002 - PTA solving slow with degenerate graph When the constraint graph consists of N nodes with only complex constraints and no copy edges we have to be lucky to arrive at a constraint solving order that requires the optimal number of iterations. What happens in the testcase is that we bottle-neck on computing the visitation order but propagate changes only very slowly. Luckily the testcase complex constraints are all copy-with-offset and those do provide a way to order visitation. The following adds this which reduces the iteration count to one. PR tree-optimization/116002 * tree-ssa-structalias.cc (topo_visit): Also consider SCALAR = SCALAR complex constraints as edges. Diff: --- gcc/tree-ssa-structalias.cc | 12 1 file changed, 12 insertions(+) diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc index 330e64e65da1..65f9132a94fd 100644 --- a/gcc/tree-ssa-structalias.cc +++ b/gcc/tree-ssa-structalias.cc @@ -1908,6 +1908,18 @@ topo_visit (constraint_graph_t graph, vec &topo_order, topo_visit (graph, topo_order, visited, k); } + /* Also consider copy with offset complex constraints as implicit edges. */ + for (auto c : graph->complex[n]) +{ + /* Constraints are ordered so that SCALAR = SCALAR appear first. */ + if (c->lhs.type != SCALAR || c->rhs.type != SCALAR) + break; + gcc_checking_assert (c->rhs.var == n); + unsigned k = find (c->lhs.var); + if (!bitmap_bit_p (visited, k)) + topo_visit (graph, topo_order, visited, k); +} + topo_order.quick_push (n); }
[gcc r15-2224] testsuite: Disable finite math only for test [PR115826]
https://gcc.gnu.org/g:7793f5b4194253acaac0b53d8a1c95d9b5c8f4bb commit r15-2224-g7793f5b4194253acaac0b53d8a1c95d9b5c8f4bb Author: Torbjörn SVENSSON Date: Mon Jul 15 12:10:12 2024 +0200 testsuite: Disable finite math only for test [PR115826] As the test case requires +-Inf and NaN to work and -ffast-math is added by default for arm-none-eabi, re-enable non-finite math. gcc/testsuite/ChangeLog: PR testsuite/115826 * gcc.dg/vect/tsvc/vect-tsvc-s1281.c: Use -fno-finite-math-only. Signed-off-by: Torbjörn SVENSSON Diff: --- gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s1281.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s1281.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s1281.c index dba95a819737..3e619a3fa5aa 100644 --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s1281.c +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s1281.c @@ -4,6 +4,9 @@ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ /* { dg-require-effective-target vect_float } */ +/* This test requires +-Inf and NaN, so disable finite-math-only */ +/* { dg-additional-options "-fno-finite-math-only" } */ + #include "tsvc.h" real_t s1281(struct args_t * func_args)
[gcc r14-10504] testsuite: Disable finite math only for test [PR115826]
https://gcc.gnu.org/g:a544898f6dd6a1689bb25abfdc20d577c93b1162 commit r14-10504-ga544898f6dd6a1689bb25abfdc20d577c93b1162 Author: Torbjörn SVENSSON Date: Mon Jul 15 12:10:12 2024 +0200 testsuite: Disable finite math only for test [PR115826] As the test case requires +-Inf and NaN to work and -ffast-math is added by default for arm-none-eabi, re-enable non-finite math. gcc/testsuite/ChangeLog: PR testsuite/115826 * gcc.dg/vect/tsvc/vect-tsvc-s1281.c: Use -fno-finite-math-only. Signed-off-by: Torbjörn SVENSSON (cherry picked from commit 7793f5b4194253acaac0b53d8a1c95d9b5c8f4bb) Diff: --- gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s1281.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s1281.c b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s1281.c index dba95a819737..3e619a3fa5aa 100644 --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s1281.c +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s1281.c @@ -4,6 +4,9 @@ /* { dg-additional-options "--param vect-epilogues-nomask=0" } */ /* { dg-require-effective-target vect_float } */ +/* This test requires +-Inf and NaN, so disable finite-math-only */ +/* { dg-additional-options "-fno-finite-math-only" } */ + #include "tsvc.h" real_t s1281(struct args_t * func_args)
[gcc r15-2225] report message for operator %a on unaddressible operand
https://gcc.gnu.org/g:472eab9ab1fdfd0ba3a555ea9eb50e20307c7052 commit r15-2225-g472eab9ab1fdfd0ba3a555ea9eb50e20307c7052 Author: Jiufu Guo Date: Tue Jul 23 13:34:20 2024 +0800 report message for operator %a on unaddressible operand Hi, For PR96866, when printing asm code for modifier "%a", an addressable operand is required. While the constraint "X" allow any kind of operand even which is hard to get the address directly. e.g. extern symbol whose address is in TOC. An error message would be reported to indicate the invalid asm operand. Compare with previous version, test case is updated with -mno-pcrel. Bootstrap®test pass on ppc64{,le}. Is this ok for trunk? BR, Jeff(Jiufu Guo) PR target/96866 gcc/ChangeLog: * config/rs6000/rs6000.cc (print_operand_address): Emit message for unsupported operand. gcc/testsuite/ChangeLog: * gcc.target/powerpc/pr96866-1.c: New test. * gcc.target/powerpc/pr96866-2.c: New test. Diff: --- gcc/config/rs6000/rs6000.cc | 7 ++- gcc/testsuite/gcc.target/powerpc/pr96866-1.c | 18 ++ gcc/testsuite/gcc.target/powerpc/pr96866-2.c | 13 + 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 85211565eb4c..0bcc6a2d0ab6 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -14706,7 +14706,12 @@ print_operand_address (FILE *file, rtx x) fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, reg_names[SMALL_DATA_REG]); else - gcc_assert (!TARGET_TOC); + { + /* Do not support getting address directly from TOC, emit error. +No more work is needed for !TARGET_TOC. */ + if (TARGET_TOC) + output_operand_lossage ("%%a requires an address of memory"); + } } else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1))) diff --git a/gcc/testsuite/gcc.target/powerpc/pr96866-1.c b/gcc/testsuite/gcc.target/powerpc/pr96866-1.c new file mode 100644 index ..72e59a19753a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr96866-1.c @@ -0,0 +1,18 @@ +/* The "%a" modifier can't get the address of extern symbol directly from TOC + with -fPIC, even the symbol is propagated for "X" constraint under -O2. */ +/* { dg-options "-fPIC -O2 -mno-pcrel" } */ + +/* It's to verify no ICE here, ignore error messages about invalid 'asm'. */ +/* { dg-excess-errors "pr96866-1.c" } */ + +int x[2]; + +int __attribute__ ((noipa)) +f1 (void) +{ + int n; + int *p = x; + *p++; + __asm__ volatile("ld %0, %a1" : "=r"(n) : "X"(p)); + return n; +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr96866-2.c b/gcc/testsuite/gcc.target/powerpc/pr96866-2.c new file mode 100644 index ..72bb15fa04f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr96866-2.c @@ -0,0 +1,13 @@ +/* The "%a" modifier can't get the address of extern symbol directly from TOC + with -fPIC. */ +/* { dg-options "-fPIC -O2 -mno-pcrel" } */ + +/* It's to verify no ICE here, ignore error messages about invalid 'asm'. */ +/* { dg-excess-errors "pr96866-2.c" } */ + +void +f (void) +{ + extern int x; + __asm__ volatile("#%a0" ::"X"(&x)); +}
[gcc r15-2226] install.texi (gcn): Suggest newer commit for Newlib
https://gcc.gnu.org/g:b95c82d60c8c88f6346c5602f2e22a4531afe47c commit r15-2226-gb95c82d60c8c88f6346c5602f2e22a4531afe47c Author: Tobias Burnus Date: Tue Jul 23 12:41:40 2024 +0200 install.texi (gcn): Suggest newer commit for Newlib Newlib 4.4.0 lacks two commits: 7dd4eb1db (2024-03-25) to fix device console output for GFX10/GFX11 and ed50a50b9 (2024-04-04) to make the added lock.h compilable with C++. This commit mentiones now also the second commit. gcc/ChangeLog: * doc/install.texi (amdgcn-x-amdhsa): Suggest newer git version for newlib. Diff: --- gcc/doc/install.texi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index b54569925837..dda623f4410a 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -3952,9 +3952,9 @@ Instead of GNU Binutils, you will need to install LLVM 15, or later, and copy by specifying a @code{--with-multilib-list=} that does not list @code{gfx1100} and @code{gfx1103}. -Use Newlib (4.3.0 or newer; 4.4.0 contains some improvements and git commit -7dd4eb1db (2024-03-25, post-4.4.0) fixes device console output for GFX10 and -GFX11 devices). +Use Newlib (4.3.0 or newer; 4.4.0 contains some improvements and git commits +7dd4eb1db and ed50a50b9 (2024-04-04, post-4.4.0) fix device console output +for GFX10 and GFX11 devices). To run the binaries, install the HSA Runtime from the @uref{https://rocm.docs.amd.com/,,ROCm Platform}, and use
[gcc(refs/users/meissner/heads/work173)] Revert changes
https://gcc.gnu.org/g:f5939dfbe6a215e0c488f8ed2f2d82f27e15236f commit f5939dfbe6a215e0c488f8ed2f2d82f27e15236f Author: Michael Meissner Date: Tue Jul 23 10:09:06 2024 -0400 Revert changes Diff: --- gcc/config/rs6000/rs6000-c.cc | 23 +++-- gcc/config/rs6000/rs6000-protos.h | 5 +- gcc/config/rs6000/rs6000.cc | 181 +- gcc/config/rs6000/rs6000.h| 36 gcc/config/rs6000/rs6000.opt | 8 -- 5 files changed, 54 insertions(+), 199 deletions(-) diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index 2ffaee165885..68519e1397f1 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -338,8 +338,7 @@ rs6000_define_or_undefine_macro (bool define_p, const char *name) #pragma GCC target, we need to adjust the macros dynamically. */ void -rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, -HOST_WIDE_INT arch_flags) +rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags) { if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) fprintf (stderr, @@ -412,7 +411,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, summary of the flags associated with particular cpu definitions. */ - /* rs6000_isa_flags and rs6000_arch_flags based options. */ + /* rs6000_isa_flags based options. */ rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC"); if ((flags & OPTION_MASK_PPC_GPOPT) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCSQ"); @@ -422,21 +421,21 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64"); if ((flags & OPTION_MASK_MFCRF) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR4"); - if ((arch_flags & ARCH_MASK_POWER4) != 0) + if ((flags & OPTION_MASK_POPCNTB) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5"); - if ((arch_flags & ARCH_MASK_POWER5) != 0) + if ((flags & OPTION_MASK_FPRND) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X"); - if ((arch_flags & ARCH_MASK_POWER6) != 0) + if ((flags & OPTION_MASK_CMPB) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6"); - if ((arch_flags & ARCH_MASK_POWER7) != 0) + if ((flags & OPTION_MASK_POPCNTD) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7"); - if ((arch_flags & ARCH_MASK_POWER8) != 0) + if ((flags & OPTION_MASK_POWER8) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8"); - if ((arch_flags & ARCH_MASK_POWER9) != 0) + if ((flags & OPTION_MASK_MODULO) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR9"); - if ((arch_flags & ARCH_MASK_POWER10) != 0) + if ((flags & OPTION_MASK_POWER10) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR10"); - if ((arch_flags & ARCH_MASK_POWER11) != 0) + if ((flags & OPTION_MASK_POWER11) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR11"); if ((flags & OPTION_MASK_SOFT_FLOAT) != 0) rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT"); @@ -606,7 +605,7 @@ void rs6000_cpu_cpp_builtins (cpp_reader *pfile) { /* Define all of the common macros. */ - rs6000_target_modify_macros (true, rs6000_isa_flags, rs6000_arch_flags); + rs6000_target_modify_macros (true, rs6000_isa_flags); if (TARGET_FRE) builtin_define ("__RECIP__"); diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index da658cd5ab2e..b40557a85577 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -323,9 +323,8 @@ extern void rs6000_cpu_cpp_builtins (struct cpp_reader *); extern bool rs6000_pragma_target_parse (tree, tree); #endif extern void rs6000_activate_target_options (tree new_tree); -extern void rs6000_target_modify_macros (bool, HOST_WIDE_INT, HOST_WIDE_INT); -extern void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, - HOST_WIDE_INT); +extern void rs6000_target_modify_macros (bool, HOST_WIDE_INT); +extern void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT); #ifdef NO_DOLLAR_IN_LABEL const char * rs6000_xcoff_strip_dollar (const char *); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index d38b407f05bb..eddd2adbab59 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -251,17 +251,17 @@ enum { /* Map compiler ISA bits into HWCAP names. */ struct clone_map { - HOST_WIDE_INT arch_mask; /* rs6000_arch_mask. */ + HOST_WIDE_INT isa_mask; /* rs6000_isa mask */ const char *name;/* name to use in __builtin_cpu_supports. */ }; static const struct clone_map rs6000_clone_map[CLONE_MAX] = { - { 0, "" }, /* Default options. */ - { ARCH_MASK_POWER6, "arch_2_05" }, /* ISA 2.05 (power6). */ - { ARCH_MASK_POWER7, "a
[gcc(refs/users/meissner/heads/work173)] Move architecture flags from isa flags
https://gcc.gnu.org/g:5021a6c04765ed9f06013827155f7c65b9d785fe commit 5021a6c04765ed9f06013827155f7c65b9d785fe Author: Michael Meissner Date: Tue Jul 23 10:11:54 2024 -0400 Move architecture flags from isa flags 2024-07-22 Michael Meissner gcc/ * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Add support for separating the architecture flags from the ISA flags. (rs6000_cpu_cpp_builtins): Likewise. * config/rs6000/rs6000-protos.h (rs6000_target_modify_macros): Update declaration. (rs6000_target_modify_macros_ptr): Likewise. * config/rs6000/rs6000.cc (struct clone_map): Switch to using architecture flags instead of isa flags. (rs6000_clone_map): Likewise. (rs6000_target_modify_macros_ptr): Update declaration. (get_arch_flags): New function. (rs6000_debug_reg_global): Add support for separating architecture flags from the ISA flags. (rs6000_print_isa_options):Likewise. (rs6000_option_override_internal): Likewise. (rs6000_machine_from_flags): Likewise. (struct rs6000_arch_mask): New structure. (rs6000_arch_masks): Likewise. (rs6000_pragma_target_parse): Likewise. (rs6000_function_specific_save): Likewise. (rs6000_function_specific_restore): Likewise. (rs6000_function_specific_print): Likewise. (rs6000_print_options_internal): Likewise. (rs6000_print_isa_options): Likewise. (rs6000_clone_priority): Switch to using architecture flags. (rs6000_can_inline_p): Check if the arch flags match along with the isa flags. * config/rs6000/rs6000.h (enum arch_bits): New enumeration. (ARCH_MASK_*): New architecture masks for a specific processor. (ARCH_FLAGS_*): New architecture masks for all of the architecture masks set for a specific processor. * config/rs6000/rs6000.opt (rs6000_arch_flags): New target variable. (x_rs6000_arch_flags): New target save area. Diff: --- gcc/config/rs6000/rs6000-c.cc | 23 ++--- gcc/config/rs6000/rs6000-protos.h | 5 +- gcc/config/rs6000/rs6000.cc | 188 +- gcc/config/rs6000/rs6000.h| 36 gcc/config/rs6000/rs6000.opt | 8 ++ 5 files changed, 205 insertions(+), 55 deletions(-) diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index 68519e1397f1..2ffaee165885 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -338,7 +338,8 @@ rs6000_define_or_undefine_macro (bool define_p, const char *name) #pragma GCC target, we need to adjust the macros dynamically. */ void -rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags) +rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags, +HOST_WIDE_INT arch_flags) { if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) fprintf (stderr, @@ -411,7 +412,7 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags) summary of the flags associated with particular cpu definitions. */ - /* rs6000_isa_flags based options. */ + /* rs6000_isa_flags and rs6000_arch_flags based options. */ rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC"); if ((flags & OPTION_MASK_PPC_GPOPT) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PPCSQ"); @@ -421,21 +422,21 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags) rs6000_define_or_undefine_macro (define_p, "_ARCH_PPC64"); if ((flags & OPTION_MASK_MFCRF) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR4"); - if ((flags & OPTION_MASK_POPCNTB) != 0) + if ((arch_flags & ARCH_MASK_POWER4) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5"); - if ((flags & OPTION_MASK_FPRND) != 0) + if ((arch_flags & ARCH_MASK_POWER5) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR5X"); - if ((flags & OPTION_MASK_CMPB) != 0) + if ((arch_flags & ARCH_MASK_POWER6) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6"); - if ((flags & OPTION_MASK_POPCNTD) != 0) + if ((arch_flags & ARCH_MASK_POWER7) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7"); - if ((flags & OPTION_MASK_POWER8) != 0) + if ((arch_flags & ARCH_MASK_POWER8) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8"); - if ((flags & OPTION_MASK_MODULO) != 0) + if ((arch_flags & ARCH_MASK_POWER9) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR9"); - if ((flags & OPTION_MASK_POWER10) != 0) + if ((arch_flags & ARCH_MASK_POWER10) != 0) rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR10"); - if ((flags & OPTION_MASK_POWER11) != 0) + if ((arch_flags & ARCH_MASK_POWER11
[gcc(refs/users/meissner/heads/work173)] Update ChangeLog.*
https://gcc.gnu.org/g:191fcce31172349eacedc462c47cd76893cb437e commit 191fcce31172349eacedc462c47cd76893cb437e Author: Michael Meissner Date: Tue Jul 23 10:13:09 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.meissner | 4 1 file changed, 4 insertions(+) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index 00e2e045c60b..e1a07e54c56c 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -25,10 +25,14 @@ gcc/ (struct rs6000_arch_mask): New structure. (rs6000_arch_masks): Likewise. (rs6000_pragma_target_parse): Likewise. + (rs6000_function_specific_save): Likewise. + (rs6000_function_specific_restore): Likewise. (rs6000_function_specific_print): Likewise. (rs6000_print_options_internal): Likewise. (rs6000_print_isa_options): Likewise. (rs6000_clone_priority): Switch to using architecture flags. + (rs6000_can_inline_p): Check if the arch flags match along with the isa + flags. * config/rs6000/rs6000.h (enum arch_bits): New enumeration. (ARCH_MASK_*): New architecture masks for a specific processor. (ARCH_FLAGS_*): New architecture masks for all of the architecture masks
[gcc r15-2227] cp/coroutines: add a test for PR c++/103953
https://gcc.gnu.org/g:826134760c49518d97769c8bb7ecbc264b78cac9 commit r15-2227-g826134760c49518d97769c8bb7ecbc264b78cac9 Author: Arsen Arsenović Date: Tue Jul 23 13:01:03 2024 +0200 cp/coroutines: add a test for PR c++/103953 This PR seems to have been fixed by a fix for a seemingly unrelated PR. Lets add a regression test to make sure it stays fixed. PR c++/103953 - Leak of coroutine return object PR c++/103953 gcc/testsuite/ChangeLog: * g++.dg/coroutines/torture/pr103953.C: New test. Reviewed-by: Iain Sandoe Diff: --- gcc/testsuite/g++.dg/coroutines/torture/pr103953.C | 75 ++ 1 file changed, 75 insertions(+) diff --git a/gcc/testsuite/g++.dg/coroutines/torture/pr103953.C b/gcc/testsuite/g++.dg/coroutines/torture/pr103953.C new file mode 100644 index ..da559f8fa0d1 --- /dev/null +++ b/gcc/testsuite/g++.dg/coroutines/torture/pr103953.C @@ -0,0 +1,75 @@ +// { dg-do run } +// https://gcc.gnu.org/PR103953 +#include +#include + +static int ctor_dtor_count = 0; + +struct task { +struct promise_type; + +using handle_type = std::coroutine_handle; + +task(handle_type h) : handle(h) { +ctor_dtor_count++; +} +task(const task & t) : handle(t.handle) { +ctor_dtor_count++; +} +task(task && t) : handle(std::move(t.handle)) { +ctor_dtor_count++; +} +~task() { + if (--ctor_dtor_count < 0) + __builtin_abort (); +} + +struct promise_type { +auto get_return_object() { +return task{handle_type::from_promise(*this)}; +} + +auto initial_suspend() { +return std::suspend_always {}; +} + +auto unhandled_exception() {} + +auto final_suspend() noexcept { +return std::suspend_always{}; +} + +void return_void() {} +}; + + handle_type handle; + + void await_resume() { + handle.resume(); + } + + auto await_suspend(handle_type) { + return handle; + } + + auto await_ready() { + return false; + } +}; + +int main() { +{ + task coroutine_A = []() ->task { + co_return; + }(); + + task coroutine_B = [&coroutine_A]() ->task { + co_await coroutine_A; + }(); + + coroutine_B.handle.resume(); +} + +if (ctor_dtor_count != 0) + __builtin_abort (); +}
[gcc r15-2228] PR modula2/116048 ICE when encountering wrong kind of qualident
https://gcc.gnu.org/g:7f8064ff0e2ac90c5bb6c30cc61acc5a28ebbe4c commit r15-2228-g7f8064ff0e2ac90c5bb6c30cc61acc5a28ebbe4c Author: Gaius Mulley Date: Tue Jul 23 15:54:16 2024 +0100 PR modula2/116048 ICE when encountering wrong kind of qualident Following on from PR-115957 further ICEs can be generated by using the wrong kind of qualident symbol. For example using a variable instead of a type or using a type instead of a const. This fix tracks the expected qualident kind state when parsing const, type and variable declarations. If the error is unrecoverable then a detailed message explaining the context of the qualident (and why the seen qualident is wrong) is generated. gcc/m2/ChangeLog: PR modula2/116048 * Make-lang.in (GM2-COMP-BOOT-DEFS): Add M2StateCheck.def. (GM2-COMP-BOOT-MODS): Add M2StateCheck.mod. (GM2-COMP-DEFS): Add M2StateCheck.def. (GM2-COMP-MODS): Add M2StateCheck.mod. * gm2-compiler/M2Quads.mod (StartBuildWith): Generate unrecoverable error is the qualident type is NulSym. Replace MetaError1 with MetaErrorT1 and position the error to the qualident. * gm2-compiler/P3Build.bnf (M2StateCheck): Import procedures. (seenError): New variable. (WasNoError): Remove variable. (BlockState): New variable. (ErrorString): Rewrite using seenError. (CompilationUnit): Ditto. (QualidentCheck): New rule. (ConstantDeclaration): Bookend with InclConst and ExclConst. (Constructor): Add InclConstructor, ExclConstructor and call CheckQualident. (ConstActualParameters): Call PushState, PopState, InclConstFunc and CheckQualident. (TypeDeclaration): Bookend with InclType and ExclType. (SimpleType): Call QualidentCheck. (CaseTag): Ditto. (OptReturnType): Ditto. (VariableDeclaration): Bookend with InclVar and ExclVar. (Designator): Call QualidentCheck. (Formal;Type): Ditto. * gm2-compiler/PCBuild.bnf (M2StateCheck): Import procedures. (ConstantDeclaration): Rewrite using InclConst and ExclConst. (Constructor): Bookend with InclConstructor and ExclConstructor. Call CheckQualident. (ConstructorOrConstActualParameters): Rewrite and cal l CheckQualident. (ConstActualParameters): Bookend with PushState PopState. Call InclConstFunc and CheckQualident. * gm2-gcc/init.cc (_M2_M2StateCheck_init): New declaration. (_M2_P3Build_init): New declaration. (init_PerCompilationInit): Call _M2_M2StateCheck_init and _M2_P3Build_init. * gm2-compiler/M2StateCheck.def: New file. * gm2-compiler/M2StateCheck.mod: New file. gcc/testsuite/ChangeLog: PR modula2/116048 * gm2/errors/fail/errors-fail.exp: Remove -Wstudents and add -Wuninit-variable-checking=all. Replace gm2_init_pim with gm2_init_iso. * gm2/errors/fail/testfio.mod: Modify test code to provoke an error in the first basic block. * gm2/errors/fail/testparam.mod: Ditto. * gm2/errors/fail/array1.mod: Ditto. * gm2/errors/fail/badtype.mod: New test. * gm2/errors/fail/badvar.mod: New test. Signed-off-by: Gaius Mulley Diff: --- gcc/m2/Make-lang.in | 4 + gcc/m2/gm2-compiler/M2Quads.mod | 45 ++-- gcc/m2/gm2-compiler/M2StateCheck.def | 154 gcc/m2/gm2-compiler/M2StateCheck.mod | 344 ++ gcc/m2/gm2-compiler/P3Build.bnf | 65 +++-- gcc/m2/gm2-compiler/PCBuild.bnf | 45 ++-- gcc/m2/gm2-gcc/init.cc| 4 + gcc/testsuite/gm2/errors/fail/array1.mod | 5 + gcc/testsuite/gm2/errors/fail/badtype.mod | 10 + gcc/testsuite/gm2/errors/fail/badvar.mod | 10 + gcc/testsuite/gm2/errors/fail/errors-fail.exp | 2 +- gcc/testsuite/gm2/errors/fail/testfio.mod | 8 + gcc/testsuite/gm2/errors/fail/testparam.mod | 5 + 13 files changed, 645 insertions(+), 56 deletions(-) diff --git a/gcc/m2/Make-lang.in b/gcc/m2/Make-lang.in index daa7ef6747a5..2bd60ca29393 100644 --- a/gcc/m2/Make-lang.in +++ b/gcc/m2/Make-lang.in @@ -808,6 +808,7 @@ GM2-COMP-BOOT-DEFS = \ M2Size.def \ M2StackAddress.def \ M2StackWord.def \ + M2StateCheck.def \ M2Students.def \ M2Swig.def \ M2SymInit.def \ @@ -882,6 +883,7 @@ GM2-COMP-BOOT-MODS = \ M2Size.mod \ M2StackAddress.mod \ M2StackWord.mod \ + M2StateCheck.mod \ M2Students.mod \ M2Swig.mod \ M2SymInit.mod \ @@ -1090,6 +1092,7 @@ GM2-COMP-DEFS = \ M2Size.def \
[gcc(refs/users/meissner/heads/work173)] Remove -mpower10 and -mpower8-internal
https://gcc.gnu.org/g:d64c5ffbfe1f13536d8cc5f6ffd2fbbdc3e1873c commit d64c5ffbfe1f13536d8cc5f6ffd2fbbdc3e1873c Author: Michael Meissner Date: Tue Jul 23 11:14:05 2024 -0400 Remove -mpower10 and -mpower8-internal 2024-07-23 Michael Meissner gcc/ * config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Remove internal options -mpower8-internal, -mpower10, and -mpower11. (ISA_3_1_MASKS_SERVER): Likewise. (POWER11_MASKS_SERVER): Likewise. (POWERPC_MASKS): Likewise. * config/rs6000/rs6000.cc (rs6000_opt_masks): Likewise. * config/rs6000/rs6000.h (TARGET_POWER8): Define in terms of hardware flags. (TARGET_POWER10): Likewise. * config/rs6000/rs6000.opt (-mpower8-internal): Do not create ISA bit. (-mpower10): Likewise. (-mpower11): Delete. Diff: --- gcc/config/rs6000/rs6000-cpus.def | 8 +--- gcc/config/rs6000/rs6000.cc | 2 -- gcc/config/rs6000/rs6000.h| 5 + gcc/config/rs6000/rs6000.opt | 11 ++- 4 files changed, 8 insertions(+), 18 deletions(-) diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 84fac8bdac1d..a3568898b0b6 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -47,7 +47,6 @@ fusion here, instead set it in rs6000.cc if we are tuning for a power8 system. */ #define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \ -| OPTION_MASK_POWER8 \ | OPTION_MASK_P8_VECTOR\ | OPTION_MASK_CRYPTO \ | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \ @@ -83,11 +82,9 @@ | OPTION_MASK_PREFIXED) #define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER \ -| OPTION_MASK_POWER10 \ | OTHER_POWER10_MASKS) -#define POWER11_MASKS_SERVER (ISA_3_1_MASKS_SERVER \ - | OPTION_MASK_POWER11) +#define POWER11_MASKS_SERVER ISA_3_1_MASKS_SERVER /* Flags that need to be turned off if -mno-vsx. */ #define OTHER_VSX_VECTOR_MASKS (OPTION_MASK_EFFICIENT_UNALIGNED_VSX\ @@ -125,8 +122,6 @@ | OPTION_MASK_FLOAT128_HW \ | OPTION_MASK_FLOAT128_KEYWORD \ | OPTION_MASK_FPRND\ -| OPTION_MASK_POWER10 \ -| OPTION_MASK_POWER11 \ | OPTION_MASK_P10_FUSION \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ @@ -135,7 +130,6 @@ | OPTION_MASK_MODULO \ | OPTION_MASK_MULHW\ | OPTION_MASK_NO_UPDATE\ -| OPTION_MASK_POWER8 \ | OPTION_MASK_P8_FUSION\ | OPTION_MASK_P8_VECTOR\ | OPTION_MASK_P9_MINMAX\ diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 381d439c0cf3..4550fd44036e 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -24526,8 +24526,6 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "float128",OPTION_MASK_FLOAT128_KEYWORD, false, true }, { "float128-hardware", OPTION_MASK_FLOAT128_HW,false, true }, { "fprnd", OPTION_MASK_FPRND, false, true }, - { "power10", OPTION_MASK_POWER10,false, true }, - { "power11", OPTION_MASK_POWER11,false, false }, { "hard-dfp",OPTION_MASK_DFP,false, true }, { "htm", OPTION_MASK_HTM,false, true }, { "isel",OPTION_MASK_ISEL, false, true }, diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 2053de1fc176..548ef9c0fa50 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -2517,3 +2517,8 @@ enum arch_bits { #define ARCH_FLAGS_POWER9 (ARCH_FLAGS_POWER8 | ARCH_MASK_POWER9) #define ARCH_FLAGS_POWER10 (ARCH_FLAGS_POWER9 | ARCH_MASK_POWER10) #define ARCH_FLAGS_POWER11 (ARCH_FLAGS_POWER10 | ARCH_MASK_POWER11) + +/* We used to use -mpower8-internal and -mp
[gcc(refs/users/meissner/heads/work173)] Update ChangeLog.*
https://gcc.gnu.org/g:93b27dd2636726ef51500e0068891ca250242c3d commit 93b27dd2636726ef51500e0068891ca250242c3d Author: Michael Meissner Date: Tue Jul 23 11:14:51 2024 -0400 Update ChangeLog.* Diff: --- gcc/ChangeLog.meissner | 21 + 1 file changed, 21 insertions(+) diff --git a/gcc/ChangeLog.meissner b/gcc/ChangeLog.meissner index e1a07e54c56c..d50a89e9c876 100644 --- a/gcc/ChangeLog.meissner +++ b/gcc/ChangeLog.meissner @@ -1,3 +1,24 @@ + Branch work173, patch #2 + +Remove -mpower10 and -mpower8-internal + +2024-07-23 Michael Meissner + +gcc/ + + * config/rs6000/rs6000-cpus.def (ISA_2_7_MASKS_SERVER): Remove internal + options -mpower8-internal, -mpower10, and -mpower11. + (ISA_3_1_MASKS_SERVER): Likewise. + (POWER11_MASKS_SERVER): Likewise. + (POWERPC_MASKS): Likewise. + * config/rs6000/rs6000.cc (rs6000_opt_masks): Likewise. + * config/rs6000/rs6000.h (TARGET_POWER8): Define in terms of hardware + flags. + (TARGET_POWER10): Likewise. + * config/rs6000/rs6000.opt (-mpower8-internal): Do not create ISA bit. + (-mpower10): Likewise. + (-mpower11): Delete. + Branch work173, patch #1 Move architecture flags from isa flags
[gcc r15-2229] c++: missing SFINAE during alias CTAD [PR115296]
https://gcc.gnu.org/g:f70281222df432a7bec1271904c5ebefd7f2c934 commit r15-2229-gf70281222df432a7bec1271904c5ebefd7f2c934 Author: Patrick Palka Date: Tue Jul 23 11:37:31 2024 -0400 c++: missing SFINAE during alias CTAD [PR115296] During the alias CTAD transformation, if substitution failed for some guide we should just silently discard the guide. We currently do discard the guide, but not silently, as in the below testcase which we diagnose forming a too-large array type when transforming the user-defined deduction guides. This patch fixes this by using complain=tf_none instead of tf_warning_or_error throughout alias_ctad_tweaks. PR c++/115296 gcc/cp/ChangeLog: * pt.cc (alias_ctad_tweaks): Use complain=tf_none instead of tf_warning_or_error. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/class-deduction-alias23.C: New test. Reviewed-by: Jason Merrill Diff: --- gcc/cp/pt.cc | 2 +- gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C | 19 +++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 108e929b8ee6..8cc5e21c520d 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -30287,7 +30287,7 @@ alias_ctad_tweaks (tree tmpl, tree uguides) (INNERMOST_TEMPLATE_PARMS (fullatparms))); } - tsubst_flags_t complain = tf_warning_or_error; + tsubst_flags_t complain = tf_none; tree aguides = NULL_TREE; tree atparms = INNERMOST_TEMPLATE_PARMS (fullatparms); unsigned natparms = TREE_VEC_LENGTH (atparms); diff --git a/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C b/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C new file mode 100644 index ..117212c67de7 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/class-deduction-alias23.C @@ -0,0 +1,19 @@ +// PR c++/115296 +// { dg-do compile { target c++20 } } + +using size_t = decltype(sizeof(0)); + +template +struct span { span(T); }; + +template +span(T(&)[N]) -> span; // { dg-bogus "array exceeds maximum" } + +template +requires (sizeof(T[N]) != 42) // { dg-bogus "array exceeds maximum" } +span(T*) -> span; + +template +using array_view = span; + +array_view x = 0;
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PR rtl-optimization/115876] Fix one of two ubsan reported issues in new ext-dce.cc code
https://gcc.gnu.org/g:ead3454f089bc864e448b1bf6ace6b445eca3152 commit ead3454f089bc864e448b1bf6ace6b445eca3152 Author: Jeff Law Date: Fri Jul 12 13:11:33 2024 -0600 [PR rtl-optimization/115876] Fix one of two ubsan reported issues in new ext-dce.cc code David Binderman did a bootstrap build with ubsan enabled which triggered a few errors in the new ext-dce.cc code. This fixes the trivial case of shifting negative values. Bootstrapped and regression tested on x86. Pushing to the trunk. gcc/ PR rtl-optimization/115876 * ext-dce.cc (carry_backpropagate): Make mask and mmask unsigned. (cherry picked from commit a6f551d079de1d151b272bcdd3d42316857c9d4e) Diff: --- gcc/ext-dce.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index adc9084df57d..91789d283fcd 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -374,13 +374,13 @@ binop_implies_op2_fully_live (rtx_code code) exclusively pertain to the first operand. */ HOST_WIDE_INT -carry_backpropagate (HOST_WIDE_INT mask, enum rtx_code code, rtx x) +carry_backpropagate (unsigned HOST_WIDE_INT mask, enum rtx_code code, rtx x) { if (mask == 0) return 0; enum machine_mode mode = GET_MODE_INNER (GET_MODE (x)); - HOST_WIDE_INT mmask = GET_MODE_MASK (mode); + unsigned HOST_WIDE_INT mmask = GET_MODE_MASK (mode); switch (code) { case PLUS:
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add vector type of BFloat16 format
https://gcc.gnu.org/g:a5ca595e63500d080294f81eada2b25e320dd572 commit a5ca595e63500d080294f81eada2b25e320dd572 Author: Feng Wang Date: Thu Jun 13 00:32:14 2024 + RISC-V: Add vector type of BFloat16 format v3: Rebase v2: Rebase The vector type of BFloat16 format is added in this patch, subsequent extensions to zvfbfmin and zvfwma need to be based on this patch. Signed-off-by: Feng Wang gcc/ChangeLog: * config/riscv/genrvv-type-indexer.cc (bfloat16_type): Generate bf16 vector_type and scalar_type in DEF_RVV_TYPE_INDEX. (bfloat16_wide_type): Ditto. (same_ratio_eew_bf16_type): Ditto. (main): Ditto. * config/riscv/riscv-modes.def (ADJUST_BYTESIZE): Add vector type for BFloat16. (RVV_WHOLE_MODES): Add vector type for BFloat16. (RVV_FRACT_MODE): Ditto. (RVV_NF4_MODES): Ditto. (RVV_NF8_MODES): Ditto. (RVV_NF2_MODES): Ditto. * config/riscv/riscv-vector-builtins-types.def (vbfloat16mf4_t): Add builtin vector type for BFloat16. (vbfloat16mf2_t): Add builtin vector type for BFloat16. (vbfloat16m1_t): Ditto. (vbfloat16m2_t): Ditto. (vbfloat16m4_t): Ditto. (vbfloat16m8_t): Ditto. (vbfloat16mf4x2_t): Ditto. (vbfloat16mf4x3_t): Ditto. (vbfloat16mf4x4_t): Ditto. (vbfloat16mf4x5_t): Ditto. (vbfloat16mf4x6_t): Ditto. (vbfloat16mf4x7_t): Ditto. (vbfloat16mf4x8_t): Ditto. (vbfloat16mf2x2_t): Ditto. (vbfloat16mf2x3_t): Ditto. (vbfloat16mf2x4_t): Ditto. (vbfloat16mf2x5_t): Ditto. (vbfloat16mf2x6_t): Ditto. (vbfloat16mf2x7_t): Ditto. (vbfloat16mf2x8_t): Ditto. (vbfloat16m1x2_t): Ditto. (vbfloat16m1x3_t): Ditto. (vbfloat16m1x4_t): Ditto. (vbfloat16m1x5_t): Ditto. (vbfloat16m1x6_t): Ditto. (vbfloat16m1x7_t): Ditto. (vbfloat16m1x8_t): Ditto. (vbfloat16m2x2_t): Ditto. (vbfloat16m2x3_t): Ditto. (vbfloat16m2x4_t): Ditto. (vbfloat16m4x2_t): Ditto. * config/riscv/riscv-vector-builtins.cc (check_required_extensions): Add required_ext checking for BFloat16. * config/riscv/riscv-vector-builtins.def (vbfloat16mf4_t): Add vector_type for BFloat16 in builtins.def. (vbfloat16mf4x2_t): Ditto. (vbfloat16mf4x3_t): Ditto. (vbfloat16mf4x4_t): Ditto. (vbfloat16mf4x5_t): Ditto. (vbfloat16mf4x6_t): Ditto. (vbfloat16mf4x7_t): Ditto. (vbfloat16mf4x8_t): Ditto. (vbfloat16mf2_t): Ditto. (vbfloat16mf2x2_t): Ditto. (vbfloat16mf2x3_t): Ditto. (vbfloat16mf2x4_t): Ditto. (vbfloat16mf2x5_t): Ditto. (vbfloat16mf2x6_t): Ditto. (vbfloat16mf2x7_t): Ditto. (vbfloat16mf2x8_t): Ditto. (vbfloat16m1_t): Ditto. (vbfloat16m1x2_t): Ditto. (vbfloat16m1x3_t): Ditto. (vbfloat16m1x4_t): Ditto. (vbfloat16m1x5_t): Ditto. (vbfloat16m1x6_t): Ditto. (vbfloat16m1x7_t): Ditto. (vbfloat16m1x8_t): Ditto. (vbfloat16m2_t): Ditto. (vbfloat16m2x2_t): Ditto. (vbfloat16m2x3_t): Ditto. (vbfloat16m2x4_t): Ditto. (vbfloat16m4_t): Ditto. (vbfloat16m4x2_t): Ditto. (vbfloat16m8_t): Ditto. (double_trunc_bfloat_scalar): Add scalar_type def for BFloat16. (double_trunc_bfloat_vector): Add vector_type def for BFloat16. * config/riscv/riscv-vector-builtins.h (RVV_REQUIRE_ELEN_BF_16): Add required defination of BFloat16 ext. * config/riscv/riscv-vector-switch.def (ENTRY): Add vector_type information for BFloat16. (TUPLE_ENTRY): Add tuple vector_type information for BFloat16. (cherry picked from commit 666f167bec09d1234e6496c86b566fe1a71f61f0) Diff: --- gcc/config/riscv/genrvv-type-indexer.cc | 115 +++ gcc/config/riscv/riscv-modes.def | 30 +- gcc/config/riscv/riscv-vector-builtins-types.def | 50 ++ gcc/config/riscv/riscv-vector-builtins.cc| 7 +- gcc/config/riscv/riscv-vector-builtins.def | 55 ++- gcc/config/riscv/riscv-vector-builtins.h | 1 + gcc/config/riscv/riscv-vector-switch.def | 36 +++ 7 files changed, 291 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/genrvv-type-indexer.cc b/gcc/config/riscv/genrvv-type-indexer.cc index 27cbd14982c1..8626ddeaaa8b 100644 --- a/gcc/config/riscv/genrvv-type-indexer.cc +++ b/
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add Zvfbfmin and Zvfbfwma intrinsic
https://gcc.gnu.org/g:958e43c1baa3d40fcbb206bb8469c7782e044e7a commit 958e43c1baa3d40fcbb206bb8469c7782e044e7a Author: Feng Wang Date: Mon Jun 17 01:59:57 2024 + RISC-V: Add Zvfbfmin and Zvfbfwma intrinsic v3: Modify warning message in riscv.cc v2: Rebase Accroding to the intrinsic doc, the 'Zvfbfmin' and 'Zvfbfwma' intrinsic functions are added by this patch. Signed-off-by: Feng Wang gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc (class vfncvtbf16_f): Add 'Zvfbfmin' intrinsic in bases. (class vfwcvtbf16_f): Ditto. (class vfwmaccbf16): Add 'Zvfbfwma' intrinsic in bases. (BASE): Add BASE macro for 'Zvfbfmin' and 'Zvfbfwma'. * config/riscv/riscv-vector-builtins-bases.h: Add declaration for 'Zvfbfmin' and 'Zvfbfwma'. * config/riscv/riscv-vector-builtins-functions.def (REQUIRED_EXTENSIONS): Add builtins def for 'Zvfbfmin' and 'Zvfbfwma'. (vfncvtbf16_f): Ditto. (vfncvtbf16_f_frm): Ditto. (vfwcvtbf16_f): Ditto. (vfwmaccbf16): Ditto. (vfwmaccbf16_frm): Ditto. * config/riscv/riscv-vector-builtins-shapes.cc (supports_vectype_p): Add vector intrinsic build judgment for BFloat16. (build_all): Ditto. (BASE_NAME_MAX_LEN): Adjust max length. * config/riscv/riscv-vector-builtins-types.def (DEF_RVV_F32_OPS): Add new operand type for BFloat16. (vfloat32mf2_t): Ditto. (vfloat32m1_t): Ditto. (vfloat32m2_t): Ditto. (vfloat32m4_t): Ditto. (vfloat32m8_t): Ditto. * config/riscv/riscv-vector-builtins.cc (DEF_RVV_F32_OPS): Ditto. (validate_instance_type_required_extensions): Add required_ext checking for 'Zvfbfmin' and 'Zvfbfwma'. * config/riscv/riscv-vector-builtins.h (enum required_ext): Add required_ext declaration for 'Zvfbfmin' and 'Zvfbfwma'. (reqired_ext_to_isa_name): Ditto. (required_extensions_specified): Ditto. (struct function_group_info): Add match case for 'Zvfbfmin' and 'Zvfbfwma'. * config/riscv/riscv.cc (riscv_validate_vector_type): Add required_ext checking for 'Zvfbfmin' and 'Zvfbfwma'. (cherry picked from commit 281f021ed4fbf9c2336048e34b6b40c6f7119baa) Diff: --- gcc/config/riscv/riscv-vector-builtins-bases.cc| 69 ++ gcc/config/riscv/riscv-vector-builtins-bases.h | 7 +++ .../riscv/riscv-vector-builtins-functions.def | 15 + gcc/config/riscv/riscv-vector-builtins-shapes.cc | 31 +- gcc/config/riscv/riscv-vector-builtins-types.def | 13 gcc/config/riscv/riscv-vector-builtins.cc | 67 + gcc/config/riscv/riscv-vector-builtins.h | 34 +++ gcc/config/riscv/riscv.cc | 13 ++-- 8 files changed, 232 insertions(+), 17 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 6483faba39c4..193392fbcc2a 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -2417,6 +2417,60 @@ public: } }; +/* Implements vfncvtbf16_f. */ +template +class vfncvtbf16_f : public function_base +{ +public: + bool has_rounding_mode_operand_p () const override + { +return FRM_OP == HAS_FRM; + } + + bool may_require_frm_p () const override { return true; } + + rtx expand (function_expander &e) const override + { +return e.use_exact_insn (code_for_pred_trunc_to_bf16 (e.vector_mode ())); + } +}; + +/* Implements vfwcvtbf16_f. */ +class vfwcvtbf16_f : public function_base +{ +public: + rtx expand (function_expander &e) const override + { +return e.use_exact_insn (code_for_pred_extend_bf16_to (e.vector_mode ())); + } +}; + +/* Implements vfwmaccbf16. */ +template +class vfwmaccbf16 : public function_base +{ +public: + bool has_rounding_mode_operand_p () const override + { +return FRM_OP == HAS_FRM; + } + + bool may_require_frm_p () const override { return true; } + + bool has_merge_operand_p () const override { return false; } + + rtx expand (function_expander &e) const override + { +if (e.op_info->op == OP_TYPE_vf) + return e.use_widen_ternop_insn ( + code_for_pred_widen_bf16_mul_scalar (e.vector_mode ())); +if (e.op_info->op == OP_TYPE_vv) + return e.use_widen_ternop_insn ( + code_for_pred_widen_bf16_mul (e.vector_mode ())); +gcc_unreachable (); + } +}; + static CONSTEXPR const vsetvl vsetvl_obj; static CONSTEXPR const vsetvl vsetvlmax_obj; static CONSTEXPR const loadstore vle_obj; @@ -2734,6 +2788,14 @@ static CONSTEXPR const crypto_vv vsm4r_obj; static CONSTEXPR const vsm3me vsm3me_obj; static CONSTEXPR const vaeskf2_vsm3c
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Add md files for vector BFloat16
https://gcc.gnu.org/g:c2b212fbed411183ca108323d674fcd62028c851 commit c2b212fbed411183ca108323d674fcd62028c851 Author: Feng Wang Date: Tue Jun 18 06:13:35 2024 + RISC-V: Add md files for vector BFloat16 V3: Add Bfloat16 vector insn in generic-vector-ooo.md v2: Rebase Accroding to the BFloat16 spec, some vector iterators and new pattern are added in md files. Signed-off-by: Feng Wang gcc/ChangeLog: * config/riscv/generic-vector-ooo.md: Add def_insn_reservation for vector BFloat16. * config/riscv/riscv.md: Add new insn name for vector BFloat16. * config/riscv/vector-iterators.md: Add some iterators for vector BFloat16. * config/riscv/vector.md: Add some attribute for vector BFloat16. * config/riscv/vector-bfloat16.md: New file. Add insn pattern vector BFloat16. (cherry picked from commit 9f521632dd9ce71ce28ff1da9c161f76bc20fe3e) Diff: --- gcc/config/riscv/generic-vector-ooo.md | 4 +- gcc/config/riscv/riscv.md | 13 ++- gcc/config/riscv/vector-bfloat16.md| 135 ++ gcc/config/riscv/vector-iterators.md | 169 - gcc/config/riscv/vector.md | 103 +--- 5 files changed, 407 insertions(+), 17 deletions(-) diff --git a/gcc/config/riscv/generic-vector-ooo.md b/gcc/config/riscv/generic-vector-ooo.md index 5e933c838418..efe6bc41e864 100644 --- a/gcc/config/riscv/generic-vector-ooo.md +++ b/gcc/config/riscv/generic-vector-ooo.md @@ -53,7 +53,7 @@ (define_insn_reservation "vec_fcmp" 3 (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\ vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\ - vfncvtftoi,vfncvtftof") + vfncvtftoi,vfncvtftof,vfncvtbf16,vfwcvtbf16") "vxu_ooo_issue,vxu_ooo_alu") ;; Vector integer multiplication. @@ -69,7 +69,7 @@ ;; Vector float multiplication and FMA. (define_insn_reservation "vec_fmul" 6 - (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd") + (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16") "vxu_ooo_issue,vxu_ooo_alu") ;; Vector crypto, assumed to be a generic operation for now. diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 5dee837a5878..379015c60de8 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -200,6 +200,7 @@ RVVMF64BI,RVVMF32BI,RVVMF16BI,RVVMF8BI,RVVMF4BI,RVVMF2BI,RVVM1BI, RVVM8QI,RVVM4QI,RVVM2QI,RVVM1QI,RVVMF2QI,RVVMF4QI,RVVMF8QI, RVVM8HI,RVVM4HI,RVVM2HI,RVVM1HI,RVVMF2HI,RVVMF4HI, + RVVM8BF,RVVM4BF,RVVM2BF,RVVM1BF,RVVMF2BF,RVVMF4BF, RVVM8HF,RVVM4HF,RVVM2HF,RVVM1HF,RVVMF2HF,RVVMF4HF, RVVM8SI,RVVM4SI,RVVM2SI,RVVM1SI,RVVMF2SI, RVVM8SF,RVVM4SF,RVVM2SF,RVVM1SF,RVVMF2SF, @@ -219,6 +220,11 @@ RVVM2x4HI,RVVM1x4HI,RVVMF2x4HI,RVVMF4x4HI, RVVM2x3HI,RVVM1x3HI,RVVMF2x3HI,RVVMF4x3HI, RVVM4x2HI,RVVM2x2HI,RVVM1x2HI,RVVMF2x2HI,RVVMF4x2HI, + RVVM1x8BF,RVVMF2x8BF,RVVMF4x8BF,RVVM1x7BF,RVVMF2x7BF, + RVVMF4x7BF,RVVM1x6BF,RVVMF2x6BF,RVVMF4x6BF,RVVM1x5BF, + RVVMF2x5BF,RVVMF4x5BF,RVVM2x4BF,RVVM1x4BF,RVVMF2x4BF, + RVVMF4x4BF,RVVM2x3BF,RVVM1x3BF,RVVMF2x3BF,RVVMF4x3BF, + RVVM4x2BF,RVVM2x2BF,RVVM1x2BF,RVVMF2x2BF,RVVMF4x2BF, RVVM1x8HF,RVVMF2x8HF,RVVMF4x8HF,RVVM1x7HF,RVVMF2x7HF, RVVMF4x7HF,RVVM1x6HF,RVVMF2x6HF,RVVMF4x6HF,RVVM1x5HF, RVVMF2x5HF,RVVMF4x5HF,RVVM2x4HF,RVVM1x4HF,RVVMF2x4HF, @@ -462,6 +468,10 @@ ;; vsm4rcrypto vector SM4 Rounds instructions ;; vsm3me crypto vector SM3 Message Expansion instructions ;; vsm3ccrypto vector SM3 Compression instructions +;; 18.Vector BF16 instrctions +;; vfncvtbf16 vector narrowing single floating-point to brain floating-point instruction +;; vfwcvtbf16 vector widening brain floating-point to single floating-point instruction +;; vfwmaccbf16 vector BF16 widening multiply-accumulate (define_attr "type" "unknown,branch,jump,jalr,ret,call,load,fpload,store,fpstore, mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul, @@ -483,7 +493,7 @@ vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down, vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll, vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz, - vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c" + vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16" (cond [(eq_attr "got" "load") (const_string "load") ;; If a doubleword move uses these expensive instructions, @@ -4373,6 +4383,7 @@ (include "generic-ooo.md") (include "vector.md") (include "vector-crypto.md") +(include "vector-bfloat16.md") (include "zicond.md") (include "sfb.md") (include "zc.md") diff --git a/gcc/config/riscv/vector-bfloat16.md b/gcc/config/riscv/vector-bfloat16.md new file mode 100644 index ..562aa8ee5ed7 --- /dev/null +++ b/g
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Fix testcase for vector .SAT_SUB in zip benchmark
https://gcc.gnu.org/g:21814631a11523712913a1fdec4055176aa89e28 commit 21814631a11523712913a1fdec4055176aa89e28 Author: Edwin Lu Date: Fri Jul 12 11:31:16 2024 -0700 RISC-V: Fix testcase for vector .SAT_SUB in zip benchmark The following testcase was not properly testing anything due to an uninitialized variable. As a result, the loop was not iterating through the testing data, but instead on undefined values which could cause an unexpected abort. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h: initialize variable Signed-off-by: Edwin Lu (cherry picked from commit 4306f76192bc7ab71c5997a7e2c95320505029ab) Diff: --- gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h index d238c6392def..309d63377d53 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h @@ -9,6 +9,7 @@ main () for (i = 0; i < sizeof (DATA) / sizeof (DATA[0]); i++) { + d = DATA[i]; RUN_BINARY_VX (&d.x[N], d.b, N); for (k = 0; k < N; k++)
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement locality for __builtin_prefetch
https://gcc.gnu.org/g:e8be9b1c419d1b59a6f5fe5f166c43bfea27ec0e commit e8be9b1c419d1b59a6f5fe5f166c43bfea27ec0e Author: Monk Chiang Date: Thu Jul 6 14:05:17 2023 +0800 RISC-V: Implement locality for __builtin_prefetch The patch add the Zihintntl instructions in the prefetch pattern. Zicbop has prefetch instructions. Zihintntl has NTL instructions. Insert NTL instructions before prefetch instruction, if target has Zihintntl extension. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_print_operand): Add 'L' letter to print zihintntl instructions string. * config/riscv/riscv.md (prefetch): Add zihintntl instructions. gcc/testsuite/ChangeLog: * gcc.target/riscv/prefetch-zicbop.c: New test. * gcc.target/riscv/prefetch-zihintntl.c: New test. (cherry picked from commit bf26413fc4081dfd18b915580b35bdb71481327e) Diff: --- gcc/config/riscv/riscv.cc | 22 ++ gcc/config/riscv/riscv.md | 10 +++--- gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c | 20 .../gcc.target/riscv/prefetch-zihintntl.c | 20 4 files changed, 69 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index d4553aacee96..9bedefa74c35 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -6488,6 +6488,7 @@ riscv_asm_output_opcode (FILE *asm_out_file, const char *p) 'A' Print the atomic operation suffix for memory model OP. 'I' Print the LR suffix for memory model OP. 'J' Print the SC suffix for memory model OP. + 'L' Print a non-temporal locality hints instruction. 'z' Print x0 if OP is zero, otherwise print OP normally. 'i' Print i if the operand is not a register. 'S' Print shift-index of single-bit mask OP. @@ -6682,6 +6683,27 @@ riscv_print_operand (FILE *file, rtx op, int letter) break; } +case 'L': + { + const char *ntl_hint = NULL; + switch (INTVAL (op)) + { + case 0: + ntl_hint = "ntl.all"; + break; + case 1: + ntl_hint = "ntl.pall"; + break; + case 2: + ntl_hint = "ntl.p1"; + break; + } + + if (ntl_hint) + asm_fprintf (file, "%s\n\t", ntl_hint); + break; + } + case 'i': if (code != REG) fputs ("i", file); diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 379015c60de8..46c46039c33a 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -4113,12 +4113,16 @@ { switch (INTVAL (operands[1])) { -case 0: return "prefetch.r\t%a0"; -case 1: return "prefetch.w\t%a0"; +case 0: return TARGET_ZIHINTNTL ? "%L2prefetch.r\t%a0" : "prefetch.r\t%a0"; +case 1: return TARGET_ZIHINTNTL ? "%L2prefetch.w\t%a0" : "prefetch.w\t%a0"; default: gcc_unreachable (); } } - [(set_attr "type" "store")]) + [(set_attr "type" "store") + (set (attr "length") (if_then_else (and (match_test "TARGET_ZIHINTNTL") + (match_test "IN_RANGE (INTVAL (operands[2]), 0, 2)")) + (const_string "8") + (const_string "4")))]) (define_insn "riscv_prefetchi_" [(unspec_volatile:X [(match_operand:X 0 "address_operand" "r") diff --git a/gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c b/gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c new file mode 100644 index ..0faa120f1f79 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/prefetch-zicbop.c @@ -0,0 +1,20 @@ +/* { dg-do compile target { { rv64-*-*}}} */ +/* { dg-options "-march=rv64gc_zicbop -mabi=lp64" } */ + +void foo (char *p) +{ + __builtin_prefetch (p, 0, 0); + __builtin_prefetch (p, 0, 1); + __builtin_prefetch (p, 0, 2); + __builtin_prefetch (p, 0, 3); + __builtin_prefetch (p, 1, 0); + __builtin_prefetch (p, 1, 1); + __builtin_prefetch (p, 1, 2); + __builtin_prefetch (p, 1, 3); +} + +/* { dg-final { scan-assembler-not "ntl.all\t" } } */ +/* { dg-final { scan-assembler-not "ntl.pall\t" } } */ +/* { dg-final { scan-assembler-not "ntl.p1\t" } } */ +/* { dg-final { scan-assembler-times "prefetch.r" 4 } } */ +/* { dg-final { scan-assembler-times "prefetch.w" 4 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/prefetch-zihintntl.c b/gcc/testsuite/gcc.target/riscv/prefetch-zihintntl.c new file mode 100644 index ..78a3afe68333 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/prefetch-zihintntl.c @@ -0,0 +1,20 @@ +/* { dg-do compile target { { rv64-*-*}}} */ +/* { dg-options "-march=rv64gc_zicbop_zihintntl -mabi=lp64" } */ + +void foo (char *p) +{ + __builtin_prefetch (p, 0, 0); + __builtin_prefetch (p, 0, 1); + __builtin_prefetch (p, 0, 2); + __builtin_prefetch (p, 0, 3); + __builtin_prefetch (p, 1, 0);
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Attribute parser: Use alloca() instead of new + std::unique_ptr
https://gcc.gnu.org/g:64b4b5211aa664e224e3cd722ab5aa11f278aa68 commit 64b4b5211aa664e224e3cd722ab5aa11f278aa68 Author: Christoph Müllner Date: Fri Jul 5 04:48:15 2024 +0200 RISC-V: Attribute parser: Use alloca() instead of new + std::unique_ptr Allocating an object on the heap with new, wrapping it in a std::unique_ptr and finally getting the buffer via buf.get() is a correct way to allocate a buffer that is automatically freed on return. However, a simple invocation of alloca() does the same with less overhead. gcc/ChangeLog: * config/riscv/riscv-target-attr.cc (riscv_target_attr_parser::parse_arch): Replace new + std::unique_ptr by alloca(). (riscv_process_one_target_attr): Likewise. (riscv_process_target_attr): Likewise. Signed-off-by: Christoph Müllner (cherry picked from commit 5040c273484d7123a40a99cdeb434cecbd17a2e9) Diff: --- gcc/config/riscv/riscv-target-attr.cc | 9 +++-- 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc index 0bbe7df25d19..3d7753f64574 100644 --- a/gcc/config/riscv/riscv-target-attr.cc +++ b/gcc/config/riscv/riscv-target-attr.cc @@ -109,8 +109,7 @@ riscv_target_attr_parser::parse_arch (const char *str) { /* Parsing the extension list like "+[,+]*". */ size_t len = strlen (str); - std::unique_ptr buf (new char[len+1]); - char *str_to_check = buf.get (); + char *str_to_check = (char *) alloca (len + 1); strcpy (str_to_check, str); const char *token = strtok_r (str_to_check, ",", &str_to_check); m_subset_list = riscv_cmdline_subset_list ()->clone (); @@ -247,8 +246,7 @@ riscv_process_one_target_attr (char *arg_str, return false; } - std::unique_ptr buf (new char[len+1]); - char *str_to_check = buf.get(); + char *str_to_check = (char *) alloca (len + 1); strcpy (str_to_check, arg_str); char *arg = strchr (str_to_check, '='); @@ -334,8 +332,7 @@ riscv_process_target_attr (tree fndecl, tree args, location_t loc, return false; } - std::unique_ptr buf (new char[len+1]); - char *str_to_check = buf.get (); + char *str_to_check = (char *) alloca (len + 1); strcpy (str_to_check, TREE_STRING_POINTER (args)); /* Used to catch empty spaces between semi-colons i.e.
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Allow adding enabled extension via target arch attributes
https://gcc.gnu.org/g:4157d59413a5f35808603e06de06cfb388811a65 commit 4157d59413a5f35808603e06de06cfb388811a65 Author: Christoph Müllner Date: Sat Jul 6 17:03:18 2024 +0200 RISC-V: Allow adding enabled extension via target arch attributes The set of enabled extensions can be extended via target arch function attributes by listing each extension with a '+' prefix and a comma as list separator. E.g.: __attribute__((target("arch=+zba,+zbb"))) void foo(); The programmer intends to ensure that one or more extensions are enabled when building the code. This is independent of the arch string that is passed at build time via the -march= option. Therefore, it is reasonable to allow enabling extensions via target arch attributes, which have already been enabled via the -march= string. The subset list code already supports such duplication for implied extensions. This patch adds an interface so the subset list parser can be switched into a mode where duplication is allowed. This commit fixes the following regressed test cases: * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-39.c * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-42.c * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-43.c * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-44.c * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-45.c * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-46.c gcc/ChangeLog: * common/config/riscv/riscv-common.cc (riscv_subset_list::add): Allow adding enabled extension if m_allow_adding_dup is set. * config/riscv/riscv-subset.h: Add m_allow_adding_dup and setter. * config/riscv/riscv-target-attr.cc (riscv_target_attr_parser::parse_arch): Allow adding enabled extensions. gcc/testsuite/ChangeLog: * gcc.target/riscv/pr115554.c: Change expected fail to expected pass. * gcc.target/riscv/target-attr-16.c: New test. Signed-off-by: Christoph Müllner (cherry picked from commit 61c21a719e205f70bd046c6a0275d1a3fd6341a4) Diff: --- gcc/common/config/riscv/riscv-common.cc | 17 +-- gcc/config/riscv/riscv-subset.h | 5 + gcc/config/riscv/riscv-target-attr.cc | 3 +++ gcc/testsuite/gcc.target/riscv/pr115554.c | 2 -- gcc/testsuite/gcc.target/riscv/target-attr-16.c | 28 + 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 8e9beb6801f9..682826c0e344 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -702,12 +702,17 @@ riscv_subset_list::add (const char *subset, int major_version, ext->minor_version = minor_version; } else - error_at ( - m_loc, - "%<-march=%s%>: extension %qs appear more than one time", - m_arch, - subset); - + { + /* The extension is already in the list. */ + if (!m_allow_adding_dup + || ext->major_version != major_version + || ext->minor_version != minor_version) + error_at ( + m_loc, + "%<-march=%s%>: extension %qs appear more than one time", + m_arch, + subset); + } return; } else if (strlen (subset) == 1 && !standard_extensions_p (subset)) diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h index 279716feab57..dace4de65753 100644 --- a/gcc/config/riscv/riscv-subset.h +++ b/gcc/config/riscv/riscv-subset.h @@ -65,6 +65,9 @@ private: /* Number of subsets. */ unsigned m_subset_num; + /* Allow adding the same extension more than once. */ + bool m_allow_adding_dup; + riscv_subset_list (const char *, location_t); const char *parsing_subset_version (const char *, const char *, unsigned *, @@ -109,6 +112,8 @@ public: void set_loc (location_t); + void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; } + void finalize (); }; diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc index 317806143949..57235c9c0a7e 100644 --- a/gcc/config/riscv/riscv-target-attr.cc +++ b/gcc/config/riscv/riscv-target-attr.cc @@ -109,6 +109,8 @@ riscv_target_attr_parser::parse_arch (const char *str) ? riscv_subset_list::parse (local_arch_str, m_loc) : riscv_cmdline_subset_list ()->clone (); m_subset_list->set_loc (m_loc); + m_subset_list->set_allow_adding_dup (true); + while (token) { if (token[0] != '+') @@ -134,6 +136,7 @@ riscv_target_attr_parser::parse_arch (const char *str) token = strtok_r (NULL, ",", &str_to_check); } +
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Rewrite target attribute handling
https://gcc.gnu.org/g:fa716b37c5c85663b8f73d725a2d4020116e2a77 commit fa716b37c5c85663b8f73d725a2d4020116e2a77 Author: Christoph Müllner Date: Sat Jun 22 21:59:04 2024 +0200 RISC-V: Rewrite target attribute handling The target-arch attribute handling in RISC-V is only a few months old, but already saw a rewrite (9941f0295a14), which addressed an important issue. This rewrite introduced a hash table in the backend, which is used to keep track of target-arch attributes of all functions. The index of this hash table is the pointer to the function declaration object (fndecl). However, objects like these don't have the lifetime that is assumed here, which resulted in observing two fndecl objects with the same address for different objects (triggering the assertion in riscv_func_target_put() -- see also PR115562). This patch removes the hash table approach in favor of storing target specific options using the DECL_FUNCTION_SPECIFIC_TARGET() macro, which is also used by other backends and is specifically designed for this purpose (https://gcc.gnu.org/onlinedocs/gccint/Function-Properties.html). To have an accessible field in the target options, we need to adjust riscv.opt and introduce the field riscv_arch_string (for the already existing option '-march='). Using this macro allows to remove much code from riscv-common.cc, which controls access to the objects 'func_target_table' and 'current_subset_list'. One thing to mention is, that we had two subset lists: current_subset_list and cmdline_subset_list, with the latter being introduced recently for target attribute handling. This patch reduces them back to one (cmdline_subset_list) which contains the list of extensions that have been enabled by the command line arguments. Note that the patch keeps the existing behavior of rejecting duplications of extensions when added via the '+' operator in a function target attribute. E.g. "-march=rv64gc_zbb" and "arch=+zbb" will trigger an error (see pr115554.c). However, at the same time this patch breaks the acceptance of adding implied extensions, which causes the following six regressions (with the error "extension 'EXT' appear more than one time"): * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-39.c * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-42.c * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-43.c * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-44.c * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-45.c * gcc.target/riscv/rvv/base/target_attribute_v_with_intrinsic-46.c New tests were added to document the behavior and to ensure it won't regress. This patch did not show any regressions for rv32/rv64 and fixes the ICEs from PR115554 and PR115562. PR target/115554 PR target/115562 gcc/ChangeLog: * common/config/riscv/riscv-common.cc (struct riscv_func_target_info): Remove. (struct riscv_func_target_hasher): Likewise. (riscv_func_decl_hash): Likewise. (riscv_func_target_hasher::hash): Likewise. (riscv_func_target_hasher::equal): Likewise. (riscv_current_subset_list): Likewise. (riscv_cmdline_subset_list): Remove obsolete space. (riscv_func_target_table_lazy_init): Remove. (riscv_func_target_get): Likewise. (riscv_func_target_put): Likewise. (riscv_func_target_remove_and_destory): Likewise. (riscv_arch_str): Generate from cmdline_subset_list. (riscv_set_arch_by_subset_list): Don't set current_subset_list. (riscv_parse_arch_string): Remove current_subset_list. * config/riscv/riscv-c.cc (riscv_cpu_cpp_builtins): Get subset list via riscv_cmdline_subset_list(). * config/riscv/riscv-subset.h (riscv_current_subset_list): Remove prototype. (riscv_func_target_get): Likewise. (riscv_func_target_put): Likewise. (riscv_func_target_remove_and_destory): Likewise. * config/riscv/riscv-target-attr.cc (riscv_target_attr_parser::parse_arch): Build base arch string from existing target options, if any. (riscv_target_attr_parser::update_settings): Store new arch string in target options. (riscv_process_one_target_attr): Whitespace fix. (riscv_process_target_attr): Drop opts argument. (riscv_option_valid_attribute_p): Properly save, change and restore target options. * config/riscv/riscv.cc (get_arch_str): New function. (riscv_declare_function_name): Get arch string for option-arch directive from function's target options. *
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Fix liveness computation for shift/rotate counts in ext-dce
https://gcc.gnu.org/g:fa543ce46c2e205f2813e13fd9d4df65e8544b87 commit fa543ce46c2e205f2813e13fd9d4df65e8544b87 Author: Jeff Law Date: Mon Jul 15 18:15:33 2024 -0600 Fix liveness computation for shift/rotate counts in ext-dce So as I've noted before I believe the control flow in ext-dce.cc is horribly messy. While investigating a fix for 115877 I came across another problem related to control flow handling. Specifically, if we have an binary op which implies the 2nd operand is fully live, then we'd actually fail to mark that operand as live. We essentially broke out of the loop which was supposed to be safe. But Y was a REG and if Y is a REG or CONST_INT we skip sub-rtxs and thus failed to process that operand (the shift count) at all. Rather than muck around with control flow, we can just set all the bits as live in DST_MASK and let normal processing continue. With all the bits live IN DST_MASK all the bits implied by the mode of the argument will also be live. No testcase. Bootstrapped and regression tested on x86. Pushing to the trunk. gcc/ * ext-dce.cc (ext_dce_process_uses): Simplify control flow and fix liveness computation for shift/rotate counts. (cherry picked from commit b31b8af807f5459674b0b310cb62a5bc81b676e7) Diff: --- gcc/ext-dce.cc | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index 91789d283fcd..7ecb99fef81d 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -632,10 +632,11 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap live_tmp) else if (!CONSTANT_P (y)) break; - /* We might have (ashift (const_int 1) (reg...)) */ - /* XXX share this logic with code below. */ + /* We might have (ashift (const_int 1) (reg...)) +By setting dst_mask we can continue iterating on the +the next operand and it will be considered fully live. */ if (binop_implies_op2_fully_live (GET_CODE (src))) - break; + dst_mask = -1; /* If this was anything but a binary operand, break the inner loop. This is conservatively correct as it will cause the
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Revert "RISC-V: Attribute parser: Use alloca() instead of new + std::unique_ptr"
https://gcc.gnu.org/g:20fe3e21e824daeb20679a24d3de78969c17710c commit 20fe3e21e824daeb20679a24d3de78969c17710c Author: Christoph Müllner Date: Mon Jul 15 23:42:39 2024 +0200 Revert "RISC-V: Attribute parser: Use alloca() instead of new + std::unique_ptr" This reverts commit 5040c273484d7123a40a99cdeb434cecbd17a2e9. (cherry picked from commit eb0c163aada970b8351067b17121f013fc58dbc9) Diff: --- gcc/config/riscv/riscv-target-attr.cc | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc index 57235c9c0a7e..1645a6692177 100644 --- a/gcc/config/riscv/riscv-target-attr.cc +++ b/gcc/config/riscv/riscv-target-attr.cc @@ -101,7 +101,8 @@ riscv_target_attr_parser::parse_arch (const char *str) { /* Parsing the extension list like "+[,+]*". */ size_t len = strlen (str); - char *str_to_check = (char *) alloca (len + 1); + std::unique_ptr buf (new char[len+1]); + char *str_to_check = buf.get (); strcpy (str_to_check, str); const char *token = strtok_r (str_to_check, ",", &str_to_check); const char *local_arch_str = global_options.x_riscv_arch_string; @@ -253,7 +254,8 @@ riscv_process_one_target_attr (char *arg_str, return false; } - char *str_to_check = (char *) alloca (len + 1); + std::unique_ptr buf (new char[len+1]); + char *str_to_check = buf.get(); strcpy (str_to_check, arg_str); char *arg = strchr (str_to_check, '='); @@ -339,7 +341,8 @@ riscv_process_target_attr (tree args, location_t loc) return false; } - char *str_to_check = (char *) alloca (len + 1); + std::unique_ptr buf (new char[len+1]); + char *str_to_check = buf.get (); strcpy (str_to_check, TREE_STRING_POINTER (args)); /* Used to catch empty spaces between semi-colons i.e.
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] Add debug counter for ext_dce
https://gcc.gnu.org/g:b33c9eebd9581a86d56e3cdba2bef96fda1727f4 commit b33c9eebd9581a86d56e3cdba2bef96fda1727f4 Author: Andrew Pinski Date: Tue Jul 16 09:53:20 2024 -0700 Add debug counter for ext_dce Like r15-1610-gb6215065a5b143 (which adds one for late_combine), adding one for ext_dce is useful to debug some issues with this pass. Bootstrapped and tested on x86_64-linux-gnu with no regressions. gcc/ChangeLog: * dbgcnt.def (ext_dce): New debug counter. * ext-dce.cc (ext_dce_try_optimize_insn): Reject the insn if the debug counter says so. (ext_dce): Rename to ... (ext_dce_execute): This. (pass_ext_dce::execute): Update for the name of ext_dce. Signed-off-by: Andrew Pinski (cherry picked from commit 7c3287f3613210d4f98c8095bc739bea6582bfbb) Diff: --- gcc/dbgcnt.def | 1 + gcc/ext-dce.cc | 16 +--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def index ed9f062eac2c..4e7aaeae2da5 100644 --- a/gcc/dbgcnt.def +++ b/gcc/dbgcnt.def @@ -162,6 +162,7 @@ DEBUG_COUNTER (dom_unreachable_edges) DEBUG_COUNTER (dse) DEBUG_COUNTER (dse1) DEBUG_COUNTER (dse2) +DEBUG_COUNTER (ext_dce) DEBUG_COUNTER (form_fma) DEBUG_COUNTER (gcse2_delete) DEBUG_COUNTER (gimple_unroll) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index 7ecb99fef81d..7270de2a3bfe 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see #include "rtl-iter.h" #include "df.h" #include "print-rtl.h" +#include "dbgcnt.h" /* These should probably move into a C++ class. */ static vec livein; @@ -312,6 +313,15 @@ ext_dce_try_optimize_insn (rtx_insn *insn, rtx set) print_rtl_single (dump_file, SET_SRC (set)); } + /* We decided to turn do the optimization but allow it to be rejected for + bisection purposes. */ + if (!dbg_cnt (::ext_dce)) +{ + if (dump_file) + fprintf (dump_file, "Rejected due to debug counter.\n"); + return; +} + new_pattern = simplify_gen_subreg (GET_MODE (src), inner, GET_MODE (inner), 0); /* simplify_gen_subreg may fail in which case NEW_PATTERN will be NULL. @@ -881,8 +891,8 @@ static bool ext_dce_rd_confluence_n (edge) { return true; } are never read. Turn such extensions into SUBREGs instead which can often be propagated away. */ -static void -ext_dce (void) +void +ext_dce_execute (void) { df_analyze (); ext_dce_init (); @@ -929,7 +939,7 @@ public: virtual bool gate (function *) { return flag_ext_dce && optimize > 0; } virtual unsigned int execute (function *) { - ext_dce (); + ext_dce_execute (); return 0; }
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Fix testcase missing arch attribute
https://gcc.gnu.org/g:bdb2115f7ee854a6daecf6079274700321f1a2b5 commit bdb2115f7ee854a6daecf6079274700321f1a2b5 Author: Edwin Lu Date: Tue Jul 16 17:43:45 2024 -0700 RISC-V: Fix testcase missing arch attribute The C + F extention implies the zcf extension on rv32. Add missing zcf extension for the rv32 target. gcc/testsuite/ChangeLog: * gcc.target/riscv/target-attr-16.c: Update expected assembly Signed-off-by: Edwin Lu (cherry picked from commit 5bb01e91d40c34e8f8230b142f7ebff3d6aa88d1) Diff: --- gcc/testsuite/gcc.target/riscv/target-attr-16.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/riscv/target-attr-16.c b/gcc/testsuite/gcc.target/riscv/target-attr-16.c index 1c7badccdeee..c6b626d0c6ce 100644 --- a/gcc/testsuite/gcc.target/riscv/target-attr-16.c +++ b/gcc/testsuite/gcc.target/riscv/target-attr-16.c @@ -24,5 +24,5 @@ void bar (void) { } -/* { dg-final { scan-assembler-times ".option arch, rv32i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zaamo1p0_zalrsc1p0_zca1p0_zcd1p0_zba1p0_zbb1p0" 4 { target { rv32 } } } } */ +/* { dg-final { scan-assembler-times ".option arch, rv32i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zaamo1p0_zalrsc1p0_zca1p0_zcd1p0_zcf1p0_zba1p0_zbb1p0" 4 { target { rv32 } } } } */ /* { dg-final { scan-assembler-times ".option arch, rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zaamo1p0_zalrsc1p0_zca1p0_zcd1p0_zba1p0_zbb1p0" 4 { target { rv64 } } } } */
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PR rtl-optimization/115877][2/n] Improve liveness computation for constant initialization
https://gcc.gnu.org/g:faadb6b663ea7bda38ebfcd1ed772882cdc725da commit faadb6b663ea7bda38ebfcd1ed772882cdc725da Author: Jeff Law Date: Sun Jul 21 08:41:28 2024 -0600 [PR rtl-optimization/115877][2/n] Improve liveness computation for constant initialization While debugging pr115877, I noticed we were failing to remove the destination register from LIVENOW bitmap when it was set to a constant value. ie (set (dest) (const_int)). This was a trivial oversight in safe_for_live_propagation. I don't have an example of this affecting code generation, but it certainly could. More importantly, by making LIVENOW more accurate it's easier to debug when LIVENOW differs from expectations. As with the prior patch this has been tested as part of a larger patchset with the crosses as well as individually on x86_64. Pushing to the trunk, PR rtl-optimization/115877 gcc/ * ext-dce.cc (safe_for_live_propagation): Handle RTX_CONST_OBJ. (cherry picked from commit 9d8ef2711dfecd093077aef6123d9e93ea23454e) Diff: --- gcc/ext-dce.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index d431f8ac12d4..59bcc4572d57 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -69,6 +69,7 @@ safe_for_live_propagation (rtx_code code) switch (GET_RTX_CLASS (code)) { case RTX_OBJ: + case RTX_CONST_OBJ: return true; case RTX_COMPARE:
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Rearrange the test helper files for vector .SAT_*
https://gcc.gnu.org/g:97d90509f1d6b8189d6492d51383c06239c57bbe commit 97d90509f1d6b8189d6492d51383c06239c57bbe Author: Pan Li Date: Sat Jul 20 10:43:44 2024 +0800 RISC-V: Rearrange the test helper files for vector .SAT_* Rearrange the test help header files, as well as align the name conventions. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: Move to... * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vvv_run.h: ...here. * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_scalar.h: Move to... * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vvx_run.h: ...here. * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx.h: Move to... * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary_vx_run.h: ...here. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: Adjust the include file names. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-28.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-29.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-30.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-31.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-32.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-5.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-6.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-7.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-8.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-9.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-10.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-11.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-12.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-13.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-14.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-15.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-16.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-17.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-18.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-19.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-20.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-21.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-22.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-23.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-24.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-25.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-26.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-27.c: Ditto. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-28.c: Ditto.
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [PR rtl-optimization/115877] Fix livein computation for ext-dce
https://gcc.gnu.org/g:3e82d5753917b74abcbb9212465eec8d89fef824 commit 3e82d5753917b74abcbb9212465eec8d89fef824 Author: Jeff Law Date: Sun Jul 21 07:36:37 2024 -0600 [PR rtl-optimization/115877] Fix livein computation for ext-dce So I'm not yet sure how I'm going to break everything down, but this is easy enough to break out as 1/N of ext-dce fixes/improvements. When handling uses in an insn, we first determine what bits are set in the destination which is represented in DST_MASK. Then we use that to refine what bits are live in the source operands. In the source operand handling section we *modify* DST_MASK if the source operand is a SUBREG (ugh!). So if the first operand is a SUBREG, then we can incorrectly compute which bit groups are live in the second operand, especially if it is a SUBREG as well. This was seen when testing a larger set of patches on the rl78 port (builtin-arith-overflow-p-7 & pr71631 execution failures), so no new test for this bugfix. Run through my tester (in conjunction with other ext-dce changes) on the various cross targets. Run individually through a bootstrap and regression test cycle on x86_64 as well. Pushing to the trunk. PR rtl-optimization/115877 gcc/ * ext-dce.cc (ext_dce_process_uses): Restore the value of DST_MASK for reach operand. (cherry picked from commit 91e468b72dafc9dcd5dcf7915f1d0ef172264d53) Diff: --- gcc/ext-dce.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index 7270de2a3bfe..d431f8ac12d4 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -591,8 +591,10 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap live_tmp) making things live. Breaking from this loop will cause the iterator to work on sub-rtxs, so it is safe to break if we see something we don't know how to handle. */ + unsigned HOST_WIDE_INT save_mask = dst_mask; for (;;) { + dst_mask = save_mask; /* Strip an outer paradoxical subreg. The bits outside the inner mode are don't cares. So we can just strip and process the inner object. */
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [NFC][PR rtl-optimization/115877] Avoid setting irrelevant bit groups as live in ext-dce
https://gcc.gnu.org/g:316f9617dcf040ccad190d853ee7f94b2f9caace commit 316f9617dcf040ccad190d853ee7f94b2f9caace Author: Jeff Law Date: Mon Jul 22 08:45:10 2024 -0600 [NFC][PR rtl-optimization/115877] Avoid setting irrelevant bit groups as live in ext-dce Another patch to refine liveness computations. This should be NFC and is designed to help debugging. In simplest terms the patch avoids setting bit groups outside the size of a pseudo as live. Consider a HImode pseudo, bits 16..63 for such a pseudo don't really have meaning, yet we often set bit groups related to bits 16.63 on in the liveness bitmaps. This makes debugging harder than it needs to be by simply having larger bitmaps to verify when walking through the code in a debugger. This has been bootstrapped and regression tested on x86_64. It's also been tested on the crosses in my tester without regressions. Pushing to the trunk, PR rtl-optimization/115877 gcc/ * ext-dce.cc (group_limit): New function. (mark_reg_live): Likewise. (ext_dce_process_sets): Use new functions. (ext_dce_process_uses): Likewise. (ext_dce_init): Likewise. (cherry picked from commit 88d16194d0c8a6bdc2896c8944bfbf3e6038c9d2) Diff: --- gcc/ext-dce.cc | 64 +++--- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index 59bcc4572d57..d1a31e1819e2 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -48,6 +48,57 @@ static bool modify; bit 16..31 bit 32..BITS_PER_WORD-1 */ +/* For the given REG, return the number of bit groups implied by the + size of the REG's mode, up to a maximum of 4 (number of bit groups + tracked by this pass). + + For partial integer and variable sized modes also return 4. This + could possibly be refined for something like PSI mode, but it + does not seem worth the effort. */ + +static int +group_limit (const_rtx reg) +{ + machine_mode mode = GET_MODE (reg); + + if (!GET_MODE_BITSIZE (mode).is_constant ()) +return 4; + + int size = GET_MODE_SIZE (mode).to_constant (); + + size = exact_log2 (size); + + if (size < 0) +return 4; + + size++; + return (size > 4 ? 4 : size); +} + +/* Make all bit groups live for REGNO in bitmap BMAP. For hard regs, + we assume all groups are live. For a pseudo we consider the size + of the pseudo to avoid creating unnecessarily live chunks of data. */ + +static void +make_reg_live (bitmap bmap, int regno) +{ + int limit; + + /* For pseudos we can use the mode to limit how many bit groups + are marked as live since a pseudo only has one mode. Hard + registers have to be handled more conservatively. */ + if (regno > FIRST_PSEUDO_REGISTER) +{ + rtx reg = regno_reg_rtx[regno]; + limit = group_limit (reg); +} + else +limit = 4; + + for (int i = 0; i < limit; i++) +bitmap_set_bit (bmap, regno * 4 + i); +} + /* Note this pass could be used to narrow memory loads too. It's not clear if that's profitable or not in general. */ @@ -196,7 +247,8 @@ ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp) /* Transfer all the LIVENOW bits for X into LIVE_TMP. */ HOST_WIDE_INT rn = REGNO (SUBREG_REG (x)); - for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + 4; i++) + int limit = group_limit (SUBREG_REG (x)); + for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++) if (bitmap_bit_p (livenow, i)) bitmap_set_bit (live_tmp, i); @@ -260,7 +312,8 @@ ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp) /* Transfer the appropriate bits from LIVENOW into LIVE_TMP. */ HOST_WIDE_INT rn = REGNO (x); - for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + 4; i++) + int limit = group_limit (x); + for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++) if (bitmap_bit_p (livenow, i)) bitmap_set_bit (live_tmp, i); @@ -692,7 +745,7 @@ ext_dce_process_uses (rtx_insn *insn, rtx obj, bitmap live_tmp) /* If we have a register reference that is not otherwise handled, just assume all the chunks are live. */ else if (REG_P (x)) - bitmap_set_range (livenow, REGNO (x) * 4, 4); + bitmap_set_range (livenow, REGNO (x) * 4, group_limit (x)); } } @@ -819,10 +872,7 @@ ext_dce_init (void) unsigned i; bitmap_iterator bi; EXECUTE_IF_SET_IN_BITMAP (refs, 0, i, bi) -{ - for (int j = 0; j < 4; j++) - bitmap_set_bit (&livein[EXIT_BLOCK], i * 4 + j); -} +make_reg_live (&livein[EXIT_BLOCK], i); livenow = BITMAP_ALLOC (NULL); all_blocks = BITMAP_ALLOC (NULL);
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] RISC-V: Implement the .SAT_TRUNC for scalar
https://gcc.gnu.org/g:0ee41b02916d9c4c68ae6dbaa364cefcd79bb7da commit 0ee41b02916d9c4c68ae6dbaa364cefcd79bb7da Author: Pan Li Date: Mon Jul 1 16:36:35 2024 +0800 RISC-V: Implement the .SAT_TRUNC for scalar This patch would like to implement the simple .SAT_TRUNC pattern in the riscv backend. Aka: Form 1: #define DEF_SAT_U_TRUC_FMT_1(NT, WT) \ NT __attribute__((noinline)) \ sat_u_truc_##WT##_to_##NT##_fmt_1 (WT x) \ {\ bool overflow = x > (WT)(NT)(-1); \ return ((NT)x) | (NT)-overflow;\ } DEF_SAT_U_TRUC_FMT_1(uint32_t, uint64_t) Before this patch: __attribute__((noinline)) uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x) { _Bool overflow; unsigned char _1; unsigned char _2; unsigned char _3; uint8_t _6; ;; basic block 2, loop depth 0 ;;pred: ENTRY overflow_5 = x_4(D) > 255; _1 = (unsigned char) x_4(D); _2 = (unsigned char) overflow_5; _3 = -_2; _6 = _1 | _3; return _6; ;;succ: EXIT } After this patch: __attribute__((noinline)) uint8_t sat_u_truc_uint16_t_to_uint8_t_fmt_1 (uint16_t x) { uint8_t _6; ;; basic block 2, loop depth 0 ;;pred: ENTRY _6 = .SAT_TRUNC (x_4(D)); [tail call] return _6; ;;succ: EXIT } The below tests suites are passed for this patch 1. The rv64gcv fully regression test. 2. The rv64gcv build with glibc gcc/ChangeLog: * config/riscv/iterators.md (ANYI_DOUBLE_TRUNC): Add new iterator for int double truncation. (ANYI_DOUBLE_TRUNCATED): Add new attr for int double truncation. (anyi_double_truncated): Ditto but for lowercase. * config/riscv/riscv-protos.h (riscv_expand_ustrunc): Add new func decl for expanding ustrunc * config/riscv/riscv.cc (riscv_expand_ustrunc): Add new func impl to expand ustrunc. * config/riscv/riscv.md (ustrunc2): Impl the new pattern ustrunc2 for int. gcc/testsuite/ChangeLog: * gcc.target/riscv/sat_arith.h: Add test helper macro. * gcc.target/riscv/sat_arith_data.h: New test. * gcc.target/riscv/sat_u_trunc-1.c: New test. * gcc.target/riscv/sat_u_trunc-2.c: New test. * gcc.target/riscv/sat_u_trunc-3.c: New test. * gcc.target/riscv/sat_u_trunc-run-1.c: New test. * gcc.target/riscv/sat_u_trunc-run-2.c: New test. * gcc.target/riscv/sat_u_trunc-run-3.c: New test. * gcc.target/riscv/scalar_sat_unary.h: New test. Signed-off-by: Pan Li (cherry picked from commit 5d2115b850df63b0ecdf56efb720ad848e7afe21) Diff: --- gcc/config/riscv/iterators.md | 10 gcc/config/riscv/riscv-protos.h| 1 + gcc/config/riscv/riscv.cc | 40 gcc/config/riscv/riscv.md | 10 gcc/testsuite/gcc.target/riscv/sat_arith.h | 16 +++ gcc/testsuite/gcc.target/riscv/sat_arith_data.h| 56 ++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-1.c | 17 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-2.c | 20 gcc/testsuite/gcc.target/riscv/sat_u_trunc-3.c | 19 gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-1.c | 16 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-2.c | 16 +++ gcc/testsuite/gcc.target/riscv/sat_u_trunc-run-3.c | 16 +++ gcc/testsuite/gcc.target/riscv/scalar_sat_unary.h | 22 + 13 files changed, 259 insertions(+) diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md index d61ed53a8b1b..734da041f0cb 100644 --- a/gcc/config/riscv/iterators.md +++ b/gcc/config/riscv/iterators.md @@ -65,6 +65,16 @@ ;; Iterator for hardware-supported integer modes. (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")]) +(define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")]) + +(define_mode_attr ANYI_DOUBLE_TRUNCATED [ + (HI "QI") (SI "HI") (DI "SI") +]) + +(define_mode_attr anyi_double_truncated [ + (HI "qi") (SI "hi") (DI "si") +]) + ;; Iterator for hardware-supported floating-point modes. (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX") (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX") diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 7c0ea1b445b1..ce5e38d3dbbf 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -135,6 +135,7 @@ riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int); extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx); extern void riscv_expand_usadd (rtx, rtx, rtx
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [4/n][PR rtl-optimization/115877] Correct SUBREG handling in a destination
https://gcc.gnu.org/g:d4f5e86b8cf0666aefd9c1f10188274af147df46 commit d4f5e86b8cf0666aefd9c1f10188274af147df46 Author: Jeff Law Date: Mon Jul 22 10:11:57 2024 -0600 [4/n][PR rtl-optimization/115877] Correct SUBREG handling in a destination If we encounter something during SET handling that we can not handle, the safe thing to do is to ignore the destination and continue the loop. We've actually been trying to do slightly better with SUBREG destinations by iterating into SUBREG_REG. It turns out that wasn't working as expected. The problem is once we "continue" we lose the state that we were inside the SET and thus we ended up ignoring the destination completely rather than tracking the SUBREG_REG object. This could be fixed by restarting SET processing, but I just don't see this as all that important to handle. So rather than leave the code as-is, not working per design, I'm twiddling it to use the common 'skip subrtxs and continue' idiom used elsewhere. This is a prerequisite for another patch in this series. Specifically I have a patch that explicitly tracks if we skipped a destination rather than trying to imply it from the state of LIVE_TMP. So this is probably NFC right now, but that's a short-lived NFC. Bootstrapped and regression tested on x86 and also run as part of a larger kit on the crosses in my tester. PR rtl-optimization/115877 gcc/ * ext-dce.cc (ext_dce_process_sets): More correctly handle SUBREG destinations. (cherry picked from commit ab7c0aed52054976d0b5e12c52e82239d4277b98) Diff: --- gcc/ext-dce.cc | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index d1a31e1819e2..7f0a6d725f1e 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -270,11 +270,18 @@ ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp) = GET_MODE_MASK (GET_MODE_INNER (GET_MODE (x))); if (SUBREG_P (x)) { - /* If we have a SUBREG that is too wide, just continue the loop -and let the iterator go down into SUBREG_REG. */ + /* If we have a SUBREG destination that is too wide, just +skip the destination rather than continuing this iterator. +While continuing would be better, we'd need to strip the +subreg and restart within the SET processing rather than +the top of the loop which just complicates the flow even +more. */ if (!is_a (GET_MODE (SUBREG_REG (x)), &outer_mode) || GET_MODE_BITSIZE (outer_mode) > 64) - continue; + { + iter.skip_subrtxes (); + continue; + } /* We can safely strip a paradoxical subreg. The inner mode will be narrower than the outer mode. We'll clear fewer bits in
[gcc(refs/vendors/riscv/heads/gcc-14-with-riscv-opts)] [5/n][PR rtl-optimization/115877] Fix handling of input/output operands
https://gcc.gnu.org/g:99d15ac27522519377f7019cf6e5cb67b1497458 commit 99d15ac27522519377f7019cf6e5cb67b1497458 Author: Jeff Law Date: Mon Jul 22 21:48:28 2024 -0600 [5/n][PR rtl-optimization/115877] Fix handling of input/output operands So in this patch we're correcting a failure to mark objects live in scenarios like (set (dest) (plus (dest) (src)) When handling set pseudos, we transfer the liveness information from LIVENOW into LIVE_TMP. LIVE_TMP is subsequently used to narrow what bit groups are live for the inputs. The first time we process the block we may not have DEST in the LIVENOW set (it may be live across the loop, but not live after the loop). Thus we can totally miss making certain objects live, resulting in incorrect code. The fix is pretty simple. If LIVE_TMP is empty, then we should go ahead and mark all the bit groups for the set object in LIVE_TMP. This also removes an invalid gcc_assert on the state of the liveness bitmaps. This showed up on pru, rl78 and/or msp430 in the testsuite. So no new test. Bootstrapped and regression tested on x86_64 and also run through my tester on all the cross platforms. Pushing to the trunk. PR rtl-optimization/115877 gcc/ * ext-dce.cc (ext_dce_process_sets): Reasonably handle input/output operands. (ext_dce_rd_transfer_n): Drop bogus assertion. (cherry picked from commit ad642d2c950657539777ea436b787e7fff4ec09e) Diff: --- gcc/ext-dce.cc | 31 ++- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/gcc/ext-dce.cc b/gcc/ext-dce.cc index 7f0a6d725f1e..43d2447acb5d 100644 --- a/gcc/ext-dce.cc +++ b/gcc/ext-dce.cc @@ -245,13 +245,25 @@ ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp) continue; } - /* Transfer all the LIVENOW bits for X into LIVE_TMP. */ + /* LIVE_TMP contains the set groups that are live-out and set in +this insn. It is used to narrow the groups live-in for the +inputs of this insn. + +The simple thing to do is mark all the groups as live, but +that will significantly inhibit optimization. + +We also need to be careful in the case where we have an in-out +operand. If we're not careful we'd clear LIVE_TMP +incorrectly. */ HOST_WIDE_INT rn = REGNO (SUBREG_REG (x)); int limit = group_limit (SUBREG_REG (x)); for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++) if (bitmap_bit_p (livenow, i)) bitmap_set_bit (live_tmp, i); + if (bitmap_empty_p (live_tmp)) + make_reg_live (live_tmp, rn); + /* The mode of the SUBREG tells us how many bits we can clear. */ machine_mode mode = GET_MODE (x); @@ -316,14 +328,25 @@ ext_dce_process_sets (rtx_insn *insn, rtx obj, bitmap live_tmp) /* Now handle the actual object that was changed. */ if (REG_P (x)) { - /* Transfer the appropriate bits from LIVENOW into -LIVE_TMP. */ + /* LIVE_TMP contains the set groups that are live-out and set in +this insn. It is used to narrow the groups live-in for the +inputs of this insn. + +The simple thing to do is mark all the groups as live, but +that will significantly inhibit optimization. + +We also need to be careful in the case where we have an in-out +operand. If we're not careful we'd clear LIVE_TMP +incorrectly. */ HOST_WIDE_INT rn = REGNO (x); int limit = group_limit (x); for (HOST_WIDE_INT i = 4 * rn; i < 4 * rn + limit; i++) if (bitmap_bit_p (livenow, i)) bitmap_set_bit (live_tmp, i); + if (bitmap_empty_p (live_tmp)) + make_reg_live (live_tmp, rn); + /* Now clear the bits known written by this instruction. Note that BIT need not be a power of two, consider a ZERO_EXTRACT destination. */ @@ -935,8 +958,6 @@ ext_dce_rd_transfer_n (int bb_index) the generic dataflow code that something changed. */ if (!bitmap_equal_p (&livein[bb_index], livenow)) { - gcc_assert (!bitmap_intersect_compl_p (&livein[bb_index], livenow)); - bitmap_copy (&livein[bb_index], livenow); return true; }
[gcc r15-2230] c++: normalizing ttp constraints [PR115656]
https://gcc.gnu.org/g:2861eb34e30973cb991a7964af7cfeae014a98b0 commit r15-2230-g2861eb34e30973cb991a7964af7cfeae014a98b0 Author: Patrick Palka Date: Tue Jul 23 13:16:14 2024 -0400 c++: normalizing ttp constraints [PR115656] Here we normalize the constraint same_as for the first time during ttp coercion of B / UU, specifically constraint subsumption checking. During this normalization the set of in-scope template parameters i.e. current_template_parms is empty, which we rely on during normalization of the ttp constraints since we pass in_decl=NULL_TREE to norm_info. And this tricks the satisfaction cache into thinking that the satisfaction value of same_as is independent of its template parameters, and we incorrectly conflate the satisfaction value with T = bool vs T = long and accept the specialization A. Since is_compatible_template_arg rewrites the ttp's constraints to be in terms of the argument template's parameters, and since it's the only caller of weakly_subsumes, the latter funcion can instead pass in_decl=tmpl to avoid relying on current_template_parms. This patch implements this, and in turns renames weakly_subsumes to ttp_subsumes to reflect that this predicate is now hardcoded for this one caller. PR c++/115656 gcc/cp/ChangeLog: * constraint.cc (weakly_subsumes): Pass in_decl=tmpl to get_normalized_constraints_from_info. Rename to ... (ttp_subsumes): ... this. * cp-tree.h (weakly_subsumes): Rename to ... (ttp_subsumes): ... this. * pt.cc (is_compatible_template_arg): Adjust after renaming. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/concepts-ttp7.C: New test. Reviewed-by: Jason Merrill Diff: --- gcc/cp/constraint.cc | 9 + gcc/cp/cp-tree.h | 2 +- gcc/cp/pt.cc | 2 +- gcc/testsuite/g++.dg/cpp2a/concepts-ttp7.C | 12 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index afd5435cc3ed..7fce78f508e9 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3328,13 +3328,14 @@ strictly_subsumes (tree ci, tree tmpl) return subsumes (n1, n2) && !subsumes (n2, n1); } -/* Returns true when the constraints in CI subsume the - associated constraints of TMPL. */ +/* Returns true when the template template parameter constraints in CI + subsume the associated constraints of the template template argument + TMPL. */ bool -weakly_subsumes (tree ci, tree tmpl) +ttp_subsumes (tree ci, tree tmpl) { - tree n1 = get_normalized_constraints_from_info (ci, NULL_TREE); + tree n1 = get_normalized_constraints_from_info (ci, tmpl); tree n2 = get_normalized_constraints_from_decl (tmpl); return subsumes (n1, n2); diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 76ac9c31763c..856699de82f2 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -8609,7 +8609,7 @@ extern tree find_template_parameters (tree, tree); extern bool equivalent_constraints (tree, tree); extern bool equivalently_constrained(tree, tree); extern bool strictly_subsumes (tree, tree); -extern bool weakly_subsumes(tree, tree); +extern bool ttp_subsumes (tree, tree); extern int more_constrained (tree, tree); extern bool at_least_as_constrained (tree, tree); extern bool constraints_equivalent_p(tree, tree); diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 8cc5e21c520d..393913294b50 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -8482,7 +8482,7 @@ is_compatible_template_arg (tree parm, tree arg, tree args) return false; } - return weakly_subsumes (parm_cons, arg); + return ttp_subsumes (parm_cons, arg); } // Convert a placeholder argument into a binding to the original diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-ttp7.C b/gcc/testsuite/g++.dg/cpp2a/concepts-ttp7.C new file mode 100644 index ..2ce884b995c7 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-ttp7.C @@ -0,0 +1,12 @@ +// PR c++/115656 +// { dg-do compile { target c++20 } } + +template concept same_as = __is_same(T, U); + +template T, template> class UU> +struct A { }; + +template> class B; + +A a1; +A a2; // { dg-error "constraint failure" }
[gcc r15-2231] C++: Support clang compatible [[musttail]] (PR83324)
https://gcc.gnu.org/g:2bd8177256b6d87f6e75819218cf22c2c0bfc1ac commit r15-2231-g2bd8177256b6d87f6e75819218cf22c2c0bfc1ac Author: Andi Kleen Date: Tue Jan 23 23:44:48 2024 -0800 C++: Support clang compatible [[musttail]] (PR83324) This patch implements a clang compatible [[musttail]] attribute for returns. musttail is useful as an alternative to computed goto for interpreters. With computed goto the interpreter function usually ends up very big which causes problems with register allocation and other per function optimizations not scaling. With musttail the interpreter can be instead written as a sequence of smaller functions that call each other. To avoid unbounded stack growth this requires forcing a sibling call, which this attribute does. It guarantees an error if the call cannot be tail called which allows the programmer to fix it instead of risking a stack overflow. Unlike computed goto it is also type-safe. It turns out that David Malcolm had already implemented middle/backend support for a musttail attribute back in 2016, but it wasn't exposed to any frontend other than a special plugin. This patch adds a [[gnu::musttail]] attribute for C++ that can be added to return statements. The return statement must be a direct call (it does not follow dependencies), which is similar to what clang implements. It then uses the existing must tail infrastructure. For compatibility it also detects clang::musttail Passes bootstrap and full test gcc/c-family/ChangeLog: * c-attribs.cc (set_musttail_on_return): New function. * c-common.h (set_musttail_on_return): Declare new function. gcc/cp/ChangeLog: PR c/83324 * cp-tree.h (AGGR_INIT_EXPR_MUST_TAIL): Add. * parser.cc (cp_parser_statement): Handle musttail. (cp_parser_jump_statement): Dito. * pt.cc (tsubst_expr): Copy CALL_EXPR_MUST_TAIL_CALL. * semantics.cc (simplify_aggr_init_expr): Handle musttail. Diff: --- gcc/c-family/c-attribs.cc | 20 gcc/c-family/c-common.h | 1 + gcc/cp/cp-tree.h | 4 gcc/cp/parser.cc | 32 +--- gcc/cp/pt.cc | 9 - gcc/cp/semantics.cc | 1 + 6 files changed, 63 insertions(+), 4 deletions(-) diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc index 5adc7b775eaf..685f212683f4 100644 --- a/gcc/c-family/c-attribs.cc +++ b/gcc/c-family/c-attribs.cc @@ -672,6 +672,26 @@ attribute_takes_identifier_p (const_tree attr_id) return targetm.attribute_takes_identifier_p (attr_id); } +/* Set a musttail attribute MUSTTAIL_P on return expression RETVAL + at LOC. */ + +void +set_musttail_on_return (tree retval, location_t loc, bool musttail_p) +{ + if (retval && musttail_p) +{ + tree t = retval; + if (TREE_CODE (t) == TARGET_EXPR) + t = TARGET_EXPR_INITIAL (t); + if (TREE_CODE (t) != CALL_EXPR) + error_at (loc, "cannot tail-call: return value must be a call"); + else + CALL_EXPR_MUST_TAIL_CALL (t) = 1; +} + else if (musttail_p && !retval) +error_at (loc, "cannot tail-call: return value must be a call"); +} + /* Verify that argument value POS at position ARGNO to attribute NAME applied to function FN (which is either a function declaration or function type) refers to a function parameter at position POS and the expected type diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index adee822a3ae0..2510ee4dbc9d 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -1648,6 +1648,7 @@ extern tree handle_noreturn_attribute (tree *, tree, tree, int, bool *); extern tree handle_musttail_attribute (tree *, tree, tree, int, bool *); extern bool has_attribute (location_t, tree, tree, tree (*)(tree)); extern tree build_attr_access_from_parms (tree, bool); +extern void set_musttail_on_return (tree, location_t, bool); /* In c-format.cc. */ extern bool valid_format_string_type_p (tree); diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 856699de82f2..e2cec2f2c16c 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -4228,6 +4228,10 @@ templated_operator_saved_lookups (tree t) #define AGGR_INIT_FROM_THUNK_P(NODE) \ (AGGR_INIT_EXPR_CHECK (NODE)->base.protected_flag) +/* Nonzero means that the call was marked musttail. */ +#define AGGR_INIT_EXPR_MUST_TAIL(NODE) \ + (AGGR_INIT_EXPR_CHECK (NODE)->base.static_flag) + /* AGGR_INIT_EXPR accessors. These are equivalent to the CALL_EXPR accessors, except for AGGR_INIT_EXPR_SLOT (which takes the place of CALL_EXPR_STATIC_CHAIN). */ diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index efd5d6f29a71..1fa0780944b6 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -2467,7 +2467,7 @@ static tree cp_parser_perform_range_for_lookup static tree
[gcc r15-2232] C: Implement musttail attribute for returns
https://gcc.gnu.org/g:78bbdbd5352df527feccf0a8c2f862f25a2e88b4 commit r15-2232-g78bbdbd5352df527feccf0a8c2f862f25a2e88b4 Author: Andi Kleen Date: Wed Jan 24 07:44:23 2024 -0800 C: Implement musttail attribute for returns Implement a C23 clang compatible musttail attribute similar to the earlier C++ implementation in the C parser. gcc/c/ChangeLog: PR c/83324 * c-parser.cc (struct attr_state): Define with musttail_p. (c_parser_statement_after_labels): Handle [[musttail]]. (c_parser_std_attribute): Dito. (c_parser_handle_musttail): Dito. (c_parser_compound_statement_nostart): Dito. (c_parser_all_labels): Dito. (c_parser_statement): Dito. * c-tree.h (c_finish_return): Add musttail_p flag. * c-typeck.cc (c_finish_return): Handle musttail_p flag. Diff: --- gcc/c/c-parser.cc | 71 +-- gcc/c/c-tree.h| 2 +- gcc/c/c-typeck.cc | 7 -- 3 files changed, 64 insertions(+), 16 deletions(-) diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index 12c5ed5d92c7..9b9284b1ba4d 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -1621,6 +1621,12 @@ struct omp_for_parse_data { bool fail : 1; }; +struct attr_state +{ + /* True if we parsed a musttail attribute for return. */ + bool musttail_p; +}; + static bool c_parser_nth_token_starts_std_attributes (c_parser *, unsigned int); static tree c_parser_std_attribute_specifier_sequence (c_parser *); @@ -1665,7 +1671,8 @@ static location_t c_parser_compound_statement_nostart (c_parser *); static void c_parser_label (c_parser *, tree); static void c_parser_statement (c_parser *, bool *, location_t * = NULL); static void c_parser_statement_after_labels (c_parser *, bool *, -vec * = NULL); +vec * = NULL, +attr_state = {}); static tree c_parser_c99_block_statement (c_parser *, bool *, location_t * = NULL); static void c_parser_if_statement (c_parser *, bool *, vec *); @@ -6982,6 +6989,29 @@ c_parser_handle_directive_omp_attributes (tree &attrs, } } +/* Check if STD_ATTR contains a musttail attribute and remove if it + precedes a return. PARSER is the parser and ATTR is the output + attr_state. */ + +static tree +c_parser_handle_musttail (c_parser *parser, tree std_attrs, attr_state &attr) +{ + if (c_parser_next_token_is_keyword (parser, RID_RETURN)) +{ + if (lookup_attribute ("gnu", "musttail", std_attrs)) + { + std_attrs = remove_attribute ("gnu", "musttail", std_attrs); + attr.musttail_p = true; + } + if (lookup_attribute ("clang", "musttail", std_attrs)) + { + std_attrs = remove_attribute ("clang", "musttail", std_attrs); + attr.musttail_p = true; + } +} + return std_attrs; +} + /* Parse a compound statement except for the opening brace. This is used for parsing both compound statements and statement expressions (which follow different paths to handling the opening). */ @@ -6998,6 +7028,7 @@ c_parser_compound_statement_nostart (c_parser *parser) bool in_omp_loop_block = omp_for_parse_state ? omp_for_parse_state->want_nested_loop : false; tree sl = NULL_TREE; + attr_state a = {}; if (c_parser_next_token_is (parser, CPP_CLOSE_BRACE)) { @@ -7138,7 +7169,10 @@ c_parser_compound_statement_nostart (c_parser *parser) = c_parser_nth_token_starts_std_attributes (parser, 1); tree std_attrs = NULL_TREE; if (have_std_attrs) - std_attrs = c_parser_std_attribute_specifier_sequence (parser); + { + std_attrs = c_parser_std_attribute_specifier_sequence (parser); + std_attrs = c_parser_handle_musttail (parser, std_attrs, a); + } if (c_parser_next_token_is_keyword (parser, RID_CASE) || c_parser_next_token_is_keyword (parser, RID_DEFAULT) || (c_parser_next_token_is (parser, CPP_NAME) @@ -7286,7 +7320,7 @@ c_parser_compound_statement_nostart (c_parser *parser) last_stmt = true; mark_valid_location_for_stdc_pragma (false); if (!omp_for_parse_state) - c_parser_statement_after_labels (parser, NULL); + c_parser_statement_after_labels (parser, NULL, NULL, a); else { /* In canonical loop nest form, nested loops can only appear @@ -7328,15 +7362,20 @@ c_parser_compound_statement_nostart (c_parser *parser) /* Parse all consecutive labels, possibly preceded by standard attributes. In this context, a statement is required, not a declaration, so attributes must be followed by a statement that is - not just a semicolon. */ + not just a semicolon. Retur
[gcc r15-2233] Add tests for C/C++ musttail attributes
https://gcc.gnu.org/g:8d1af8f904a0c08656d976cbf8ca56dba35197b0 commit r15-2233-g8d1af8f904a0c08656d976cbf8ca56dba35197b0 Author: Andi Kleen Date: Tue Jan 23 23:54:56 2024 -0800 Add tests for C/C++ musttail attributes Some adopted from the existing C musttail plugin tests. Also extends the ability to query the sibcall capabilities of the target. gcc/testsuite/ChangeLog: * lib/target-supports.exp: (check_effective_target_struct_tail_call): New function. * c-c++-common/musttail1.c: New test. * c-c++-common/musttail12.c: New test. * c-c++-common/musttail13.c: New test. * c-c++-common/musttail2.c: New test. * c-c++-common/musttail3.c: New test. * c-c++-common/musttail4.c: New test. * c-c++-common/musttail5.c: New test. * c-c++-common/musttail7.c: New test. * c-c++-common/musttail8.c: New test. * g++.dg/musttail10.C: New test. * g++.dg/musttail11.C: New test. * g++.dg/musttail6.C: New test. * g++.dg/musttail9.C: New test. Diff: --- gcc/testsuite/c-c++-common/musttail1.c | 14 gcc/testsuite/c-c++-common/musttail12.c | 15 + gcc/testsuite/c-c++-common/musttail13.c | 5 +++ gcc/testsuite/c-c++-common/musttail2.c | 33 ++ gcc/testsuite/c-c++-common/musttail3.c | 29 gcc/testsuite/c-c++-common/musttail4.c | 17 ++ gcc/testsuite/c-c++-common/musttail5.c | 28 +++ gcc/testsuite/c-c++-common/musttail7.c | 14 gcc/testsuite/c-c++-common/musttail8.c | 17 ++ gcc/testsuite/g++.dg/musttail10.C | 40 ++ gcc/testsuite/g++.dg/musttail11.C | 33 ++ gcc/testsuite/g++.dg/musttail6.C| 60 + gcc/testsuite/g++.dg/musttail9.C| 10 ++ gcc/testsuite/lib/target-supports.exp | 12 +++ 14 files changed, 327 insertions(+) diff --git a/gcc/testsuite/c-c++-common/musttail1.c b/gcc/testsuite/c-c++-common/musttail1.c new file mode 100644 index ..74efcc2a0bc6 --- /dev/null +++ b/gcc/testsuite/c-c++-common/musttail1.c @@ -0,0 +1,14 @@ +/* { dg-do compile { target { tail_call && { c || c++11 } } } } */ +/* { dg-additional-options "-fdelayed-branch" { target sparc*-*-* } } */ + +int __attribute__((noinline,noclone,noipa)) +callee (int i) +{ + return i * i; +} + +int __attribute__((noinline,noclone,noipa)) +caller (int i) +{ + [[gnu::musttail]] return callee (i + 1); +} diff --git a/gcc/testsuite/c-c++-common/musttail12.c b/gcc/testsuite/c-c++-common/musttail12.c new file mode 100644 index ..4140bcd00950 --- /dev/null +++ b/gcc/testsuite/c-c++-common/musttail12.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { struct_tail_call && { c || c++11 } } } } */ +/* { dg-additional-options "-fdelayed-branch" { target sparc*-*-* } } */ + +struct str +{ + int a, b; +}; +struct str +cstruct (int x) +{ + if (x < 10) +L: +[[gnu::musttail]] return cstruct (x + 1); + return ((struct str){ x, 0 }); +} diff --git a/gcc/testsuite/c-c++-common/musttail13.c b/gcc/testsuite/c-c++-common/musttail13.c new file mode 100644 index ..6bd212fbeb8f --- /dev/null +++ b/gcc/testsuite/c-c++-common/musttail13.c @@ -0,0 +1,5 @@ +/* { dg-do compile { target { c || c++11 } } } */ +void f(void) +{ + [[gnu::musttail]] return; /* { dg-error "cannot tail-call.*return value must be a call" } */ +} diff --git a/gcc/testsuite/c-c++-common/musttail2.c b/gcc/testsuite/c-c++-common/musttail2.c new file mode 100644 index ..86f2c3d77404 --- /dev/null +++ b/gcc/testsuite/c-c++-common/musttail2.c @@ -0,0 +1,33 @@ +/* { dg-do compile { target { tail_call && { c || c++11 } } } } */ + +struct box { char field[256]; int i; }; + +int __attribute__((noinline,noclone,noipa)) +test_2_callee (int i, struct box b) +{ + if (b.field[0]) +return 5; + return i * i; +} + +int __attribute__((noinline,noclone,noipa)) +test_2_caller (int i) +{ + struct box b; + [[gnu::musttail]] return test_2_callee (i + 1, b); /* { dg-error "cannot tail-call: " } */ +} + +extern void setjmp (void); +void +test_3 (void) +{ + [[gnu::musttail]] return setjmp (); /* { dg-error "cannot tail-call: " } */ +} + +extern float f7(void); + +int +test_6 (void) +{ + [[gnu::musttail]] return f7(); /* { dg-error "cannot tail-call: " } */ +} diff --git a/gcc/testsuite/c-c++-common/musttail3.c b/gcc/testsuite/c-c++-common/musttail3.c new file mode 100644 index ..ea9589c59ef2 --- /dev/null +++ b/gcc/testsuite/c-c++-common/musttail3.c @@ -0,0 +1,29 @@ +/* { dg-do compile { target { tail_call && { c || c++11 } } } } */ + +extern int foo2 (int x, ...); + +struct str +{ + int a, b; +}; + +struct str +cstruct (int x) +{ + if (x < 10) +[[clang::musttail]] return cstruct (x + 1); + return ((struct str){ x, 0 }); +} + +int +foo (int x) +{ + if
[gcc r15-2234] Add documentation for musttail attribute
https://gcc.gnu.org/g:8daae81113eeff37b4ae2e08a9797295fbc8b81e commit r15-2234-g8daae81113eeff37b4ae2e08a9797295fbc8b81e Author: Andi Kleen Date: Tue Jan 23 23:38:23 2024 -0800 Add documentation for musttail attribute gcc/ChangeLog: PR c/83324 * doc/extend.texi: Document [[musttail]] Diff: --- gcc/doc/extend.texi | 25 +++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 4b77599380b5..b0273927b256 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -9921,7 +9921,7 @@ same manner as the @code{deprecated} attribute. @section Statement Attributes @cindex Statement Attributes -GCC allows attributes to be set on null statements. @xref{Attribute Syntax}, +GCC allows attributes to be set on statements. @xref{Attribute Syntax}, for details of the exact syntax for using attributes. Other attributes are available for functions (@pxref{Function Attributes}), variables (@pxref{Variable Attributes}), labels (@pxref{Label Attributes}), enumerators @@ -9978,6 +9978,25 @@ foo (int x, int y) @code{y} is not actually incremented and the compiler can but does not have to optimize it to just @code{return 42 + 42;}. +@cindex @code{musttail} statement attribute +@item musttail + +The @code{gnu::musttail} or @code{clang::musttail} attribute +can be applied to a @code{return} statement with a return-value expression +that is a function call. It asserts that the call must be a tail call that +does not allocate extra stack space, so it is safe to use tail recursion +to implement long running loops. + +@smallexample +[[gnu::musttail]] return foo(); +@end smallexample + +If the compiler cannot generate a @code{musttail} tail call it will report +an error. On some targets tail calls may never be supported. +Tail calls cannot reference locals in memory, which may affect +builds without optimization when passing small structures, or passing +or returning large structures. Enabling -O1 or -O2 can improve +the success of tail calls. @end table @node Attribute Syntax @@ -10101,7 +10120,9 @@ the constant expression, if present. @subsubheading Statement Attributes In GNU C, an attribute specifier list may appear as part of a null -statement. The attribute goes before the semicolon. +statement. The attribute goes before the semicolon. +Some attributes in new style syntax are also supported +on non-null statements. @subsubheading Type Attributes
[gcc r15-2235] doc: add missing @option for musttail
https://gcc.gnu.org/g:e8c40aed0f81ca8aac1ae43f140f489eda2d3a07 commit r15-2235-ge8c40aed0f81ca8aac1ae43f140f489eda2d3a07 Author: Marek Polacek Date: Tue Jul 23 16:32:20 2024 -0400 doc: add missing @option for musttail gcc/ChangeLog: * doc/extend.texi: Add missing @option. Diff: --- gcc/doc/extend.texi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index b0273927b256..66c99ef7a667 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -9995,8 +9995,8 @@ If the compiler cannot generate a @code{musttail} tail call it will report an error. On some targets tail calls may never be supported. Tail calls cannot reference locals in memory, which may affect builds without optimization when passing small structures, or passing -or returning large structures. Enabling -O1 or -O2 can improve -the success of tail calls. +or returning large structures. Enabling @option{-O1} or @option{-O2} can +improve the success of tail calls. @end table @node Attribute Syntax
[gcc r15-2236] RISC-V: Fix snafu in SI mode splitters patch
https://gcc.gnu.org/g:806927111cf388a2d8cd54072269601f677767cf commit r15-2236-g806927111cf388a2d8cd54072269601f677767cf Author: Vineet Gupta Date: Tue Jul 23 15:12:11 2024 -0700 RISC-V: Fix snafu in SI mode splitters patch SPEC2017 perlbench for RISC-V was broke as runtime output mismatch failure. > 3830: mbox2: dWshe3Aa1EULre4CT5O/ErYFrk+o/EOoebA1kTVjQVQQH2EjT5fHcYnwjj2MdBmZu5y3Ce4Ei4QQZo/SNrry9g >mbox2: uuWPimQiU0D4UrwFP+LS0lFNph4qL43WV1A6T3tHleatIOUaHixhrJU9NoA2lc9KjwYpdEL0lNTXkvo8ymNHzA > ^ > 3832: mbox3: 8f4jdv6GIf0lX3DcdwRdEm6/aZwnmGX6n86GzCvmkwTKFXQjwlwVHc8jy8XlcyiIPr3yXTkgVOiP3cRYvyYQPg >mbox3: 9xQySgP6qbhfxl8Usu1WfGA5UhStB5AN31wueGM6OF4Jp59DkqJPu6ksGblOU5u0nQapQC1e9oYIs16a2mq2NA > ^ > specdiff run completed Edwin bisected this to 273f16a125c4 ("[v3][RISC-V] Handle bit manipulation of SImode values") which had the operands swapped in one of the new splitters introduced. No test as reducer narrows it to down to the exact test introduced by the original commit. gcc/ChangeLog: * config/riscv/bitmanip.md: Fix splitter. Reported-by: Edwin Lu Signed-off-by: Vineet Gupta Diff: --- gcc/config/riscv/bitmanip.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index f403ba8dbbad..d262430485e7 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -675,7 +675,7 @@ "#" "&& reload_completed" [(set (match_dup 4) (match_dup 2)) -(set (match_dup 4) (and:DI (not:DI (match_dup 4)) (match_dup 1))) +(set (match_dup 4) (and:DI (not:DI (match_dup 1)) (match_dup 4))) (set (match_dup 0) (any_or:DI (ashift:DI (const_int 1) (match_dup 5)) (match_dup 3)))] { operands[5] = gen_lowpart (QImode, operands[4]); } [(set_attr "type" "bitmanip")])
[gcc r15-2237] Output CodeView type information for references
https://gcc.gnu.org/g:7341607544e01a4a155613470b2ef099b051b881 commit r15-2237-g7341607544e01a4a155613470b2ef099b051b881 Author: Mark Harmstone Date: Sat Jul 20 20:12:30 2024 +0100 Output CodeView type information for references Translates DW_TAG_reference_type DIEs into LF_POINTER types. gcc/ * dwarf2codeview.cc (get_type_num_reference_type): New function. (get_type_num_array_type): Add DW_TAG_reference_type to switch. (get_type_num): Handle DW_TAG_reference_type DIEs. * dwarf2codeview.h (CV_PTR_MODE_LVREF): Define. Diff: --- gcc/dwarf2codeview.cc | 44 gcc/dwarf2codeview.h | 1 + 2 files changed, 45 insertions(+) diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc index c174f3204803..23175204acd3 100644 --- a/gcc/dwarf2codeview.cc +++ b/gcc/dwarf2codeview.cc @@ -2293,6 +2293,45 @@ get_type_num_pointer_type (dw_die_ref type, bool in_struct) return ct->num; } +/* Process a DW_TAG_reference_type DIE, add a new LF_POINTER type, and return + its number. */ + +static uint32_t +get_type_num_reference_type (dw_die_ref type, bool in_struct) +{ + uint32_t base_type_num, byte_size; + dw_die_ref base_type; + codeview_custom_type *ct; + + byte_size = get_AT_unsigned (type, DW_AT_byte_size); + if (byte_size != 4 && byte_size != 8) +return 0; + + base_type = get_AT_ref (type, DW_AT_type); + + base_type_num = get_type_num (base_type, in_struct, false); + if (base_type_num == 0) +return 0; + + ct = (codeview_custom_type *) xmalloc (sizeof (codeview_custom_type)); + + ct->next = NULL; + ct->kind = LF_POINTER; + ct->lf_pointer.base_type = base_type_num; + ct->lf_pointer.attributes = CV_PTR_MODE_LVREF; + + if (byte_size == 4) +ct->lf_pointer.attributes |= CV_PTR_NEAR32; + else +ct->lf_pointer.attributes |= CV_PTR_64; + + ct->lf_pointer.attributes |= byte_size << 13; + + add_custom_type (ct); + + return ct->num; +} + /* Process a DW_TAG_const_type DIE, adding an LF_MODIFIER type and returning its number. */ @@ -3024,6 +3063,7 @@ get_type_num_array_type (dw_die_ref type, bool in_struct) case DW_TAG_class_type: case DW_TAG_union_type: case DW_TAG_pointer_type: + case DW_TAG_reference_type: size = get_AT_unsigned (t, DW_AT_byte_size); break; @@ -3151,6 +3191,10 @@ get_type_num (dw_die_ref type, bool in_struct, bool no_fwd_ref) num = get_type_num_pointer_type (type, in_struct); break; +case DW_TAG_reference_type: + num = get_type_num_reference_type (type, in_struct); + break; + case DW_TAG_const_type: num = get_type_num_const_type (type, in_struct); break; diff --git a/gcc/dwarf2codeview.h b/gcc/dwarf2codeview.h index 8fd3632e524c..7d4e3ab1db43 100644 --- a/gcc/dwarf2codeview.h +++ b/gcc/dwarf2codeview.h @@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see /* LF_POINTER attributes. */ #define CV_PTR_NEAR32 0x0a #define CV_PTR_64 0x0c +#define CV_PTR_MODE_LVREF 0x20 /* LF_MODIFIER values. */ #define MOD_const 0x1
[gcc r15-2238] Output CodeView type information for rvalue references
https://gcc.gnu.org/g:1ca7a12807a7fa0d9c27a5c8c45fa99ac9e7e027 commit r15-2238-g1ca7a12807a7fa0d9c27a5c8c45fa99ac9e7e027 Author: Mark Harmstone Date: Sat Jul 20 20:18:14 2024 +0100 Output CodeView type information for rvalue references Translates DW_TAG_rvalue_reference_type DIEs into LF_POINTER types. gcc/ * dwarf2codeview.cc (get_type_num_reference_type): Handle rvalue refs. (get_type_num_array_type): Add DW_TAG_rvalue_reference_type to switch. (get_type_num): Handle DW_TAG_rvalue_reference_type DIEs. * dwarf2codeview.h (CV_PTR_MODE_RVREF): Define. Diff: --- gcc/dwarf2codeview.cc | 15 ++- gcc/dwarf2codeview.h | 1 + 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/gcc/dwarf2codeview.cc b/gcc/dwarf2codeview.cc index 23175204acd3..b16c6960f63e 100644 --- a/gcc/dwarf2codeview.cc +++ b/gcc/dwarf2codeview.cc @@ -2293,11 +2293,11 @@ get_type_num_pointer_type (dw_die_ref type, bool in_struct) return ct->num; } -/* Process a DW_TAG_reference_type DIE, add a new LF_POINTER type, and return - its number. */ +/* Process a DW_TAG_reference_type or DW_TAG_rvalue_reference_type DIE, add a + new LF_POINTER type, and return its number. */ static uint32_t -get_type_num_reference_type (dw_die_ref type, bool in_struct) +get_type_num_reference_type (dw_die_ref type, bool in_struct, bool rvref) { uint32_t base_type_num, byte_size; dw_die_ref base_type; @@ -2318,7 +2318,7 @@ get_type_num_reference_type (dw_die_ref type, bool in_struct) ct->next = NULL; ct->kind = LF_POINTER; ct->lf_pointer.base_type = base_type_num; - ct->lf_pointer.attributes = CV_PTR_MODE_LVREF; + ct->lf_pointer.attributes = rvref ? CV_PTR_MODE_RVREF : CV_PTR_MODE_LVREF; if (byte_size == 4) ct->lf_pointer.attributes |= CV_PTR_NEAR32; @@ -3064,6 +3064,7 @@ get_type_num_array_type (dw_die_ref type, bool in_struct) case DW_TAG_union_type: case DW_TAG_pointer_type: case DW_TAG_reference_type: + case DW_TAG_rvalue_reference_type: size = get_AT_unsigned (t, DW_AT_byte_size); break; @@ -3192,7 +3193,11 @@ get_type_num (dw_die_ref type, bool in_struct, bool no_fwd_ref) break; case DW_TAG_reference_type: - num = get_type_num_reference_type (type, in_struct); + num = get_type_num_reference_type (type, in_struct, false); + break; + +case DW_TAG_rvalue_reference_type: + num = get_type_num_reference_type (type, in_struct, true); break; case DW_TAG_const_type: diff --git a/gcc/dwarf2codeview.h b/gcc/dwarf2codeview.h index 7d4e3ab1db43..8ede1b295294 100644 --- a/gcc/dwarf2codeview.h +++ b/gcc/dwarf2codeview.h @@ -56,6 +56,7 @@ along with GCC; see the file COPYING3. If not see #define CV_PTR_NEAR32 0x0a #define CV_PTR_64 0x0c #define CV_PTR_MODE_LVREF 0x20 +#define CV_PTR_MODE_RVREF 0x80 /* LF_MODIFIER values. */ #define MOD_const 0x1
[gcc r15-2240] [PR rtl-optimization/115877][6/n] Add testcase from pr115877
https://gcc.gnu.org/g:f9a60d575f02822852aa22513c636be38f9c63ea commit r15-2240-gf9a60d575f02822852aa22513c636be38f9c63ea Author: Jeff Law Date: Tue Jul 23 19:11:04 2024 -0600 [PR rtl-optimization/115877][6/n] Add testcase from pr115877 This just adds the testcase from pr115877. It's working now on the trunk. I'm not done with cleanups/bugfixing, but there's no reason to not have the testcase installed at this point. PR rtl-optimization/115877 gcc/testsuite * gcc.dg/torture/pr115877.c: New test. Diff: --- gcc/testsuite/gcc.dg/torture/pr115877.c | 20 1 file changed, 20 insertions(+) diff --git a/gcc/testsuite/gcc.dg/torture/pr115877.c b/gcc/testsuite/gcc.dg/torture/pr115877.c new file mode 100644 index ..432b1280b177 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr115877.c @@ -0,0 +1,20 @@ +/* { dg-do run { target int128 } } */ + +char a[16]; +unsigned short u; + +__int128 +foo (int i) +{ + i -= (unsigned short) ~u; + a[(unsigned short) i] = 1; + return i; +} + +int +main () +{ + __int128 x = foo (0); + if (x != -0x) +__builtin_abort(); +}
[gcc r13-8938] rs6000: Update ELFv2 stack frame comment showing the correct ROP save location
https://gcc.gnu.org/g:9bbdec4d94f9120b75d03a610e0338bb05ee40f7 commit r13-8938-g9bbdec4d94f9120b75d03a610e0338bb05ee40f7 Author: Peter Bergner Date: Fri Jun 7 16:03:08 2024 -0500 rs6000: Update ELFv2 stack frame comment showing the correct ROP save location The ELFv2 stack frame layout comment in rs6000-logue.cc shows the ROP hash save slot in the wrong location. Update the comment to show the correct ROP hash save location in the frame. 2024-06-07 Peter Bergner gcc/ * config/rs6000/rs6000-logue.cc (rs6000_stack_info): Update comment. (cherry picked from commit e91cf26a954a5c1bf431e36f3a1e69f94e9fa4fe) Diff: --- gcc/config/rs6000/rs6000-logue.cc | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gcc/config/rs6000/rs6000-logue.cc b/gcc/config/rs6000/rs6000-logue.cc index baadbbd692e1..ddd46ae459ae 100644 --- a/gcc/config/rs6000/rs6000-logue.cc +++ b/gcc/config/rs6000/rs6000-logue.cc @@ -591,21 +591,21 @@ rs6000_savres_strategy (rs6000_stack_t *info, +---+ | Parameter save area (+padding*) (P) | 32 +---+ - | Optional ROP hash slot (R)| 32+P + | Alloca space (A) | 32+P +---+ - | Alloca space (A) | 32+P+R + | Local variable space (L) | 32+P+A +---+ - | Local variable space (L) | 32+P+R+A + | Optional ROP hash slot (R)| 32+P+A+L +---+ - | Save area for AltiVec registers (W) | 32+P+R+A+L + | Save area for AltiVec registers (W) | 32+P+A+L+R +---+ - | AltiVec alignment padding (Y) | 32+P+R+A+L+W + | AltiVec alignment padding (Y) | 32+P+A+L+R+W +---+ - | Save area for GP registers (G)| 32+P+R+A+L+W+Y + | Save area for GP registers (G)| 32+P+A+L+R+W+Y +---+ - | Save area for FP registers (F)| 32+P+R+A+L+W+Y+G + | Save area for FP registers (F)| 32+P+A+L+R+W+Y+G +---+ - old SP->| back chain to caller's caller | 32+P+R+A+L+W+Y+G+F + old SP->| back chain to caller's caller | 32+P+A+L+R+W+Y+G+F +---+ * If the alloca area is present, the parameter save area is
[gcc r13-8939] rs6000: Compute rop_hash_save_offset for non-Altivec compiles [PR115389]
https://gcc.gnu.org/g:bc51e5abcd9cf9a4f74384f2df7c0c8c5ae07c1c commit r13-8939-gbc51e5abcd9cf9a4f74384f2df7c0c8c5ae07c1c Author: Peter Bergner Date: Fri Jun 14 14:36:20 2024 -0500 rs6000: Compute rop_hash_save_offset for non-Altivec compiles [PR115389] We currently only compute the offset for the ROP hash save location in the stack frame for Altivec compiles. For non-Altivec compiles when we emit ROP mitigation instructions, we use a default offset of zero which corresponds to the backchain save location which will get clobbered on any call. The fix is to compute the ROP hash save location for all compiles. 2024-06-14 Peter Bergner gcc/ PR target/115389 * config/rs6000/rs6000-logue.cc (rs6000_stack_info): Compute rop_hash_save_offset for non-Altivec compiles. gcc/testsuite PR target/115389 * gcc.target/powerpc/pr115389.c: New test. (cherry picked from commit c70eea0dba5f223d49c80cfb3e80e87b74330aac) Diff: --- gcc/config/rs6000/rs6000-logue.cc | 9 - gcc/testsuite/gcc.target/powerpc/pr115389.c | 17 + 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/gcc/config/rs6000/rs6000-logue.cc b/gcc/config/rs6000/rs6000-logue.cc index ddd46ae459ae..d38b90b61070 100644 --- a/gcc/config/rs6000/rs6000-logue.cc +++ b/gcc/config/rs6000/rs6000-logue.cc @@ -817,17 +817,16 @@ rs6000_stack_info (void) gcc_assert (info->altivec_size == 0 || info->altivec_save_offset % 16 == 0); - /* Adjust for AltiVec case. */ - info->ehrd_offset = info->altivec_save_offset - ehrd_size; - /* Adjust for ROP protection. */ info->rop_hash_save_offset = info->altivec_save_offset - info->rop_hash_size; - info->ehrd_offset -= info->rop_hash_size; } else - info->ehrd_offset = info->gp_save_offset - ehrd_size; + /* Adjust for ROP protection. */ + info->rop_hash_save_offset + = info->gp_save_offset - info->rop_hash_size; + info->ehrd_offset = info->rop_hash_save_offset - ehrd_size; info->ehcr_offset = info->ehrd_offset - ehcr_size; info->cr_save_offset = reg_size; /* first word when 64-bit. */ info->lr_save_offset = 2*reg_size; diff --git a/gcc/testsuite/gcc.target/powerpc/pr115389.c b/gcc/testsuite/gcc.target/powerpc/pr115389.c new file mode 100644 index ..a091ee8a1be0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr115389.c @@ -0,0 +1,17 @@ +/* PR target/115389 */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect -mno-vsx -mno-altivec -mabi=no-altivec -save-temps" } */ +/* { dg-require-effective-target rop_ok } */ + +/* Verify we do not emit invalid offsets for our ROP insns. */ + +extern void foo (void); +long +bar (void) +{ + foo (); + return 0; +} + +/* { dg-final { scan-assembler-times {\mhashst\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mhashchk\M} 1 } } */
[gcc r13-8942] rs6000: Catch unsupported ABI errors when using -mrop-protect [PR114759, PR115988]
https://gcc.gnu.org/g:9a4603d323d890dfab6d27ede17dc904abdccd9b commit r13-8942-g9a4603d323d890dfab6d27ede17dc904abdccd9b Author: Peter Bergner Date: Thu Jul 18 18:01:46 2024 -0500 rs6000: Catch unsupported ABI errors when using -mrop-protect [PR114759,PR115988] 2024-07-18 Peter Bergner gcc/testsuite/ PR target/114759 PR target/115988 * gcc.target/powerpc/pr114759-3.c: Catch unsupported ABI errors. (cherry picked from commit b2f47a5c1d5204131660ea0372a08e692df8844e) Diff: --- gcc/testsuite/gcc.target/powerpc/pr114759-3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.target/powerpc/pr114759-3.c b/gcc/testsuite/gcc.target/powerpc/pr114759-3.c index 6770a9aec3b5..e2f1d42e111f 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr114759-3.c +++ b/gcc/testsuite/gcc.target/powerpc/pr114759-3.c @@ -2,7 +2,8 @@ /* { dg-do compile } */ /* { dg-options "-O2 -mdejagnu-cpu=power7 -mrop-protect" } */ -/* Verify we emit an error if we use -mrop-protect with an unsupported cpu. */ +/* Verify we emit an error if we use -mrop-protect with an unsupported cpu + or ABI. */ extern void foo (void); @@ -17,3 +18,4 @@ bar (void) in the final line (which is all that dg-error inspects). Hence, we have to tell dg-error to ignore the line number. */ /* { dg-error "'-mrop-protect' requires '-mcpu=power8'" "PR114759" { target *-*-* } 0 } */ +/* { dg-error "'-mrop-protect' requires the ELFv2 ABI" "PR114759" { target { ! rop_ok } } 0 } */
[gcc r13-8940] rs6000: ROP - Emit hashst and hashchk insns on Power8 and later [PR114759]
https://gcc.gnu.org/g:77fd352a47137d79e6b7a480503ce4368f13c3e5 commit r13-8940-g77fd352a47137d79e6b7a480503ce4368f13c3e5 Author: Peter Bergner Date: Wed Jun 19 16:07:29 2024 -0500 rs6000: ROP - Emit hashst and hashchk insns on Power8 and later [PR114759] We currently only emit the ROP-protect hash* insns for Power10, where the insns were added to the architecture. We want to emit them for earlier cpus (where they operate as NOPs), so that if those older binaries are ever executed on a Power10, then they'll be protected from ROP attacks. Binutils accepts hashst and hashchk back to Power8, so change GCC to emit them for Power8 and later. This matches clang's behavior. 2024-06-19 Peter Bergner gcc/ PR target/114759 * config/rs6000/rs6000-logue.cc (rs6000_stack_info): Use TARGET_POWER8. (rs6000_emit_prologue): Likewise. * config/rs6000/rs6000.md (hashchk): Likewise. (hashst): Likewise. Fix whitespace. gcc/testsuite/ PR target/114759 * gcc.target/powerpc/pr114759-2.c: New test. * lib/target-supports.exp (rop_ok): Use check_effective_target_has_arch_pwr8. (cherry picked from commit a05c3d23d1e1c8d2971b123804fc7a61a3561adb) Diff: --- gcc/config/rs6000/rs6000-logue.cc | 6 +++--- gcc/config/rs6000/rs6000.md | 6 +++--- gcc/testsuite/gcc.target/powerpc/pr114759-2.c | 17 + gcc/testsuite/lib/target-supports.exp | 2 +- 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/gcc/config/rs6000/rs6000-logue.cc b/gcc/config/rs6000/rs6000-logue.cc index d38b90b61070..9e6b4ca5533f 100644 --- a/gcc/config/rs6000/rs6000-logue.cc +++ b/gcc/config/rs6000/rs6000-logue.cc @@ -716,7 +716,7 @@ rs6000_stack_info (void) info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame); info->rop_hash_size = 0; - if (TARGET_POWER10 + if (TARGET_POWER8 && info->calls_p && DEFAULT_ABI == ABI_ELFv2 && rs6000_rop_protect) @@ -3275,7 +3275,7 @@ rs6000_emit_prologue (void) /* NOTE: The hashst isn't needed if we're going to do a sibcall, but there's no way to know that here. Harmless except for performance, of course. */ - if (TARGET_POWER10 && rs6000_rop_protect && info->rop_hash_size != 0) + if (TARGET_POWER8 && rs6000_rop_protect && info->rop_hash_size != 0) { gcc_assert (DEFAULT_ABI == ABI_ELFv2); rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); @@ -5054,7 +5054,7 @@ rs6000_emit_epilogue (enum epilogue_type epilogue_type) /* The ROP hash check must occur after the stack pointer is restored (since the hash involves r1), and is not performed for a sibcall. */ - if (TARGET_POWER10 + if (TARGET_POWER8 && rs6000_rop_protect && info->rop_hash_size != 0 && epilogue_type != EPILOGUE_TYPE_SIBCALL) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 8d8118197da7..79aaf490837f 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -15665,9 +15665,9 @@ (define_insn "hashst" [(set (match_operand:DI 0 "simple_offsettable_mem_operand" "=m") -(unspec_volatile:DI [(match_operand:DI 1 "int_reg_operand" "r")] + (unspec_volatile:DI [(match_operand:DI 1 "int_reg_operand" "r")] UNSPEC_HASHST))] - "TARGET_POWER10 && rs6000_rop_protect" + "TARGET_POWER8 && rs6000_rop_protect" { static char templ[32]; const char *p = rs6000_privileged ? "p" : ""; @@ -15680,7 +15680,7 @@ [(unspec_volatile [(match_operand:DI 0 "int_reg_operand" "r") (match_operand:DI 1 "simple_offsettable_mem_operand" "m")] UNSPEC_HASHCHK)] - "TARGET_POWER10 && rs6000_rop_protect" + "TARGET_POWER8 && rs6000_rop_protect" { static char templ[32]; const char *p = rs6000_privileged ? "p" : ""; diff --git a/gcc/testsuite/gcc.target/powerpc/pr114759-2.c b/gcc/testsuite/gcc.target/powerpc/pr114759-2.c new file mode 100644 index ..3881ebd416e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr114759-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mdejagnu-cpu=power8 -mrop-protect" } */ +/* { dg-require-effective-target rop_ok } Only enable on supported ABIs. */ + +/* Verify we generate ROP-protect hash insns when compiling for Power8. */ + +extern void foo (void); + +int +bar (void) +{ + foo (); + return 5; +} + +/* { dg-final { scan-assembler-times {\mhashst\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mhashchk\M} 1 } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index be8ddc8e0f85..c81313ea7177 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6919,7 +6919,7 @@ proc check_effective_target_powerpc_elfv2 { } { # Return 1
[gcc r13-8941] rs6000: Error on CPUs and ABIs that don't support the ROP protection insns [PR114759]
https://gcc.gnu.org/g:63b1b3e23c3c589c2859d481705dc706cbff35a1 commit r13-8941-g63b1b3e23c3c589c2859d481705dc706cbff35a1 Author: Peter Bergner Date: Mon Jul 15 16:57:32 2024 -0500 rs6000: Error on CPUs and ABIs that don't support the ROP protection insns [PR114759] We currently silently ignore the -mrop-protect option for old CPUs we don't support with the ROP hash insns, but we throw an error for unsupported ABIs. This patch treats unsupported CPUs and ABIs similarly by throwing an error both both. This matches clang behavior and allows us to simplify our tests in the code that generates our prologue and epilogue code. 2024-06-26 Peter Bergner gcc/ PR target/114759 * config/rs6000/rs6000.cc (rs6000_option_override_internal): Disallow CPUs and ABIs that do no support the ROP protection insns. * config/rs6000/rs6000-logue.cc (rs6000_stack_info): Remove now unneeded tests. (rs6000_emit_prologue): Likewise. Remove unneeded gcc_assert. (rs6000_emit_epilogue): Likewise. * config/rs6000/rs6000.md: Likewise. gcc/testsuite/ PR target/114759 * gcc.target/powerpc/pr114759-3.c: New test. (cherry picked from commit 6f2bab9b5d1ce1914c748b7dcd8638dafaa98df7) Diff: --- gcc/config/rs6000/rs6000-logue.cc | 22 ++ gcc/config/rs6000/rs6000.cc | 12 gcc/config/rs6000/rs6000.md | 4 ++-- gcc/testsuite/gcc.target/powerpc/pr114759-3.c | 19 +++ 4 files changed, 39 insertions(+), 18 deletions(-) diff --git a/gcc/config/rs6000/rs6000-logue.cc b/gcc/config/rs6000/rs6000-logue.cc index 9e6b4ca5533f..208404e68640 100644 --- a/gcc/config/rs6000/rs6000-logue.cc +++ b/gcc/config/rs6000/rs6000-logue.cc @@ -716,17 +716,11 @@ rs6000_stack_info (void) info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame); info->rop_hash_size = 0; - if (TARGET_POWER8 - && info->calls_p - && DEFAULT_ABI == ABI_ELFv2 - && rs6000_rop_protect) + /* If we want ROP protection and this function makes a call, indicate + we need to create a stack slot to save the hashed return address in. */ + if (rs6000_rop_protect + && info->calls_p) info->rop_hash_size = 8; - else if (rs6000_rop_protect && DEFAULT_ABI != ABI_ELFv2) -{ - /* We can't check this in rs6000_option_override_internal since -DEFAULT_ABI isn't established yet. */ - error ("%qs requires the ELFv2 ABI", "-mrop-protect"); -} /* Determine if we need to save the condition code registers. */ if (save_reg_p (CR2_REGNO) @@ -3275,9 +3269,8 @@ rs6000_emit_prologue (void) /* NOTE: The hashst isn't needed if we're going to do a sibcall, but there's no way to know that here. Harmless except for performance, of course. */ - if (TARGET_POWER8 && rs6000_rop_protect && info->rop_hash_size != 0) + if (info->rop_hash_size) { - gcc_assert (DEFAULT_ABI == ABI_ELFv2); rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); rtx addr = gen_rtx_PLUS (Pmode, stack_ptr, GEN_INT (info->rop_hash_save_offset)); @@ -5054,12 +5047,9 @@ rs6000_emit_epilogue (enum epilogue_type epilogue_type) /* The ROP hash check must occur after the stack pointer is restored (since the hash involves r1), and is not performed for a sibcall. */ - if (TARGET_POWER8 - && rs6000_rop_protect - && info->rop_hash_size != 0 + if (info->rop_hash_size && epilogue_type != EPILOGUE_TYPE_SIBCALL) { - gcc_assert (DEFAULT_ABI == ABI_ELFv2); rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); rtx addr = gen_rtx_PLUS (Pmode, stack_ptr, GEN_INT (info->rop_hash_save_offset)); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 29475ffcb7f0..2b876c90e6fa 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -4850,6 +4850,18 @@ rs6000_option_override_internal (bool global_init_p) } } + /* We only support ROP protection on certain targets. */ + if (rs6000_rop_protect) +{ + /* Disallow CPU targets we don't support. */ + if (!TARGET_POWER8) + error ("%<-mrop-protect%> requires %<-mcpu=power8%> or later"); + + /* Disallow ABI targets we don't support. */ + if (DEFAULT_ABI != ABI_ELFv2) + error ("%<-mrop-protect%> requires the ELFv2 ABI"); +} + /* Initialize all of the registers. */ rs6000_init_hard_regno_mode_ok (global_init_p); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 79aaf490837f..7e6fcc45a204 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -15667,7 +15667,7 @@ [(set (match_operand:DI 0 "simple_offsettable_mem_operand" "=m")
[gcc r15-2241] Internal-fn: Only allow modes describe types for internal fn[PR115961]
https://gcc.gnu.org/g:905973410957891fec8a3e42eeefa4618780e0ce commit r15-2241-g905973410957891fec8a3e42eeefa4618780e0ce Author: Pan Li Date: Thu Jul 18 17:23:36 2024 +0800 Internal-fn: Only allow modes describe types for internal fn[PR115961] The direct_internal_fn_supported_p has no restrictions for the type modes. For example the bitfield like below will be recog as .SAT_TRUNC. struct e { unsigned pre : 12; unsigned a : 4; }; __attribute__((noipa)) void bug (e * v, unsigned def, unsigned use) { e & defE = *v; defE.a = min_u (use + 1, 0xf); } This patch would like to add checks for the direct_internal_fn_supported_p, and only allows the tree types describled by modes. The below test suites are passed for this patch: 1. The rv64gcv fully regression tests. 2. The x86 bootstrap tests. 3. The x86 fully regression tests. PR target/115961 gcc/ChangeLog: * internal-fn.cc (type_strictly_matches_mode_p): Add new func impl to check type strictly matches mode or not. (type_pair_strictly_matches_mode_p): Ditto but for tree type pair. (direct_internal_fn_supported_p): Add above check for the tree type pair. gcc/testsuite/ChangeLog: * g++.dg/torture/pr115961-run-1.C: New test. Signed-off-by: Pan Li Diff: --- gcc/internal-fn.cc| 32 +++ gcc/testsuite/g++.dg/torture/pr115961-run-1.C | 32 +++ 2 files changed, 64 insertions(+) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 95946bfd6839..8a2e07f2f965 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4164,6 +4164,35 @@ direct_internal_fn_optab (internal_fn fn) gcc_unreachable (); } +/* Return true if TYPE's mode has the same format as TYPE, and if there is + a 1:1 correspondence between the values that the mode can store and the + values that the type can store. */ + +static bool +type_strictly_matches_mode_p (const_tree type) +{ + if (VECTOR_TYPE_P (type)) +return VECTOR_MODE_P (TYPE_MODE (type)); + + if (INTEGRAL_TYPE_P (type)) +return type_has_mode_precision_p (type); + + if (SCALAR_FLOAT_TYPE_P (type) || COMPLEX_FLOAT_TYPE_P (type)) +return true; + + return false; +} + +/* Returns true if both types of TYPE_PAIR strictly match their modes, + else returns false. */ + +static bool +type_pair_strictly_matches_mode_p (tree_pair type_pair) +{ + return type_strictly_matches_mode_p (type_pair.first) +&& type_strictly_matches_mode_p (type_pair.second); +} + /* Return true if FN is supported for the types in TYPES when the optimization type is OPT_TYPE. The types are those associated with the "type0" and "type1" fields of FN's direct_internal_fn_info @@ -4173,6 +4202,9 @@ bool direct_internal_fn_supported_p (internal_fn fn, tree_pair types, optimization_type opt_type) { + if (!type_pair_strictly_matches_mode_p (types)) +return false; + switch (fn) { #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \ diff --git a/gcc/testsuite/g++.dg/torture/pr115961-run-1.C b/gcc/testsuite/g++.dg/torture/pr115961-run-1.C new file mode 100644 index ..787f7245457d --- /dev/null +++ b/gcc/testsuite/g++.dg/torture/pr115961-run-1.C @@ -0,0 +1,32 @@ +/* PR target/115961 */ +/* { dg-do run } */ + +struct e +{ + unsigned pre : 12; + unsigned a : 4; +}; + +static unsigned min_u (unsigned a, unsigned b) +{ + return (b < a) ? b : a; +} + +__attribute__((noipa)) +void bug (e * v, unsigned def, unsigned use) { + e & defE = *v; + defE.a = min_u (use + 1, 0xf); +} + +__attribute__((noipa, optimize(0))) +int main(void) +{ + e v = { 0xded, 3 }; + + bug(&v, 32, 33); + + if (v.a != 0xf) +__builtin_abort (); + + return 0; +}
[gcc r15-2242] x86: Don't enable APX_F in 32-bit mode
https://gcc.gnu.org/g:9d312ba54428d70f0703c8774a6fe6a4755930e7 commit r15-2242-g9d312ba54428d70f0703c8774a6fe6a4755930e7 Author: Lingling Kong Date: Wed Jul 24 14:52:47 2024 +0800 x86: Don't enable APX_F in 32-bit mode gcc/ChangeLog: PR target/115978 * config/i386/driver-i386.cc (host_detect_local_cpu): Enable APX_F only for 64-bit codegen. * config/i386/i386-options.cc (DEF_PTA): Skip PTA_APX_F if not in 64-bit mode. gcc/testsuite/ChangeLog: PR target/115978 * gcc.target/i386/pr115978-1.c: New test. * gcc.target/i386/pr115978-2.c: Ditto. Diff: --- gcc/config/i386/driver-i386.cc | 3 ++- gcc/config/i386/i386-options.cc| 3 ++- gcc/testsuite/gcc.target/i386/pr115978-1.c | 22 ++ gcc/testsuite/gcc.target/i386/pr115978-2.c | 6 ++ 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc index 11470eaea125..445f5640155a 100644 --- a/gcc/config/i386/driver-i386.cc +++ b/gcc/config/i386/driver-i386.cc @@ -900,7 +900,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) if (has_feature (isa_names_table[i].feature)) { if (codegen_x86_64 - || isa_names_table[i].feature != FEATURE_UINTR) + || (isa_names_table[i].feature != FEATURE_UINTR + && isa_names_table[i].feature != FEATURE_APX_F)) options = concat (options, " ", isa_names_table[i].option, NULL); } diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 059ef3ae6ad4..1c8f7835af2b 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2351,7 +2351,8 @@ ix86_option_override_internal (bool main_args_p, #define DEF_PTA(NAME) \ if (((processor_alias_table[i].flags & PTA_ ## NAME) != 0) \ && PTA_ ## NAME != PTA_64BIT \ - && (TARGET_64BIT || PTA_ ## NAME != PTA_UINTR) \ + && (TARGET_64BIT || (PTA_ ## NAME != PTA_UINTR \ +&& PTA_ ## NAME != PTA_APX_F))\ && !TARGET_EXPLICIT_ ## NAME ## _P (opts)) \ SET_TARGET_ ## NAME (opts); #include "i386-isa.def" diff --git a/gcc/testsuite/gcc.target/i386/pr115978-1.c b/gcc/testsuite/gcc.target/i386/pr115978-1.c new file mode 100644 index ..18a1c5f153a3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115978-1.c @@ -0,0 +1,22 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=native" } */ + +int +main () +{ + if (__builtin_cpu_supports ("apxf")) +{ +#ifdef __x86_64__ +# ifndef __APX_F__ + __builtin_abort (); +# endif +#else +# ifdef __APX_F__ + __builtin_abort (); +# endif +#endif + return 0; +} + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr115978-2.c b/gcc/testsuite/gcc.target/i386/pr115978-2.c new file mode 100644 index ..900d6eb096ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr115978-2.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=native -mno-apxf" } */ + +#ifdef __APX_F__ +# error APX_F should be disabled +#endif