https://gcc.gnu.org/g:acdf8105a7fcead27bd4924bf8dc7f76c9273943
commit r15-8289-gacdf8105a7fcead27bd4924bf8dc7f76c9273943 Author: Jakub Jelinek <ja...@redhat.com> Date: Wed Mar 19 09:34:10 2025 +0100 c++: Perform immediate invocation evaluation separately from cp_fold_r [PR118068] The r14-4140 change moved consteval evaluation from build_over_call to cp_fold_r. The following testcase is a regression caused by that change. There is a cast around immediate invocation, (int) foo (0x23) where consteval for returns unsigned char. When the consteval call has been folded early to 0x23 (with unsigned char type), cp_fold sees (int) 0x23 and folds that to 0x23 with int type. But when the immediate invocation is handled only during cp_fold_r, cp_fold_r first calls cp_fold on the NOP_EXPR, which calls cp_fold on its operand, it is CALL_EXPR, nothing is folded at that point. Then cp_fold_r continues to walk the NOP_EXPR's operand, sees it is an immediate function invocation and cp_fold_immediate_r calls cxx_constant_value on it and replaces the CALL_EXPR with the INTEGER_CST 0x23. Nothing comes back to folding the containing NOP_EXPR though. Sure, with optimizations enabled some GIMPLE optimization folds that later, but e.g. with -O0 nothing does that. I think there could be arbitrarily complex expressions on top of the immediate invocation(s) that used to be folded by cp_fold before and aren't folded anymore. One possibility would be to do the immediate invocation evaluation in cp_fold rather than cp_fold_r (or in addition to cp_fold_r). The following patch instead first evaluates all immediate invocations and does cp_fold_r in a separate step. That not only allows the folding of expressions which contain immediate invocations, but also simplifies some of the quirks that had to be done when it was in cp_fold_r. Though, I had to add an extra case to cp_genericize_r RETURN_EXPR handling to avoid a regression where after emitting errors in RETURN_EXPR argument we've emitted a -Wreturn-type false positive. Previously we ended up with RETURN_EXPR with CLEANUP_POINT_EXPR with INIT_EXPR of RESULT_DECL to error_mark_node, now we fold it more and have RETURN_EXPR with error_mark_node operand. The former would result during gimplification into something -Wresult-type was quiet about, the latter doesn't. BTW, r14-4140 changed behavior on consteval bool foo (bool x) { if (x) throw 1; return false; } constexpr void foo () { if constexpr (false) { bool a = foo (true); } } where GCC 13 emitted error: expression ‘<throw-expression>’ is not a constant expression error and GCC 14/trunk including the patch below doesn't reject it. And clang++ trunk rejects it. It isn't immediately clear to me what is right, if immediate invocations in discarded statements should be evaluated or not. 2025-03-19 Jakub Jelinek <ja...@redhat.com> PR target/118068 gcc/cp/ * cp-gimplify.cc (cp_fold_immediate): Use cp_walk_tree rather than cp_walk_tree_without_duplicates. (cp_fold_immediate_r): For IF_STMT_CONSTEVAL_P IF_STMT don't walk into THEN_CLAUSE subtree, only ELSE_CLAUSE. For non-call related stmts call data->pset.add and if it returns true, don't walk subtrees. (cp_fold_r): Don't call cp_fold_immediate_r here. (cp_fold_function): For C++20 or later call cp_walk_tree with cp_fold_immediate_r callback first before calling cp_walk_tree with cp_fold_r callback and call data.pset.empty () in between. (cp_fully_fold_init): Likewise. (cp_genericize_r) <case RETURN_EXPR>: Suppress -Wreturn-type warning if RETURN_EXPR has erroneous argument. gcc/testsuite/ * g++.target/i386/pr118068.C: New test. Diff: --- gcc/cp/cp-gimplify.cc | 74 +++++++++++++++----------------- gcc/testsuite/g++.target/i386/pr118068.C | 17 ++++++++ 2 files changed, 52 insertions(+), 39 deletions(-) diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc index 04e430801d5f..550cea29dd29 100644 --- a/gcc/cp/cp-gimplify.cc +++ b/gcc/cp/cp-gimplify.cc @@ -519,7 +519,7 @@ cp_fold_immediate (tree *tp, mce_value manifestly_const_eval, cp_fold_data data (flags); int save_errorcount = errorcount; - tree r = cp_walk_tree_without_duplicates (tp, cp_fold_immediate_r, &data); + tree r = cp_walk_tree (tp, cp_fold_immediate_r, &data, NULL); if (errorcount > save_errorcount) return integer_one_node; return r; @@ -1204,7 +1204,8 @@ cp_build_init_expr_for_ctor (tree call, tree init) return init; } -/* A subroutine of cp_fold_r to handle immediate functions. */ +/* A walk_tree callback for cp_fold_function and cp_fully_fold_init to handle + immediate functions. */ static tree cp_fold_immediate_r (tree *stmt_p, int *walk_subtrees, void *data_) @@ -1250,7 +1251,19 @@ cp_fold_immediate_r (tree *stmt_p, int *walk_subtrees, void *data_) if (!ADDR_EXPR_DENOTES_CALL_P (stmt)) decl = TREE_OPERAND (stmt, 0); break; + case IF_STMT: + if (IF_STMT_CONSTEVAL_P (stmt)) + { + if (!data->pset.add (stmt)) + cp_walk_tree (&ELSE_CLAUSE (stmt), cp_fold_immediate_r, data_, + NULL); + *walk_subtrees = 0; + return NULL_TREE; + } + /* FALLTHRU */ default: + if (data->pset.add (stmt)) + *walk_subtrees = 0; return NULL_TREE; } @@ -1370,45 +1383,8 @@ cp_fold_r (tree *stmt_p, int *walk_subtrees, void *data_) tree stmt = *stmt_p; enum tree_code code = TREE_CODE (stmt); - if (cxx_dialect >= cxx20) - { - /* Unfortunately we must handle code like - false ? bar () : 42 - where we have to check bar too. The cp_fold call below could - fold the ?: into a constant before we've checked it. */ - if (code == COND_EXPR) - { - auto then_fn = cp_fold_r, else_fn = cp_fold_r; - /* See if we can figure out if either of the branches is dead. If it - is, we don't need to do everything that cp_fold_r does. */ - cp_walk_tree (&TREE_OPERAND (stmt, 0), cp_fold_r, data, nullptr); - if (integer_zerop (TREE_OPERAND (stmt, 0))) - then_fn = cp_fold_immediate_r; - else if (integer_nonzerop (TREE_OPERAND (stmt, 0))) - else_fn = cp_fold_immediate_r; - - if (TREE_OPERAND (stmt, 1)) - cp_walk_tree (&TREE_OPERAND (stmt, 1), then_fn, data, - nullptr); - if (TREE_OPERAND (stmt, 2)) - cp_walk_tree (&TREE_OPERAND (stmt, 2), else_fn, data, - nullptr); - *walk_subtrees = 0; - /* Don't return yet, still need the cp_fold below. */ - } - else - cp_fold_immediate_r (stmt_p, walk_subtrees, data); - } - *stmt_p = stmt = cp_fold (*stmt_p, data->flags); - /* For certain trees, like +foo(), the cp_fold above will remove the +, - and the subsequent tree walk would go straight down to the CALL_EXPR's - operands, meaning that cp_fold_immediate_r would never see the - CALL_EXPR. Ew :(. */ - if (TREE_CODE (stmt) == CALL_EXPR && code != CALL_EXPR) - cp_fold_immediate_r (stmt_p, walk_subtrees, data); - if (data->pset.add (stmt)) { /* Don't walk subtrees of stmts we've already walked once, otherwise @@ -1537,6 +1513,16 @@ cp_fold_function (tree fndecl) been constant-evaluated already if possible, so we can safely pass ff_mce_false. */ cp_fold_data data (ff_genericize | ff_mce_false); + /* Do cp_fold_immediate_r in separate whole IL walk instead of during + cp_fold_r, as otherwise expressions using results of immediate functions + might not be folded as cp_fold is called on those before cp_fold_r is + called on their argument. */ + if (cxx_dialect >= cxx20) + { + cp_walk_tree (&DECL_SAVED_TREE (fndecl), cp_fold_immediate_r, + &data, NULL); + data.pset.empty (); + } cp_walk_tree (&DECL_SAVED_TREE (fndecl), cp_fold_r, &data, NULL); /* This is merely an optimization: if FNDECL has no i-e expressions, @@ -1717,6 +1703,11 @@ cp_genericize_r (tree *stmt_p, int *walk_subtrees, void *data) case RETURN_EXPR: if (TREE_OPERAND (stmt, 0)) { + if (error_operand_p (TREE_OPERAND (stmt, 0)) + && warn_return_type) + /* Suppress -Wreturn-type for this function. */ + suppress_warning (current_function_decl, OPT_Wreturn_type); + if (is_invisiref_parm (TREE_OPERAND (stmt, 0))) /* Don't dereference an invisiref RESULT_DECL inside a RETURN_EXPR. */ @@ -2922,6 +2913,11 @@ cp_fully_fold_init (tree x) return x; x = cp_fully_fold (x, mce_false); cp_fold_data data (ff_mce_false); + if (cxx_dialect >= cxx20) + { + cp_walk_tree (&x, cp_fold_immediate_r, &data, NULL); + data.pset.empty (); + } cp_walk_tree (&x, cp_fold_r, &data, NULL); return x; } diff --git a/gcc/testsuite/g++.target/i386/pr118068.C b/gcc/testsuite/g++.target/i386/pr118068.C new file mode 100644 index 000000000000..c5cc61fd1ce9 --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr118068.C @@ -0,0 +1,17 @@ +// PR target/118068 +// { dg-do compile { target c++20 } } +// { dg-options "-O0 -mavx" } + +typedef float V __attribute__((vector_size (32))); + +consteval unsigned char +foo (int x) +{ + return x; +} + +V +bar (V x, V y) +{ + return __builtin_ia32_blendps256 (x, y, (int) foo (0x23)); +}