https://gcc.gnu.org/g:c738d4ef524e66c1aa13e949e701473a7dcc4db6
commit r16-5456-gc738d4ef524e66c1aa13e949e701473a7dcc4db6 Author: Kito Cheng <[email protected]> Date: Wed Nov 5 17:55:39 2025 +0800 RISC-V: Add RTL pass to combine cm.popret with zero return value This patch implements a new RTL pass that combines "li a0, 0" and "cm.popret" into a single "cm.popretz" instruction for the Zcmp extension. This optimization cannot be done during prologue/epilogue expansion because it would cause shrink-wrapping to generate incorrect code as documented in PR113715. The dedicated RTL pass runs after shrink-wrap but before branch shortening, safely performing this combination. Changes since v2: - Apply Jeff's comment - Use CONST0_RTX rather than const0_rtx, this make this pass able to handle (const_double:SF 0.0) as well. - Adding test case for float/double zero return value. Changes since v1: - Tweak the testcase. gcc/ChangeLog: * config/riscv/riscv-opt-popretz.cc: New file. * config/riscv/riscv-passes.def: Insert pass_combine_popretz before pass_shorten_branches. * config/riscv/riscv-protos.h (make_pass_combine_popretz): New declaration. * config/riscv/t-riscv: Add riscv-opt-popretz.o build rule. * config.gcc (riscv*): Add riscv-opt-popretz.o to extra_objs. gcc/testsuite/ChangeLog: * gcc.target/riscv/pr113715.c: New test. * gcc.target/riscv/rv32e_zcmp.c: Update expected output for test_popretz. * gcc.target/riscv/rv32i_zcmp.c: Likewise. Diff: --- gcc/config.gcc | 2 +- gcc/config/riscv/riscv-opt-popretz.cc | 294 ++++++++++++++++++++++++++++ gcc/config/riscv/riscv-passes.def | 1 + gcc/config/riscv/riscv-protos.h | 1 + gcc/config/riscv/t-riscv | 6 + gcc/testsuite/gcc.target/riscv/pr113715.c | 98 ++++++++++ gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c | 3 +- gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c | 3 +- 8 files changed, 403 insertions(+), 5 deletions(-) diff --git a/gcc/config.gcc b/gcc/config.gcc index 20c974e8e052..0947d6317de9 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -560,7 +560,7 @@ riscv*) extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-string.o" extra_objs="${extra_objs} riscv-v.o riscv-vsetvl.o riscv-vector-costs.o riscv-avlprop.o riscv-vect-permconst.o" extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o sifive-vector-builtins-bases.o andes-vector-builtins-bases.o" - extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o riscv-bclr-lowest-set-bit.o" + extra_objs="${extra_objs} thead.o riscv-target-attr.o riscv-zicfilp.o riscv-bclr-lowest-set-bit.o riscv-opt-popretz.o" d_target_objs="riscv-d.o" extra_headers="riscv_vector.h riscv_crypto.h riscv_bitmanip.h riscv_th_vector.h sifive_vector.h andes_vector.h" target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-vector-builtins.cc" diff --git a/gcc/config/riscv/riscv-opt-popretz.cc b/gcc/config/riscv/riscv-opt-popretz.cc new file mode 100644 index 000000000000..43b2d5e2a527 --- /dev/null +++ b/gcc/config/riscv/riscv-opt-popretz.cc @@ -0,0 +1,294 @@ +/* RISC-V cm.popretz optimization pass. + Copyright (C) 2025 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +/* + This pass combines "li a0, 0" + "cm.popret" into "cm.popretz" instruction + for the RISC-V Zcmp extension. + + Rationale: + --------- + Ideally, cm.popretz should be generated during prologue/epilogue expansion. + However, as documented in PR113715 [1], this approach causes shrink-wrapping + analysis to fail, resulting in incorrect code generation. + + To address this issue, we use a dedicated RTL pass to combine these + instructions later in the compilation pipeline, after shrink-wrapping has + completed. + + Why not use peephole2? + ---------------------- + An alternative approach would be to use a peephole2 pattern to perform this + optimization. However, between "li a0, 0" and "cm.popret", there can be + STACK_TIE and other instructions that make it difficult to write a robust + peephole pattern that handles all cases. + + For example, in RV32, when the return value is in DImode but the low part + (a0) is zero, this pattern is hard to describe effectively in peephole2. + Using a dedicated pass gives us more flexibility to handle these cases. + + [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113715 */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "emit-rtl.h" +#include "dumpfile.h" +#include "tree-pass.h" +#include "insn-config.h" +#include "insn-opinit.h" +#include "recog.h" + +namespace { + +const pass_data pass_data_combine_popretz = +{ + RTL_PASS, /* type. */ + "popretz", /* name. */ + OPTGROUP_NONE, /* optinfo_flags. */ + TV_MACH_DEP, /* tv_id. */ + 0, /* properties_required. */ + 0, /* properties_provided. */ + 0, /* properties_destroyed. */ + 0, /* todo_flags_start. */ + 0, /* todo_flags_finish. */ +}; + +class pass_combine_popretz : public rtl_opt_pass +{ +public: + pass_combine_popretz (gcc::context *ctxt) + : rtl_opt_pass (pass_data_combine_popretz, ctxt) + {} + + virtual bool gate (function *) + { + return TARGET_ZCMP && !frame_pointer_needed; + } + + virtual unsigned int execute (function *); +}; // class pass_combine_popretz + + +/* Check if the given instruction code is a cm.popret instruction. + Returns true if the code corresponds to any variant of gpr_multi_popret + (for different register bounds and modes). */ +static bool +riscv_popret_insn_p (int code) +{ +#define CASE_CODE_FOR_POPRET_(REG_BOUND, MODE) \ + case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE: +#define CASE_CODE_FOR_POPRET(REG_BOUND) \ + CASE_CODE_FOR_POPRET_(REG_BOUND, si) \ + CASE_CODE_FOR_POPRET_(REG_BOUND, di) +#define ALL_CASE_CODE_FOR_POPRET \ + CASE_CODE_FOR_POPRET(ra) \ + CASE_CODE_FOR_POPRET(s0) \ + CASE_CODE_FOR_POPRET(s1) \ + CASE_CODE_FOR_POPRET(s2) \ + CASE_CODE_FOR_POPRET(s3) \ + CASE_CODE_FOR_POPRET(s4) \ + CASE_CODE_FOR_POPRET(s5) \ + CASE_CODE_FOR_POPRET(s6) \ + CASE_CODE_FOR_POPRET(s7) \ + CASE_CODE_FOR_POPRET(s8) \ + CASE_CODE_FOR_POPRET(s9) \ + CASE_CODE_FOR_POPRET(s11) \ + + switch (code) + { + ALL_CASE_CODE_FOR_POPRET + return true; + default: + return false; + } + +#undef CASE_CODE_FOR_POPRET_ +#undef CASE_CODE_FOR_POPRET +#undef ALL_CASE_CODE_FOR_POPRET +} + +/* Convert a cm.popret instruction code to its corresponding cm.popretz code. + Given an instruction code for gpr_multi_popret, returns the equivalent + gpr_multi_popretz instruction code. Returns CODE_FOR_nothing if the + input is not a valid popret instruction. */ +static int +riscv_code_for_popretz (int code) +{ +#define CASE_CODE_FOR_POPRETZ_(REG_BOUND, MODE) \ + case CODE_FOR_gpr_multi_popret_up_to_##REG_BOUND##_##MODE: \ + return CODE_FOR_gpr_multi_popretz_up_to_##REG_BOUND##_##MODE; + +#define CASE_CODE_FOR_POPRETZ(REG_BOUND) \ + CASE_CODE_FOR_POPRETZ_(REG_BOUND, si) \ + CASE_CODE_FOR_POPRETZ_(REG_BOUND, di) + +#define ALL_CASE_CODE_FOR_POPRETZ \ + CASE_CODE_FOR_POPRETZ(ra) \ + CASE_CODE_FOR_POPRETZ(s0) \ + CASE_CODE_FOR_POPRETZ(s1) \ + CASE_CODE_FOR_POPRETZ(s2) \ + CASE_CODE_FOR_POPRETZ(s3) \ + CASE_CODE_FOR_POPRETZ(s4) \ + CASE_CODE_FOR_POPRETZ(s5) \ + CASE_CODE_FOR_POPRETZ(s6) \ + CASE_CODE_FOR_POPRETZ(s7) \ + CASE_CODE_FOR_POPRETZ(s8) \ + CASE_CODE_FOR_POPRETZ(s9) \ + CASE_CODE_FOR_POPRETZ(s11) \ + + switch (code) + { + ALL_CASE_CODE_FOR_POPRETZ + default: + return CODE_FOR_nothing; + } + +#undef CASE_CODE_FOR_POPRETZ_ +#undef CASE_CODE_FOR_POPRETZ +#undef ALL_CASE_CODE_FOR_POPRETZ +} + +/* Combine "li a0, 0" with "cm.popret" to form "cm.popretz". + + This pass scans basic blocks that precede the exit block, looking for + the following pattern: + 1. A cm.popret instruction (function epilogue with return) + 2. A (use a0) pseudo-instruction before the cm.popret + 3. A "li a0, 0" instruction (set a0 to zero) before the use + + When this pattern is found AND a0 is not referenced by any other + instructions between the "li a0, 0" and the (use a0), we can safely + combine them into a single cm.popretz instruction, which performs + the same operations more efficiently. + + This is a late RTL pass that runs before branch shortening. */ +unsigned int +pass_combine_popretz::execute (function *fn) +{ + timevar_push (TV_MACH_DEP); + edge e; + edge_iterator ei; + + /* Only visit exit block's pred since popret will only appear there. */ + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (fn)->preds) + { + basic_block bb = e->src; + rtx_insn *popret_insn = BB_END (bb); + if (!JUMP_P (popret_insn)) + continue; + int code = recog_memoized (popret_insn); + if (!riscv_popret_insn_p (code)) + continue; + + rtx_insn *def_a0_insn = NULL; + rtx_insn *use_a0_insn = NULL; + rtx a0_reg = NULL; + /* Scan backwards from popret to find the pattern: + 1. First, find the (use a0) pseudo-instruction + 2. Continue scanning to find "li a0, 0" (set a0 to const0_rtx) + 3. Ensure a0 is not referenced by any instructions between them + 4. Stop at the first definition of a0 (to ensure we have the + last/most recent def before the use). */ + for (rtx_insn *def_insn = PREV_INSN (popret_insn); + def_insn && def_insn != PREV_INSN (BB_HEAD (bb)); + def_insn = PREV_INSN (def_insn)) + { + if (!INSN_P (def_insn)) + continue; + rtx def_pat = PATTERN (def_insn); + if (GET_CODE (def_pat) == USE + && REG_P (XEXP (def_pat, 0)) + && REGNO (XEXP (def_pat, 0)) == A0_REGNUM) + { + a0_reg = XEXP (def_pat, 0); + use_a0_insn = def_insn; + continue; + } + + if (use_a0_insn && reg_referenced_p (a0_reg, def_pat)) + { + /* a0 is used by other instruction before its use in popret. */ + use_a0_insn = NULL; + break; + } + + if (use_a0_insn + && GET_CODE (def_pat) == SET + && REG_P (SET_DEST (def_pat)) + && REGNO (SET_DEST (def_pat)) == A0_REGNUM) + { + if (SET_SRC (def_pat) == CONST0_RTX (GET_MODE (SET_SRC (def_pat)))) + def_a0_insn = def_insn; + /* Stop the search regardless of the value assigned to a0, + because we only want to match the last (most recent) + definition of a0 before the (use a0). */ + break; + } + } + + /* If we found a def of a0 before its use, and the value is zero, + we can replace the popret with popretz. */ + if (!def_a0_insn || !use_a0_insn) + continue; + + int code_for_popretz = riscv_code_for_popretz (code); + gcc_assert (code_for_popretz != CODE_FOR_nothing); + + /* Extract the stack adjustment value from the popret instruction. + The popret pattern is a PARALLEL, and the first element is the + stack pointer adjustment: (set sp (plus sp const_int)). */ + rtx stack_adj_rtx = XVECEXP (PATTERN (popret_insn), 0, 0); + gcc_assert (GET_CODE (stack_adj_rtx) == SET + && REG_P (SET_DEST (stack_adj_rtx)) + && REGNO (SET_DEST (stack_adj_rtx)) == SP_REGNUM + && GET_CODE (SET_SRC (stack_adj_rtx)) == PLUS + && CONST_INT_P (XEXP (SET_SRC (stack_adj_rtx), 1))); + + rtx stack_adj_val = XEXP (SET_SRC (stack_adj_rtx), 1); + + /* Generate and insert the popretz instruction at the position of + the original popret. emit_insn_after places the new instruction + after PREV_INSN(popret_insn). */ + rtx popretz = GEN_FCN (code_for_popretz) (stack_adj_val); + emit_insn_after (popretz, PREV_INSN (popret_insn)); + + /* Clean up those instructions. */ + remove_insn (popret_insn); + remove_insn (use_a0_insn); + remove_insn (def_a0_insn); + } + + timevar_pop (TV_MACH_DEP); + return 0; +} + +} // anon namespace + +rtl_opt_pass * +make_pass_combine_popretz (gcc::context *ctxt) +{ + return new pass_combine_popretz (ctxt); +} diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def index 5aa41228e1fe..d41cc58c1dcb 100644 --- a/gcc/config/riscv/riscv-passes.def +++ b/gcc/config/riscv/riscv-passes.def @@ -22,5 +22,6 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs); INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop); INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl); INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad); +INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_combine_popretz); INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst); diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 570acb14f585..a372779cf9f5 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -208,6 +208,7 @@ rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt); rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt); rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt); rtl_opt_pass * make_pass_bclr_lowest_set_bit (gcc::context *ctxt); +rtl_opt_pass * make_pass_combine_popretz (gcc::context *ctxt); /* Routines implemented in riscv-vsetvl.cc. */ extern bool has_vtype_op (rtx_insn *); diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index b53a2dff2cf7..3f92feab50ec 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -89,6 +89,12 @@ riscv-sr.o: $(srcdir)/config/riscv/riscv-sr.cc $(CONFIG_H) \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-sr.cc +riscv-opt-popretz.o: $(srcdir)/config/riscv/riscv-opt-popretz.cc $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(TARGET_H) recog.h insn-opinit.h \ + tree-pass.h emit-rtl.h insn-config.h + $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ + $(srcdir)/config/riscv/riscv-opt-popretz.cc + riscv-c.o: $(srcdir)/config/riscv/riscv-c.cc $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ diff --git a/gcc/testsuite/gcc.target/riscv/pr113715.c b/gcc/testsuite/gcc.target/riscv/pr113715.c new file mode 100644 index 000000000000..953a7bed9515 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr113715.c @@ -0,0 +1,98 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32ima_zca_zcmp -mabi=ilp32 -mcmodel=medlow -fno-pic" }*/ +/* { dg-skip-if "" { *-*-* } {"-O0" "-O1" "-O2" "-Og" "-O3" "-flto"} } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +void test_1(int); + +/* +**test_err: +** ... +** li a0,1 +** call test_1 +** cm.popretz {ra}, 16 +** ... +*/ +int test_err(int mode) +{ + if (mode == 2) { + test_1(1); + } + + return 0; +} + +/* +**test_err2: +** ... +** li a0,1 +** call test_1 +** li a1,0 +** cm.popretz {ra}, 16 +** ... +*/ +long long test_err2(int mode) +{ + if (mode == 2) { + test_1(1); + } + + return 0; +} + + +/* +**test_err3: +** ... +** li a0,1 +** call test_1 +** li a1,1 +** cm.popretz {ra}, 16 +** ... +*/ +long long test_err3(int mode) +{ + if (mode == 2) { + test_1(1); + return 0x100000000ll; + } + + return 0; +} + +/* +**test_err4: +** ... +** li a0,1 +** call test_1 +** cm.popretz {ra}, 16 +** ... +*/ +float test_err4(int mode) +{ + if (mode == 2) { + test_1(1); + return 0.0f; + } + + return 1.0f; +} + +/* +**test_err5: +** ... +** li a0,1 +** call test_1 +** li a1,0 +** cm.popretz {ra}, 16 +** ... +*/ +double test_err5(int mode) +{ + if (mode == 2) { + test_1(1); + return 0.0; + } + + return 1.0; +} diff --git a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c index fd845f533359..8e3a36db586a 100644 --- a/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c +++ b/gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c @@ -259,8 +259,7 @@ foo (void) **test_popretz: ** cm.push {ra}, -16 ** call f1(?:@plt)? -** li a0,0 -** cm.popret {ra}, 16 +** cm.popretz {ra}, 16 */ long test_popretz () diff --git a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c index d90f4f47c8dd..7bcffebacb5d 100644 --- a/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c +++ b/gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c @@ -259,8 +259,7 @@ foo (void) **test_popretz: ** cm.push {ra}, -16 ** call f1(?:@plt)? -** li a0,0 -** cm.popret {ra}, 16 +** cm.popretz {ra}, 16 */ long test_popretz ()
