Hi Sterling, Attached please find version 2 of the patch.
I applied this updated patch (with small adaptations) to gcc-4.8.2 and carried out some tests. I can execute the testcases in a simulator, which support zero-overhead looping instructions. First of all, I can successfully build libgcc, libstdc++ and newlibc for xtensa with this patch. The newly built xtensa gcc also passed testsuite which comes with newlibc. I also tested the cases under gcc/testsuite/gcc.c-torture/execute/ directory. There are about 800+ cases tested. Test result shows no new failed case with this patch, compared with the original gcc version. Is that OK? I also double checked the loop relaxation issue with binutils-2.24 (the latest version). The result show that the assember can do loop relaxation when the loop target is too far ( > 256 Byte). And this is the reason why I don't check the size of the loop. Index: gcc/ChangeLog =================================================================== --- gcc/ChangeLog (revision 206463) +++ gcc/ChangeLog (working copy) @@ -1,3 +1,18 @@ +2014-01-09 Felix Yang <fei.yang0...@gmail.com> + + * config/xtensa/xtensa.c (xtensa_reorg): New. + (xtensa_reorg_loops): New. + (xtensa_can_use_doloop_p): New. + (xtensa_invalid_within_doloop): New. + (hwloop_optimize): New. + (hwloop_fail): New. + (hwloop_pattern_reg): New. + (xtensa_emit_loop_end): Modified to emit the zero-overhead loop end label. + (xtensa_doloop_hooks): Define. + * config/xtensa/xtensa.md (doloop_end): New. + (zero_cost_loop_start): Rewritten. + (zero_cost_loop_end): Rewritten. + 2014-01-09 Richard Biener <rguent...@suse.de> PR tree-optimization/59715 Index: gcc/config/xtensa/xtensa.md =================================================================== --- gcc/config/xtensa/xtensa.md (revision 206463) +++ gcc/config/xtensa/xtensa.md (working copy) @@ -1,6 +1,7 @@ ;; GCC machine description for Tensilica's Xtensa architecture. ;; Copyright (C) 2001-2014 Free Software Foundation, Inc. ;; Contributed by Bob Wilson (bwil...@tensilica.com) at Tensilica. +;; Zero-overhead looping support by Felix Yang (fei.yang0...@gmail.com). ;; This file is part of GCC. @@ -35,6 +36,8 @@ (UNSPEC_TLS_CALL 9) (UNSPEC_TP 10) (UNSPEC_MEMW 11) + (UNSPEC_LSETUP_START 12) + (UNSPEC_LSETUP_END 13) (UNSPECV_SET_FP 1) (UNSPECV_ENTRY 2) @@ -1289,41 +1292,67 @@ (set_attr "length" "3")]) +;; Hardware loop support. + ;; Define the loop insns used by bct optimization to represent the -;; start and end of a zero-overhead loop (in loop.c). This start -;; template generates the loop insn; the end template doesn't generate -;; any instructions since loop end is handled in hardware. +;; start and end of a zero-overhead loop. This start template generates +;; the loop insn; the end template doesn't generate any instructions since +;; loop end is handled in hardware. (define_insn "zero_cost_loop_start" [(set (pc) - (if_then_else (eq (match_operand:SI 0 "register_operand" "a") - (const_int 0)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (reg:SI 19) - (plus:SI (match_dup 0) (const_int -1)))] + (if_then_else (ne (match_operand:SI 0 "register_operand" "a") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 2 "register_operand" "+a0") + (plus (match_dup 2) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_START)] "" - "loopnez\t%0, %l1" + "loop\t%0, %l1_LEND" [(set_attr "type" "jump") (set_attr "mode" "none") (set_attr "length" "3")]) (define_insn "zero_cost_loop_end" [(set (pc) - (if_then_else (ne (reg:SI 19) (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc))) - (set (reg:SI 19) - (plus:SI (reg:SI 19) (const_int -1)))] + (if_then_else (ne (match_operand:SI 0 "register_operand" "a") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 2 "register_operand" "+a0") + (plus (match_dup 2) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END)] "" { - xtensa_emit_loop_end (insn, operands); - return ""; + xtensa_emit_loop_end (insn, operands); + return ""; } [(set_attr "type" "jump") (set_attr "mode" "none") (set_attr "length" "0")]) +; operand 0 is the loop count pseudo register +; operand 1 is the label to jump to at the top of the loop +(define_expand "doloop_end" + [(parallel [(set (pc) (if_then_else + (ne (match_operand:SI 0 "" "") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus:SI (match_dup 0) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END)])] + "" +{ + /* The loop optimizer doesn't check the predicates... */ + if (GET_MODE (operands[0]) != SImode) + FAIL; +}) + ;; Setting a register from a comparison. Index: gcc/config/xtensa/xtensa.c =================================================================== --- gcc/config/xtensa/xtensa.c (revision 206463) +++ gcc/config/xtensa/xtensa.c (working copy) @@ -1,6 +1,7 @@ /* Subroutines for insn-output.c for Tensilica's Xtensa architecture. Copyright (C) 2001-2014 Free Software Foundation, Inc. Contributed by Bob Wilson (bwil...@tensilica.com) at Tensilica. + Zero-overhead looping support by Felix Yang (fei.yang0...@gmail.com). This file is part of GCC. @@ -61,8 +62,9 @@ along with GCC; see the file COPYING3. If not see #include "gimple.h" #include "gimplify.h" #include "df.h" +#include "hw-doloop.h" +#include "dumpfile.h" - /* Enumeration for all of the relational tests, so that we can build arrays indexed by the test type, and not worry about the order of EQ, NE, etc. */ @@ -186,6 +188,10 @@ static reg_class_t xtensa_secondary_reload (bool, static bool constantpool_address_p (const_rtx addr); static bool xtensa_legitimate_constant_p (enum machine_mode, rtx); +static void xtensa_reorg (void); +static bool xtensa_can_use_doloop_p (double_int, double_int iterations_max, + unsigned int, bool); +static const char *xtensa_invalid_within_doloop (const_rtx); static bool xtensa_member_type_forces_blk (const_tree, enum machine_mode mode); @@ -312,6 +318,15 @@ static const int reg_nonleaf_alloc_order[FIRST_PSE #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P xtensa_legitimate_constant_p +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG xtensa_reorg + +#undef TARGET_CAN_USE_DOLOOP_P +#define TARGET_CAN_USE_DOLOOP_P xtensa_can_use_doloop_p + +#undef TARGET_INVALID_WITHIN_DOLOOP +#define TARGET_INVALID_WITHIN_DOLOOP xtensa_invalid_within_doloop + struct gcc_target targetm = TARGET_INITIALIZER; @@ -1676,7 +1691,7 @@ xtensa_emit_loop_end (rtx insn, rtx *operands) } } - output_asm_insn ("# loop end for %0", operands); + output_asm_insn ("%1_LEND:", operands); } @@ -3709,4 +3724,224 @@ xtensa_legitimate_constant_p (enum machine_mode mo return !xtensa_tls_referenced_p (x); } +/* Implement TARGET_CAN_USE_DOLOOP_P. */ + +static bool +xtensa_can_use_doloop_p (double_int, double_int, + unsigned int level, bool entered_at_top) +{ + /* Considering limitations in the hardware, only use doloop for innermost loops + which must be entered from the top. */ + if (level != 1 || !entered_at_top) + return false; + + return true; +} + +/* NULL if INSN insn is valid within a low-overhead loop. + Otherwise return why doloop cannot be applied. */ + +static const char * +xtensa_invalid_within_doloop (const_rtx insn) +{ + if (CALL_P (insn)) + return "Function call in the loop."; + + return NULL; +} + +/* Optimize LOOP. */ + +static bool +hwloop_optimize (hwloop_info loop) +{ + int i; + edge entry_edge; + basic_block entry_bb; + rtx insn, seq, iter_reg, entry_after; + + if (loop->depth > 1) + { + if (dump_file) + fprintf (dump_file, ";; loop %d is not innermost\n", loop->loop_no); + return false; + } + + if (!loop->incoming_dest) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has more than one entry\n", loop->loop_no); + return false; + } + + if (loop->incoming_dest != loop->head) + { + if (dump_file) + fprintf (dump_file, ";; loop %d is not entered from head\n", loop->loop_no); + return false; + } + + if (loop->has_call || loop->has_asm) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has invalid insn\n", loop->loop_no); + return false; + } + + /* Scan all the blocks to make sure they don't use iter_reg. */ + if (loop->iter_reg_used || loop->iter_reg_used_outside) + { + if (dump_file) + fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no); + return false; + } + + /* Check if start_label appears before doloop_end. */ + insn = loop->start_label; + while (insn && insn != loop->loop_end) + insn = NEXT_INSN (insn); + + if (!insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d start_label not before loop_end\n", + loop->loop_no); + return false; + } + + /* Get the loop iteration register. */ + iter_reg = loop->iter_reg; + + gcc_assert (REG_P (iter_reg)); + + entry_edge = NULL; + + FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge) + if (entry_edge->flags & EDGE_FALLTHRU) + break; + + if (entry_edge == NULL) + return false; + + /* Place the zero_cost_loop_start instruction before the loop. */ + entry_bb = entry_edge->src; + + start_sequence (); + + insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg, + loop->start_label, + loop->iter_reg)); + + seq = get_insns (); + + if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1) + { + basic_block new_bb; + edge e; + edge_iterator ei; + + emit_insn_before (seq, BB_HEAD (loop->head)); + seq = emit_label_before (gen_label_rtx (), seq); + + new_bb = create_basic_block (seq, insn, entry_bb); + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (!(e->flags & EDGE_FALLTHRU)) + redirect_edge_and_branch_force (e, new_bb); + else + redirect_edge_succ (e, new_bb); + } + make_edge (new_bb, loop->head, 0); + } + else + { + entry_after = BB_END (entry_bb); + while (DEBUG_INSN_P (entry_after) + || (NOTE_P (entry_after) + && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK)) + entry_after = PREV_INSN (entry_after); + emit_insn_after (seq, entry_after); + } + + end_sequence (); + + return true; +} + +/* A callback for the hw-doloop pass. Called when a loop we have discovered + turns out not to be optimizable; we have to split the loop_end pattern into + a subtract and a test. */ + +static void +hwloop_fail (hwloop_info loop) +{ + rtx test, insn = loop->loop_end; + + emit_insn_before (gen_addsi3 (loop->iter_reg, + loop->iter_reg, + constm1_rtx), + loop->loop_end); + + test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx); + insn = emit_jump_insn_before (gen_cbranchsi4 (test, + loop->iter_reg, const0_rtx, + loop->start_label), + loop->loop_end); + + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; + delete_insn (loop->loop_end); +} + +/* A callback for the hw-doloop pass. This function examines INSN; if + it is a doloop_end pattern we recognize, return the reg rtx for the + loop counter. Otherwise, return NULL_RTX. */ + +static rtx +hwloop_pattern_reg (rtx insn) +{ + rtx reg; + + if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_zero_cost_loop_end) + return NULL_RTX; + + reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1)); + if (!REG_P (reg)) + return NULL_RTX; + return reg; +} + + +static struct hw_doloop_hooks xtensa_doloop_hooks = +{ + hwloop_pattern_reg, + hwloop_optimize, + hwloop_fail +}; + +/* Run from machine_dependent_reorg, this pass looks for doloop_end insns + and tries to rewrite the RTL of these loops so that proper Xtensa + hardware loops are generated. */ + +static void +xtensa_reorg_loops (void) +{ + reorg_loops (false, &xtensa_doloop_hooks); +} + +/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. */ + +static void +xtensa_reorg (void) +{ + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + + df_analyze (); + + /* Doloop optimization. */ + xtensa_reorg_loops (); +} + #include "gt-xtensa.h" Cheers, Felix On Thu, Jan 9, 2014 at 12:49 AM, Sterling Augustine <augustine.sterl...@gmail.com> wrote: > On Wed, Jan 8, 2014 at 8:27 AM, Felix Yang <fei.yang0...@gmail.com> wrote: >> Hi Sterling, >> >> This patch implements zero-overhead looping for xtensa backend using >> hw-doloop facility. >> If OK for trunk, please apply it for me. Thanks. > > Hi Felix, > > I last worked on zero-overhead loops for Xtensa in the gcc 4.3 > timeframe, but when I did, I ran into several problems related to > later optimizations rearranging the code which I didn't have time to > address. > > I'm sure much of that experience is completely stale now, but I would > appreciate a detail of the testing you have done with this patch (in > particular, a description of the different xtensa configurations you > tested it against, especially the ones with and without loop > instructions) before I approve it. Please be sure the assembler can > relax the loops it generates as well. I don't see any particular > problem, but there are many, many gotchas when dealing with xtensa > loop instructions. > > It also appears that Tensilica has stopped posting test results for > Xtensa, which makes it difficult to evaluate the quality of this > patch. > > Thanks, > > Sterling
Index: gcc/ChangeLog =================================================================== --- gcc/ChangeLog (revision 206463) +++ gcc/ChangeLog (working copy) @@ -1,3 +1,18 @@ +2014-01-09 Felix Yang <fei.yang0...@gmail.com> + + * config/xtensa/xtensa.c (xtensa_reorg): New. + (xtensa_reorg_loops): New. + (xtensa_can_use_doloop_p): New. + (xtensa_invalid_within_doloop): New. + (hwloop_optimize): New. + (hwloop_fail): New. + (hwloop_pattern_reg): New. + (xtensa_emit_loop_end): Modified to emit the zero-overhead loop end label. + (xtensa_doloop_hooks): Define. + * config/xtensa/xtensa.md (doloop_end): New. + (zero_cost_loop_start): Rewritten. + (zero_cost_loop_end): Rewritten. + 2014-01-09 Richard Biener <rguent...@suse.de> PR tree-optimization/59715 Index: gcc/config/xtensa/xtensa.md =================================================================== --- gcc/config/xtensa/xtensa.md (revision 206463) +++ gcc/config/xtensa/xtensa.md (working copy) @@ -1,6 +1,7 @@ ;; GCC machine description for Tensilica's Xtensa architecture. ;; Copyright (C) 2001-2014 Free Software Foundation, Inc. ;; Contributed by Bob Wilson (bwil...@tensilica.com) at Tensilica. +;; Zero-overhead looping support by Felix Yang (fei.yang0...@gmail.com). ;; This file is part of GCC. @@ -35,6 +36,8 @@ (UNSPEC_TLS_CALL 9) (UNSPEC_TP 10) (UNSPEC_MEMW 11) + (UNSPEC_LSETUP_START 12) + (UNSPEC_LSETUP_END 13) (UNSPECV_SET_FP 1) (UNSPECV_ENTRY 2) @@ -1289,41 +1292,67 @@ (set_attr "length" "3")]) +;; Hardware loop support. + ;; Define the loop insns used by bct optimization to represent the -;; start and end of a zero-overhead loop (in loop.c). This start -;; template generates the loop insn; the end template doesn't generate -;; any instructions since loop end is handled in hardware. +;; start and end of a zero-overhead loop. This start template generates +;; the loop insn; the end template doesn't generate any instructions since +;; loop end is handled in hardware. (define_insn "zero_cost_loop_start" [(set (pc) - (if_then_else (eq (match_operand:SI 0 "register_operand" "a") - (const_int 0)) - (label_ref (match_operand 1 "" "")) - (pc))) - (set (reg:SI 19) - (plus:SI (match_dup 0) (const_int -1)))] + (if_then_else (ne (match_operand:SI 0 "register_operand" "a") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 2 "register_operand" "+a0") + (plus (match_dup 2) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_START)] "" - "loopnez\t%0, %l1" + "loop\t%0, %l1_LEND" [(set_attr "type" "jump") (set_attr "mode" "none") (set_attr "length" "3")]) (define_insn "zero_cost_loop_end" [(set (pc) - (if_then_else (ne (reg:SI 19) (const_int 0)) - (label_ref (match_operand 0 "" "")) - (pc))) - (set (reg:SI 19) - (plus:SI (reg:SI 19) (const_int -1)))] + (if_then_else (ne (match_operand:SI 0 "register_operand" "a") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_operand:SI 2 "register_operand" "+a0") + (plus (match_dup 2) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END)] "" { - xtensa_emit_loop_end (insn, operands); - return ""; + xtensa_emit_loop_end (insn, operands); + return ""; } [(set_attr "type" "jump") (set_attr "mode" "none") (set_attr "length" "0")]) +; operand 0 is the loop count pseudo register +; operand 1 is the label to jump to at the top of the loop +(define_expand "doloop_end" + [(parallel [(set (pc) (if_then_else + (ne (match_operand:SI 0 "" "") + (const_int 1)) + (label_ref (match_operand 1 "" "")) + (pc))) + (set (match_dup 0) + (plus:SI (match_dup 0) + (const_int -1))) + (unspec [(const_int 0)] UNSPEC_LSETUP_END)])] + "" +{ + /* The loop optimizer doesn't check the predicates... */ + if (GET_MODE (operands[0]) != SImode) + FAIL; +}) + ;; Setting a register from a comparison. Index: gcc/config/xtensa/xtensa.c =================================================================== --- gcc/config/xtensa/xtensa.c (revision 206463) +++ gcc/config/xtensa/xtensa.c (working copy) @@ -1,6 +1,7 @@ /* Subroutines for insn-output.c for Tensilica's Xtensa architecture. Copyright (C) 2001-2014 Free Software Foundation, Inc. Contributed by Bob Wilson (bwil...@tensilica.com) at Tensilica. + Zero-overhead looping support by Felix Yang (fei.yang0...@gmail.com). This file is part of GCC. @@ -61,8 +62,9 @@ along with GCC; see the file COPYING3. If not see #include "gimple.h" #include "gimplify.h" #include "df.h" +#include "hw-doloop.h" +#include "dumpfile.h" - /* Enumeration for all of the relational tests, so that we can build arrays indexed by the test type, and not worry about the order of EQ, NE, etc. */ @@ -186,6 +188,10 @@ static reg_class_t xtensa_secondary_reload (bool, static bool constantpool_address_p (const_rtx addr); static bool xtensa_legitimate_constant_p (enum machine_mode, rtx); +static void xtensa_reorg (void); +static bool xtensa_can_use_doloop_p (double_int, double_int iterations_max, + unsigned int, bool); +static const char *xtensa_invalid_within_doloop (const_rtx); static bool xtensa_member_type_forces_blk (const_tree, enum machine_mode mode); @@ -312,6 +318,15 @@ static const int reg_nonleaf_alloc_order[FIRST_PSE #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P xtensa_legitimate_constant_p +#undef TARGET_MACHINE_DEPENDENT_REORG +#define TARGET_MACHINE_DEPENDENT_REORG xtensa_reorg + +#undef TARGET_CAN_USE_DOLOOP_P +#define TARGET_CAN_USE_DOLOOP_P xtensa_can_use_doloop_p + +#undef TARGET_INVALID_WITHIN_DOLOOP +#define TARGET_INVALID_WITHIN_DOLOOP xtensa_invalid_within_doloop + struct gcc_target targetm = TARGET_INITIALIZER; @@ -1676,7 +1691,7 @@ xtensa_emit_loop_end (rtx insn, rtx *operands) } } - output_asm_insn ("# loop end for %0", operands); + output_asm_insn ("%1_LEND:", operands); } @@ -3709,4 +3724,224 @@ xtensa_legitimate_constant_p (enum machine_mode mo return !xtensa_tls_referenced_p (x); } +/* Implement TARGET_CAN_USE_DOLOOP_P. */ + +static bool +xtensa_can_use_doloop_p (double_int, double_int, + unsigned int level, bool entered_at_top) +{ + /* Considering limitations in the hardware, only use doloop for innermost loops + which must be entered from the top. */ + if (level != 1 || !entered_at_top) + return false; + + return true; +} + +/* NULL if INSN insn is valid within a low-overhead loop. + Otherwise return why doloop cannot be applied. */ + +static const char * +xtensa_invalid_within_doloop (const_rtx insn) +{ + if (CALL_P (insn)) + return "Function call in the loop."; + + return NULL; +} + +/* Optimize LOOP. */ + +static bool +hwloop_optimize (hwloop_info loop) +{ + int i; + edge entry_edge; + basic_block entry_bb; + rtx insn, seq, iter_reg, entry_after; + + if (loop->depth > 1) + { + if (dump_file) + fprintf (dump_file, ";; loop %d is not innermost\n", loop->loop_no); + return false; + } + + if (!loop->incoming_dest) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has more than one entry\n", loop->loop_no); + return false; + } + + if (loop->incoming_dest != loop->head) + { + if (dump_file) + fprintf (dump_file, ";; loop %d is not entered from head\n", loop->loop_no); + return false; + } + + if (loop->has_call || loop->has_asm) + { + if (dump_file) + fprintf (dump_file, ";; loop %d has invalid insn\n", loop->loop_no); + return false; + } + + /* Scan all the blocks to make sure they don't use iter_reg. */ + if (loop->iter_reg_used || loop->iter_reg_used_outside) + { + if (dump_file) + fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no); + return false; + } + + /* Check if start_label appears before doloop_end. */ + insn = loop->start_label; + while (insn && insn != loop->loop_end) + insn = NEXT_INSN (insn); + + if (!insn) + { + if (dump_file) + fprintf (dump_file, ";; loop %d start_label not before loop_end\n", + loop->loop_no); + return false; + } + + /* Get the loop iteration register. */ + iter_reg = loop->iter_reg; + + gcc_assert (REG_P (iter_reg)); + + entry_edge = NULL; + + FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge) + if (entry_edge->flags & EDGE_FALLTHRU) + break; + + if (entry_edge == NULL) + return false; + + /* Place the zero_cost_loop_start instruction before the loop. */ + entry_bb = entry_edge->src; + + start_sequence (); + + insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg, + loop->start_label, + loop->iter_reg)); + + seq = get_insns (); + + if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1) + { + basic_block new_bb; + edge e; + edge_iterator ei; + + emit_insn_before (seq, BB_HEAD (loop->head)); + seq = emit_label_before (gen_label_rtx (), seq); + + new_bb = create_basic_block (seq, insn, entry_bb); + FOR_EACH_EDGE (e, ei, loop->incoming) + { + if (!(e->flags & EDGE_FALLTHRU)) + redirect_edge_and_branch_force (e, new_bb); + else + redirect_edge_succ (e, new_bb); + } + make_edge (new_bb, loop->head, 0); + } + else + { + entry_after = BB_END (entry_bb); + while (DEBUG_INSN_P (entry_after) + || (NOTE_P (entry_after) + && NOTE_KIND (entry_after) != NOTE_INSN_BASIC_BLOCK)) + entry_after = PREV_INSN (entry_after); + emit_insn_after (seq, entry_after); + } + + end_sequence (); + + return true; +} + +/* A callback for the hw-doloop pass. Called when a loop we have discovered + turns out not to be optimizable; we have to split the loop_end pattern into + a subtract and a test. */ + +static void +hwloop_fail (hwloop_info loop) +{ + rtx test, insn = loop->loop_end; + + emit_insn_before (gen_addsi3 (loop->iter_reg, + loop->iter_reg, + constm1_rtx), + loop->loop_end); + + test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx); + insn = emit_jump_insn_before (gen_cbranchsi4 (test, + loop->iter_reg, const0_rtx, + loop->start_label), + loop->loop_end); + + JUMP_LABEL (insn) = loop->start_label; + LABEL_NUSES (loop->start_label)++; + delete_insn (loop->loop_end); +} + +/* A callback for the hw-doloop pass. This function examines INSN; if + it is a doloop_end pattern we recognize, return the reg rtx for the + loop counter. Otherwise, return NULL_RTX. */ + +static rtx +hwloop_pattern_reg (rtx insn) +{ + rtx reg; + + if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_zero_cost_loop_end) + return NULL_RTX; + + reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1)); + if (!REG_P (reg)) + return NULL_RTX; + return reg; +} + + +static struct hw_doloop_hooks xtensa_doloop_hooks = +{ + hwloop_pattern_reg, + hwloop_optimize, + hwloop_fail +}; + +/* Run from machine_dependent_reorg, this pass looks for doloop_end insns + and tries to rewrite the RTL of these loops so that proper Xtensa + hardware loops are generated. */ + +static void +xtensa_reorg_loops (void) +{ + reorg_loops (false, &xtensa_doloop_hooks); +} + +/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. */ + +static void +xtensa_reorg (void) +{ + /* We are freeing block_for_insn in the toplev to keep compatibility + with old MDEP_REORGS that are not CFG based. Recompute it now. */ + compute_bb_for_insn (); + + df_analyze (); + + /* Doloop optimization. */ + xtensa_reorg_loops (); +} + #include "gt-xtensa.h"