PING?
> > Hi Sterling, > > I made some improvement to the patch. Two changes: > 1. TARGET_LOOPS is now used as a condition of the doloop related > patterns, which is more elegant. > 2. As the trip count register of the zero-cost loop maybe potentially > spilled, > we need to change the patterns in order to handle this issue. The solution is > similar to that adapted by c6x backend. > Just turn the zero-cost loop into a regular loop when that happens when reload > is completed. > Attached please find version 4 of the patch. Make check regression tested > with xtensa-elf-gcc/simulator. > OK for trunk? > > Index: gcc/ChangeLog > ================================================================ > === > --- gcc/ChangeLog (revision 216079) > +++ gcc/ChangeLog (working copy) > @@ -1,3 +1,20 @@ > +2014-10-10 Felix Yang <felix.y...@huawei.com> > + > + * config/xtensa/xtensa.h (TARGET_LOOPS): New Macro. > + * config/xtensa/xtensa.c (xtensa_reorg): New. > + (xtensa_reorg_loops): New. > + (xtensa_can_use_doloop_p): New. > + (xtensa_invalid_within_doloop): New. > + (hwloop_optimize): New. > + (hwloop_fail): New. > + (hwloop_pattern_reg): New. > + (xtensa_emit_loop_end): Modified to emit the zero-overhead loop end > label. > + (xtensa_doloop_hooks): Define. > + * config/xtensa/xtensa.md (doloop_end): New. > + (loop_end): New > + (zero_cost_loop_start): Rewritten. > + (zero_cost_loop_end): Rewritten. > + > 2014-10-10 Kyrylo Tkachov <kyrylo.tkac...@arm.com> > > * configure.ac: Add --enable-fix-cortex-a53-835769 option. > Index: gcc/config/xtensa/xtensa.md > ================================================================ > === > --- gcc/config/xtensa/xtensa.md (revision 216079) > +++ gcc/config/xtensa/xtensa.md (working copy) > @@ -35,6 +35,8 @@ > (UNSPEC_TLS_CALL 9) > (UNSPEC_TP 10) > (UNSPEC_MEMW 11) > + (UNSPEC_LSETUP_START 12) > + (UNSPEC_LSETUP_END 13) > > (UNSPECV_SET_FP 1) > (UNSPECV_ENTRY 2) > @@ -1289,41 +1291,120 @@ > (set_attr "length" "3")]) > > > +;; Zero-overhead looping support. > + > ;; Define the loop insns used by bct optimization to represent the -;; start > and > end of a zero-overhead loop (in loop.c). This start -;; template generates > the > loop insn; the end template doesn't generate -;; any instructions since loop > end > is handled in hardware. > +;; start and end of a zero-overhead loop. This start template > +generates ;; the loop insn; the end template doesn't generate any > +instructions since ;; loop end is handled in hardware. > > (define_insn "zero_cost_loop_start" > [(set (pc) > - (if_then_else (eq (match_operand:SI 0 "register_operand" "a") > - (const_int 0)) > - (label_ref (match_operand 1 "" "")) > - (pc))) > - (set (reg:SI 19) > - (plus:SI (match_dup 0) (const_int -1)))] > - "" > - "loopnez\t%0, %l1" > + (if_then_else (ne (match_operand:SI 0 "register_operand" "2") > + (const_int 1)) > + (label_ref (match_operand 1 "" "")) > + (pc))) > + (set (match_operand:SI 2 "register_operand" "=a") > + (plus (match_dup 0) > + (const_int -1))) > + (unspec [(const_int 0)] UNSPEC_LSETUP_START)] "TARGET_LOOPS && > + optimize" > + "loop\t%0, %l1_LEND" > [(set_attr "type" "jump") > (set_attr "mode" "none") > (set_attr "length" "3")]) > > (define_insn "zero_cost_loop_end" > [(set (pc) > - (if_then_else (ne (reg:SI 19) (const_int 0)) > - (label_ref (match_operand 0 "" "")) > - (pc))) > - (set (reg:SI 19) > - (plus:SI (reg:SI 19) (const_int -1)))] > - "" > + (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" > "2,2") > + (const_int 1)) > + (label_ref (match_operand 1 "" "")) > + (pc))) > + (set (match_operand:SI 2 "nonimmediate_operand" "=a,m") > + (plus (match_dup 0) > + (const_int -1))) > + (unspec [(const_int 0)] UNSPEC_LSETUP_END) > + (clobber (match_scratch:SI 3 "=X,&r"))] "TARGET_LOOPS && optimize" > + "#" > + [(set_attr "type" "jump") > + (set_attr "mode" "none") > + (set_attr "length" "0")]) > + > +(define_insn "loop_end" > + [(set (pc) > + (if_then_else (ne (match_operand:SI 0 "register_operand" "2") > + (const_int 1)) > + (label_ref (match_operand 1 "" "")) > + (pc))) > + (set (match_operand:SI 2 "register_operand" "=a") > + (plus (match_dup 0) > + (const_int -1))) > + (unspec [(const_int 0)] UNSPEC_LSETUP_END)] > + "TARGET_LOOPS && optimize" > { > - xtensa_emit_loop_end (insn, operands); > - return ""; > + xtensa_emit_loop_end (insn, operands); return ""; > } > [(set_attr "type" "jump") > (set_attr "mode" "none") > (set_attr "length" "0")]) > > +(define_split > + [(set (pc) > + (if_then_else (ne (match_operand:SI 0 "nonimmediate_operand" "") > + (const_int 1)) > + (label_ref (match_operand 1 "" "")) > + (pc))) > + (set (match_operand:SI 2 "nonimmediate_operand" "") > + (plus:SI (match_dup 0) > + (const_int -1))) > + (unspec [(const_int 0)] UNSPEC_LSETUP_END) > + (clobber (match_scratch 3))] > + "TARGET_LOOPS && optimize && reload_completed" > + [(const_int 0)] > +{ > + if (!REG_P (operands[0])) > + { > + rtx test; > + > + /* Fallback into a normal conditional branch insn. */ > + emit_move_insn (operands[3], operands[0]); > + emit_insn (gen_addsi3 (operands[3], operands[3], constm1_rtx)); > + emit_move_insn (operands[0], operands[3]); > + test = gen_rtx_NE (VOIDmode, operands[3], const0_rtx); > + emit_jump_insn (gen_cbranchsi4 (test, operands[3], > + const0_rtx, operands[1])); > + } > + else > + { > + emit_jump_insn (gen_loop_end (operands[0], operands[1], > operands[2])); > + } > + > + DONE; > +}) > + > +; operand 0 is the loop count pseudo register ; operand 1 is the label > +to jump to at the top of the loop (define_expand "doloop_end" > + [(parallel [(set (pc) (if_then_else > + (ne (match_operand:SI 0 "" "") > + (const_int 1)) > + (label_ref (match_operand 1 "" "")) > + (pc))) > + (set (match_dup 0) > + (plus:SI (match_dup 0) > + (const_int -1))) > + (unspec [(const_int 0)] UNSPEC_LSETUP_END) > + (clobber (match_dup 2))])] ; match_scratch > + "TARGET_LOOPS && optimize" > +{ > + /* The loop optimizer doesn't check the predicates... */ > + if (GET_MODE (operands[0]) != SImode) > + FAIL; > + operands[2] = gen_rtx_SCRATCH (SImode); > +}) > + > > ;; Setting a register from a comparison. > > Index: gcc/config/xtensa/xtensa.c > ================================================================ > === > --- gcc/config/xtensa/xtensa.c (revision 216079) > +++ gcc/config/xtensa/xtensa.c (working copy) > @@ -61,6 +61,8 @@ along with GCC; see the file COPYING3. If not see > #include "gimplify.h" > #include "df.h" > #include "builtins.h" > +#include "dumpfile.h" > +#include "hw-doloop.h" > > > /* Enumeration for all of the relational tests, so that we can build @@ > -186,6 > +188,10 @@ static reg_class_t xtensa_secondary_reload (bool, > > static bool constantpool_address_p (const_rtx addr); static bool > xtensa_legitimate_constant_p (enum machine_mode, rtx); > +static void xtensa_reorg (void); > +static bool xtensa_can_use_doloop_p (const widest_int &, const widest_int > &, > + unsigned int, bool); static const > +char *xtensa_invalid_within_doloop (const rtx_insn *); > > static bool xtensa_member_type_forces_blk (const_tree, > enum machine_mode mode); @@ -312,6 > +318,15 @@ static const int reg_nonleaf_alloc_order[FIRST_PSE #undef > TARGET_LEGITIMATE_CONSTANT_P #define > TARGET_LEGITIMATE_CONSTANT_P xtensa_legitimate_constant_p > > +#undef TARGET_MACHINE_DEPENDENT_REORG > +#define TARGET_MACHINE_DEPENDENT_REORG xtensa_reorg > + > +#undef TARGET_CAN_USE_DOLOOP_P > +#define TARGET_CAN_USE_DOLOOP_P xtensa_can_use_doloop_p > + > +#undef TARGET_INVALID_WITHIN_DOLOOP > +#define TARGET_INVALID_WITHIN_DOLOOP xtensa_invalid_within_doloop > + > struct gcc_target targetm = TARGET_INITIALIZER; > > > @@ -1676,7 +1691,7 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx *operand > } > } > > - output_asm_insn ("# loop end for %0", operands); > + output_asm_insn ("%1_LEND:", operands); > } > > > @@ -3712,4 +3727,236 @@ xtensa_legitimate_constant_p (enum > machine_mode mo > return !xtensa_tls_referenced_p (x); > } > > +/* Implement TARGET_CAN_USE_DOLOOP_P. */ > + > +static bool > +xtensa_can_use_doloop_p (const widest_int &, const widest_int &, > + unsigned int loop_depth, bool entered_at_top) > +{ > + /* Considering limitations in the hardware, only use doloop > + for innermost loops which must be entered from the top. */ > + if (loop_depth > 1 || !entered_at_top) > + return false; > + > + return true; > +} > + > +/* NULL if INSN insn is valid within a low-overhead loop. > + Otherwise return why doloop cannot be applied. */ > + > +static const char * > +xtensa_invalid_within_doloop (const rtx_insn *insn) { > + if (CALL_P (insn)) > + return "Function call in the loop."; > + > + if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return) > + return "Return from a call instruction in the loop."; > + > + return NULL; > +} > + > +/* Optimize LOOP. */ > + > +static bool > +hwloop_optimize (hwloop_info loop) > +{ > + int i; > + edge entry_edge; > + basic_block entry_bb; > + rtx iter_reg; > + rtx_insn *insn, *seq, *entry_after; > + > + if (loop->depth > 1) > + { > + if (dump_file) > + fprintf (dump_file, ";; loop %d is not innermost\n", > + loop->loop_no); > + return false; > + } > + > + if (!loop->incoming_dest) > + { > + if (dump_file) > + fprintf (dump_file, ";; loop %d has more than one entry\n", > + loop->loop_no); > + return false; > + } > + > + if (loop->incoming_dest != loop->head) > + { > + if (dump_file) > + fprintf (dump_file, ";; loop %d is not entered from head\n", > + loop->loop_no); > + return false; > + } > + > + if (loop->has_call || loop->has_asm) > + { > + if (dump_file) > + fprintf (dump_file, ";; loop %d has invalid insn\n", > + loop->loop_no); > + return false; > + } > + > + /* Scan all the blocks to make sure they don't use iter_reg. */ if > + (loop->iter_reg_used || loop->iter_reg_used_outside) > + { > + if (dump_file) > + fprintf (dump_file, ";; loop %d uses iterator\n", > + loop->loop_no); > + return false; > + } > + > + /* Check if start_label appears before doloop_end. */ insn = > + loop->start_label; while (insn && insn != loop->loop_end) > + insn = NEXT_INSN (insn); > + > + if (!insn) > + { > + if (dump_file) > + fprintf (dump_file, ";; loop %d start_label not before loop_end\n", > + loop->loop_no); > + return false; > + } > + > + /* Get the loop iteration register. */ iter_reg = loop->iter_reg; > + > + gcc_assert (REG_P (iter_reg)); > + > + entry_edge = NULL; > + > + FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge) > + if (entry_edge->flags & EDGE_FALLTHRU) > + break; > + > + if (entry_edge == NULL) > + return false; > + > + /* Place the zero_cost_loop_start instruction before the loop. */ > + entry_bb = entry_edge->src; > + > + start_sequence (); > + > + insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg, > + loop->start_label, > + loop->iter_reg)); > + > + seq = get_insns (); > + > + if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1) > + { > + basic_block new_bb; > + edge e; > + edge_iterator ei; > + > + emit_insn_before (seq, BB_HEAD (loop->head)); > + seq = emit_label_before (gen_label_rtx (), seq); > + new_bb = create_basic_block (seq, insn, entry_bb); > + FOR_EACH_EDGE (e, ei, loop->incoming) > + { > + if (!(e->flags & EDGE_FALLTHRU)) > + redirect_edge_and_branch_force (e, new_bb); > + else > + redirect_edge_succ (e, new_bb); > + } > + > + make_edge (new_bb, loop->head, 0); > + } > + else > + { > + entry_after = BB_END (entry_bb); > + while (DEBUG_INSN_P (entry_after) > + || (NOTE_P (entry_after) > + && NOTE_KIND (entry_after) != > NOTE_INSN_BASIC_BLOCK)) > + entry_after = PREV_INSN (entry_after); > + > + emit_insn_after (seq, entry_after); > + } > + > + end_sequence (); > + > + return true; > +} > + > +/* A callback for the hw-doloop pass. Called when a loop we have discovered > + turns out not to be optimizable; we have to split the loop_end pattern > into > + a subtract and a test. */ > + > +static void > +hwloop_fail (hwloop_info loop) > +{ > + rtx test; > + rtx_insn *insn = loop->loop_end; > + > + emit_insn_before (gen_addsi3 (loop->iter_reg, > + loop->iter_reg, > + constm1_rtx), > + loop->loop_end); > + > + test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx); insn = > + emit_jump_insn_before (gen_cbranchsi4 (test, > + loop->iter_reg, > const0_rtx, > + loop->start_label), > + loop->loop_end); > + > + JUMP_LABEL (insn) = loop->start_label; > + LABEL_NUSES (loop->start_label)++; > + delete_insn (loop->loop_end); > +} > + > +/* A callback for the hw-doloop pass. This function examines INSN; if > + it is a doloop_end pattern we recognize, return the reg rtx for the > + loop counter. Otherwise, return NULL_RTX. */ > + > +static rtx > +hwloop_pattern_reg (rtx_insn *insn) > +{ > + rtx reg; > + > + if (!JUMP_P (insn) || recog_memoized (insn) != CODE_FOR_loop_end) > + return NULL_RTX; > + > + reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1)); if (!REG_P (reg)) > + return NULL_RTX; > + > + return reg; > +} > + > + > +static struct hw_doloop_hooks xtensa_doloop_hooks = { > + hwloop_pattern_reg, > + hwloop_optimize, > + hwloop_fail > +}; > + > +/* Run from machine_dependent_reorg, this pass looks for doloop_end insns > + and tries to rewrite the RTL of these loops so that proper Xtensa > + hardware loops are generated. */ > + > +static void > +xtensa_reorg_loops (void) > +{ > + reorg_loops (false, &xtensa_doloop_hooks); } > + > +/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. */ > + > +static void > +xtensa_reorg (void) > +{ > + /* We are freeing block_for_insn in the toplev to keep compatibility > + with old MDEP_REORGS that are not CFG based. Recompute it now. > +*/ > + compute_bb_for_insn (); > + > + df_analyze (); > + > + /* Doloop optimization. */ > + xtensa_reorg_loops (); > +} > + > #include "gt-xtensa.h" > Index: gcc/config/xtensa/xtensa.h > ================================================================ > === > --- gcc/config/xtensa/xtensa.h (revision 216079) > +++ gcc/config/xtensa/xtensa.h (working copy) > @@ -61,6 +61,7 @@ extern unsigned xtensa_current_frame_size; > #define TARGET_S32C1I XCHAL_HAVE_S32C1I > #define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS > #define TARGET_THREADPTR XCHAL_HAVE_THREADPTR > +#define TARGET_LOOPS XCHAL_HAVE_LOOPS > > #define TARGET_DEFAULT \ > ((XCHAL_HAVE_L32R ? 0 : MASK_CONST16) | \ > > Cheers, > Felix > > > On Thu, Oct 9, 2014 at 6:52 PM, Felix Yang <fei.yang0...@gmail.com> wrote: > > Hello Sterling, > > > > My paper work with the FSF has finished and we can now move > > forward with this patch :-) > > I rebased the patch on the latest trunk. Attached please find > > version 3 of the patch. > > And the enclosed patch also includes the two points pointed by > > you, do you like it? > > Make check regression tested with xtensa-elf-gcc built from trunk > > with this patch. > > OK to apply? > > > > Index: gcc/ChangeLog > > > ================================================================ > === > > --- gcc/ChangeLog (revision 216036) > > +++ gcc/ChangeLog (working copy) > > @@ -1,3 +1,19 @@ > > +2014-10-09 Felix Yang <felix.y...@huawei.com> > > + > > + * config/xtensa/xtensa.h (TARGET_LOOPS): New Macro. > > + * config/xtensa/xtensa.c (xtensa_reorg): New. > > + (xtensa_reorg_loops): New. > > + (xtensa_can_use_doloop_p): New. > > + (xtensa_invalid_within_doloop): New. > > + (hwloop_optimize): New. > > + (hwloop_fail): New. > > + (hwloop_pattern_reg): New. > > + (xtensa_emit_loop_end): Modified to emit the zero-overhead loop end > label. > > + (xtensa_doloop_hooks): Define. > > + * config/xtensa/xtensa.md (doloop_end): New. > > + (zero_cost_loop_start): Rewritten. > > + (zero_cost_loop_end): Rewritten. > > + > > 2014-10-09 Joern Rennecke <joern.renne...@embecosm.com> > > > > * config/avr/avr.opt (mmcu=): Change to have a string value. > > Index: gcc/config/xtensa/xtensa.md > > > ================================================================ > === > > --- gcc/config/xtensa/xtensa.md (revision 216036) > > +++ gcc/config/xtensa/xtensa.md (working copy) > > @@ -35,6 +35,8 @@ > > (UNSPEC_TLS_CALL 9) > > (UNSPEC_TP 10) > > (UNSPEC_MEMW 11) > > + (UNSPEC_LSETUP_START 12) > > + (UNSPEC_LSETUP_END 13) > > > > (UNSPECV_SET_FP 1) > > (UNSPECV_ENTRY 2) > > @@ -1289,41 +1291,67 @@ > > (set_attr "length" "3")]) > > > > > > +;; Zero-overhead looping support. > > + > > ;; Define the loop insns used by bct optimization to represent the > > -;; start and end of a zero-overhead loop (in loop.c). This start -;; > > template generates the loop insn; the end template doesn't generate > > -;; any instructions since loop end is handled in hardware. > > +;; start and end of a zero-overhead loop. This start template > > +generates ;; the loop insn; the end template doesn't generate any > > +instructions since ;; loop end is handled in hardware. > > > > (define_insn "zero_cost_loop_start" > > [(set (pc) > > - (if_then_else (eq (match_operand:SI 0 "register_operand" "a") > > - (const_int 0)) > > - (label_ref (match_operand 1 "" "")) > > - (pc))) > > - (set (reg:SI 19) > > - (plus:SI (match_dup 0) (const_int -1)))] > > + (if_then_else (ne (match_operand:SI 0 "register_operand" "a") > > + (const_int 1)) > > + (label_ref (match_operand 1 "" "")) > > + (pc))) > > + (set (match_operand:SI 2 "register_operand" "+a0") > > + (plus (match_dup 2) > > + (const_int -1))) > > + (unspec [(const_int 0)] UNSPEC_LSETUP_START)] > > "" > > - "loopnez\t%0, %l1" > > + "loop\t%0, %l1_LEND" > > [(set_attr "type" "jump") > > (set_attr "mode" "none") > > (set_attr "length" "3")]) > > > > (define_insn "zero_cost_loop_end" > > [(set (pc) > > - (if_then_else (ne (reg:SI 19) (const_int 0)) > > - (label_ref (match_operand 0 "" "")) > > - (pc))) > > - (set (reg:SI 19) > > - (plus:SI (reg:SI 19) (const_int -1)))] > > + (if_then_else (ne (match_operand:SI 0 "register_operand" "a") > > + (const_int 1)) > > + (label_ref (match_operand 1 "" "")) > > + (pc))) > > + (set (match_operand:SI 2 "register_operand" "+a0") > > + (plus (match_dup 2) > > + (const_int -1))) > > + (unspec [(const_int 0)] UNSPEC_LSETUP_END)] > > "" > > { > > - xtensa_emit_loop_end (insn, operands); > > - return ""; > > + xtensa_emit_loop_end (insn, operands); return ""; > > } > > [(set_attr "type" "jump") > > (set_attr "mode" "none") > > (set_attr "length" "0")]) > > > > +; operand 0 is the loop count pseudo register ; operand 1 is the > > +label to jump to at the top of the loop (define_expand "doloop_end" > > + [(parallel [(set (pc) (if_then_else > > + (ne (match_operand:SI 0 "" "") > > + (const_int 1)) > > + (label_ref (match_operand 1 "" "")) > > + (pc))) > > + (set (match_dup 0) > > + (plus:SI (match_dup 0) > > + (const_int -1))) > > + (unspec [(const_int 0)] UNSPEC_LSETUP_END)])] > > + "" > > +{ > > + /* The loop optimizer doesn't check the predicates... */ > > + if (GET_MODE (operands[0]) != SImode) > > + FAIL; > > +}) > > + > > > > ;; Setting a register from a comparison. > > > > Index: gcc/config/xtensa/xtensa.c > > > ================================================================ > === > > --- gcc/config/xtensa/xtensa.c (revision 216036) > > +++ gcc/config/xtensa/xtensa.c (working copy) > > @@ -61,6 +61,8 @@ along with GCC; see the file COPYING3. If not see > > #include "gimplify.h" > > #include "df.h" > > #include "builtins.h" > > +#include "dumpfile.h" > > +#include "hw-doloop.h" > > > > > > /* Enumeration for all of the relational tests, so that we can build > > @@ -186,6 +188,10 @@ static reg_class_t xtensa_secondary_reload (bool, > > > > static bool constantpool_address_p (const_rtx addr); > > static bool xtensa_legitimate_constant_p (enum machine_mode, rtx); > > +static void xtensa_reorg (void); > > +static bool xtensa_can_use_doloop_p (const widest_int &, const widest_int > &, > > + unsigned int, bool); > > +static const char *xtensa_invalid_within_doloop (const rtx_insn *); > > > > static bool xtensa_member_type_forces_blk (const_tree, > > enum machine_mode mode); > > @@ -312,6 +318,15 @@ static const int reg_nonleaf_alloc_order[FIRST_PSE > > #undef TARGET_LEGITIMATE_CONSTANT_P > > #define TARGET_LEGITIMATE_CONSTANT_P xtensa_legitimate_constant_p > > > > +#undef TARGET_MACHINE_DEPENDENT_REORG > > +#define TARGET_MACHINE_DEPENDENT_REORG xtensa_reorg > > + > > +#undef TARGET_CAN_USE_DOLOOP_P > > +#define TARGET_CAN_USE_DOLOOP_P xtensa_can_use_doloop_p > > + > > +#undef TARGET_INVALID_WITHIN_DOLOOP > > +#define TARGET_INVALID_WITHIN_DOLOOP xtensa_invalid_within_doloop > > + > > struct gcc_target targetm = TARGET_INITIALIZER; > > > > > > @@ -1676,7 +1691,7 @@ xtensa_emit_loop_end (rtx_insn *insn, rtx > *operand > > } > > } > > > > - output_asm_insn ("# loop end for %0", operands); > > + output_asm_insn ("%1_LEND:", operands); > > } > > > > > > @@ -3712,4 +3727,239 @@ xtensa_legitimate_constant_p (enum > machine_mode mo > > return !xtensa_tls_referenced_p (x); > > } > > > > +/* Implement TARGET_CAN_USE_DOLOOP_P. */ > > + > > +static bool > > +xtensa_can_use_doloop_p (const widest_int &, const widest_int &, > > + unsigned int loop_depth, bool > entered_at_top) > > +{ > > + if (!TARGET_LOOPS) > > + return false; > > + > > + /* Considering limitations in the hardware, only use doloop > > + for innermost loops which must be entered from the top. */ > > + if (loop_depth > 1 || !entered_at_top) > > + return false; > > + > > + return true; > > +} > > + > > +/* NULL if INSN insn is valid within a low-overhead loop. > > + Otherwise return why doloop cannot be applied. */ > > + > > +static const char * > > +xtensa_invalid_within_doloop (const rtx_insn *insn) > > +{ > > + if (CALL_P (insn)) > > + return "Function call in the loop."; > > + > > + if (JUMP_P (insn) && INSN_CODE (insn) == CODE_FOR_return) > > + return "Return from a call instruction in the loop."; > > + > > + return NULL; > > +} > > + > > +/* Optimize LOOP. */ > > + > > +static bool > > +hwloop_optimize (hwloop_info loop) > > +{ > > + int i; > > + edge entry_edge; > > + basic_block entry_bb; > > + rtx iter_reg; > > + rtx_insn *insn, *seq, *entry_after; > > + > > + if (loop->depth > 1) > > + { > > + if (dump_file) > > + fprintf (dump_file, ";; loop %d is not innermost\n", > > + loop->loop_no); > > + return false; > > + } > > + > > + if (!loop->incoming_dest) > > + { > > + if (dump_file) > > + fprintf (dump_file, ";; loop %d has more than one entry\n", > > + loop->loop_no); > > + return false; > > + } > > + > > + if (loop->incoming_dest != loop->head) > > + { > > + if (dump_file) > > + fprintf (dump_file, ";; loop %d is not entered from head\n", > > + loop->loop_no); > > + return false; > > + } > > + > > + if (loop->has_call || loop->has_asm) > > + { > > + if (dump_file) > > + fprintf (dump_file, ";; loop %d has invalid insn\n", > > + loop->loop_no); > > + return false; > > + } > > + > > + /* Scan all the blocks to make sure they don't use iter_reg. */ > > + if (loop->iter_reg_used || loop->iter_reg_used_outside) > > + { > > + if (dump_file) > > + fprintf (dump_file, ";; loop %d uses iterator\n", > > + loop->loop_no); > > + return false; > > + } > > + > > + /* Check if start_label appears before doloop_end. */ > > + insn = loop->start_label; > > + while (insn && insn != loop->loop_end) > > + insn = NEXT_INSN (insn); > > + > > + if (!insn) > > + { > > + if (dump_file) > > + fprintf (dump_file, ";; loop %d start_label not before loop_end\n", > > + loop->loop_no); > > + return false; > > + } > > + > > + /* Get the loop iteration register. */ > > + iter_reg = loop->iter_reg; > > + > > + gcc_assert (REG_P (iter_reg)); > > + > > + entry_edge = NULL; > > + > > + FOR_EACH_VEC_SAFE_ELT (loop->incoming, i, entry_edge) > > + if (entry_edge->flags & EDGE_FALLTHRU) > > + break; > > + > > + if (entry_edge == NULL) > > + return false; > > + > > + /* Place the zero_cost_loop_start instruction before the loop. */ > > + entry_bb = entry_edge->src; > > + > > + start_sequence (); > > + > > + insn = emit_insn (gen_zero_cost_loop_start (loop->iter_reg, > > + loop->start_label, > > + loop->iter_reg)); > > + > > + seq = get_insns (); > > + > > + if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1) > > + { > > + basic_block new_bb; > > + edge e; > > + edge_iterator ei; > > + > > + emit_insn_before (seq, BB_HEAD (loop->head)); > > + seq = emit_label_before (gen_label_rtx (), seq); > > + new_bb = create_basic_block (seq, insn, entry_bb); > > + FOR_EACH_EDGE (e, ei, loop->incoming) > > + { > > + if (!(e->flags & EDGE_FALLTHRU)) > > + redirect_edge_and_branch_force (e, new_bb); > > + else > > + redirect_edge_succ (e, new_bb); > > + } > > + > > + make_edge (new_bb, loop->head, 0); > > + } > > + else > > + { > > + entry_after = BB_END (entry_bb); > > + while (DEBUG_INSN_P (entry_after) > > + || (NOTE_P (entry_after) > > + && NOTE_KIND (entry_after) != > NOTE_INSN_BASIC_BLOCK)) > > + entry_after = PREV_INSN (entry_after); > > + > > + emit_insn_after (seq, entry_after); > > + } > > + > > + end_sequence (); > > + > > + return true; > > +} > > + > > +/* A callback for the hw-doloop pass. Called when a loop we have > discovered > > + turns out not to be optimizable; we have to split the loop_end pattern > into > > + a subtract and a test. */ > > + > > +static void > > +hwloop_fail (hwloop_info loop) > > +{ > > + rtx test; > > + rtx_insn *insn = loop->loop_end; > > + > > + emit_insn_before (gen_addsi3 (loop->iter_reg, > > + loop->iter_reg, > > + constm1_rtx), > > + loop->loop_end); > > + > > + test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx); > > + insn = emit_jump_insn_before (gen_cbranchsi4 (test, > > + loop->iter_reg, > const0_rtx, > > + > loop->start_label), > > + loop->loop_end); > > + > > + JUMP_LABEL (insn) = loop->start_label; > > + LABEL_NUSES (loop->start_label)++; > > + delete_insn (loop->loop_end); > > +} > > + > > +/* A callback for the hw-doloop pass. This function examines INSN; if > > + it is a doloop_end pattern we recognize, return the reg rtx for the > > + loop counter. Otherwise, return NULL_RTX. */ > > + > > +static rtx > > +hwloop_pattern_reg (rtx_insn *insn) > > +{ > > + rtx reg; > > + > > + if (!JUMP_P (insn) || recog_memoized (insn) != > CODE_FOR_zero_cost_loop_end) > > + return NULL_RTX; > > + > > + reg = SET_DEST (XVECEXP (PATTERN (insn), 0, 1)); > > + if (!REG_P (reg)) > > + return NULL_RTX; > > + > > + return reg; > > +} > > + > > + > > +static struct hw_doloop_hooks xtensa_doloop_hooks = > > +{ > > + hwloop_pattern_reg, > > + hwloop_optimize, > > + hwloop_fail > > +}; > > + > > +/* Run from machine_dependent_reorg, this pass looks for doloop_end > insns > > + and tries to rewrite the RTL of these loops so that proper Xtensa > > + hardware loops are generated. */ > > + > > +static void > > +xtensa_reorg_loops (void) > > +{ > > + reorg_loops (false, &xtensa_doloop_hooks); > > +} > > + > > +/* Implement the TARGET_MACHINE_DEPENDENT_REORG pass. */ > > + > > +static void > > +xtensa_reorg (void) > > +{ > > + /* We are freeing block_for_insn in the toplev to keep compatibility > > + with old MDEP_REORGS that are not CFG based. Recompute it now. > */ > > + compute_bb_for_insn (); > > + > > + df_analyze (); > > + > > + /* Doloop optimization. */ > > + xtensa_reorg_loops (); > > +} > > + > > #include "gt-xtensa.h" > > Index: gcc/config/xtensa/xtensa.h > > > ================================================================ > === > > --- gcc/config/xtensa/xtensa.h (revision 216036) > > +++ gcc/config/xtensa/xtensa.h (working copy) > > @@ -61,6 +61,7 @@ extern unsigned xtensa_current_frame_size; > > #define TARGET_S32C1I XCHAL_HAVE_S32C1I > > #define TARGET_ABSOLUTE_LITERALS XSHAL_USE_ABSOLUTE_LITERALS > > #define TARGET_THREADPTR XCHAL_HAVE_THREADPTR > > +#define TARGET_LOOPS XCHAL_HAVE_LOOPS > > > > #define TARGET_DEFAULT \ > > ((XCHAL_HAVE_L32R ? 0 : MASK_CONST16) | \ > > Cheers, > > Felix > > > > > > On Tue, Jan 14, 2014 at 1:23 AM, Sterling Augustine > > <augustine.sterl...@gmail.com> wrote: > >> On Thu, Jan 9, 2014 at 7:48 PM, Yangfei (Felix) <felix.y...@huawei.com> > wrote: > >>> And here is the xtensa configuration tested (include/xtensa-config.h): > >>> > >>> #define XCHAL_HAVE_BE 0 > >>> #define XCHAL_HAVE_LOOPS 1 > >> > >> > >> Hi Felix, > >> > >> I like this patch, and expect I will approve it. However, I would like > >> you to do two more things before I do: > >> > >> 1. Ensure it doesn't generate zcl's when: > >> > >> #define XCHAL_HAVE_LOOPS 0 > >> > >> 2. Ensure it doesn't produce loops bodies that contain ret, retw, > >> ret.n or retw.n as the last instruction. It might be easier to just > >> disallow them in loop bodies entirely though. > >> > >> Thanks!