https://gcc.gnu.org/g:d833114c5f8816dcddfd84a9c64e87c0eed46fe2
commit r15-5800-gd833114c5f8816dcddfd84a9c64e87c0eed46fe2 Author: Georg-Johann Lay <a...@gjlay.de> Date: Fri Nov 29 18:08:20 2024 +0100 AVR: target/117726 - Better optimize shifts. This patch splits 2-byte and 3-byte shifts after reload into a 3-operand byte shift and a residual 2-operand shift. The "2op" shift insn alternatives are not needed and removed because all shift insn already have a "r,0,n" alternative that does the job. PR target/117726 gcc/ * config/avr/avr-passes.cc (avr_shift_is_3op, avr_emit_shift): Also handle 2-byte and 3-byte shifts. (avr_split_shift4, avr_split_shift3, avr_split_shift2): New local helper functions. (avr_split_shift): Use them. * config/avr/avr-passes.def (avr_pass_split_after_peephole2): Adjust comments. * config/avr/avr.cc (avr_out_ashlpsi3, avr_out_ashrpsi3) (avr_out_lshrpsi3): Support offset 15. (ashrhi3_out): Support offset 7 as 3-op. (ashrsi3_out): Support offset 15. (avr_rtx_costs_1): Adjust shift costs. * config/avr/avr.md (2op): Remove attribute value and all such insn alternatives. (ashlhi3, *ashlhi3, *ashlhi3_const): Add 3-op alternatives like C2l. (ashrhi3, *ashrhi3, *ashrhi3_const): Add 3-op alternatives like C2a. (lshrhi3, *lshrhi3, *lshrhi3_const): Add 3-op alternatives like C2r. (*ashlpsi3_split, *ashlpsi3): Add 3-op alternatives C15 and C3l. (*ashrpsi3_split, *ashrpsi3): Add 3-op alternatives C15 and C3r. (*lshrpsi3_split, *lshrpsi3): Add 3-op alternatives C15 and C3r. (ashlsi3, *ashlsi3, *ashlsi3_const): Remove "2op" alternative. (ashrsi3, *ashrsi3, *ashrsi3_const): Same. (lshrsi3, *lshrsi3, *lshrsi3_const): Same. (constr_split_suffix): Code attr morphed from constr_split_shift4. * config/avr/constraints.md (C2a, C2r, C2l) (C3a, C3r, C3l): New constraints. * doc/invoke.texi (AVR Options) <-msplit-bit-shift>: Adjust doc. Diff: --- gcc/config/avr/avr-passes.cc | 238 +++++++++++++++++++++++++++----- gcc/config/avr/avr-passes.def | 13 +- gcc/config/avr/avr.cc | 220 +++++++++++++++++++++--------- gcc/config/avr/avr.md | 310 +++++++++++++++++++++++------------------- gcc/config/avr/constraints.md | 30 ++++ gcc/doc/invoke.texi | 10 +- 6 files changed, 568 insertions(+), 253 deletions(-) diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc index bd249b70e8d6..0d324727b470 100644 --- a/gcc/config/avr/avr-passes.cc +++ b/gcc/config/avr/avr-passes.cc @@ -4781,7 +4781,8 @@ avr_pass_fuse_add::execute1 (function *func) ////////////////////////////////////////////////////////////////////////////// -// Split insns after peephole2 / befor avr-fuse-move. +// Split shift insns after peephole2 / befor avr-fuse-move. + static const pass_data avr_pass_data_split_after_peephole2 = { RTL_PASS, // type @@ -4816,20 +4817,19 @@ public: } // anonymous namespace -/* Whether some shift insn alternatives are a 3-operand insn or a - 2-operand insn. This 3op alternatives allow the source and the - destination register of the shift to be different right from the - start, because the splitter will split the 3op shift into a 3op byte - shift and a 2op residual bit shift. - (When the residual shift has an offset of one less than the bitsize, - then the residual shift is also a 3op insn. */ +/* Whether some shift insn alternatives are a `3op' 3-operand insn. + This 3op alternatives allow the source and the destination register + of the shift to be different right from the start, because the splitter + will split the 3op shift into a 3-operand byte shift and a 2-operand + residual bit shift. (When the residual shift has an offset of one + less than the bitsize, then the residual shift is also a 3op insn.) */ bool avr_shift_is_3op () { // Don't split for OPTIMIZE_SIZE_MAX (-Oz). // For OPTIMIZE_SIZE_BALANCED (-Os), we still split because - // the size overhead (if exists at all) is marginal. + // the size overhead (if at all) is marginal. return (avropt_split_bit_shift && optimize > 0 @@ -4837,41 +4837,77 @@ avr_shift_is_3op () } -/* Implement constraints `C4a', `C4l' and `C4r'. +/* Implement constraints `C2a', `C2l', `C2r' ... `C4a', `C4l', `C4r'. Whether we split an N_BYTES shift of code CODE in { ASHIFTRT, LSHIFTRT, ASHIFT } into a byte shift and a residual bit shift. */ bool avr_split_shift_p (int n_bytes, int offset, rtx_code code) { - gcc_assert (n_bytes == 4); + gcc_assert (n_bytes == 4 || n_bytes == 3 || n_bytes == 2); + + if (! avr_shift_is_3op () + || offset % 8 == 0) + return false; - if (avr_shift_is_3op () - && offset % 8 != 0) + if (n_bytes == 4) return select<bool>() - : code == ASHIFT ? IN_RANGE (offset, 17, 30) - : code == ASHIFTRT ? IN_RANGE (offset, 9, 29) + : code == ASHIFT ? IN_RANGE (offset, 9, 30) && offset != 15 + : code == ASHIFTRT ? IN_RANGE (offset, 9, 29) && offset != 15 : code == LSHIFTRT ? IN_RANGE (offset, 9, 30) && offset != 15 : bad_case<bool> (); + if (n_bytes == 3) + return select<bool>() + : code == ASHIFT ? IN_RANGE (offset, 9, 22) && offset != 15 + : code == ASHIFTRT ? IN_RANGE (offset, 9, 21) && offset != 15 + : code == LSHIFTRT ? IN_RANGE (offset, 9, 22) && offset != 15 + : bad_case<bool> (); + + if (n_bytes == 2) + return select<bool>() + : code == ASHIFT ? IN_RANGE (offset, 9, 14) + : code == ASHIFTRT ? IN_RANGE (offset, 9, 13) + : code == LSHIFTRT ? IN_RANGE (offset, 9, 14) + : bad_case<bool> (); + return false; } /* Emit a DEST = SRC <code> OFF shift of QImode, HImode or PSImode. - SCRATCH is a QImode d-register, scratch:QI, or NULL_RTX. */ + SCRATCH is a QImode d-register, scratch:QI, or NULL_RTX. + This function is used to emit shifts that have been split into + a byte shift and a residual bit shift that operates on a mode + strictly smaller than the original shift. */ static void avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch) { const machine_mode mode = GET_MODE (dest); + const int n_bits = GET_MODE_BITSIZE (mode); rtx xoff = GEN_INT (off); - bool is_3op = (off % 8 == 0 - || off == GET_MODE_BITSIZE (mode) - 1 - || (code == ASHIFTRT && off == GET_MODE_BITSIZE (mode) - 2) - || (mode == HImode - && (code == ASHIFT || code == LSHIFTRT) - && satisfies_constraint_C7c (xoff) /* 7...12 */)); + + // Work out which alternatives can handle 3 operands independent + // of options. + + const bool b16_is_3op = select<bool>() + : code == ASHIFT ? satisfies_constraint_C7c (xoff) // 7...12 + : code == LSHIFTRT ? satisfies_constraint_C7c (xoff) + : code == ASHIFTRT ? off == 7 + : bad_case<bool> (); + + const bool b24_is_3op = select<bool>() + : code == ASHIFT ? off == 15 + : code == LSHIFTRT ? off == 15 + : code == ASHIFTRT ? false + : bad_case<bool> (); + + const bool is_3op = (off % 8 == 0 + || off == n_bits - 1 + || (code == ASHIFTRT && off == n_bits - 2) + || (n_bits == 16 && b16_is_3op) + || (n_bits == 24 && b24_is_3op)); rtx shift; if (is_3op) @@ -4885,23 +4921,25 @@ avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch) shift = gen_rtx_fmt_ee (code, mode, dest, xoff); } + if (n_bits == 8) + // 8-bit shifts don't have a scratch operand. + scratch = NULL_RTX; + else if (! scratch && n_bits == 24) + // 24-bit shifts always have a scratch operand. + scratch = gen_rtx_SCRATCH (QImode); + emit_valid_move_clobbercc (dest, shift, scratch); } -/* Worker for define_split that runs when -msplit-bit-shift is on. - Split a shift of code CODE into a 3op byte shift and a residual bit shift. - Return 'true' when a split has been performed and insns have been emitted. - Otherwise, return 'false'. */ +/* Handle the 4-byte case of avr_split_shift below: + Split 4-byte shift DEST = SRC <code> IOFF into a 3-operand + byte shift and a residual shift in a smaller mode if possible. + SCRATCH is a QImode upper scratch register or NULL_RTX. */ -bool -avr_split_shift (rtx xop[], rtx scratch, rtx_code code) +static bool +avr_split_shift4 (rtx dest, rtx src, int ioff, rtx scratch, rtx_code code) { - scratch = scratch && REG_P (scratch) ? scratch : NULL_RTX; - rtx dest = xop[0]; - rtx src = xop[1]; - int ioff = INTVAL (xop[2]); - gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 4); if (code == ASHIFT) @@ -4923,6 +4961,8 @@ avr_split_shift (rtx xop[], rtx scratch, rtx_code code) emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx); return true; } + // ...the 9...14 cases are only handled by define_split because + // for now, we don't exploit that the low byte is zero. } else if (code == ASHIFTRT || code == LSHIFTRT) @@ -4965,11 +5005,9 @@ avr_split_shift (rtx xop[], rtx scratch, rtx_code code) } else if (IN_RANGE (ioff, 9, 15)) { - avr_emit_shift (code, dest, src, 8, NULL_RTX); + avr_emit_shift (code, dest, src, 8, scratch); rtx dst24 = avr_chunk (PSImode, dest, 0); rtx src24 = avr_chunk (PSImode, dest, 0); - if (! scratch) - scratch = gen_rtx_SCRATCH (QImode); avr_emit_shift (code, dst24, src24, ioff - 8, scratch); return true; } @@ -4981,6 +5019,134 @@ avr_split_shift (rtx xop[], rtx scratch, rtx_code code) } +/* Handle the 3-byte case of avr_split_shift below: + Split 3-byte shift DEST = SRC <code> IOFF into a 3-operand + byte shift and a residual shift in a smaller mode if possible. + SCRATCH is a QImode upper scratch register or NULL_RTX. */ + +static bool +avr_split_shift3 (rtx dest, rtx src, int ioff, rtx scratch, rtx_code code) +{ + gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 3); + + if (code == ASHIFT) + { + if (IN_RANGE (ioff, 17, 22)) + { + rtx dst8 = avr_byte (dest, 2); + rtx src8 = avr_byte (src, 0); + avr_emit_shift (code, dst8, src8, ioff - 16, NULL_RTX); + emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx); + return true; + } + // ...the 9...14 cases are only handled by define_split because + // for now, we don't exploit that the low byte is zero. + } + else if (code == ASHIFTRT + || code == LSHIFTRT) + { + if (IN_RANGE (ioff, 17, 22)) + { + rtx dst8 = avr_byte (dest, 0); + rtx src8 = avr_byte (src, 2); + avr_emit_shift (code, dst8, src8, ioff - 16, NULL_RTX); + if (code == ASHIFTRT) + { + rtx signs = avr_byte (dest, 1); + avr_emit_shift (code, signs, src8, 7, NULL_RTX); + emit_valid_move_clobbercc (avr_byte (dest, 2), signs); + } + else + { + emit_valid_move_clobbercc (avr_byte (dest, 1), const0_rtx); + emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx); + } + return true; + } + else if (IN_RANGE (ioff, 9, 15)) + { + avr_emit_shift (code, dest, src, 8, scratch); + rtx dst16 = avr_chunk (HImode, dest, 0); + rtx src16 = avr_chunk (HImode, dest, 0); + avr_emit_shift (code, dst16, src16, ioff - 8, scratch); + return true; + } + } + else + gcc_unreachable (); + + return false; +} + + +/* Handle the 2-byte case of avr_split_shift below: + Split 2-byte shift DEST = SRC <code> IOFF into a 3-operand + byte shift and a residual shift in a smaller mode if possible. + SCRATCH is a QImode upper scratch register or NULL_RTX. */ + +static bool +avr_split_shift2 (rtx dest, rtx src, int ioff, rtx /*scratch*/, rtx_code code) +{ + gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 2); + + if (code == ASHIFT) + { + if (IN_RANGE (ioff, 9, 14)) + { + rtx dst8 = avr_byte (dest, 1); + rtx src8 = avr_byte (src, 0); + avr_emit_shift (code, dst8, src8, ioff - 8, NULL_RTX); + emit_valid_move_clobbercc (avr_byte (dest, 0), const0_rtx); + return true; + } + } + else if (code == ASHIFTRT + || code == LSHIFTRT) + { + if (IN_RANGE (ioff, 9, 14)) + { + rtx dst8 = avr_byte (dest, 0); + rtx src8 = avr_byte (src, 1); + rtx signs = const0_rtx; + avr_emit_shift (code, dst8, src8, ioff - 8, NULL_RTX); + if (code == ASHIFTRT) + { + signs = avr_byte (dest, 1); + avr_emit_shift (code, signs, src8, 7, NULL_RTX); + } + emit_valid_move_clobbercc (avr_byte (dest, 1), signs); + return true; + } + } + else + gcc_unreachable (); + + return false; +} + + +/* Worker for a define_split that runs when -msplit-bit-shift is on. + Split a shift of code CODE into a 3op byte shift and a residual bit shift. + Return 'true' when a split has been performed and insns have been emitted. + Otherwise, return 'false'. */ + +bool +avr_split_shift (rtx xop[], rtx scratch, rtx_code code) +{ + scratch = scratch && REG_P (scratch) ? scratch : NULL_RTX; + rtx dest = xop[0]; + rtx src = xop[1]; + int ioff = INTVAL (xop[2]); + int n_bytes = GET_MODE_SIZE (GET_MODE (dest)); + + return select<bool>() + : n_bytes == 2 ? avr_split_shift2 (dest, src, ioff, scratch, code) + : n_bytes == 3 ? avr_split_shift3 (dest, src, ioff, scratch, code) + : n_bytes == 4 ? avr_split_shift4 (dest, src, ioff, scratch, code) + : bad_case<bool> (); +} + + namespace { diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def index be8278370b01..019839fb3c59 100644 --- a/gcc/config/avr/avr-passes.def +++ b/gcc/config/avr/avr-passes.def @@ -105,9 +105,14 @@ INSERT_PASS_BEFORE (pass_split_after_reload, 1, avr_pass_ifelse); INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_fuse_move); - /* Run an instance of post-reload split prior to avr-fuse-move. - Purpose is to split 3-operand shift insns into a 3-operand shift - with a byte offset, and a 2-operand residual shift after - RTL peepholes but prior to the avr-fuse-move pass. */ +/* Run an instance of post-reload split prior to avr-fuse-move. + Purpose is to split the `3op' alternative (which allows 3 operands) + of shift insns into a 3-operand shift with a byte offset, and + a 2-operand residual shift. This additional split pass runs after + the 1st RTL peephole pass but prior to avr-fuse-move. + The respective define_split patterns have a `n_avr_fuse_add_executed' + condition (amongst others) so that split passes that run before + the 1st RTL peephole pass won't split them. Shifts with a constant + offset that is a multiple of 8 are split by avr-fuse-move. */ INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_split_after_peephole2); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 940ac3f0279e..fc9f17704209 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -7045,6 +7045,7 @@ avr_out_ashlpsi3 (rtx_insn *insn, rtx *op, int *plen) { int reg0 = REGNO (op[0]); int reg1 = REGNO (op[1]); + bool reg1_unused_after = reg_unused_after (insn, op[1]); switch (INTVAL (op[2])) { @@ -7063,6 +7064,20 @@ avr_out_ashlpsi3 (rtx_insn *insn, rtx *op, int *plen) : avr_asm_len ("clr %A0" CR_TAB "mov %B0,%A1" CR_TAB "mov %C0,%B1", op, plen, 3); + case 15: + avr_asm_len (reg1_unused_after + ? "lsr %B1" + : "bst %B1,0", op, plen, 1); + if (reg0 + 2 != reg1) + avr_asm_len ("mov %C0,%A1", op, plen, 1); + avr_asm_len ("clr %A0" CR_TAB + "clr %B0" CR_TAB + "ror %C0" CR_TAB + "ror %B0", op, plen, 5); + return reg1_unused_after + ? "" + : avr_asm_len ("bld %C0,7", op, plen, 1); + case 16: if (reg0 + 2 != reg1) avr_asm_len ("mov %C0,%A1", op, plen, 1); @@ -7094,7 +7109,7 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen) { int reg0 = true_regnum (operands[0]); int reg1 = true_regnum (operands[1]); - bool reg1_unused_after_p = reg_unused_after (insn, operands[1]); + bool reg1_unused_after = reg_unused_after (insn, operands[1]); if (plen) *plen = 0; @@ -7124,7 +7139,7 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen) "mov %C0,%B1" CR_TAB "mov %D0,%C1", operands, plen, 4); case 15: - avr_asm_len (reg1_unused_after_p + avr_asm_len (reg1_unused_after ? "lsr %C1" : "bst %C1,0", operands, plen, 1); if (reg0 + 2 != reg1) @@ -7135,18 +7150,15 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen) avr_asm_len ("mov %C0,%A1" CR_TAB "mov %D0,%B1", operands, plen, 2); } - return reg1_unused_after_p - ? avr_asm_len ("clr %A0" CR_TAB - "clr %B0" CR_TAB - "ror %D0" CR_TAB - "ror %C0" CR_TAB - "ror %B0", operands, plen, 5) - : avr_asm_len ("clr %A0" CR_TAB - "clr %B0" CR_TAB - "lsr %D0" CR_TAB - "ror %C0" CR_TAB - "ror %B0" CR_TAB - "bld %D0,7", operands, plen, 6); + avr_asm_len ("clr %A0" CR_TAB + "clr %B0" CR_TAB + "ror %D0" CR_TAB + "ror %C0" CR_TAB + "ror %B0", operands, plen, 5); + return reg1_unused_after + ? "" + : avr_asm_len ("bld %D0,7", operands, plen, 1); + case 16: if (reg0 + 2 == reg1) return avr_asm_len ("clr %B0" CR_TAB @@ -7275,10 +7287,16 @@ ashrhi3_out (rtx_insn *insn, rtx operands[], int *plen) "rol %A0" CR_TAB "rol %B0", operands, plen, 8); case 7: - return avr_asm_len ("lsl %A0" CR_TAB - "mov %A0,%B0" CR_TAB - "rol %A0" CR_TAB - "sbc %B0,%B0", operands, plen, 4); + return reg1_unused_after + ? avr_asm_len ("lsl %A1" CR_TAB + "mov %A0,%B1" CR_TAB + "rol %A0" CR_TAB + "sbc %B0,%B0", operands, plen, 4) + : avr_asm_len ("mov %A0,%A1" CR_TAB + "lsl %A0" CR_TAB + "mov %A0,%B1" CR_TAB + "rol %A0" CR_TAB + "sbc %B0,%B0", operands, plen, 5); case 8: { int reg0 = true_regnum (operands[0]); @@ -7422,13 +7440,30 @@ avr_out_ashrpsi3 (rtx_insn *insn, rtx *op, int *plen) "dec %C0" CR_TAB "mov %B0,%C1" CR_TAB "mov %A0,%B1", op, plen, 5); + case 15: + avr_asm_len (reg1_unused_after + ? "lsl %B1" + : "bst %B1,7", op, plen, 1); + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + avr_asm_len ("rol %A0" CR_TAB + "sbc %B0,%B0" CR_TAB + "sbc %C0,%C0", op, plen, 3); + return reg1_unused_after + ? "" + : avr_asm_len ("bld %A0,0", op, plen, 1); + case 16: if (dest != src + 2) avr_asm_len ("mov %A0,%C1", op, plen, 1); - return avr_asm_len ("clr %B0" CR_TAB - "sbrc %A0,7" CR_TAB - "com %B0" CR_TAB - "mov %C0,%B0", op, plen, 4); + return reg1_unused_after && dest != src + 2 + ? avr_asm_len ("rol %C1" CR_TAB + "sbc %B0,%B0" CR_TAB + "sbc %C0,%C0", op, plen, 3) + : avr_asm_len ("clr %B0" CR_TAB + "sbrc %A0,7" CR_TAB + "com %B0" CR_TAB + "mov %C0,%B0", op, plen, 4); case 22: { rtx xop[2] = { op[0], op[1] }; @@ -7504,31 +7539,58 @@ ashrsi3_out (rtx_insn *insn, rtx operands[], int *plen) "mov %C0,%D1" CR_TAB "mov %B0,%C1" CR_TAB "mov %A0,%B1", operands, plen, 6); + case 15: + avr_asm_len (reg1_unused_after + ? "lsl %B1" + : "bst %B1,7", operands, plen, 1); + if (reg0 != reg1 + 2) + { + if (AVR_HAVE_MOVW) + avr_asm_len ("movw %A0,%C1", operands, plen, 1); + else + avr_asm_len ("mov %A0,%C1" CR_TAB + "mov %B0,%D1", operands, plen, 2); + } + avr_asm_len ("rol %A0" CR_TAB + "rol %B0" CR_TAB + "sbc %C0,%C0" CR_TAB + "sbc %D0,%D0", operands, plen, 4); + return reg1_unused_after + ? "" + : avr_asm_len ("bld %A0,0", operands, plen, 1); + case 16: if (reg0 == reg1 + 2) return avr_asm_len ("clr %D0" CR_TAB "sbrc %B0,7" CR_TAB "com %D0" CR_TAB "mov %C0,%D0", operands, plen, 4); - return AVR_HAVE_MOVW - ? avr_asm_len ("movw %A0,%C1" CR_TAB - "clr %D0" CR_TAB - "sbrc %B0,7" CR_TAB - "com %D0" CR_TAB + if (AVR_HAVE_MOVW) + avr_asm_len ("movw %A0,%C1", operands, plen, 1); + else + avr_asm_len ("mov %B0,%D1" CR_TAB + "mov %A0,%C1", operands, plen, 2); + return reg1_unused_after + ? avr_asm_len ("lsl %D1" CR_TAB + "sbc %D0,%D0" CR_TAB + "mov %C0,%D0", operands, plen, 3) + : avr_asm_len ("clr %D0" CR_TAB + "sbrc %B0,7" CR_TAB + "com %D0" CR_TAB + "mov %C0,%D0", operands, plen, 4); + case 24: + return reg1_unused_after + ? avr_asm_len ("mov %A0,%D1" CR_TAB + "lsl %D1" CR_TAB + "sbc %D0,%D0" CR_TAB + "mov %B0,%D0" CR_TAB "mov %C0,%D0", operands, plen, 5) - : avr_asm_len ("mov %B0,%D1" CR_TAB - "mov %A0,%C1" CR_TAB + : avr_asm_len ("mov %A0,%D1" CR_TAB "clr %D0" CR_TAB - "sbrc %B0,7" CR_TAB + "sbrc %A0,7" CR_TAB "com %D0" CR_TAB + "mov %B0,%D0" CR_TAB "mov %C0,%D0", operands, plen, 6); - case 24: - return avr_asm_len ("mov %A0,%D1" CR_TAB - "clr %D0" CR_TAB - "sbrc %A0,7" CR_TAB - "com %D0" CR_TAB - "mov %B0,%D0" CR_TAB - "mov %C0,%D0", operands, plen, 6); case 30: { rtx xop[2] = { operands[0], operands[1] }; @@ -7856,6 +7918,7 @@ avr_out_lshrpsi3 (rtx_insn *insn, rtx *op, int *plen) { int dest = REGNO (op[0]); int src = REGNO (op[1]); + bool src_unused_after_p = reg_unused_after (insn, op[1]); if (CONST_INT_P (op[2])) { @@ -7873,6 +7936,19 @@ avr_out_lshrpsi3 (rtx_insn *insn, rtx *op, int *plen) return avr_asm_len ("clr %C0" CR_TAB "mov %B0,%C1" CR_TAB "mov %A0,%B1", op, plen, 3); + case 15: + avr_asm_len (src_unused_after_p + ? "lsl %B1" + : "bst %B1,7", op, plen, 1); + if (dest != src + 2) + avr_asm_len ("mov %A0,%C1", op, plen, 1); + avr_asm_len ("clr %C0" CR_TAB + "clr %B0" CR_TAB + "rol %A0" CR_TAB + "rol %B0", op, plen, 4); + return src_unused_after_p + ? "" + : avr_asm_len ("bld %A0,0", op, plen, 1); case 16: if (dest != src + 2) @@ -7912,7 +7988,7 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen) { int reg0 = true_regnum (operands[0]); int reg1 = true_regnum (operands[1]); - bool reg1_unused_after_p = reg_unused_after (insn, operands[1]); + bool reg1_unused_after = reg_unused_after (insn, operands[1]); if (plen) *plen = 0; @@ -7942,7 +8018,7 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen) "mov %B0,%C1" CR_TAB "mov %A0,%B1", operands, plen, 4); case 15: - avr_asm_len (reg1_unused_after_p + avr_asm_len (reg1_unused_after ? "lsl %B1" : "bst %B1,7", operands, plen, 1); if (reg0 != reg1 + 2) @@ -7953,18 +8029,15 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen) avr_asm_len ("mov %A0,%C1" CR_TAB "mov %B0,%D1", operands, plen, 2); } - return reg1_unused_after_p - ? avr_asm_len ("clr %D0" CR_TAB - "clr %C0" CR_TAB - "rol %A0" CR_TAB - "rol %B0" CR_TAB - "rol %C0", operands, plen, 5) - : avr_asm_len ("clr %D0" CR_TAB - "clr %C0" CR_TAB - "lsl %A0" CR_TAB - "rol %B0" CR_TAB - "rol %C0" CR_TAB - "bld %A0,0", operands, plen, 6); + avr_asm_len ("clr %D0" CR_TAB + "clr %C0" CR_TAB + "rol %A0" CR_TAB + "rol %B0" CR_TAB + "rol %C0", operands, plen, 5); + return reg1_unused_after + ? "" + : avr_asm_len ("bld %A0,0", operands, plen, 1); + case 16: if (reg0 == reg1 + 2) return avr_asm_len ("clr %C0" CR_TAB @@ -12421,7 +12494,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, case ASHIFT: switch (mode) { - case E_QImode: + case E_QImode: // ashlqi3 if (speed && XEXP (x, 0) == const1_rtx && GET_CODE (XEXP (x, 1)) == AND) @@ -12449,7 +12522,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } break; - case E_HImode: + case E_HImode: // ashlhi3 if (AVR_HAVE_MUL) { if (const_2_to_7_operand (XEXP (x, 1), HImode) @@ -12513,7 +12586,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } break; - case E_PSImode: + case E_PSImode: // ashlpsi3 if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 6 : 73); @@ -12529,6 +12602,10 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, case 16: *total = COSTS_N_INSNS (3); break; + case 9: + case 15: + *total = COSTS_N_INSNS (6); + break; case 23: *total = COSTS_N_INSNS (5); break; @@ -12538,7 +12615,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } break; - case E_SImode: + case E_SImode: // ashlsi3 if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 7 : 113); @@ -12585,7 +12662,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, case ASHIFTRT: switch (mode) { - case E_QImode: + case E_QImode: // ashrqi3 if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 4 : 17); @@ -12605,7 +12682,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } break; - case E_HImode: + case E_HImode: // ashrhi3 if (CONST_INT_P (XEXP (x, 0)) && INTVAL (XEXP (x, 0)) == 128 && GET_CODE (XEXP (x, 1)) == AND) @@ -12659,7 +12736,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } break; - case E_PSImode: + case E_PSImode: // ashrpsi3 if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 6 : 73); @@ -12673,10 +12750,13 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, case 1: *total = COSTS_N_INSNS (3); break; - case 16: case 8: + case 15: *total = COSTS_N_INSNS (5); break; + case 16: + *total = COSTS_N_INSNS (4); + break; case 22: *total = COSTS_N_INSNS (6); break; @@ -12689,7 +12769,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } break; - case E_SImode: + case E_SImode: // ashrsi3 if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 7 : 113); @@ -12706,9 +12786,16 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, *total = COSTS_N_INSNS (4); break; case 8: + *total = COSTS_N_INSNS (6); + break; + case 15: + *total = COSTS_N_INSNS (6 - AVR_HAVE_MOVW); + break; case 16: + *total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW); + break; case 24: - *total = COSTS_N_INSNS (6); + *total = COSTS_N_INSNS (5); break; case 2: *total = COSTS_N_INSNS (!speed ? 7 : 8); @@ -12740,7 +12827,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, switch (mode) { - case E_QImode: + case E_QImode: // lshrqi3 if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 4 : 17); @@ -12758,7 +12845,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } break; - case E_HImode: + case E_HImode: // lshrhi3 if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 5 : 41); @@ -12806,7 +12893,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } break; - case E_PSImode: + case E_PSImode: // lshrpsi3 if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 6 : 73); @@ -12822,6 +12909,9 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, case 16: *total = COSTS_N_INSNS (3); break; + case 15: + *total = COSTS_N_INSNS (6); + break; case 23: *total = COSTS_N_INSNS (5); break; @@ -12831,7 +12921,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, } break; - case E_SImode: + case E_SImode: // lshrsi3 if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 7 : 113); diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 346847fa7d6e..e343fb23d079 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -187,15 +187,15 @@ ;; The following ISA attributes are actually not architecture specific, ;; but depend on (optimization) options. This is because the "enabled" ;; attribut can't depend on more than one other attribute. This means -;; that 2op and 3op must work for all ISAs, and hence a 'flat' attribue -;; scheme can be used (as opposed to a true cartesian product). +;; that 3op must work for all ISAs, and hence a 'flat' attribue scheme +;; can be used (as opposed to a true cartesian product). -;; 2op : insn is a 2-operand insn 3op : insn is a 3-operand insn +;; 3op : alternative is a 3-operand insn (define_attr "isa" "mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega, no_adiw,adiw, - 2op,3op, + 3op, standard" (const_string "standard")) @@ -245,9 +245,6 @@ (and (eq_attr "isa" "no_adiw") (match_test "!AVR_HAVE_ADIW")) - (and (eq_attr "isa" "2op") - (match_test "!avr_shift_is_3op ()")) - (and (eq_attr "isa" "3op") (match_test "avr_shift_is_3op ()")) ) @@ -5152,30 +5149,33 @@ ;; "ashlhq3" "ashluhq3" ;; "ashlha3" "ashluha3" (define_insn_and_split "ashl<mode>3" - [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r,r") - (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,n,Qm")))] + [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r ,r,r") + (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,C2l,n,Qm")))] "" "#" "&& reload_completed" [(parallel [(set (match_dup 0) (ashift:ALL2 (match_dup 1) (match_dup 2))) - (clobber (reg:CC REG_CC))])]) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "isa" "*,*,*,3op,*,*")]) ;; "*ashlhi3" ;; "*ashlhq3" "*ashluhq3" ;; "*ashlha3" "*ashluha3" (define_insn "*ashl<mode>3" - [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r,r") - (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,n,Qm"))) + [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r ,r,r") + (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,C2l,n,Qm"))) (clobber (reg:CC REG_CC))] "reload_completed" { return ashlhi3_out (insn, operands, NULL); } - [(set_attr "length" "10") + [(set_attr "isa" "*,*,*,3op,*,*") + (set_attr "length" "10") (set_attr "adjust_len" "ashlhi")]) @@ -5265,9 +5265,9 @@ ;; "ashlsq3" "ashlusq3" ;; "ashlsa3" "ashlusa3" (define_insn_and_split "ashl<mode>3" - [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r") - (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,C4l,n,Qm")))] + [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") + (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,n,Qm")))] "" "#" "&& reload_completed" @@ -5276,20 +5276,20 @@ (match_dup 2))) (clobber (reg:CC REG_CC))])] "" - [(set_attr "isa" "*,*,*,2op,3op,*,*")]) + [(set_attr "isa" "*,*,*,3op,*,*")]) (define_insn "*ashl<mode>3" - [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r") - (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,C4l,n,Qm"))) + [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") + (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4l,n,Qm"))) (clobber (reg:CC REG_CC))] "reload_completed" { return ashlsi3_out (insn, operands, NULL); } - [(set_attr "length" "12") - (set_attr "adjust_len" "ashlsi") - (set_attr "isa" "*,*,*,2op,3op,*,*")]) + [(set_attr "isa" "*,*,*,3op,*,*") + (set_attr "length" "12") + (set_attr "adjust_len" "ashlsi")]) ;; Optimize if a scratch register from LD_REGS happens to be available. @@ -5348,6 +5348,7 @@ operands[2] = avr_to_int_mode (operands[0]); }) +;; Endow 2-byte shift with a scratch if available. (define_peephole2 ; *ashlhi3_const *ashrhi3_const *lshrhi3_const [(match_scratch:QI 3 "d") (parallel [(set (match_operand:ALL2 0 "register_operand") @@ -5367,75 +5368,78 @@ ;; "*ashlhq3_const" "*ashluhq3_const" ;; "*ashlha3_const" "*ashluha3_const" (define_insn "*ashl<mode>3_const" - [(set (match_operand:ALL2 0 "register_operand" "=r ,r ,r") - (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0 ,r ,0") - (match_operand:QI 2 "const_int_operand" "LPK,O C7c C15,n"))) - (clobber (match_scratch:QI 3 "=X ,X ,&d")) + [(set (match_operand:ALL2 0 "register_operand" "=r ,r ,r ,r") + (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "0 ,r ,r ,0") + (match_operand:QI 2 "const_int_operand" "LPK,O C7c C15,C2l,n"))) + (clobber (match_scratch:QI 3 "=X ,X ,&d ,&d")) (clobber (reg:CC REG_CC))] "reload_completed" { return ashlhi3_out (insn, operands, NULL); } - [(set_attr "length" "10") + [(set_attr "isa" "*,*,3op,*") + (set_attr "length" "10") (set_attr "adjust_len" "ashlhi")]) -(define_code_attr constr_split_shift4 - [(ashift "C4l") - (ashiftrt "C4a") - (lshiftrt "C4r")]) +(define_code_attr constr_split_suffix + [(ashift "l") + (ashiftrt "a") + (lshiftrt "r")]) ;; Split shift into a byte shift and a residual bit shift (without scratch) (define_split - [(parallel [(set (match_operand:ALL4 0 "register_operand") - (any_shift:ALL4 (match_operand:ALL4 1 "register_operand") - (match_operand:QI 2 "const_int_operand"))) + [(parallel [(set (match_operand:ALL234 0 "register_operand") + (any_shift:ALL234 (match_operand:ALL234 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) (clobber (reg:CC REG_CC))])] "avropt_split_bit_shift && n_avr_fuse_add_executed >= 1 - && satisfies_constraint_<constr_split_shift4> (operands[2])" + && satisfies_constraint_C<SIZE><constr_split_suffix> (operands[2])" [(parallel [(set (match_dup 0) - (any_shift:ALL4 (match_dup 1) - (match_dup 3))) + (any_shift:ALL234 (match_dup 1) + (match_dup 3))) + (clobber (scratch:QI)) (clobber (reg:CC REG_CC))]) (parallel [(set (match_dup 0) - (any_shift:ALL4 (match_dup 0) - (match_dup 4))) + (any_shift:ALL234 (match_dup 0) + (match_dup 4))) (clobber (reg:CC REG_CC))])] { + int offset = INTVAL (operands[2]); if (avr_split_shift (operands, NULL_RTX, <CODE>)) DONE; - else if (REGNO (operands[0]) == REGNO (operands[1])) + else if (offset <= 8) FAIL; - int offset = INTVAL (operands[2]); operands[3] = GEN_INT (offset & ~7); operands[4] = GEN_INT (offset & 7); }) ;; Split shift into a byte shift and a residual bit shift (with scratch) (define_split - [(parallel [(set (match_operand:ALL4 0 "register_operand") - (any_shift:ALL4 (match_operand:ALL4 1 "register_operand") - (match_operand:QI 2 "const_int_operand"))) + [(parallel [(set (match_operand:ALL234 0 "register_operand") + (any_shift:ALL234 (match_operand:ALL234 1 "register_operand") + (match_operand:QI 2 "const_int_operand"))) (clobber (match_operand:QI 3 "scratch_or_dreg_operand")) (clobber (reg:CC REG_CC))])] "avropt_split_bit_shift && n_avr_fuse_add_executed >= 1 - && satisfies_constraint_<constr_split_shift4> (operands[2])" + && satisfies_constraint_C<SIZE><constr_split_suffix> (operands[2])" [(parallel [(set (match_dup 0) - (any_shift:ALL4 (match_dup 1) - (match_dup 4))) + (any_shift:ALL234 (match_dup 1) + (match_dup 4))) + (clobber (scratch:QI)) (clobber (reg:CC REG_CC))]) (parallel [(set (match_dup 0) - (any_shift:ALL4 (match_dup 0) - (match_dup 5))) + (any_shift:ALL234 (match_dup 0) + (match_dup 5))) (clobber (match_dup 3)) (clobber (reg:CC REG_CC))])] { + int offset = INTVAL (operands[2]); if (avr_split_shift (operands, operands[3], <CODE>)) DONE; - else if (REGNO (operands[0]) == REGNO (operands[1])) + else if (offset <= 8) FAIL; - int offset = INTVAL (operands[2]); operands[4] = GEN_INT (offset & ~7); operands[5] = GEN_INT (offset & 7); }) @@ -5462,18 +5466,18 @@ ;; "*ashlsq3_const" "*ashlusq3_const" ;; "*ashlsa3_const" "*ashlusa3_const" (define_insn "*ashl<mode>3_const" - [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r ,r") - (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,0 ,r ,0") - (match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4l,C4l,n"))) - (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d ,&d")) + [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r") + (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0") + (match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4l,n"))) + (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d")) (clobber (reg:CC REG_CC))] "reload_completed" { return ashlsi3_out (insn, operands, NULL); } - [(set_attr "length" "10") - (set_attr "adjust_len" "ashlsi") - (set_attr "isa" "*,*,2op,3op,*")]) + [(set_attr "isa" "*,*,3op,*") + (set_attr "length" "10") + (set_attr "adjust_len" "ashlsi")]) (define_expand "ashlpsi3" [(parallel [(set (match_operand:PSI 0 "register_operand" "") @@ -5503,10 +5507,10 @@ }) (define_insn_and_split "*ashlpsi3_split" - [(set (match_operand:PSI 0 "register_operand" "=r,r,r ,r") - (ashift:PSI (match_operand:PSI 1 "register_operand" "0,0,r ,0") - (match_operand:QI 2 "nonmemory_operand" "r,P,O C23,n"))) - (clobber (match_scratch:QI 3 "=X,X,X ,&d"))] + [(set (match_operand:PSI 0 "register_operand" "=r,r,r ,r ,r") + (ashift:PSI (match_operand:PSI 1 "register_operand" "0,0,r ,r ,0") + (match_operand:QI 2 "nonmemory_operand" "r,P,O C15 C23,C3l,n"))) + (clobber (match_scratch:QI 3 "=X,X,X ,&d ,&d"))] "" "#" "&& reload_completed" @@ -5514,19 +5518,23 @@ (ashift:PSI (match_dup 1) (match_dup 2))) (clobber (match_dup 3)) - (clobber (reg:CC REG_CC))])]) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "isa" "*,*,*,3op,*")]) (define_insn "*ashlpsi3" - [(set (match_operand:PSI 0 "register_operand" "=r,r,r ,r") - (ashift:PSI (match_operand:PSI 1 "register_operand" "0,0,r ,0") - (match_operand:QI 2 "nonmemory_operand" "r,P,O C23,n"))) - (clobber (match_scratch:QI 3 "=X,X,X ,&d")) + [(set (match_operand:PSI 0 "register_operand" "=r,r,r ,r ,r") + (ashift:PSI (match_operand:PSI 1 "register_operand" "0,0,r ,r ,0") + (match_operand:QI 2 "nonmemory_operand" "r,P,O C15 C23,C3l,n"))) + ; "X&d" since the insn may be a split of a 4-byte shift without scratch. + (clobber (match_scratch:QI 3 "=X,X,X ,X&d,X&d")) (clobber (reg:CC REG_CC))] "reload_completed" { return avr_out_ashlpsi3 (insn, operands, NULL); } - [(set_attr "adjust_len" "ashlpsi")]) + [(set_attr "isa" "*,*,*,3op,*") + (set_attr "adjust_len" "ashlpsi")]) ;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> ;; arithmetic shift right @@ -5561,37 +5569,40 @@ ;; "ashrhq3" "ashruhq3" ;; "ashrha3" "ashruha3" (define_insn_and_split "ashr<mode>3" - [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r,r") - (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C14 C15,n,Qm")))] + [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r ,r,r") + (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C07 C14 C15,C2a,n,Qm")))] "" "#" "&& reload_completed" [(parallel [(set (match_dup 0) (ashiftrt:ALL2 (match_dup 1) (match_dup 2))) - (clobber (reg:CC REG_CC))])]) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "isa" "*,*,*,3op,*,*")]) ;; "*ashrhi3" ;; "*ashrhq3" "*ashruhq3" ;; "*ashrha3" "*ashruha3" (define_insn "*ashr<mode>3" - [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r,r") - (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C14 C15,n,Qm"))) + [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r ,r,r") + (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C07 C14 C15,C2a,n,Qm"))) (clobber (reg:CC REG_CC))] "reload_completed" { return ashrhi3_out (insn, operands, NULL); } - [(set_attr "length" "10") + [(set_attr "isa" "*,*,*,3op,*,*") + (set_attr "length" "10") (set_attr "adjust_len" "ashrhi")]) (define_insn_and_split "ashrpsi3" - [(set (match_operand:PSI 0 "register_operand" "=r,r ,r ,r") - (ashiftrt:PSI (match_operand:PSI 1 "register_operand" "0,0 ,r ,0") - (match_operand:QI 2 "nonmemory_operand" "r,PK,O C22 C23,n"))) - (clobber (match_scratch:QI 3 "=X,X ,X ,&d"))] + [(set (match_operand:PSI 0 "register_operand" "=r,r ,r ,r ,r") + (ashiftrt:PSI (match_operand:PSI 1 "register_operand" "0,0 ,r ,r ,0") + (match_operand:QI 2 "nonmemory_operand" "r,PK,O C15 C22 C23,C3a,n"))) + (clobber (match_scratch:QI 3 "=X,X ,X ,&d ,&d"))] "" "#" "&& reload_completed" @@ -5599,27 +5610,31 @@ (ashiftrt:PSI (match_dup 1) (match_dup 2))) (clobber (match_dup 3)) - (clobber (reg:CC REG_CC))])]) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "isa" "*,*,*,3op,*")]) (define_insn "*ashrpsi3" - [(set (match_operand:PSI 0 "register_operand" "=r,r ,r ,r") - (ashiftrt:PSI (match_operand:PSI 1 "register_operand" "0,0 ,r ,0") - (match_operand:QI 2 "nonmemory_operand" "r,PK,O C22 C23,n"))) - (clobber (match_scratch:QI 3 "=X,X ,X ,&d")) + [(set (match_operand:PSI 0 "register_operand" "=r,r ,r ,r ,r") + (ashiftrt:PSI (match_operand:PSI 1 "register_operand" "0,0 ,r ,r ,0") + (match_operand:QI 2 "nonmemory_operand" "r,PK,O C15 C22 C23,C3a,n"))) + ; "X&d" since the insn may be a split of a 4-byte shift without scratch. + (clobber (match_scratch:QI 3 "=X,X ,X ,X&d,X&d")) (clobber (reg:CC REG_CC))] "reload_completed" { return avr_out_ashrpsi3 (insn, operands, NULL); } - [(set_attr "adjust_len" "ashrpsi")]) + [(set_attr "isa" "*,*,*,3op,*") + (set_attr "adjust_len" "ashrpsi")]) ;; "ashrsi3" ;; "ashrsq3" "ashrusq3" ;; "ashrsa3" "ashrusa3" (define_insn_and_split "ashr<mode>3" - [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r") - (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C30 C31,C4a,C4a,n,Qm")))] + [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") + (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C30 C31,C4a,n,Qm")))] "" "#" "&& reload_completed" @@ -5628,18 +5643,18 @@ (match_dup 2))) (clobber (reg:CC REG_CC))])] "" - [(set_attr "isa" "*,*,*,2op,3op,*,*")]) + [(set_attr "isa" "*,*,*,3op,*,*")]) (define_insn "*ashr<mode>3" - [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r") - (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C30 C31,C4a,C4a,n,Qm"))) + [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") + (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C30 C31,C4a,n,Qm"))) (clobber (reg:CC REG_CC))] "reload_completed" { return ashrsi3_out (insn, operands, NULL); } - [(set_attr "isa" "*,*,*,2op,3op,*,*") + [(set_attr "isa" "*,*,*,3op,*,*") (set_attr "length" "12") (set_attr "adjust_len" "ashrsi")]) @@ -5648,16 +5663,17 @@ ;; "*ashrhq3_const" "*ashruhq3_const" ;; "*ashrha3_const" "*ashruha3_const" (define_insn "*ashr<mode>3_const" - [(set (match_operand:ALL2 0 "register_operand" "=r ,r ,r") - (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0 ,r ,0") - (match_operand:QI 2 "const_int_operand" "LPK,O C14 C15,n"))) - (clobber (match_scratch:QI 3 "=X ,X ,&d")) + [(set (match_operand:ALL2 0 "register_operand" "=r ,r ,r ,r") + (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0 ,r ,r ,0") + (match_operand:QI 2 "const_int_operand" "LPK,O C07 C14 C15,C2a,n"))) + (clobber (match_scratch:QI 3 "=X ,X ,&d ,&d")) (clobber (reg:CC REG_CC))] "reload_completed" { return ashrhi3_out (insn, operands, NULL); } - [(set_attr "length" "10") + [(set_attr "isa" "*,*,3op,*") + (set_attr "length" "10") (set_attr "adjust_len" "ashrhi")]) @@ -5665,16 +5681,16 @@ ;; "*ashrsq3_const" "*ashrusq3_const" ;; "*ashrsa3_const" "*ashrusa3_const" (define_insn "*ashr<mode>3_const" - [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r ,r") - (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,0 ,r ,0") - (match_operand:QI 2 "const_int_operand" "LP,O C30 C31,C4a,C4a,n"))) - (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d ,&d")) + [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r") + (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0") + (match_operand:QI 2 "const_int_operand" "LP,O C15 C30 C31,C4a,n"))) + (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d")) (clobber (reg:CC REG_CC))] "reload_completed" { return ashrsi3_out (insn, operands, NULL); } - [(set_attr "isa" "*,*,2op,3op,*") + [(set_attr "isa" "*,*,3op,*") (set_attr "length" "10") (set_attr "adjust_len" "ashrsi")]) @@ -5757,33 +5773,36 @@ ;; "lshrhq3" "lshruhq3" ;; "lshrha3" "lshruha3" (define_insn_and_split "lshr<mode>3" - [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r,r") - (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,n,Qm")))] + [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r ,r,r") + (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,C2r,n,Qm")))] "" "#" "&& reload_completed" [(parallel [(set (match_dup 0) (lshiftrt:ALL2 (match_dup 1) (match_dup 2))) - (clobber (reg:CC REG_CC))])]) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "isa" "*,*,*,3op,*,*")]) (define_insn "*lshr<mode>3" - [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r,r") - (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,n,Qm"))) + [(set (match_operand:ALL2 0 "register_operand" "=r,r ,r ,r ,r,r") + (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,C2r,n,Qm"))) (clobber (reg:CC REG_CC))] "reload_completed" { return lshrhi3_out (insn, operands, NULL); } - [(set_attr "adjust_len" "lshrhi")]) + [(set_attr "isa" "*,*,*,3op,*,*") + (set_attr "adjust_len" "lshrhi")]) (define_insn_and_split "lshrpsi3" - [(set (match_operand:PSI 0 "register_operand" "=r,r,r,r ,r,r") - (lshiftrt:PSI (match_operand:PSI 1 "register_operand" "0,0,r,r ,0,0") - (match_operand:QI 2 "nonmemory_operand" "r,P,O,C23,K,n"))) - (clobber (match_scratch:QI 3 "=X,X,X,X ,X,&d"))] + [(set (match_operand:PSI 0 "register_operand" "=r,r ,r ,r ,r") + (lshiftrt:PSI (match_operand:PSI 1 "register_operand" "0,0 ,r ,r ,0") + (match_operand:QI 2 "nonmemory_operand" "r,PK,O C15 C23,C3r,n"))) + (clobber (match_scratch:QI 3 "=X,X ,X ,&d ,&d"))] "" "#" "&& reload_completed" @@ -5791,27 +5810,31 @@ (lshiftrt:PSI (match_dup 1) (match_dup 2))) (clobber (match_dup 3)) - (clobber (reg:CC REG_CC))])]) + (clobber (reg:CC REG_CC))])] + "" + [(set_attr "isa" "*,*,*,3op,*")]) (define_insn "*lshrpsi3" - [(set (match_operand:PSI 0 "register_operand" "=r,r,r,r ,r,r") - (lshiftrt:PSI (match_operand:PSI 1 "register_operand" "0,0,r,r ,0,0") - (match_operand:QI 2 "nonmemory_operand" "r,P,O,C23,K,n"))) - (clobber (match_scratch:QI 3 "=X,X,X,X ,X,&d")) + [(set (match_operand:PSI 0 "register_operand" "=r,r ,r ,r ,r") + (lshiftrt:PSI (match_operand:PSI 1 "register_operand" "0,0 ,r ,r ,0") + (match_operand:QI 2 "nonmemory_operand" "r,PK,O C15 C23,C3r,n"))) + ; "X&d" since the insn may be a split of a 4-byte shift without scratch. + (clobber (match_scratch:QI 3 "=X,X ,X ,X&d,X&d")) (clobber (reg:CC REG_CC))] "reload_completed" { return avr_out_lshrpsi3 (insn, operands, NULL); } - [(set_attr "adjust_len" "lshrpsi")]) + [(set_attr "isa" "*,*,*,3op,*") + (set_attr "adjust_len" "lshrpsi")]) ;; "lshrsi3" ;; "lshrsq3" "lshrusq3" ;; "lshrsa3" "lshrusa3" (define_insn_and_split "lshr<mode>3" - [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r") - (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4r,C4r,n,Qm")))] + [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") + (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4r,n,Qm")))] "" "#" "&& reload_completed" @@ -5820,18 +5843,18 @@ (match_dup 2))) (clobber (reg:CC REG_CC))])] "" - [(set_attr "isa" "*,*,*,2op,3op,*,*")]) + [(set_attr "isa" "*,*,*,3op,*,*")]) (define_insn "*lshr<mode>3" - [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r ,r,r") - (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,0 ,r ,0,0") - (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4r,C4r,n,Qm"))) + [(set (match_operand:ALL4 0 "register_operand" "=r,r ,r ,r ,r,r") + (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0 ,r ,r ,0,0") + (match_operand:QI 2 "nop_general_operand" "r,LPK,O C15 C31,C4r,n,Qm"))) (clobber (reg:CC REG_CC))] "reload_completed" { return lshrsi3_out (insn, operands, NULL); } - [(set_attr "isa" "*,*,*,2op,3op,*,*") + [(set_attr "isa" "*,*,*,3op,*,*") (set_attr "adjust_len" "lshrsi")]) ;; Optimize if a scratch register from LD_REGS happens to be available. @@ -5896,32 +5919,33 @@ ;; "*lshrhq3_const" "*lshruhq3_const" ;; "*lshrha3_const" "*lshruha3_const" (define_insn "*lshr<mode>3_const" - [(set (match_operand:ALL2 0 "register_operand" "=r ,r ,r") - (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0 ,r ,0") - (match_operand:QI 2 "const_int_operand" "LPK,O C7c C15,n"))) - (clobber (match_scratch:QI 3 "=X ,X ,&d")) + [(set (match_operand:ALL2 0 "register_operand" "=r ,r ,r ,r") + (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "0 ,r ,r ,0") + (match_operand:QI 2 "const_int_operand" "LPK,O C7c C15,C2r,n"))) + (clobber (match_scratch:QI 3 "=X ,X ,&d ,&d")) (clobber (reg:CC REG_CC))] "reload_completed" { return lshrhi3_out (insn, operands, NULL); } - [(set_attr "adjust_len" "lshrhi")]) + [(set_attr "isa" "*,*,3op,*") + (set_attr "adjust_len" "lshrhi")]) ;; "*lshrsi3_const" ;; "*lshrsq3_const" "*lshrusq3_const" ;; "*lshrsa3_const" "*lshrusa3_const" (define_insn "*lshr<mode>3_const" - [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r ,r") - (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,0 ,r ,0") - (match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4r,C4r,n"))) - (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d ,&d")) + [(set (match_operand:ALL4 0 "register_operand" "=r ,r ,r ,r") + (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r ,r ,0") + (match_operand:QI 2 "const_int_operand" "LP,O C15 C31,C4r,n"))) + (clobber (match_operand:QI 3 "scratch_or_dreg_operand" "=X ,X ,&d ,&d")) (clobber (reg:CC REG_CC))] "reload_completed" { return lshrsi3_out (insn, operands, NULL); } - [(set_attr "isa" "*,*,2op,3op,*") + [(set_attr "isa" "*,*,3op,*") (set_attr "adjust_len" "lshrsi")]) ;; abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) diff --git a/gcc/config/avr/constraints.md b/gcc/config/avr/constraints.md index 22bb4575089f..6ae0412d55ea 100644 --- a/gcc/config/avr/constraints.md +++ b/gcc/config/avr/constraints.md @@ -283,6 +283,36 @@ (and (match_code "const_int,symbol_ref,const") (match_test "const_0mod256_operand (op, HImode)"))) +(define_constraint "C2a" + "A constant integer shift offset for a 2-byte ASHIFTRT that's opt to being split." + (and (match_code "const_int") + (match_test "avr_split_shift_p (2, ival, ASHIFTRT)"))) + +(define_constraint "C2r" + "A constant integer shift offset for a 2-byte LSHIFTRT that's opt to being split." + (and (match_code "const_int") + (match_test "avr_split_shift_p (2, ival, LSHIFTRT)"))) + +(define_constraint "C2l" + "A constant integer shift offset for a 2-byte ASHIFT that's opt to being split." + (and (match_code "const_int") + (match_test "avr_split_shift_p (2, ival, ASHIFT)"))) + +(define_constraint "C3a" + "A constant integer shift offset for a 3-byte ASHIFTRT that's opt to being split." + (and (match_code "const_int") + (match_test "avr_split_shift_p (3, ival, ASHIFTRT)"))) + +(define_constraint "C3r" + "A constant integer shift offset for a 3-byte LSHIFTRT that's opt to being split." + (and (match_code "const_int") + (match_test "avr_split_shift_p (3, ival, LSHIFTRT)"))) + +(define_constraint "C3l" + "A constant integer shift offset for a 3-byte ASHIFT that's opt to being split." + (and (match_code "const_int") + (match_test "avr_split_shift_p (3, ival, ASHIFT)"))) + (define_constraint "C4a" "A constant integer shift offset for a 4-byte ASHIFTRT that's opt to being split." (and (match_code "const_int") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 6fe2be06054b..18a5754a0973 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -24320,11 +24320,11 @@ sbiw r26, const ; X -= const @opindex msplit-bit-shift @item -msplit-bit-shift -Split multi-byte shifts into a shift with a byte offset and a residual -shift with a non-byte offset. This optimization is turned on per default -for @option{-O2} and higher, including @option{-Os} but excluding -@option{-Oz}. To date, only 4-byte shifts with a shift offset of -at least 17 are split. Splitting of shifts with an offset that is +Split multi-byte shifts with a constant offset into a shift with +a byte offset and a residual shift with a non-byte offset. +This optimization is turned on per default for @option{-O2} and higher, +including @option{-Os} but excluding @option{-Oz}. +Splitting of shifts with a constant offset that is a multiple of 8 is controlled by @option{-mfuse-move}. @opindex mtiny-stack