https://gcc.gnu.org/g:cca1229b85f2ad9422773fdb954d0924fa1cd350
commit r15-2064-gcca1229b85f2ad9422773fdb954d0924fa1cd350 Author: Georg-Johann Lay <a...@gjlay.de> Date: Tue Jul 16 15:17:23 2024 +0200 AVR: Allow more combinations of XOR / IOR with byte-shifts. This patch takes some existing patterns that have QImode as one input and uses a mode iterator to allow for more modes to match. These insns are split after reload into *xorqi3 resp. *iorqi3 insn(s). gcc/ * config/avr/avr-protos.h (avr_emit_xior_with_shift): New proto. * config/avr/avr.cc (avr_emit_xior_with_shift): New function. * config/avr/avr.md (any_lshift): New code iterator. (*<xior:code><mode>.<any_lshift:code>): New insn-and-split. (<code><HISI:mode><QIPSI:mode>.0): Replaces... (*<code_stdname><mode>qi.byte0): ...this one. (*<xior:code><HISI:mode><QIPSI:mode>.<any_lshift:code>): Replaces... (*<code_stdname><mode>qi.byte1-3): ...this one. Diff: --- gcc/config/avr/avr-protos.h | 1 + gcc/config/avr/avr.cc | 52 ++++++++++++++++++++++++++++++ gcc/config/avr/avr.md | 78 +++++++++++++++++++++++++++++---------------- 3 files changed, 104 insertions(+), 27 deletions(-) diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 6e02161759ca..d3fa6c677232 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -88,6 +88,7 @@ extern rtx avr_to_int_mode (rtx); extern void avr_expand_prologue (void); extern void avr_expand_epilogue (bool); extern bool avr_emit_cpymemhi (rtx*); +extern void avr_emit_xior_with_shift (rtx_insn*, rtx*, int); extern int avr_epilogue_uses (int regno); extern bool avr_split_tiny_move (rtx_insn *insn, rtx *operands); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 4a7cbd0e7bc6..d2a08c60c3ad 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -9754,6 +9754,58 @@ avr_out_bitop (rtx insn, rtx *xop, int *plen) } +/* Emit code for + + XOP[0] = XOP[0] <xior> (XOP[1] <shift> BITOFF) + + where XOP[0] and XOP[1] are hard registers with integer mode, + <xior> is XOR or IOR, and <shift> is LSHIFTRT or ASHIFT with a + non-negative shift offset BITOFF. This function emits the operation + in terms of byte-wise operations in QImode. */ + +void +avr_emit_xior_with_shift (rtx_insn *insn, rtx *xop, int bitoff) +{ + rtx src = SET_SRC (single_set (insn)); + RTX_CODE xior = GET_CODE (src); + gcc_assert (xior == XOR || xior == IOR); + gcc_assert (bitoff % 8 == 0); + + // Work out the shift offset in bytes; negative for shift right. + RTX_CODE shift = GET_CODE (XEXP (src, 0)); + int byteoff = 0?0 + : shift == ASHIFT ? bitoff / 8 + : shift == LSHIFTRT ? -bitoff / 8 + // Not a shift but something like REG or ZERO_EXTEND: + // Use xop[1] as is, without shifting it. + : 0; + + // Work out which hard REGNOs belong to the operands. + int size0 = GET_MODE_SIZE (GET_MODE (xop[0])); + int size1 = GET_MODE_SIZE (GET_MODE (xop[1])); + int regno0_lo = REGNO (xop[0]), regno0_hi = regno0_lo + size0 - 1; + int regno1_lo = REGNO (xop[1]), regno1_hi = regno1_lo + size1 - 1; + int regoff = regno0_lo - regno1_lo + byteoff; + + // The order of insns matters in the rare case when xop[1] overlaps xop[0]. + int beg = regoff > 0 ? regno1_hi : regno1_lo; + int end = regoff > 0 ? regno1_lo : regno1_hi; + int inc = regoff > 0 ? -1 : 1; + + rtx (*gen)(rtx,rtx,rtx) = xior == XOR ? gen_xorqi3 : gen_iorqi3; + + for (int i = beg; i != end + inc; i += inc) + { + if (IN_RANGE (i + regoff, regno0_lo, regno0_hi)) + { + rtx reg0 = all_regs_rtx[i + regoff]; + rtx reg1 = all_regs_rtx[i]; + emit_insn (gen (reg0, reg0, reg1)); + } + } +} + + /* Output sign extension from XOP[1] to XOP[0] and return "". If PLEN == NULL, print assembler instructions to perform the operation; otherwise, set *PLEN to the length of the instruction sequence (in words) diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index e67284421b64..cf9541422a33 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -313,6 +313,7 @@ (define_code_iterator any_extract [sign_extract zero_extract]) (define_code_iterator any_shiftrt [lshiftrt ashiftrt]) (define_code_iterator any_shift [lshiftrt ashiftrt ashift]) +(define_code_iterator any_lshift [lshiftrt ashift]) ; logic shift (define_code_iterator piaop [plus ior and]) (define_code_iterator bitop [xor ior and]) @@ -9350,46 +9351,69 @@ }) +;; "*iorsi.ashift" "*iorpsi.ashift" "*iorhi.ashift" +;; "*xorsi.ashift" "*xorpsi.ashift" "*xorhi.ashift" +;; "*iorsi.lshiftrt" "*iorpsi.lshiftrt" "*iorhi.lshiftrt" +;; "*xorsi.lshiftrt" "*xorpsi.lshiftrt" "*xorhi.lshiftrt" +(define_insn_and_split "*<xior:code><mode>.<any_lshift:code>" + [(set (match_operand:HISI 0 "register_operand" "=r") + (xior:HISI (any_lshift:HISI (match_operand:HISI 1 "register_operand" "r") + (match_operand:QI 3 "const_8_16_24_operand" "n")) + (match_operand:HISI 2 "register_operand" "0")))] + "INTVAL (operands[3]) <= <MSB>" + "#" + "&& reload_completed" + [(scratch)] + { + avr_emit_xior_with_shift (curr_insn, operands, INTVAL (operands[3])); + DONE; + }) + ;; Some combine patterns that try to fix bad code when a value is composed ;; from byte parts like in PR27663. ;; The patterns give some release but the code still is not optimal, ;; in particular when subreg lowering (-fsplit-wide-types) is turned on. ;; That switch obfuscates things here and in many other places. -;; "*iorhiqi.byte0" "*iorpsiqi.byte0" "*iorsiqi.byte0" -;; "*xorhiqi.byte0" "*xorpsiqi.byte0" "*xorsiqi.byte0" -(define_insn_and_split "*<code_stdname><mode>qi.byte0" - [(set (match_operand:HISI 0 "register_operand" "=r") - (xior:HISI - (zero_extend:HISI (match_operand:QI 1 "register_operand" "r")) - (match_operand:HISI 2 "register_operand" "0")))] - "" +;; "*iorsiqi.0" "*iorpsiqi.0" "*iorhiqi.0" +;; "*iorsihi.0" "*iorpsihi.0" "*iorsipsi.0" +;; "*xorsiqi.0" "*xorpsiqi.0" "*xorhiqi.0" +;; "*xorsihi.0" "*xorpsihi.0" "*xorsipsi.0" +(define_insn_and_split "*<code><HISI:mode><QIPSI:mode>.0" + [(set (match_operand:HISI 0 "register_operand" "=r") + (xior:HISI (zero_extend:HISI (match_operand:QIPSI 1 "register_operand" "r")) + (match_operand:HISI 2 "register_operand" "0")))] + "<HISI:SIZE> > <QIPSI:SIZE>" "#" - "reload_completed" - [(set (match_dup 3) - (xior:QI (match_dup 3) - (match_dup 1)))] + "&& reload_completed" + [(scratch)] { - operands[3] = simplify_gen_subreg (QImode, operands[0], <MODE>mode, 0); + avr_emit_xior_with_shift (curr_insn, operands, 0); + DONE; }) -;; "*iorhiqi.byte1-3" "*iorpsiqi.byte1-3" "*iorsiqi.byte1-3" -;; "*xorhiqi.byte1-3" "*xorpsiqi.byte1-3" "*xorsiqi.byte1-3" -(define_insn_and_split "*<code_stdname><mode>qi.byte1-3" - [(set (match_operand:HISI 0 "register_operand" "=r") - (xior:HISI - (ashift:HISI (zero_extend:HISI (match_operand:QI 1 "register_operand" "r")) - (match_operand:QI 2 "const_8_16_24_operand" "n")) - (match_operand:HISI 3 "register_operand" "0")))] - "INTVAL(operands[2]) <= <MSB>" + +;; "*iorsiqi.ashift" "*iorpsiqi.ashift" "*iorhiqi.ashift" +;; "*iorsihi.ashift" "*iorpsihi.ashift" "*iorsipsi.ashift" +;; "*xorsiqi.ashift" "*xorpsiqi.ashift" "*xorhiqi.ashift" +;; "*xorsihi.ashift" "*xorpsihi.ashift" "*xorsipsi.ashift" +;; "*iorsiqi.lshiftrt" "*iorpsiqi.lshiftrt" "*iorhiqi.lshiftrt" +;; "*iorsihi.lshiftrt" "*iorpsihi.lshiftrt" "*iorsipsi.lshiftrt" +;; "*xorsiqi.lshiftrt" "*xorpsiqi.lshiftrt" "*xorhiqi.lshiftrt" +;; "*xorsihi.lshiftrt" "*xorpsihi.lshiftrt" "*xorsipsi.lshiftrt" +(define_insn_and_split "*<xior:code><HISI:mode><QIPSI:mode>.<any_lshift:code>" + [(set (match_operand:HISI 0 "register_operand" "=r") + (xior:HISI (any_lshift:HISI (zero_extend:HISI (match_operand:QIPSI 1 "register_operand" "r")) + (match_operand:QI 3 "const_8_16_24_operand" "n")) + (match_operand:HISI 2 "register_operand" "0")))] + "<HISI:SIZE> > <QIPSI:SIZE> + && INTVAL (operands[3]) <= <HISI:MSB>" "#" "&& reload_completed" - [(set (match_dup 4) - (xior:QI (match_dup 4) - (match_dup 1)))] + [(scratch)] { - int byteno = INTVAL(operands[2]) / BITS_PER_UNIT; - operands[4] = simplify_gen_subreg (QImode, operands[0], <MODE>mode, byteno); + avr_emit_xior_with_shift (curr_insn, operands, INTVAL (operands[3])); + DONE; })