https://gcc.gnu.org/g:9116490c1b03dac18f10e42df03731a3aed0b4e9
commit r15-2158-g9116490c1b03dac18f10e42df03731a3aed0b4e9 Author: Georg-Johann Lay <a...@gjlay.de> Date: Fri Jul 19 18:22:26 2024 +0200 AVR: Support new built-in function __builtin_avr_mask1. gcc/ * config/avr/builtins.def (MASK1): New DEF_BUILTIN. * config/avr/avr.cc (avr_rtx_costs_1): Handle rtx costs for expressions like __builtin_avr_mask1. (avr_init_builtins) <uintQI_ftype_uintQI_uintQI>: New tree type. (avr_expand_builtin) [AVR_BUILTIN_MASK1]: Diagnose unexpected forms. (avr_fold_builtin) [AVR_BUILTIN_MASK1]: Handle case. * config/avr/avr.md (gen_mask1): New expand helper. (mask1_0x01_split, mask1_0x80_split, mask1_0xfe_split): New insn-and-split. (*mask1_0x01, *mask1_0x80, *mask1_0xfe): New insns. * doc/extend.texi (AVR Built-in Functions) <__builtin_avr_mask1>: Document new built-in function. gcc/testsuite/ * gcc.target/avr/torture/builtin-mask1.c: New test. Diff: --- gcc/config/avr/avr.cc | 62 +++++++++ gcc/config/avr/avr.md | 138 +++++++++++++++++++++ gcc/config/avr/builtins.def | 1 + gcc/doc/extend.texi | 17 +++ .../gcc.target/avr/torture/builtin-mask1.c | 118 ++++++++++++++++++ 5 files changed, 336 insertions(+) diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index b9064424ffec..e941730452e5 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -13135,6 +13135,16 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, switch (mode) { case E_QImode: + if (speed + && XEXP (x, 0) == const1_rtx + && GET_CODE (XEXP (x, 1)) == AND) + { + // "*mask1_0x01" + // Leave the space costs alone as they are smaller than 7 here. + *total = COSTS_N_INSNS (7); + return true; + } + if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 4 : 17); @@ -13308,6 +13318,15 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, break; case E_HImode: + if (CONST_INT_P (XEXP (x, 0)) + && INTVAL (XEXP (x, 0)) == 128 + && GET_CODE (XEXP (x, 1)) == AND) + { + // "*mask1_0x80" + *total = COSTS_N_INSNS (7); + return true; + } + if (!CONST_INT_P (XEXP (x, 1))) { *total = COSTS_N_INSNS (!speed ? 5 : 41); @@ -15796,6 +15815,11 @@ avr_init_builtins (void) = build_function_type_list (unsigned_intQI_type_node, unsigned_intQI_type_node, NULL_TREE); + tree uintQI_ftype_uintQI_uintQI + = build_function_type_list (unsigned_intQI_type_node, + unsigned_intQI_type_node, + unsigned_intQI_type_node, + NULL_TREE); tree uintHI_ftype_uintQI_uintQI = build_function_type_list (unsigned_intHI_type_node, unsigned_intQI_type_node, @@ -16080,6 +16104,22 @@ avr_expand_builtin (tree exp, rtx target, rtx /*subtarget*/, return NULL_RTX; } + case AVR_BUILTIN_MASK1: + { + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL); + int ival = CONST_INT_P (op0) ? 0xff & INTVAL (op0) : 0; + + if (ival != 0x01 && ival != 0x7f && ival != 0x80 && ival != 0xfe) + { + error ("%s expects a compile time integer constant of 0x01, " + "0x7f, 0x80 or 0xfe as first argument", bname); + return target; + } + + break; + } + case AVR_BUILTIN_INSERT_BITS: { arg0 = CALL_EXPR_ARG (exp, 0); @@ -16247,6 +16287,28 @@ avr_fold_builtin (tree fndecl, int /*n_args*/, tree *arg, bool /*ignore*/) return build1 (VIEW_CONVERT_EXPR, val_type, arg[0]); + case AVR_BUILTIN_MASK1: + { + tree tmask = arg[0]; + tree toffs = arg[1]; + + if (TREE_CODE (tmask) == INTEGER_CST + && TREE_CODE (toffs) == INTEGER_CST) + { + switch (0xff & TREE_INT_CST_LOW (tmask)) + { + case 0x01: + case 0xfe: + return fold_build2 (LROTATE_EXPR, val_type, arg[0], toffs); + + case 0x80: + case 0x7f: + return fold_build2 (RROTATE_EXPR, val_type, arg[0], toffs); + } + } + break; + } // AVR_BUILTIN_MASK1 + case AVR_BUILTIN_INSERT_BITS: { tree tbits = arg[1]; diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 16adb7b85d93..91a306f25228 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -8163,6 +8163,144 @@ [(set_attr "adjust_len" "insert_bits")]) +;; __builtin_avr_mask1 + +(define_expand "gen_mask1" + [(parallel [(match_operand:QI 0 "register_operand") + (match_operand 1 "const_int_operand") + (match_operand:QI 2 "register_operand")])] + "" + { + switch (INTVAL (operands[1]) & 0xff) + { + case 0x01: + emit (gen_mask1_0x01_split (operands[0], operands[2])); + break; + case 0x80: + emit (gen_mask1_0x80_split (operands[0], operands[2])); + break; + case 0xfe: + emit (gen_mask1_0xfe_split (operands[0], operands[2])); + break; + case 0x7f: + // Sequences like below don't work for 0x7f because + // there is no ASL instruction. + emit (gen_mask1_0x80_split (operands[0], operands[2])); + emit (gen_one_cmplqi2 (operands[0], operands[0])); + break; + default: + gcc_unreachable(); + } + DONE; + }) + +(define_insn_and_split "mask1_0x01_split" + [(set (match_operand:QI 0 "register_operand" "=&d") + (ashift:QI (const_int 1) + (and:QI (match_operand:QI 1 "register_operand" "r") + (const_int 7))))] + "" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (rotate:QI (const_int 1) + (match_dup 1))) + (clobber (reg:CC REG_CC))])]) + +(define_insn "*mask1_0x01" + [(set (match_operand:QI 0 "register_operand" "=&d") + (rotate:QI (const_int 1) + (match_operand:QI 1 "register_operand" "r"))) + (clobber (reg:CC REG_CC))] + "reload_completed" + { + return "ldi %0,1" CR_TAB + "sbrc %1,1" CR_TAB + "ldi %0,4" CR_TAB + "sbrc %1,0" CR_TAB + "lsl %0" CR_TAB + "sbrc %1,2" CR_TAB + "swap %0"; + } + [(set_attr "length" "7")]) + + +;; Use a representation as chosen by insn combine. +(define_insn_and_split "mask1_0x80_split" + [(set (match_operand:QI 0 "register_operand" "=&d") + (subreg:QI (ashiftrt:HI (const_int 128) + (and:QI (match_operand:QI 1 "register_operand" "r") + (const_int 7))) + 0))] + "" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (rotatert:QI (const_int -128) + (match_dup 1))) + (clobber (reg:CC REG_CC))])]) + +(define_insn "*mask1_0x80" + [(set (match_operand:QI 0 "register_operand" "=&d") + (rotatert:QI (const_int -128) + (match_operand:QI 1 "register_operand" "r"))) + (clobber (reg:CC REG_CC))] + "reload_completed" + { + return "ldi %0,0x80" CR_TAB + "sbrc %1,1" CR_TAB + "ldi %0,0x20" CR_TAB + "sbrc %1,0" CR_TAB + "lsr %0" CR_TAB + "sbrc %1,2" CR_TAB + "swap %0"; + } + [(set_attr "length" "7")]) + +(define_insn_and_split "" + [(set (match_operand:QI 0 "register_operand" "=&d") + (not:QI (rotate:QI (const_int -2) + (and:QI (match_operand:QI 1 "register_operand" "r") + (const_int 7)))))] + "" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (rotate:QI (const_int 1) + (match_dup 1))) + (clobber (reg:CC REG_CC))])]) + +(define_insn_and_split "mask1_0xfe_split" + [(set (match_operand:QI 0 "register_operand" "=&d") + (rotate:QI (const_int -2) + (and:QI (match_operand:QI 1 "register_operand" "r") + (const_int 7))))] + "" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (rotate:QI (const_int -2) + (match_dup 1))) + (clobber (reg:CC REG_CC))])]) + +(define_insn "*mask1_0xfe" + [(set (match_operand:QI 0 "register_operand" "=&d") + (rotate:QI (const_int -2) + (match_operand:QI 1 "register_operand" "r"))) + (clobber (reg:CC REG_CC))] + "reload_completed" + { + return "ldi %0,0xfd" CR_TAB + "sbrc %1,1" CR_TAB + "ldi %0,0xf7" CR_TAB + "sbrs %1,0" CR_TAB + "asr %0" CR_TAB + "sbrc %1,2" CR_TAB + "swap %0"; + } + [(set_attr "length" "7")]) + + ;; __builtin_avr_flash_segment ;; Just a helper for the next "official" expander. diff --git a/gcc/config/avr/builtins.def b/gcc/config/avr/builtins.def index 316bdebe4980..fbe7572b6e10 100644 --- a/gcc/config/avr/builtins.def +++ b/gcc/config/avr/builtins.def @@ -52,6 +52,7 @@ DEF_BUILTIN (FMULSU, 2, intHI_ftype_intQI_uintQI, fmulsu, NULL) DEF_BUILTIN (DELAY_CYCLES, -1, void_ftype_uintSI, nothing, NULL) DEF_BUILTIN (NOPS, -1, void_ftype_uintSI, nothing, NULL) +DEF_BUILTIN (MASK1, 2, uintQI_ftype_uintQI_uintQI, gen_mask1, NULL) DEF_BUILTIN (INSERT_BITS, 3, uintQI_ftype_uintSI_uintQI_uintQI, insert_bits, NULL) DEF_BUILTIN (FLASH_SEGMENT, 1, intQI_ftype_const_memx_ptr, flash_segment, NULL) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 0b572afca720..4b77599380b5 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -16974,6 +16974,23 @@ __builtin_avr_insert_bits (0x01234567, bits, 0); @end smallexample @enddefbuiltin +@defbuiltin{uint8_t __builtin_avr_mask1 (uint8_t @var{mask}, uint8_t @var{offs})} +Rotate the 8-bit constant value @var{mask} by an offset of @var{offs}, +where @var{mask} is in @{ 0x01, 0xfe, 0x7f, 0x80 @}. +This built-in can be use as an alternative to 8-bit expressions like +@code{1 << offs} when their computation consumes too much +time, and @var{offs} is known to be in the range 0@dots{}7. +@example +__builtin_avr_mask1 (1, offs) // same like 1 << offs +__builtin_avr_mask1 (~1, offs) // same like ~(1 << offs) +__builtin_avr_mask1 (0x80, offs) // same like 0x80 >> offs +__builtin_avr_mask1 (~0x80, offs) // same like ~(0x80 >> offs) +@end example +The open coded C versions take at least @code{5 + 4 * @var{offs}} cycles +(and 5 instructions), whereas the built-in takes 7 cycles and instructions +(8 cycles and instructions in the case of @code{@var{mask} = 0x7f}). +@enddefbuiltin + @defbuiltin{void __builtin_avr_nops (uint16_t @var{count})} Insert @var{count} @code{NOP} instructions. The number of instructions must be a compile-time integer constant. diff --git a/gcc/testsuite/gcc.target/avr/torture/builtin-mask1.c b/gcc/testsuite/gcc.target/avr/torture/builtin-mask1.c new file mode 100644 index 000000000000..9eb4be180da1 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/torture/builtin-mask1.c @@ -0,0 +1,118 @@ +/* { dg-do run } */ + +typedef __UINT8_TYPE__ u8; +typedef __INT8_TYPE__ i8; + +#define NI __attribute__((noipa)) + +NI u8 mask_01_0 (void) { return __builtin_avr_mask1 (1, 0); } +NI u8 mask_01_1 (void) { return __builtin_avr_mask1 (1, 1); } +NI u8 mask_01_2 (void) { return __builtin_avr_mask1 (1, 2); } +NI u8 mask_01_3 (void) { return __builtin_avr_mask1 (1, 3); } +NI u8 mask_01_4 (void) { return __builtin_avr_mask1 (1, 4); } +NI u8 mask_01_5 (void) { return __builtin_avr_mask1 (1, 5); } +NI u8 mask_01_6 (void) { return __builtin_avr_mask1 (1, 6); } +NI u8 mask_01_7 (void) { return __builtin_avr_mask1 (1, 7); } +NI u8 mask_01_8 (void) { return __builtin_avr_mask1 (1, 8); } +NI u8 mask_01_9 (void) { return __builtin_avr_mask1 (1, 9); } + +NI u8 mask_7f_0 (void) { return __builtin_avr_mask1 (0x7f, 0); } +NI u8 mask_7f_1 (void) { return __builtin_avr_mask1 (0x7f, 1); } +NI u8 mask_7f_2 (void) { return __builtin_avr_mask1 (0x7f, 2); } +NI u8 mask_7f_3 (void) { return __builtin_avr_mask1 (0x7f, 3); } +NI u8 mask_7f_4 (void) { return __builtin_avr_mask1 (0x7f, 4); } +NI u8 mask_7f_5 (void) { return __builtin_avr_mask1 (0x7f, 5); } +NI u8 mask_7f_6 (void) { return __builtin_avr_mask1 (0x7f, 6); } +NI u8 mask_7f_7 (void) { return __builtin_avr_mask1 (0x7f, 7); } +NI u8 mask_7f_8 (void) { return __builtin_avr_mask1 (0x7f, 8); } + +NI u8 mask_80_0 (void) { return __builtin_avr_mask1 (0x80, 0); } +NI u8 mask_80_1 (void) { return __builtin_avr_mask1 (0x80, 1); } +NI u8 mask_80_2 (void) { return __builtin_avr_mask1 (0x80, 2); } +NI u8 mask_80_3 (void) { return __builtin_avr_mask1 (0x80, 3); } +NI u8 mask_80_4 (void) { return __builtin_avr_mask1 (0x80, 4); } +NI u8 mask_80_5 (void) { return __builtin_avr_mask1 (0x80, 5); } +NI u8 mask_80_6 (void) { return __builtin_avr_mask1 (0x80, 6); } +NI u8 mask_80_7 (void) { return __builtin_avr_mask1 (0x80, 7); } +NI u8 mask_80_8 (void) { return __builtin_avr_mask1 (0x80, 8); } + +NI u8 mask_fe_0 (void) { return __builtin_avr_mask1 (0xfe, 0); } +NI u8 mask_fe_1 (void) { return __builtin_avr_mask1 (0xfe, 1); } +NI u8 mask_fe_2 (void) { return __builtin_avr_mask1 (0xfe, 2); } +NI u8 mask_fe_3 (void) { return __builtin_avr_mask1 (0xfe, 3); } +NI u8 mask_fe_4 (void) { return __builtin_avr_mask1 (0xfe, 4); } +NI u8 mask_fe_5 (void) { return __builtin_avr_mask1 (0xfe, 5); } +NI u8 mask_fe_6 (void) { return __builtin_avr_mask1 (0xfe, 6); } +NI u8 mask_fe_7 (void) { return __builtin_avr_mask1 (0xfe, 7); } +NI u8 mask_fe_8 (void) { return __builtin_avr_mask1 (0xfe, 8); } + +NI u8 mask_0x01 (u8 x) { return __builtin_avr_mask1 (0x01, x); } +NI u8 mask_0x80 (u8 x) { return __builtin_avr_mask1 (0x80, x); } +NI u8 mask_0x7f (u8 x) { return __builtin_avr_mask1 (0x7f, x); } +NI u8 mask_0xfe (u8 x) { return __builtin_avr_mask1 (0xfe, x); } + +NI u8 not_mask_0x01 (u8 x) { return ~ __builtin_avr_mask1 (0x01, x); } +NI u8 not_mask_0xfe (u8 x) { return ~ __builtin_avr_mask1 (0xfe, x); } +NI u8 not_mask_0x80 (u8 x) { return ~ __builtin_avr_mask1 (0x80, x); } +NI u8 not_mask_0x7f (u8 x) { return ~ __builtin_avr_mask1 (0x7f, x); } + +NI u8 rotl (u8 x, u8 y) +{ + u8 i; + for (i = 0; i < (7 & y); ++i) + x = (x << 1) | (x >> 7); + return x; +} + +NI u8 rotr (u8 x, u8 y) +{ + u8 i; + for (i = 0; i < (7 & y); ++i) + x = (x >> 1) | (x << 7); + return x; +} + + +NI u8 fun_0x01 (u8 x) { return rotl (0x01, x); } +NI u8 fun_0x80 (u8 x) { return rotr (0x80, x); } +NI u8 fun_0x7f (u8 x) { return rotr (0x7f, x); } +NI u8 fun_0xfe (u8 x) { return rotl (0xfe, x); } + +NI u8 fun2r (u8 x) +{ + x &= 7; + return 0x80 >> x; +} + +NI u8 fun2l (u8 x) +{ + x &= 7; + return 1 << x; +} + +int main (void) +{ + i8 x; + for (x = -10; x < 10; ++x) + { + if (mask_0x01 (x) != fun_0x01 (x)) __builtin_exit (__LINE__); + if (mask_0xfe (x) != fun_0xfe (x)) __builtin_exit (__LINE__); + if (mask_0x80 (x) != fun_0x80 (x)) __builtin_exit (__LINE__); + if (mask_0x7f (x) != fun_0x7f (x)) __builtin_exit (__LINE__); + + if (not_mask_0x01 (x) != fun_0xfe (x)) __builtin_exit (__LINE__); + if (not_mask_0xfe (x) != fun_0x01 (x)) __builtin_exit (__LINE__); + if (not_mask_0x80 (x) != fun_0x7f (x)) __builtin_exit (__LINE__); + if (not_mask_0x7f (x) != fun_0x80 (x)) __builtin_exit (__LINE__); + + if (fun2r (x) != fun_0x80 (x)) __builtin_exit (__LINE__); + if (fun2l (x) != fun_0x01 (x)) __builtin_exit (__LINE__); + } + + if (mask_01_0 () != mask_01_8 ())__builtin_exit (__LINE__); + if (mask_fe_0 () != mask_fe_8 ())__builtin_exit (__LINE__); + if (mask_80_0 () != mask_80_8 ())__builtin_exit (__LINE__); + if (mask_7f_0 () != mask_7f_8 ())__builtin_exit (__LINE__); + + return 0; +}