On Tue, 2019-02-05 at 21:12 +0000, Wilco Dijkstra wrote: > +bool > +aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode, > + unsigned HOST_WIDE_INT mask1, > + unsigned HOST_WIDE_INT shft_amnt, > + unsigned HOST_WIDE_INT mask2) > +{ > + unsigned HOST_WIDE_INT t; > + > + /* Verify that there is no overlap in what bits are set in the two > masks. */ > + if ((mask1 + mask2) != HOST_WIDE_INT_M1U) > + return false; > + if ((mask1 & mask2) != 0) > + return false; > > Why not check mask1 == ~mask2? That's much simpler...
Yes that would be simpler. I made that change. > > + /* Verify that the shift amount is less than the mode size. */ > + if (shft_amnt >= GET_MODE_BITSIZE (mode)) > + return false; > > The md pattern already guarantees this (this could be an assert). We need > to check that shift+width <= mode size. Given immediates are limited to > the mode size, the easiest way is to check mask2 is not 0 or M1. OK, I changed the if statement to a gcc_assert and added a check to make sure mask2 is not 0 or M1. > + /* Verify that the mask being shifted is contiguous and would be in the > + least significant bits after shifting by creating a mask 't' based on > + the number of bits set in mask2 and the shift amount for mask2 and > + comparing that to the actual mask2. */ > + t = popcount_hwi (mask2); > + t = (HOST_WIDE_INT_1U << t) - 1; > + t = t << shft_amnt; > + if (t != mask2) > + return false; > + > + return true; > > This would return true if mask2 == 0 or M1 (I think this can't happen with > current md patterns, but this would emit an illegal bfi). > > After special cases you could do something like t = mask2 + (HWI_1U << shift); > return t == (t & -t) to check for a valid bfi. I am not sure I follow this logic and my attempts to use this did not work so I kept my original code. > + "bfi\t%<GPI:w>0, %<GPI:w>1, 0, %P2" > > This could emit a width that may be 32 too large in SImode if bit 31 is set > (there is another use of %P in aarch64.md which may have the same > issue). I am not sure why having bit 31 set would be a problem. Sign extension? > Finally from some quick testing it seems one case is not yet > supported: > > int f1(int x, int y) > { > return (y & 0xfffffff) | (((x <<28) & 0xf0000000)); > } > > This just has a shift, rather than an AND plus a shift. I added another instruction to handle this and added a new test to check for it. Steve Ellcey sell...@marvell.com Here is the latest version of the patch. 2018-02-06 Steve Ellcey <sell...@marvell.com> PR rtl-optimization/87763 * config/aarch64/aarch64-protos.h (aarch64_masks_and_shift_for_bfi_p): New prototype. * config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p): New function. * config/aarch64/aarch64.md (*aarch64_bfi<GPI:mode>5_shift): New instruction. (*aarch64_bfi<GPI:mode>4_noand): Ditto. (*aarch64_bfi<GPI:mode>4_noshift): Ditto. (*aarch64_bfi<GPI:mode>4_noshift_alt): Ditto. 2018-02-06 Steve Ellcey <sell...@marvell.com> PR rtl-optimization/87763 * gcc.target/aarch64/combine_bfxil.c: Change some bfxil checks to bfi. * gcc.target/aarch64/combine_bfi_2.c: New test.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index b035e35f33b..b6c0d0a8eb6 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -429,6 +429,9 @@ bool aarch64_label_mentioned_p (rtx); void aarch64_declare_function_name (FILE *, const char*, tree); bool aarch64_legitimate_pic_operand_p (rtx); bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx); +bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT); bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); opt_machine_mode aarch64_sve_pred_mode (unsigned int); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index d7c453cdad0..26b5ab47d6f 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -9330,6 +9330,42 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask, & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0; } +/* Return true if the masks and a shift amount from an RTX of the form + ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into + a BFI instruction of mode MODE. See *arch64_bfi patterns. */ + +bool +aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode, + unsigned HOST_WIDE_INT mask1, + unsigned HOST_WIDE_INT shft_amnt, + unsigned HOST_WIDE_INT mask2) +{ + unsigned HOST_WIDE_INT t; + + /* Verify that there is no overlap in what bits are set in the two masks. */ + if (mask1 != ~mask2) + return false; + + /* Verify that mask2 is not all zeros or ones. */ + if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U) + return false; + + /* The shift amount should always be less than the mode size. */ + gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode)); + + /* Verify that the mask being shifted is contiguous and would be in the + least significant bits after shifting by creating a mask 't' based on + the number of bits set in mask2 and the shift amount for mask2 and + comparing that to the actual mask2. */ + t = popcount_hwi (mask2); + t = (HOST_WIDE_INT_1U << t) - 1; + t = t << shft_amnt; + if (t != mask2) + return false; + + return true; +} + /* Calculate the cost of calculating X, storing it in *COST. Result is true if the total cost of the operation has now been calculated. */ static bool diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index b7f6fe0f135..2bbd3f1055c 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -5476,6 +5476,76 @@ [(set_attr "type" "bfm")] ) +;; Match a bfi instruction where the shift of OP3 means that we are +;; actually copying the least significant bits of OP3 into OP0 by way +;; of the AND masks and the IOR instruction. A similar instruction +;; with the two parts of the IOR swapped around was never triggered +;; in a bootstrap build and test of GCC so it was not included. + +(define_insn "*aarch64_bfi<GPI:mode>5_shift" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (ashift:GPI + (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "aarch64_simd_shift_imm_<mode>" "n")) + (match_operand:GPI 5 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), + UINTVAL (operands[4]), + UINTVAL(operands[5]))" + "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %P5" + [(set_attr "type" "bfm")] +) + +;; Like *aarch64_bfi<GPI:mode>5_shift but with no and of the ashift because +;; the shift is large enough to remove the need for an AND instruction. + +(define_insn "*aarch64_bfi<GPI:mode>4_noand" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (ashift:GPI + (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "aarch64_simd_shift_imm_<mode>" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), + UINTVAL (operands[4]), + HOST_WIDE_INT_M1U << UINTVAL (operands[4]) )" +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - UINTVAL (operands[4])); + return "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %5"; +} + [(set_attr "type" "bfm")] +) + +;; Like *aarch64_bfi<GPI:mode>5_shift but with no shifting, we are just +;; copying the least significant bits of OP3 to OP0. In this case we do +;; need two versions of the instruction to handle different checks on the +;; constant values. + +(define_insn "*aarch64_bfi<GPI:mode>4_noshift" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), 0, + UINTVAL (operands[4]))" + "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4" + [(set_attr "type" "bfm")] +) + +(define_insn "*aarch64_bfi<GPI:mode>4_noshift_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (match_operand:GPI 3 "register_operand" "0") + (match_operand:GPI 4 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[4]), 0, + UINTVAL (operands[2]))" + "bfi\t%<GPI:w>0, %<GPI:w>1, 0, %P2" + [(set_attr "type" "bfm")] +) + (define_insn "*extr_insv_lower_reg<mode>" [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") (match_operand 1 "const_int_operand" "n")
diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfi_2.c b/gcc/testsuite/gcc.target/aarch64/combine_bfi_2.c index e69de29bb2d..145282d4d55 100644 --- a/gcc/testsuite/gcc.target/aarch64/combine_bfi_2.c +++ b/gcc/testsuite/gcc.target/aarch64/combine_bfi_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int f1(int x, int y) +{ + return (y & 0xfffffff) | (((x <<28) & 0xf0000000)); +} + + +int f2(int x, int y) +{ + return (((x <<28) & 0xf0000000)) | (y & 0xfffffff); +} + +/* { dg-final { scan-assembler-times {\tbfi\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c index 109f989a2f0..c3b5fc58800 100644 --- a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c +++ b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c @@ -114,4 +114,5 @@ main (void) return 0; } -/* { dg-final { scan-assembler-times "bfxil\\t" 18 } } */ +/* { dg-final { scan-assembler-times "bfxil\\t" 3 } } */ +/* { dg-final { scan-assembler-times "bfi\\t" 15 } } */