On Sat, 2019-01-26 at 00:00 +0100, Jakub Jelinek wrote:
>
> > + /* Verify that there is no overlap in what bits are set in the two
> > masks. */
> > + if ((m1 + m2 + 1) != 0)
> > + return false;
>
> Wouldn't that be clearer to test
> if (m1 + m2 != HOST_WIDE_INT_1U)
> return false;
> ?
> You IMHO also should test
> if ((m1 & m2) != 0)
> return false;
I think you mean HOST_WIDE_INT_M1U, not HOST_WIDE_INT_1U. That does
seem clearer and I made that change as well as adding the second test.
> >
> > + t = popcount_hwi (m2);
> > + t = (1 << t) - 1;
>
> This should be (HOST_WIDE_INT_1U << t), otherwise if popcount of m2 is
> > = 32, there will be UB.
Fixed.
> As mentioned in rs6000.md, I believe you also need a similar pattern where
> the two ANDs are swapped, because they have the same priority.
I fixed the long lines in aarch64.md and I added a second pattern for
the *aarch64_bfi<GPI:mode>4_noshift pattern, swapping the order of the
IOR operands. I did not swap the AND operands, I assume the compiler
would ensure that the register was always before the constant or that
it would check both orderings.
I tried adding a second version of *aarch64_bfi<GPI:mode>5_shift as
well but when I tested it, it never got used during a bootstrap build
or a GCC test run so I decided it was not needed.
Tested on aarch64 with a bootstrap build and a GCC testsuite run.
OK to checkin?
Steve Ellcey
[email protected]
2018-01-28 Steve Ellcey <[email protected]>
PR rtl-optimization/87763
* config/aarch64/aarch64-protos.h (aarch64_masks_and_shift_for_bfi_p):
New prototype.
* config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p):
New function.
* config/aarch64/aarch64.md (*aarch64_bfi<GPI:mode>5_shift):
New instruction.
(*aarch64_bfi<GPI:mode>4_noshift): Ditto.
(*aarch64_bfi<GPI:mode>4_noshift_alt): Ditto.
2018-01-28 Steve Ellcey <[email protected]>
PR rtl-optimization/87763
* gcc.target/aarch64/combine_bfxil.c: Change some bfxil checks
to bfi.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index b035e35f33b..b6c0d0a8eb6 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -429,6 +429,9 @@ bool aarch64_label_mentioned_p (rtx);
void aarch64_declare_function_name (FILE *, const char*, tree);
bool aarch64_legitimate_pic_operand_p (rtx);
bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
+bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
+ unsigned HOST_WIDE_INT,
+ unsigned HOST_WIDE_INT);
bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
opt_machine_mode aarch64_sve_pred_mode (unsigned int);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d7c453cdad0..9a3080b5db8 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9330,6 +9330,41 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
& ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
}
+/* Return true if the masks and a shift amount from an RTX of the form
+ ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
+ a BFI instruction of mode MODE. See *arch64_bfi patterns. */
+
+bool
+aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
+ unsigned HOST_WIDE_INT mask1,
+ unsigned HOST_WIDE_INT shft_amnt,
+ unsigned HOST_WIDE_INT mask2)
+{
+ unsigned HOST_WIDE_INT t;
+
+ /* Verify that there is no overlap in what bits are set in the two masks. */
+ if ((mask1 + mask2) != HOST_WIDE_INT_M1U)
+ return false;
+ if ((mask1 & mask2) != 0)
+ return false;
+
+ /* Verify that the shift amount is less than the mode size. */
+ if (shft_amnt >= GET_MODE_BITSIZE (mode))
+ return false;
+
+ /* Verify that the mask being shifted is contiguous and would be in the
+ least significant bits after shifting by creating a mask 't' based on
+ the number of bits set in mask2 and the shift amount for mask2 and
+ comparing that to the actual mask2. */
+ t = popcount_hwi (mask2);
+ t = (HOST_WIDE_INT_1U << t) - 1;
+ t = t << shft_amnt;
+ if (t != mask2)
+ return false;
+
+ return true;
+}
+
/* Calculate the cost of calculating X, storing it in *COST. Result
is true if the total cost of the operation has now been calculated. */
static bool
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b7f6fe0f135..6b5339092ba 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5476,6 +5476,56 @@
[(set_attr "type" "bfm")]
)
+;; Match a bfi instruction where the shift of OP3 means that we are
+;; actually copying the least significant bits of OP3 into OP0 by way
+;; of the AND masks and the IOR instruction. A similar instruction
+;; with the two parts of the IOR swapped around was never triggered
+;; in a bootstrap build and test of GCC so it was not included.
+
+(define_insn "*aarch64_bfi<GPI:mode>5_shift"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+ (match_operand:GPI 2 "const_int_operand" "n"))
+ (and:GPI (ashift:GPI
+ (match_operand:GPI 3 "register_operand" "r")
+ (match_operand:GPI 4 "aarch64_simd_shift_imm_<mode>" "n"))
+ (match_operand:GPI 5 "const_int_operand" "n"))))]
+ "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]),
+ UINTVAL (operands[4]),
+ UINTVAL(operands[5]))"
+ "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %P5"
+ [(set_attr "type" "bfm")]
+)
+
+;; Like the above instruction but with no shifting, we are just copying
+;; the least significant bits of OP3 to OP0. In this case we do need
+;; two versions of the instruction to handle different checks on the
+;; constant values.
+
+(define_insn "*aarch64_bfi<GPI:mode>4_noshift"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+ (match_operand:GPI 2 "const_int_operand" "n"))
+ (and:GPI (match_operand:GPI 3 "register_operand" "r")
+ (match_operand:GPI 4 "const_int_operand" "n"))))]
+ "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), 0,
+ UINTVAL (operands[4]))"
+ "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4"
+ [(set_attr "type" "bfm")]
+)
+
+(define_insn "*aarch64_bfi<GPI:mode>4_noshift_alt"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r")
+ (match_operand:GPI 2 "const_int_operand" "n"))
+ (and:GPI (match_operand:GPI 3 "register_operand" "0")
+ (match_operand:GPI 4 "const_int_operand" "n"))))]
+ "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[4]), 0,
+ UINTVAL (operands[2]))"
+ "bfi\t%<GPI:w>0, %<GPI:w>1, 0, %P2"
+ [(set_attr "type" "bfm")]
+)
+
(define_insn "*extr_insv_lower_reg<mode>"
[(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
(match_operand 1 "const_int_operand" "n")
diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
index 109f989a2f0..c3b5fc58800 100644
--- a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
+++ b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
@@ -114,4 +114,5 @@ main (void)
return 0;
}
-/* { dg-final { scan-assembler-times "bfxil\\t" 18 } } */
+/* { dg-final { scan-assembler-times "bfxil\\t" 3 } } */
+/* { dg-final { scan-assembler-times "bfi\\t" 15 } } */