On Thu, Jan 24, 2019 at 11:17:45PM +0000, Steve Ellcey wrote:
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -9294,6 +9294,44 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode
> mode, rtx mask,
> & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
> }
>
> +/* Return true if the masks and a shift amount from an RTX of the form
> + ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
> + a BFI instruction of mode MODE. See *arch64_bfi patterns. */
> +
> +bool
> +aarch64_masks_and_shift_for_aarch64_bfi_p (scalar_int_mode mode, rtx mask1,
> + rtx shft_amnt, rtx mask2)
> +{
> + unsigned HOST_WIDE_INT m1, m2, s, t;
> +
> + if (!CONST_INT_P (mask1) || !CONST_INT_P (mask2) || !CONST_INT_P
> (shft_amnt))
> + return false;
> +
> + m1 = UINTVAL (mask1);
> + m2 = UINTVAL (mask2);
> + s = UINTVAL (shft_amnt);
> +
> + /* Verify that there is no overlap in what bits are set in the two masks.
> */
> + if ((m1 + m2 + 1) != 0)
> + return false;
Wouldn't that be clearer to test
if (m1 + m2 != HOST_WIDE_INT_1U)
return false;
?
You IMHO also should test
if ((m1 & m2) != 0)
return false;
> +
> + /* Verify that the shift amount is less than the mode size. */
> + if (s >= GET_MODE_BITSIZE (mode))
> + return false;
> +
> + /* Verify that the mask being shifted is contigious and would be in the
> + least significant bits after shifting by creating a mask 't' based on
> + the number of bits set in mask2 and the shift amount for mask2 and
> + comparing that to the actual mask2. */
> + t = popcount_hwi (m2);
> + t = (1 << t) - 1;
This should be (HOST_WIDE_INT_1U << t), otherwise if popcount of m2 is
>= 32, there will be UB.
> + t = t << s;
> + if (t != m2)
> + return false;
> +
> + return true;
> +}
> +
> +;; Match a bfi instruction where the shift of OP3 means that we are
> +;; actually copying the least significant bits of OP3 into OP0 by way
> +;; of the AND masks and the IOR instruction.
> +
> +(define_insn "*aarch64_bfi<GPI:mode>4_shift"
> + [(set (match_operand:GPI 0 "register_operand" "=r")
> + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
> + (match_operand:GPI 2 "const_int_operand" "n"))
> + (and:GPI (ashift:GPI
> + (match_operand:GPI 3 "register_operand" "r")
> + (match_operand:GPI 4
> "aarch64_simd_shift_imm_<mode>" "n"))
> + (match_operand:GPI 5 "const_int_operand" "n"))))]
> + "aarch64_masks_and_shift_for_aarch64_bfi_p (<MODE>mode, operands[2],
> operands[4], operands[5])"
Too long lines.
> +{
> + return "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %P5";
> +}
> + [(set_attr "type" "bfm")]
> +)
As mentioned in rs6000.md, I believe you also need a similar pattern where
the two ANDs are swapped, because they have the same priority.
> +
> +;; Like the above instruction but with no shifting, we are just copying the
> +;; least significant bits of OP3 to OP0.
> +
> +(define_insn "*aarch64_bfi<GPI:mode>4_noshift"
> + [(set (match_operand:GPI 0 "register_operand" "=r")
> + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
> + (match_operand:GPI 2 "const_int_operand" "n"))
> + (and:GPI (match_operand:GPI 3 "register_operand" "r")
> + (match_operand:GPI 4 "const_int_operand" "n"))))]
> + "aarch64_masks_and_shift_for_aarch64_bfi_p (<MODE>mode, operands[2],
> const0_rtx, operands[4])"
Too long line.
I guess this one has similar issue that you might need two patterns for both
AND orderings (though the "0" needs to be on the right argument in each
case).
Jakub