Pengxuan Zheng <[email protected]> writes:
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 15f08cebeb1..98ce85dfdae 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -23621,6 +23621,36 @@ aarch64_simd_valid_and_imm (rtx op)
> return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_AND);
> }
>
> +/* Return true if OP is a valid SIMD and immediate which allows the and be
s/and be/and to be/
> + optimized as fmov. If ELT_SIZE is nonnull, it represents the size of the
> + register for fmov. */
Maybe rename this to ELT_BITSIZE (see below), and say:
If ELT_BITSIZE is nonnull, use it to return the number of bits to move.
> +bool
> +aarch64_simd_valid_and_imm_fmov (rtx op, unsigned int *elt_size)
> +{
> + machine_mode mode = GET_MODE (op);
> + gcc_assert (!aarch64_sve_mode_p (mode));
> +
> + auto_vec<target_unit, 16> buffer;
> + unsigned int n_bytes = GET_MODE_SIZE (mode).to_constant ();
> + buffer.reserve (n_bytes);
> +
> + bool ok = native_encode_rtx (mode, op, buffer, 0, n_bytes);
> + gcc_assert (ok);
> +
> + auto mask = native_decode_int (buffer, 0, n_bytes, n_bytes *
> BITS_PER_UNIT);
> + int set_bit = wi::exact_log2 (mask + 1);
> + if ((set_bit == 16 && TARGET_SIMD_F16INST)
> + || set_bit == 32
> + || set_bit == 64)
> + {
> + if (elt_size)
> + *elt_size = set_bit / BITS_PER_UNIT;
I didn't notice last time that the only consumer multiplies by BITS_PER_UNIT
again, so how about making this:
*elt_bitsize = set_bit;
and removing the later multiplication.
Please leave 24 hours for other to comment, but otherwise the patch is
ok with those changes, thanks.
Richard
> + return true;
> + }
> +
> + return false;
> +}
> +
> /* Return true if OP is a valid SIMD xor immediate for SVE. */
> bool
> aarch64_simd_valid_xor_imm (rtx op)
> @@ -25757,6 +25787,26 @@ aarch64_float_const_representable_p (rtx x)
> return aarch64_real_float_const_representable_p (r);
> }
>
> +/* Returns the string with the fmov instruction which is equivalent to an and
> + instruction with the SIMD immediate CONST_VECTOR. */
> +char*
> +aarch64_output_fmov (rtx const_vector)
> +{
> + bool is_valid;
> + static char templ[40];
> + char element_char;
> + unsigned int elt_size;
> +
> + is_valid = aarch64_simd_valid_and_imm_fmov (const_vector, &elt_size);
> + gcc_assert (is_valid);
> +
> + element_char = sizetochar (elt_size * BITS_PER_UNIT);
> + snprintf (templ, sizeof (templ), "fmov\t%%%c0, %%%c1",
> + element_char, element_char);
> +
> + return templ;
> +}
> +
> /* Returns the string with the instruction for the SIMD immediate
> * CONST_VECTOR of MODE and WIDTH. WHICH selects a move, and(bic) or orr.
> */
> char*
> diff --git a/gcc/config/aarch64/constraints.md
> b/gcc/config/aarch64/constraints.md
> index e8321c4d2fb..e9f69f823a6 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -466,6 +466,13 @@ (define_constraint "Do"
> (and (match_code "const_vector")
> (match_test "aarch64_simd_valid_orr_imm (op)")))
>
> +(define_constraint "Df"
> + "@internal
> + A constraint that matches a vector of immediates for and which can be
> + optimized as fmov."
> + (and (match_code "const_vector")
> + (match_test "aarch64_simd_valid_and_imm_fmov (op)")))
> +
> (define_constraint "Db"
> "@internal
> A constraint that matches vector of immediates for and/bic."
> diff --git a/gcc/config/aarch64/predicates.md
> b/gcc/config/aarch64/predicates.md
> index 1ab1c696c62..2c6af831eae 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -123,7 +123,8 @@ (define_predicate "aarch64_reg_or_orr_imm"
> (define_predicate "aarch64_reg_or_and_imm"
> (ior (match_operand 0 "register_operand")
> (and (match_code "const_vector")
> - (match_test "aarch64_simd_valid_and_imm (op)"))))
> + (ior (match_test "aarch64_simd_valid_and_imm (op)")
> + (match_test "aarch64_simd_valid_and_imm_fmov (op)")))))
>
> (define_predicate "aarch64_reg_or_xor_imm"
> (ior (match_operand 0 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-1-be.c
> b/gcc/testsuite/gcc.target/aarch64/fmov-1-be.c
> new file mode 100644
> index 00000000000..65dd4f52d09
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fmov-1-be.c
> @@ -0,0 +1,149 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbig-endian" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +typedef int v2si __attribute__ ((vector_size (8)));
> +typedef float v2sf __attribute__ ((vector_size (8)));
> +typedef short v4hi __attribute__ ((vector_size (8)));
> +typedef char v8qi __attribute__ ((vector_size (8)));
> +typedef long v2di __attribute__ ((vector_size (16)));
> +typedef double v2df __attribute__ ((vector_size (16)));
> +typedef int v4si __attribute__ ((vector_size (16)));
> +typedef float v4sf __attribute__ ((vector_size (16)));
> +typedef short v8hi __attribute__ ((vector_size (16)));
> +typedef char v16qi __attribute__ ((vector_size (16)));
> +
> +/*
> +** f_v4hi:
> +** fmov s0, s0
> +** ret
> +*/
> +v4hi
> +f_v4hi (v4hi x)
> +{
> + return x & (v4hi){ 0, 0, 0xffff, 0xffff };
> +}
> +
> +/*
> +** g_v4hi:
> +** movi d([0-9]+), 0xffff00000000ffff
> +** and v0.8b, v0.8b, v\1.8b
> +** ret
> +*/
> +v4hi
> +g_v4hi (v4hi x)
> +{
> + return x & (v4hi){ 0xffff, 0, 0, 0xffff };
> +}
> +
> +/*
> +** f_v8hi:
> +** fmov s0, s0
> +** ret
> +*/
> +v8hi
> +f_v8hi (v8hi x)
> +{
> + return x & (v8hi){ 0, 0, 0, 0, 0, 0, 0xffff, 0xffff };
> +}
> +
> +/*
> +** g_v8hi:
> +** fmov d0, d0
> +** ret
> +*/
> +v8hi
> +g_v8hi (v8hi x)
> +{
> + return x & (v8hi){ 0, 0, 0, 0, 0xffff, 0xffff, 0xffff, 0xffff };
> +}
> +
> +/*
> +** f_v2si:
> +** fmov s0, s0
> +** ret
> +*/
> +v2si
> +f_v2si (v2si x)
> +{
> + return x & (v2si){ 0, 0xffffffff };
> +}
> +
> +/*
> +** f_v2di:
> +** fmov d0, d0
> +** ret
> +*/
> +v2di
> +f_v2di (v2di x)
> +{
> + return x & (v2di){ 0, 0xffffffffffffffff };
> +}
> +
> +/*
> +** g_v2di:
> +** fmov s0, s0
> +** ret
> +*/
> +v2di
> +g_v2di (v2di x)
> +{
> + return x & (v2di){ 0, 0xffffffff };
> +}
> +
> +/*
> +** f_v4si:
> +** fmov s0, s0
> +** ret
> +*/
> +v4si
> +f_v4si (v4si x)
> +{
> + return x & (v4si){ 0, 0, 0, 0xffffffff };
> +}
> +
> +/*
> +** h_v4si:
> +** fmov d0, d0
> +** ret
> +*/
> +v4si
> +h_v4si (v4si x)
> +{
> + return x & (v4si){ 0, 0, 0xffffffff, 0xffffffff };
> +}
> +
> +/*
> +** f_v8qi:
> +** fmov s0, s0
> +** ret
> +*/
> +v8qi
> +f_v8qi (v8qi x)
> +{
> + return x & (v8qi){ 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff };
> +}
> +
> +/*
> +** f_v16qi:
> +** fmov d0, d0
> +** ret
> +*/
> +v16qi
> +f_v16qi (v16qi x)
> +{
> + return x & (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0,
> + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
> +}
> +
> +/*
> +** g_v16qi:
> +** fmov s0, s0
> +** ret
> +*/
> +v16qi
> +g_v16qi (v16qi x)
> +{
> + return x & (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff };
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-1-le.c
> b/gcc/testsuite/gcc.target/aarch64/fmov-1-le.c
> new file mode 100644
> index 00000000000..d969e2ab6b1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fmov-1-le.c
> @@ -0,0 +1,149 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mlittle-endian" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +typedef int v2si __attribute__ ((vector_size (8)));
> +typedef float v2sf __attribute__ ((vector_size (8)));
> +typedef short v4hi __attribute__ ((vector_size (8)));
> +typedef char v8qi __attribute__ ((vector_size (8)));
> +typedef long v2di __attribute__ ((vector_size (16)));
> +typedef double v2df __attribute__ ((vector_size (16)));
> +typedef int v4si __attribute__ ((vector_size (16)));
> +typedef float v4sf __attribute__ ((vector_size (16)));
> +typedef short v8hi __attribute__ ((vector_size (16)));
> +typedef char v16qi __attribute__ ((vector_size (16)));
> +
> +/*
> +** f_v4hi:
> +** fmov s0, s0
> +** ret
> +*/
> +v4hi
> +f_v4hi (v4hi x)
> +{
> + return x & (v4hi){ 0xffff, 0xffff, 0, 0 };
> +}
> +
> +/*
> +** g_v4hi:
> +** movi d([0-9]+), 0xffff00000000ffff
> +** and v0.8b, v0.8b, v\1.8b
> +** ret
> +*/
> +v4hi
> +g_v4hi (v4hi x)
> +{
> + return x & (v4hi){ 0xffff, 0, 0, 0xffff };
> +}
> +
> +/*
> +** f_v8hi:
> +** fmov s0, s0
> +** ret
> +*/
> +v8hi
> +f_v8hi (v8hi x)
> +{
> + return x & (v8hi){ 0xffff, 0xffff, 0, 0, 0, 0, 0, 0 };
> +}
> +
> +/*
> +** g_v8hi:
> +** fmov d0, d0
> +** ret
> +*/
> +v8hi
> +g_v8hi (v8hi x)
> +{
> + return x & (v8hi){ 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0 };
> +}
> +
> +/*
> +** f_v2si:
> +** fmov s0, s0
> +** ret
> +*/
> +v2si
> +f_v2si (v2si x)
> +{
> + return x & (v2si){ 0xffffffff, 0 };
> +}
> +
> +/*
> +** f_v2di:
> +** fmov d0, d0
> +** ret
> +*/
> +v2di
> +f_v2di (v2di x)
> +{
> + return x & (v2di){ 0xffffffffffffffff, 0 };
> +}
> +
> +/*
> +** g_v2di:
> +** fmov s0, s0
> +** ret
> +*/
> +v2di
> +g_v2di (v2di x)
> +{
> + return x & (v2di){ 0xffffffff, 0 };
> +}
> +
> +/*
> +** f_v4si:
> +** fmov s0, s0
> +** ret
> +*/
> +v4si
> +f_v4si (v4si x)
> +{
> + return x & (v4si){ 0xffffffff, 0, 0, 0 };
> +}
> +
> +/*
> +** h_v4si:
> +** fmov d0, d0
> +** ret
> +*/
> +v4si
> +h_v4si (v4si x)
> +{
> + return x & (v4si){ 0xffffffff, 0xffffffff, 0, 0 };
> +}
> +
> +/*
> +** f_v8qi:
> +** fmov s0, s0
> +** ret
> +*/
> +v8qi
> +f_v8qi (v8qi x)
> +{
> + return x & (v8qi){ 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0 };
> +}
> +
> +/*
> +** f_v16qi:
> +** fmov d0, d0
> +** ret
> +*/
> +v16qi
> +f_v16qi (v16qi x)
> +{
> + return x & (v16qi){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
> + 0, 0, 0, 0, 0, 0, 0, 0 };
> +}
> +
> +/*
> +** g_v16qi:
> +** fmov s0, s0
> +** ret
> +*/
> +v16qi
> +g_v16qi (v16qi x)
> +{
> + return x & (v16qi){ 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0,
> + 0, 0, 0, 0, 0, 0, 0, 0 };
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-2-be.c
> b/gcc/testsuite/gcc.target/aarch64/fmov-2-be.c
> new file mode 100644
> index 00000000000..1e38066b4cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fmov-2-be.c
> @@ -0,0 +1,90 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbig-endian" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#pragma GCC target ("arch=armv8.2-a+fp16")
> +
> +typedef int v2si __attribute__ ((vector_size (8)));
> +typedef short v4hi __attribute__ ((vector_size (8)));
> +typedef char v8qi __attribute__ ((vector_size (8)));
> +typedef long v2di __attribute__ ((vector_size (16)));
> +typedef int v4si __attribute__ ((vector_size (16)));
> +typedef short v8hi __attribute__ ((vector_size (16)));
> +typedef char v16qi __attribute__ ((vector_size (16)));
> +
> +/*
> +** f_v2di:
> +** fmov h0, h0
> +** ret
> +*/
> +v2di
> +f_v2di (v2di x)
> +{
> + return x & (v2di){ 0, 0xffff };
> +}
> +
> +/*
> +** f_v4si:
> +** fmov h0, h0
> +** ret
> +*/
> +v4si
> +f_v4si (v4si x)
> +{
> + return x & (v4si){ 0, 0, 0, 0xffff };
> +}
> +
> +/*
> +** f_v2si:
> +** fmov h0, h0
> +** ret
> +*/
> +v2si
> +f_v2si (v2si x)
> +{
> + return x & (v2si){ 0, 0xffff };
> +}
> +
> +/*
> +** f_v8hi:
> +** fmov h0, h0
> +** ret
> +*/
> +v8hi
> +f_v8hi (v8hi x)
> +{
> + return x & (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0xffff };
> +}
> +
> +/*
> +** f_v4hi:
> +** fmov h0, h0
> +** ret
> +*/
> +v4hi
> +f_v4hi (v4hi x)
> +{
> + return x & (v4hi){ 0, 0, 0, 0xffff };
> +}
> +
> +/*
> +** f_v16qi:
> +** fmov h0, h0
> +** ret
> +*/
> +v16qi
> +f_v16qi (v16qi x)
> +{
> + return x & (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff };
> +}
> +
> +/*
> +** f_v8qi:
> +** fmov h0, h0
> +** ret
> +*/
> +v8qi
> +f_v8qi (v8qi x)
> +{
> + return x & (v8qi){ 0, 0, 0, 0, 0, 0, 0xff, 0xff };
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-2-le.c
> b/gcc/testsuite/gcc.target/aarch64/fmov-2-le.c
> new file mode 100644
> index 00000000000..7627680a0b2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/fmov-2-le.c
> @@ -0,0 +1,90 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mlittle-endian" } */
> +/* { dg-final { check-function-bodies "**" "" "" } } */
> +
> +#pragma GCC target ("arch=armv8.2-a+fp16")
> +
> +typedef int v2si __attribute__ ((vector_size (8)));
> +typedef short v4hi __attribute__ ((vector_size (8)));
> +typedef char v8qi __attribute__ ((vector_size (8)));
> +typedef long v2di __attribute__ ((vector_size (16)));
> +typedef int v4si __attribute__ ((vector_size (16)));
> +typedef short v8hi __attribute__ ((vector_size (16)));
> +typedef char v16qi __attribute__ ((vector_size (16)));
> +
> +/*
> +** f_v2di:
> +** fmov h0, h0
> +** ret
> +*/
> +v2di
> +f_v2di (v2di x)
> +{
> + return x & (v2di){ 0xffff, 0 };
> +}
> +
> +/*
> +** f_v4si:
> +** fmov h0, h0
> +** ret
> +*/
> +v4si
> +f_v4si (v4si x)
> +{
> + return x & (v4si){ 0xffff, 0, 0, 0 };
> +}
> +
> +/*
> +** f_v2si:
> +** fmov h0, h0
> +** ret
> +*/
> +v2si
> +f_v2si (v2si x)
> +{
> + return x & (v2si){ 0xffff, 0 };
> +}
> +
> +/*
> +** f_v8hi:
> +** fmov h0, h0
> +** ret
> +*/
> +v8hi
> +f_v8hi (v8hi x)
> +{
> + return x & (v8hi){ 0xffff, 0, 0, 0, 0, 0, 0, 0 };
> +}
> +
> +/*
> +** f_v4hi:
> +** fmov h0, h0
> +** ret
> +*/
> +v4hi
> +f_v4hi (v4hi x)
> +{
> + return x & (v4hi){ 0xffff, 0, 0, 0 };
> +}
> +
> +/*
> +** f_v16qi:
> +** fmov h0, h0
> +** ret
> +*/
> +v16qi
> +f_v16qi (v16qi x)
> +{
> + return x & (v16qi){ 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
> +}
> +
> +/*
> +** f_v8qi:
> +** fmov h0, h0
> +** ret
> +*/
> +v8qi
> +f_v8qi (v8qi x)
> +{
> + return x & (v8qi){ 0xff, 0xff, 0, 0, 0, 0, 0, 0 };
> +}