On Thu, Nov 2, 2023 at 4:15 AM <[email protected]> wrote:
>
> From: Pan Li <[email protected]>
>
> The extract_low_bits only try the scalar mode if the bitsize of
> the mode and src_mode is not equal. When vector mode is given
> from get_stored_val in DSE, it will always fail and return NULL_RTX.
>
> This patch would like to allow the vector mode in the extract_low_bits
> if and only if the size of mode is less than or equals to the size of
> the src_mode.
>
> Given below example code with --param=riscv-autovec-preference=fixed-vlmax.
>
> vuint8m1_t test () {
> uint8_t arr[32] = {
> 1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
> 1, 2, 7, 1, 3, 4, 5, 3, 1, 0, 1, 2, 4, 4, 9, 9,
> };
>
> return __riscv_vle8_v_u8m1(arr, 32);
> }
>
> Before this patch:
>
> test:
> lui a5,%hi(.LANCHOR0)
> addi sp,sp,-32
> addi a5,a5,%lo(.LANCHOR0)
> li a3,32
> vl2re64.v v2,0(a5)
> vsetvli zero,a3,e8,m1,ta,ma
> vs2r.v v2,0(sp) <== Unnecessary store to stack
> vle8.v v1,0(sp) <== Ditto
> vs1r.v v1,0(a0)
> addi sp,sp,32
> jr ra
>
> After this patch:
>
> test:
> lui a5,%hi(.LANCHOR0)
> addi a5,a5,%lo(.LANCHOR0)
> li a4,32
> addi sp,sp,-32
> vsetvli zero,a4,e8,m1,ta,ma
> vle8.v v1,0(a5)
> vs1r.v v1,0(a0)
> addi sp,sp,32
> jr ra
>
> Below tests are passed within this patch:
>
> * The x86 bootstrap and regression test.
> * The aarch64 regression test.
> * The risc-v regression test.
>
> PR target/111720
>
> gcc/ChangeLog:
>
> * expmed.cc (extract_low_bits): Allow vector mode if the
> mode size is less than or equal to src_mode.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/pr111720-0.c: New test.
> * gcc.target/riscv/rvv/base/pr111720-1.c: New test.
> * gcc.target/riscv/rvv/base/pr111720-10.c: New test.
> * gcc.target/riscv/rvv/base/pr111720-2.c: New test.
> * gcc.target/riscv/rvv/base/pr111720-3.c: New test.
> * gcc.target/riscv/rvv/base/pr111720-4.c: New test.
> * gcc.target/riscv/rvv/base/pr111720-5.c: New test.
> * gcc.target/riscv/rvv/base/pr111720-6.c: New test.
> * gcc.target/riscv/rvv/base/pr111720-7.c: New test.
> * gcc.target/riscv/rvv/base/pr111720-8.c: New test.
> * gcc.target/riscv/rvv/base/pr111720-9.c: New test.
>
> Signed-off-by: Pan Li <[email protected]>
> ---
> gcc/expmed.cc | 44 ++++++++++++-------
> .../gcc.target/riscv/rvv/base/pr111720-0.c | 18 ++++++++
> .../gcc.target/riscv/rvv/base/pr111720-1.c | 18 ++++++++
> .../gcc.target/riscv/rvv/base/pr111720-10.c | 18 ++++++++
> .../gcc.target/riscv/rvv/base/pr111720-2.c | 18 ++++++++
> .../gcc.target/riscv/rvv/base/pr111720-3.c | 18 ++++++++
> .../gcc.target/riscv/rvv/base/pr111720-4.c | 18 ++++++++
> .../gcc.target/riscv/rvv/base/pr111720-5.c | 18 ++++++++
> .../gcc.target/riscv/rvv/base/pr111720-6.c | 18 ++++++++
> .../gcc.target/riscv/rvv/base/pr111720-7.c | 21 +++++++++
> .../gcc.target/riscv/rvv/base/pr111720-8.c | 18 ++++++++
> .../gcc.target/riscv/rvv/base/pr111720-9.c | 15 +++++++
> 12 files changed, 227 insertions(+), 15 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c
>
> diff --git a/gcc/expmed.cc b/gcc/expmed.cc
> index b294eabb08d..5db83fe638c 100644
> --- a/gcc/expmed.cc
> +++ b/gcc/expmed.cc
> @@ -2403,8 +2403,6 @@ extract_split_bit_field (rtx op0, opt_scalar_int_mode
> op0_mode,
> rtx
> extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
> {
> - scalar_int_mode int_mode, src_int_mode;
> -
> if (mode == src_mode)
> return src;
>
> @@ -2437,22 +2435,38 @@ extract_low_bits (machine_mode mode, machine_mode
> src_mode, rtx src)
> return x;
> }
>
> - if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
> - || !int_mode_for_mode (mode).exists (&int_mode))
> - return NULL_RTX;
> + if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (src_mode))
when there are integer modes for the vector modes you now go a different path,
a little less "regressing" would be to write it as
if (int_mode_for_mode (src_mode).exists (&src_int_mode)
&& int_mode_for_mode (mode).exists (&int_mode))
{
... old code ...
}
else if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (src_mode))
{
... new code ...
}
else
return NULL_RTX;
> + {
> + if (maybe_gt (GET_MODE_BITSIZE (mode), GET_MODE_BITSIZE (src_mode))
> + || !targetm.modes_tieable_p (mode, src_mode))
> + return NULL_RTX;
>
> - if (!targetm.modes_tieable_p (src_int_mode, src_mode))
> - return NULL_RTX;
> - if (!targetm.modes_tieable_p (int_mode, mode))
> - return NULL_RTX;
> + /* For vector mode, only the bitsize (mode) <= bitsize (src_mode) and
> + tieable is allowed here. */
> + src = gen_lowpart (mode, src);
so you're really expecting to generate a subreg here? Given "vector
register layout"
isn't something that's very well defined I fear it's going to be
difficult to guarantee
the desired semantics of this function. IIRC powerpc64le has big-endian lane
order for example.
> + }
> + else
> + {
> + scalar_int_mode int_mode, src_int_mode;
>
> - src = gen_lowpart (src_int_mode, src);
> - if (!validate_subreg (int_mode, src_int_mode, src,
> - subreg_lowpart_offset (int_mode, src_int_mode)))
> - return NULL_RTX;
> + if (!int_mode_for_mode (src_mode).exists (&src_int_mode)
> + || !int_mode_for_mode (mode).exists (&int_mode))
> + return NULL_RTX;
> +
> + if (!targetm.modes_tieable_p (src_int_mode, src_mode))
> + return NULL_RTX;
> + if (!targetm.modes_tieable_p (int_mode, mode))
> + return NULL_RTX;
> +
> + src = gen_lowpart (src_int_mode, src);
> + if (!validate_subreg (int_mode, src_int_mode, src,
> + subreg_lowpart_offset (int_mode, src_int_mode)))
> + return NULL_RTX;
> +
> + src = convert_modes (int_mode, src_int_mode, src, true);
> + src = gen_lowpart (mode, src);
> + }
>
> - src = convert_modes (int_mode, src_int_mode, src, true);
> - src = gen_lowpart (mode, src);
> return src;
> }
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c
> new file mode 100644
> index 00000000000..a61e94a6d98
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-0.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint8m1_t test () {
> + uint8_t arr[32] = {
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + };
> +
> + return __riscv_vle8_v_u8m1(arr, 32);
> +}
> +
> +/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> +/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c
> new file mode 100644
> index 00000000000..46efd7379ac
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint8m2_t test () {
> + uint8_t arr[32] = {
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + };
> +
> + return __riscv_vle8_v_u8m2(arr, 32);
> +}
> +
> +/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> +/* { dg-final { scan-assembler-not {vs[09]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} }
> } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c
> new file mode 100644
> index 00000000000..8bebac219a6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-10.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vbool4_t test () {
> + uint8_t arr[32] = {
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + };
> +
> + return __riscv_vlm_v_b4(arr, 32);
> +}
> +
> +/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> +/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c
> new file mode 100644
> index 00000000000..47e4243e02e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-2.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint8m1_t test () {
> + uint8_t arr[32] = {
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + };
> +
> + return __riscv_vle8_v_u8m1(arr, 16);
> +}
> +
> +/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> +/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c
> new file mode 100644
> index 00000000000..5331e547ed3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-3.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint8m2_t test () {
> + uint8_t arr[32] = {
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + };
> +
> + return __riscv_vle8_v_u8m2(arr, 8);
> +}
> +
> +/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> +/* { dg-final { scan-assembler-not {vs[09]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} }
> } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c
> new file mode 100644
> index 00000000000..0c728f93514
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-4.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint8mf2_t test () {
> + uint8_t arr[32] = {
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + };
> +
> + return __riscv_vle8_v_u8mf2(arr, 32);
> +}
> +
> +/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> +/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c
> new file mode 100644
> index 00000000000..ccfc40cd382
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-5.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint8m2_t test () {
> + uint8_t arr[32] = {
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + };
> +
> + return __riscv_vle8_v_u8m2(arr, 4);
> +}
> +
> +/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> +/* { dg-final { scan-assembler-not {vs[09]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} }
> } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c
> new file mode 100644
> index 00000000000..ce7ddbb99b2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-6.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint8m8_t test () {
> + uint8_t arr[32] = {
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + };
> +
> + return __riscv_vle8_v_u8m8(arr, 32);
> +}
> +
> +/* { dg-final { scan-assembler-times
> {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} 1 } } */
> +/* { dg-final { scan-assembler-times
> {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c
> new file mode 100644
> index 00000000000..ac0100a1211
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-7.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vbool8_t test () {
> + uint8_t arr[32] = {
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + 1, 2, 7, 1, 3, 4, 5, 3,
> + 1, 0, 1, 2, 4, 4, 9, 9,
> + };
> +
> + vuint8m1_t varr = __riscv_vle8_v_u8m1(arr, 32);
> + vuint8m1_t vand_m = __riscv_vand_vx_u8m1(varr, 1, 32);
> +
> + return __riscv_vreinterpret_v_u8m1_b8(vand_m);
> +}
> +
> +/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> +/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c
> new file mode 100644
> index 00000000000..b7ebef80954
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-8.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vfloat32m1_t test () {
> + float arr[32] = {
> + 1.0, 2.2, 7.8, 1.2, 3.3, 4.7, 5.5, 3.3,
> + 1.0, 0.2, 1.8, 2.2, 4.3, 4.7, 9.5, 9.3,
> + 1.0, 2.2, 7.8, 1.2, 3.3, 4.7, 5.5, 3.3,
> + 1.0, 0.2, 1.8, 2.2, 4.3, 4.7, 9.5, 9.3,
> + };
> +
> + return __riscv_vle32_v_f32m1(arr, 32);
> +}
> +
> +/* { dg-final { scan-assembler-not {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> +/* { dg-final { scan-assembler-not {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)}
> } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c
> new file mode 100644
> index 00000000000..21fed06d201
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr111720-9.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -ftree-vectorize
> --param=riscv-autovec-preference=fixed-vlmax -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +vfloat64m8_t test () {
> + double arr[8] = {
> + 1.0, 2.2, 7.8, 1.2, 3.3, 4.7, 5.5, 3.3,
> + };
> +
> + return __riscv_vle64_v_f64m8(arr, 4);
> +}
> +
> +/* { dg-final { scan-assembler-times
> {vle[0-9]+\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} 1 } } */
> +/* { dg-final { scan-assembler-times
> {vs[0-9]+r\.v\s+v[0-9]+,\s*[0-9]+\(sp\)} 1 } } */
> --
> 2.34.1
>