Hi, Richard.
>> I think it would be better to split out building a tree from VF from both
>> arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
I am trying to split out building tree from both arms as you suggested..
Could you take a look the following codes ?
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 8abc1937d74..24a86187d11 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -10315,19 +10315,47 @@ vectorizable_induction (loop_vec_info loop_vinfo,
/* iv_loop is the loop to be vectorized. Generate:
vec_step = [VF*S, VF*S, VF*S, VF*S] */
gimple_seq seq = NULL;
- if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
{
- expr = build_int_cst (integer_type_node, vf);
- expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
+ /* When we're using loop_len produced by SELEC_VL, the non-final
+ iterations are not always processing VF elements. So vectorize
+ induction variable instead of
+
+ _21 = vect_vec_iv_.6_22 + { VF, ... };
+
+ We should generate:
+
+ _35 = .SELECT_VL (ivtmp_33, VF);
+ vect_cst__22 = [vec_duplicate_expr] _35;
+ _21 = vect_vec_iv_.6_22 + vect_cst__22; */
+ vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+ tree len
+ = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
+ expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
+ unshare_expr (len)),
+ &seq, true, NULL_TREE);
}
else
- expr = build_int_cst (TREE_TYPE (step_expr), vf);
+ {
+ bool float_p = SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr));
+ expr = build_int_cst (float_p ? integer_type_node
+ : TREE_TYPE (step_expr),
+ vf);
+ if (float_p)
+ expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
+ }
+
new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr),
expr, step_expr);
if (seq)
{
- new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
- gcc_assert (!new_bb);
+ if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
+ gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
+ else
+ {
+ new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+ gcc_assert (!new_bb);
+ }
}
}
@@ -10335,9 +10363,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
gcc_assert (CONSTANT_CLASS_P (new_name)
|| TREE_CODE (new_name) == SSA_NAME);
new_vec = build_vector_from_val (step_vectype, t);
- vec_step = vect_init_vector (loop_vinfo, stmt_info,
- new_vec, step_vectype, NULL);
-
+ vec_step
+ = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
+ LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si : NULL);
Thanks.
[email protected]
From: Richard Biener
Date: 2023-11-09 20:16
To: Juzhe-Zhong
CC: gcc-patches; richard.sandiford; rguenther; kito.cheng; kito.cheng
Subject: Re: [PATCH] Middle-end: Fix bug of induction variable vectorization
for RVV
On Wed, Nov 8, 2023 at 11:53 AM Juzhe-Zhong <[email protected]> wrote:
>
> PR: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112438
>
> SELECT_VL result is not necessary always VF in non-final iteration.
>
> Current GIMPLE IR is wrong:
>
> # vect_vec_iv_.21_25 = PHI <_24(4), { 0, 1, 2, ... }(3)>
> ...
> _24 = vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... };
>
> After this patch which is correct for SELECT_VL:
>
> # vect_vec_iv_.8_22 = PHI <_21(4), { 0, 1, 2, ... }(3)>
> ...
> _35 = .SELECT_VL (ivtmp_33, POLY_INT_CST [4, 4]);
> _21 = vect_vec_iv_.8_22 + { POLY_INT_CST [4, 4], ... };
>
> kito, could you give more explanation ?
>
> PR middle/112438
>
> gcc/ChangeLog:
>
> * tree-vect-loop.cc (vectorizable_induction): Fix bug.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/pr112438.c: New test.
>
> ---
> .../gcc.target/riscv/rvv/autovec/pr112438.c | 35 +++++++++++++++++
> gcc/tree-vect-loop.cc | 39 +++++++++++++++----
> 2 files changed, 67 insertions(+), 7 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> new file mode 100644
> index 00000000000..b326d56a52c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr112438.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -fno-vect-cost-model
> -ffast-math -fdump-tree-optimized-details" } */
> +
> +void
> +foo (int n, int *__restrict in, int *__restrict out)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i;
> + }
> +}
> +
> +void
> +foo2 (int n, float * __restrict in,
> +float * __restrict out)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i;
> + }
> +}
> +
> +void
> +foo3 (int n, float * __restrict in,
> +float * __restrict out, float x)
> +{
> + for (int i = 0; i < n; i += 1)
> + {
> + out[i] = in[i] + i* i;
> + }
> +}
> +
> +/* We don't want to see vect_vec_iv_.21_25 + { POLY_INT_CST [4, 4], ... }.
> */
> +/* { dg-final { scan-tree-dump-not "\\+ \{ POLY_INT_CST" "optimized" } } */
> +
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index a544bc9b059..3e103946168 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10309,10 +10309,30 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> new_name = step_expr;
> else
> {
> + gimple_seq seq = NULL;
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + {
> + /* When we're using loop_len produced by SELEC_VL, the non-final
> + iterations are not always processing VF elements. So vectorize
> + induction variable instead of
> +
> + _21 = vect_vec_iv_.6_22 + { VF, ... };
> +
> + We should generate:
> +
> + _35 = .SELECT_VL (ivtmp_33, VF);
> + vect_cst__22 = [vec_duplicate_expr] _35;
> + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> + tree len
> + = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0);
> + expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr),
> + unshare_expr (len)),
> + &seq, true, NULL_TREE);
> + }
I think it would be better to split out building a tree from VF from both
arms and avoid using 'vf' when LOOP_VINFO_USING_SELECT_VL_P.
Btw, you are not patching the SLP path here which I believe has the same
problem but is currently exempt from non-constant VF at least.
Richard.
> /* iv_loop is the loop to be vectorized. Generate:
> vec_step = [VF*S, VF*S, VF*S, VF*S] */
> - gimple_seq seq = NULL;
> - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
> {
> expr = build_int_cst (integer_type_node, vf);
> expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr);
> @@ -10323,8 +10343,13 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> expr, step_expr);
> if (seq)
> {
> - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> - gcc_assert (!new_bb);
> + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> + gsi_insert_seq_before (&si, seq, GSI_SAME_STMT);
> + else
> + {
> + new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
> + gcc_assert (!new_bb);
> + }
> }
> }
>
> @@ -10332,9 +10357,9 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> gcc_assert (CONSTANT_CLASS_P (new_name)
> || TREE_CODE (new_name) == SSA_NAME);
> new_vec = build_vector_from_val (step_vectype, t);
> - vec_step = vect_init_vector (loop_vinfo, stmt_info,
> - new_vec, step_vectype, NULL);
> -
> + vec_step
> + = vect_init_vector (loop_vinfo, stmt_info, new_vec, step_vectype,
> + LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) ? &si :
> NULL);
>
> /* Create the following def-use cycle:
> loop prolog:
> --
> 2.36.3
>