Notice current dynamic LMUL is not accurate for conversion codes.
Refine for it, there is current case is changed from choosing LMUL = 4 into 
LMUL = 8.

Tested no regression, committed.

Before this patch (LMUL = 4):                  After this patch (LMUL = 8):  
        lw      a7,56(sp)                             lw        a7,56(sp)
        ld      t5,0(sp)                              ld        t5,0(sp)
        ld      t1,8(sp)                              ld        t1,8(sp)
        ld      t6,16(sp)                             ld        t6,16(sp)
        ld      t0,24(sp)                             ld        t0,24(sp)
        ld      t3,32(sp)                             ld        t3,32(sp)
        ld      t4,40(sp)                             ld        t4,40(sp)
        ble     a7,zero,.L5                           ble       a7,zero,.L5
.L3:                                               .L3:
        vsetvli a4,a7,e32,m2,ta,ma                    vsetvli   a4,a7,e32,m4,ta
        vle8.v  v1,0(a2)                              vle8.v    v3,0(a2)
        vle8.v  v4,0(a1)                              vle8.v    v16,0(t0)
        vsext.vf4       v8,v1                         vle8.v    v7,0(a1)
        vsext.vf4       v2,v4                         vle8.v    v12,0(t6)
        vsetvli zero,zero,e8,mf2,ta,ma                vle8.v    v2,0(a5)
        vadd.vv v4,v4,v1                              vle8.v    v1,0(t5)
        vsetvli zero,zero,e32,m2,ta,ma                vsext.vf4 v20,v3
        vle8.v  v5,0(t0)                              vsext.vf4 v8,v7
        vle8.v  v6,0(t6)                              vadd.vv   v8,v8,v20
        vadd.vv v2,v2,v8                              vadd.vv   v8,v8,v8
        vadd.vv v2,v2,v2                              vadd.vv   v8,v8,v20
        vadd.vv v2,v2,v8                              vsetvli   zero,zero,e8,m1
        vsetvli zero,zero,e8,mf2,ta,ma                vadd.vv   v15,v12,v16
        vadd.vv v6,v6,v5                              vsetvli   zero,zero,e32,m4
        vsetvli zero,zero,e32,m2,ta,ma                vsext.vf4 v12,v15
        vle8.v  v8,0(t5)                              vadd.vv   v8,v8,v12
        vle8.v  v9,0(a5)                              vsetvli   zero,zero,e8,m1
        vsext.vf4       v10,v4                        vadd.vv   v7,v7,v3
        vsext.vf4       v12,v6                        vsetvli   zero,zero,e32,m4
        vadd.vv v2,v2,v12                             vsext.vf4 v4,v7
        vadd.vv v2,v2,v10                             vadd.vv   v8,v8,v4
        vsetvli zero,zero,e16,m1,ta,ma                vsetvli   zero,zero,e16,m2
        vncvt.x.x.w     v4,v2                         vncvt.x.x.w       v4,v8
        vsetvli zero,zero,e32,m2,ta,ma                vsetvli   zero,zero,e8,m1
        vadd.vv v6,v2,v2                              vncvt.x.x.w       v4,v4
        vsetvli zero,zero,e8,mf2,ta,ma                vadd.vv   v15,v3,v4
        vncvt.x.x.w     v4,v4                         vadd.vv   v2,v2,v4
        vadd.vv v5,v5,v4                              vse8.v    v15,0(t4)
        vadd.vv v9,v9,v4                              vadd.vv   v3,v16,v4
        vadd.vv v1,v1,v4                              vse8.v    v2,0(a3)
        vadd.vv v4,v8,v4                              vadd.vv   v1,v1,v4
        vse8.v  v1,0(t4)                              vse8.v    v1,0(a6)
        vse8.v  v9,0(a3)                              vse8.v    v3,0(t1)
        vsetvli zero,zero,e32,m2,ta,ma                vsetvli   zero,zero,e32,m4
        vse8.v  v4,0(a6)                              vsext.vf4 v4,v3
        vsext.vf4       v8,v5                         vadd.vv   v4,v4,v8
        vse8.v  v5,0(t1)                              vsetvli   zero,zero,e64,m8
        vadd.vv v2,v8,v2                              vsext.vf2 v16,v4
        vsetvli zero,zero,e64,m4,ta,ma                vse64.v   v16,0(t3)
        vsext.vf2       v8,v2                         vsetvli   zero,zero,e32,m4
        vsetvli zero,zero,e32,m2,ta,ma                vadd.vv   v8,v8,v8
        slli    t2,a4,3                               vsext.vf4 v4,v15
        vse64.v v8,0(t3)                              slli      t2,a4,3
        vsext.vf4       v2,v1                         vadd.vv   v4,v8,v4
        sub     a7,a7,a4                              sub       a7,a7,a4
        vadd.vv v2,v6,v2                              vsetvli   zero,zero,e64,m8
        vsetvli zero,zero,e64,m4,ta,ma                vsext.vf2 v8,v4
        vsext.vf2       v4,v2                         vse64.v   v8,0(a0)
        vse64.v v4,0(a0)                              add       a1,a1,a4
        add     a2,a2,a4                              add       a2,a2,a4
        add     a1,a1,a4                              add       a5,a5,a4
        add     t6,t6,a4                              add       t5,t5,a4
        add     t0,t0,a4                              add       t6,t6,a4
        add     a5,a5,a4                              add       t0,t0,a4
        add     t5,t5,a4                              add       t4,t4,a4
        add     t4,t4,a4                              add       a3,a3,a4
        add     a3,a3,a4                              add       a6,a6,a4
        add     a6,a6,a4                              add       t1,t1,a4
        add     t1,t1,a4                              add       t3,t3,t2
        add     t3,t3,t2                              add       a0,a0,t2
        add     a0,a0,t2                              bne       a7,zero,.L3
        bne     a7,zero,.L3                         .L5:
.L5:                                                  ret
        ret

gcc/ChangeLog:

        * config/riscv/riscv-vector-costs.cc (is_gimple_assign_or_call): Change 
interface.
        (get_live_range): New function.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c: Adapt test.
        * gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c: Ditto.

---
 gcc/config/riscv/riscv-vector-costs.cc        | 72 ++++++++++++++++---
 .../costmodel/riscv/rvv/dynamic-lmul4-3.c     |  5 +-
 .../costmodel/riscv/rvv/dynamic-lmul4-5.c     |  2 +
 3 files changed, 69 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-costs.cc 
b/gcc/config/riscv/riscv-vector-costs.cc
index df3c0b0d93a..0c485dc4f29 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -89,9 +89,9 @@ namespace riscv_vector {
 */
 
 static bool
-is_gimple_assign_or_call (gimple_stmt_iterator si)
+is_gimple_assign_or_call (gimple *stmt)
 {
-  return is_gimple_assign (gsi_stmt (si)) || is_gimple_call (gsi_stmt (si));
+  return is_gimple_assign (stmt) || is_gimple_call (stmt);
 }
 
 /* Return the program point of 1st vectorized lanes statement.  */
@@ -121,6 +121,42 @@ get_last_lane_point (const vec<stmt_point> program_points,
   return max_point;
 }
 
+/* Return the last variable that is in the live range list.  */
+static pair *
+get_live_range (hash_map<tree, pair> *live_ranges, tree arg)
+{
+  auto *r = live_ranges->get (arg);
+  if (r)
+    return r;
+  else
+    {
+      tree t = arg;
+      gimple *def_stmt = NULL;
+      while (t && TREE_CODE (t) == SSA_NAME && !r
+            && (def_stmt = SSA_NAME_DEF_STMT (t)))
+       {
+         if (gimple_assign_cast_p (def_stmt))
+           {
+             t = gimple_assign_rhs1 (def_stmt);
+             r = live_ranges->get (t);
+             def_stmt = NULL;
+           }
+         else
+           /* FIXME: Currently we don't see any fold for
+              non-conversion statements.  */
+           t = NULL_TREE;
+       }
+      if (r)
+       return r;
+      else
+       {
+         bool insert_p = live_ranges->put (arg, pair (0, 0));
+         gcc_assert (!insert_p);
+         return live_ranges->get (arg);
+       }
+    }
+}
+
 /* Collect all STMTs that are vectorized and compute their program points.
    Note that we don't care about the STMTs that are not vectorized and
    we only build the local graph (within a block) of program points.
@@ -163,9 +199,9 @@ compute_local_program_points (
            dump_printf_loc (MSG_NOTE, vect_location,
                             "Compute local program points for bb %d:\n",
                             bb->index);
-         for (si = gsi_start_bb (bbs[i]); !gsi_end_p (si); gsi_next (&si))
+         for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
            {
-             if (!is_gimple_assign_or_call (si))
+             if (!is_gimple_assign_or_call (gsi_stmt (si)))
                continue;
              stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
              enum stmt_vec_info_type type
@@ -282,13 +318,33 @@ compute_local_live_ranges (
                          == VMAT_LOAD_STORE_LANES)
                        point = get_last_lane_point (program_points,
                                                     program_point.stmt_info);
+                     else if (existed_p)
+                       point = MAX (live_range.second, point);
                      if (existed_p)
                        /* We will grow the live range for each use.  */
                        live_range = pair (live_range.first, point);
                      else
-                       /* We assume the variable is live from the start of
-                          this block.  */
-                       live_range = pair (0, point);
+                       {
+                         gimple *def_stmt;
+                         if (TREE_CODE (var) == SSA_NAME
+                             && (def_stmt = SSA_NAME_DEF_STMT (var))
+                             && gimple_bb (def_stmt) == bb
+                             && is_gimple_assign_or_call (def_stmt))
+                           {
+                             live_ranges->remove (var);
+                             for (unsigned int j = 0;
+                                  j < gimple_num_args (def_stmt); j++)
+                               {
+                                 tree arg = gimple_arg (def_stmt, j);
+                                 auto *r = get_live_range (live_ranges, arg);
+                                 gcc_assert (r);
+                                 (*r).second = MAX (point, (*r).second);
+                               }
+                           }
+                         else
+                           /* The splat vector lives the whole block.  */
+                           live_range = pair (0, program_points.length ());
+                       }
                    }
                }
            }
@@ -589,7 +645,7 @@ update_local_live_ranges (
        }
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
        {
-         if (!is_gimple_assign_or_call (si))
+         if (!is_gimple_assign_or_call (gsi_stmt (si)))
            continue;
          stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
          enum stmt_vec_info_type type
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c
index cd476cb80ca..9af91b0b863 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-3.c
@@ -39,9 +39,10 @@ void foo2 (int64_t *__restrict a,
     }
 }
 
-/* { dg-final { scan-assembler {e64,m4} } } */
+/* { dg-final { scan-assembler {e32,m4} } } */
+/* { dg-final { scan-assembler {e64,m8} } } */
 /* { dg-final { scan-assembler-not {csrr} } } */
-/* { dg-final { scan-tree-dump-times "Preferring smaller LMUL loop because it 
has unexpected spills" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Preferring smaller LMUL loop because it 
has unexpected spills" "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c 
b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c
index d48a37666ae..2a881da0b01 100644
--- a/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c
+++ b/gcc/testsuite/gcc.dg/vect/costmodel/riscv/rvv/dynamic-lmul4-5.c
@@ -45,3 +45,5 @@ void foo2 (int16_t *__restrict a,
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 8" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 4" 1 "vect" } } */
 /* { dg-final { scan-tree-dump-times "Maximum lmul = 2" 1 "vect" } } */
+/* { dg-final { scan-tree-dump "start = 8, end = 10" "vect" } } */
+/* { dg-final { scan-tree-dump "2: type = unsigned short, start = 0, end = 34" 
"vect" } } */
-- 
2.36.3

Reply via email to