Hi,
When loop versioning is required in vectorization, we can merge niter check for 
vect
peeling with the check for loop versioning, thus save one check/branch for 
vectorized
loop.
Is it OK?

Thanks,
bin
2017-04-11  Bin Cheng  <bin.ch...@arm.com>

        * tree-vect-loop-manip.c (vect_do_peeling): Don't skip vector loop
        if versioning is required.
        * tree-vect-loop.c (vect_analyze_loop_2): Merge niter check for loop
        peeling with the check for versioning.
From bd54e2524a4047328ba4847ad013db2bbe5850fe Mon Sep 17 00:00:00 2001
From: Bin Cheng <binch...@e108451-lin.cambridge.arm.com>
Date: Thu, 16 Mar 2017 16:40:50 +0000
Subject: [PATCH 32/33] save-vect_peeling-niters-check-20170225.txt

---
 gcc/tree-vect-loop-manip.c |  8 +++++---
 gcc/tree-vect-loop.c       | 30 ++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 0fc8cd3..0ff474d 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1686,9 +1686,11 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
 
   /* Prolog loop may be skipped.  */
   bool skip_prolog = (prolog_peeling != 0);
-  /* Skip to epilog if scalar loop may be preferred.  It's only used when
-     we peel for epilog loop.  */
-  bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
+  /* Skip to epilog if scalar loop may be preferred.  It's only needed
+     when we peel for epilog loop and when it hasn't been checked with
+     loop versioning.  */
+  bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+                     && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
   /* Epilog loop must be executed if the number of iterations for epilog
      loop is known at compile time, otherwise we need to add a check at
      the end of vector loop and skip to the end of epilog loop.  */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index af874e7..98caa5e 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2214,6 +2214,36 @@ start_over:
         }
     }
 
+  /* During peeling, we need to check if number of loop iterations is
+     enough for both peeled prolog loop and vector loop.  This check
+     can be merged along with threshold check of loop versioning, so
+     increase threshold for this case if necessary.  */
+  if (LOOP_REQUIRES_VERSIONING (loop_vinfo)
+      && (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+         || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
+    {
+      unsigned niters_th;
+
+      /* Niters for peeled prolog loop.  */
+      if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
+       {
+         struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
+         tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
+
+         niters_th = TYPE_VECTOR_SUBPARTS (vectype) - 1;
+       }
+      else
+       niters_th = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+
+      /* Niters for at least one iteration of vectorized loop.  */
+      niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+      /* One additional iteration because of peeling for gap.  */
+      if (!LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+       niters_th++;
+      if (LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) < niters_th)
+       LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = niters_th;
+    }
+
   gcc_assert (vectorization_factor
              == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
 
-- 
1.9.1

Reply via email to