Hi,

It looks like it's enough to implement targetm.vectorize.
autovectorize_vector_sizes for NEON in order to enable initial
auto-detection of vector size. With the attached patch and
-mvectorize-with-neon-quad flag, the vectorizer first tries to vectorize
for 128 bit, and if this fails, it tries to vectorize for 64 bit. For
example, in the attached testcase number of iterations is too small for 128
bit (first 2 iterations have to be peeled in order to align the array
accesses), but is sufficient for 64 bit (the accesses are aligned here).

I'd appreciate your comments on the patch, and I also have a few questions:
1. Why the default vector size is 64?
2. Where is the place of NEON vectorization tests? I found NEON tests with
intrinsics at gcc.target/arm, is that the right place?
3. According to gcc.dg/vect/vect.exp the only flag that is used for NEON
(in addition to target independent flags) is -ffast-math. Is that enough?

Thanks,
Ira

ChangeLog:

        * config/arm/arm.c (arm_autovectorize_vector_sizes): New
        function.
        (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.

Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c    (revision 166032)
+++ config/arm/arm.c    (working copy)
@@ -246,6 +246,7 @@ static bool arm_builtin_support_vector_misalignmen
                                                     const_tree type,
                                                     int misalignment,
                                                     bool is_packed);
+static unsigned int arm_autovectorize_vector_sizes (void);


 /* Table of machine attributes.  */
@@ -391,6 +392,9 @@ static const struct default_options arm_option_opt
 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+  arm_autovectorize_vector_sizes

 #undef  TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
@@ -23223,6 +23227,12 @@ arm_expand_sync (enum machine_mode mode,
     }
 }

+static unsigned int
+arm_autovectorize_vector_sizes (void)
+{
+  return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
+}
+
 static bool
 arm_vector_alignment_reachable (const_tree type, bool is_packed)
 {


test:

#define N 5

unsigned int ub[N+2] = {1,1,6,39,12,18,14};
unsigned int uc[N+2] = {2,3,4,11,6,7,1};

void main1 ()
{
  int i;
  unsigned int udiff = 2;
  unsigned int umax = 10;

  for (i = 0; i < N; i++)
    {
      /* Summation.  */
      udiff += (ub[i+2] - uc[i]);

      /* Maximum.  */
      umax = umax < uc[i+2] ? uc[i+2] : umax;
    }
}


_______________________________________________
linaro-toolchain mailing list
linaro-toolchain@lists.linaro.org
http://lists.linaro.org/mailman/listinfo/linaro-toolchain

Reply via email to