On 24/03/11 09:06, Ira Rosen wrote:
Hi,

This patch implements TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES for ARM NEON.

Regtested on arm-linux-gnueabi.
OK for trunk?


This is OK for trunk if there are no regressions.

cheers
Ramana

Thanks,
Ira

ChangeLog:

        * config/arm/arm.c (arm_autovectorize_vector_sizes): New
        function.
        (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.

testsuite/ChangeLog:

         * gcc.dg/vect/vect-outer-5.c: Reduce the distance between data
         accesses to preserve the meaning of the test for doubleword vectors.
         * gcc.dg/vect/no-vfa-pr29145.c: Likewise.
         * gcc.dg/vect/slp-3.c: Reduce the loop bound for the same reason.

Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c    (revision 171339)
+++ config/arm/arm.c    (working copy)
@@ -252,6 +252,7 @@ static bool arm_builtin_support_vector_misalignmen
                                                      bool is_packed);
  static void arm_conditional_register_usage (void);
  static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+static unsigned int arm_autovectorize_vector_sizes (void);

  ^L
  /* Table of machine attributes.  */
@@ -404,6 +405,9 @@ static const struct default_options arm_option_opt
  #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
  #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
  #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+  arm_autovectorize_vector_sizes

  #undef  TARGET_MACHINE_DEPENDENT_REORG
  #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
@@ -23528,6 +23532,12 @@ arm_expand_sync (enum machine_mode mode,
      }
  }

+static unsigned int
+arm_autovectorize_vector_sizes (void)
+{
+  return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
+}
+
  static bool
  arm_vector_alignment_reachable (const_tree type, bool is_packed)
  {
Index: testsuite/gcc.dg/vect/vect-outer-5.c
===================================================================
--- testsuite/gcc.dg/vect/vect-outer-5.c        (revision 171339)
+++ testsuite/gcc.dg/vect/vect-outer-5.c        (working copy)
@@ -17,7 +17,7 @@ int main1 ()
    float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
    float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
    float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
-  float E[4] = {0,1,2,480};
+  float E[4] = {0,480,960,1440};
    float s;

    int i, j;
@@ -55,7 +55,7 @@ int main1 ()
        s = 0;
        for (j=0; j<N; j+=4)
         s += C[j];
-      B[i+3] = B[i] + s;
+      B[i+1] = B[i] + s;
      }

    /* check results:  */
Index: testsuite/gcc.dg/vect/slp-3.c
===================================================================
--- testsuite/gcc.dg/vect/slp-3.c       (revision 171339)
+++ testsuite/gcc.dg/vect/slp-3.c       (working copy)
@@ -101,7 +101,7 @@ main1 ()
      }

    /* SLP with unrolling by 8.  */
-  for (i = 0; i<  N/2; i++)
+  for (i = 0; i<  N/4; i++)
      {
        out[i*9] = in[i*9];
        out[i*9 + 1] = in[i*9 + 1];
@@ -115,7 +115,7 @@ main1 ()
      }

    /* check results:  */
-  for (i = 0; i<  N/2; i++)
+  for (i = 0; i<  N/4; i++)
      {
        if (out[i*9] !=  in[i*9]
           || out[i*9 + 1] != in[i*9 + 1]
Index: testsuite/gcc.dg/vect/no-vfa-pr29145.c
===================================================================
--- testsuite/gcc.dg/vect/no-vfa-pr29145.c      (revision 171339)
+++ testsuite/gcc.dg/vect/no-vfa-pr29145.c      (working copy)
@@ -8,7 +8,7 @@ __attribute__ ((noinline))
  void with_restrict(int * __restrict p)
  {
    int i;
-  int *q = p - 2;
+  int *q = p - 1;

    for (i = 0; i<  1000; ++i) {
      p[i] = q[i];
@@ -19,7 +19,7 @@ __attribute__ ((noinline))
  void without_restrict(int * p)
  {
    int i;
-  int *q = p - 2;
+  int *q = p - 1;

    for (i = 0; i<  1000; ++i) {
      p[i] = q[i];
@@ -38,8 +38,8 @@ int main(void)
      a[i] = b[i] = i;
    }

-  with_restrict(a + 2);
-  without_restrict(b + 2);
+  with_restrict(a + 1);
+  without_restrict(b + 1);

    for (i = 0; i<  1002; ++i) {
      if (a[i] != b[i])

Reply via email to