Jakub Jelinek <ja...@redhat.com> wrote: >Hi! > >My recent patch introduced a regression as shown by the attached >testcases, there was nothing actually checking the type of the >induction, >but because we allowed only INTEGER_CST steps that implied >integral/pointer >types only. With -ffast-math I don't see a reason why we can't handle >floating point types the same, so the patch adds type checks to >vect_is_simple_iv_evolution so that we won't be surprised by fixed >point, >vector (and whatever else comes later) inductions, and handles the >REAL_TYPE >steps in get_initial_def_for_induction. > >Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok. Thanks, Richard. >2013-06-28 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/57741 > * tree-vect-loop.c (vect_is_simple_iv_evolution): Disallow > non-INTEGRAL_TYPE_P non-SCALAR_FLOAT_TYPE_P SSA_NAME step_exprs, > or SCALAR_FLOAT_TYPE_P SSA_NAMEs if !flag_associative_math. > Allow REAL_CST step_exprs if flag_associative_math. > (get_initial_def_for_induction): Handle SCALAR_FLOAT_TYPE_P step_expr. > > * gcc.dg/vect/pr57741-1.c: New test. > * gcc.dg/vect/pr57741-2.c: New test. > * gcc.dg/vect/pr57741-3.c: New test. > >--- gcc/tree-vect-loop.c.jj 2013-06-25 14:34:33.000000000 +0200 >+++ gcc/tree-vect-loop.c 2013-06-28 01:07:42.524553908 +0200 >@@ -538,7 +538,12 @@ vect_is_simple_iv_evolution (unsigned lo > if (TREE_CODE (step_expr) != INTEGER_CST > && (TREE_CODE (step_expr) != SSA_NAME > || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr))) >- && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb)))) >+ && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb)) >+ || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr)) >+ && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)) >+ || !flag_associative_math))) >+ && (TREE_CODE (step_expr) != REAL_CST >+ || !flag_associative_math)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >@@ -3276,7 +3281,13 @@ get_initial_def_for_induction (gimple iv > { > /* iv_loop is the loop to be vectorized. Generate: > vec_step = [VF*S, VF*S, VF*S, VF*S] */ >- expr = build_int_cst (TREE_TYPE (step_expr), vf); >+ if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) >+ { >+ expr = build_int_cst (integer_type_node, vf); >+ expr = fold_convert (TREE_TYPE (step_expr), expr); >+ } >+ else >+ expr = build_int_cst (TREE_TYPE (step_expr), vf); > new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), > expr, step_expr); > if (TREE_CODE (step_expr) == SSA_NAME) >@@ -3339,7 +3350,13 @@ get_initial_def_for_induction (gimple iv > gcc_assert (!nested_in_vect_loop); > > /* Create the vector that holds the step of the induction. */ >- expr = build_int_cst (TREE_TYPE (step_expr), nunits); >+ if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) >+ { >+ expr = build_int_cst (integer_type_node, nunits); >+ expr = fold_convert (TREE_TYPE (step_expr), expr); >+ } >+ else >+ expr = build_int_cst (TREE_TYPE (step_expr), nunits); > new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), > expr, step_expr); > if (TREE_CODE (step_expr) == SSA_NAME) >--- gcc/testsuite/gcc.dg/vect/pr57741-1.c.jj 2013-06-27 >23:29:07.322359740 +0200 >+++ gcc/testsuite/gcc.dg/vect/pr57741-1.c 2013-06-27 23:54:18.216533899 >+0200 >@@ -0,0 +1,21 @@ >+/* PR tree-optimization/57741 */ >+/* { dg-do compile } */ >+ >+void >+foo (float *p, float *q, float x) >+{ >+ int i; >+ float f = 1.0f, g = 2.0f; >+ for (i = 0; i < 1024; i++) >+ { >+ *p++ = f; >+ f += x; >+ } >+ for (i = 0; i < 1024; i++) >+ { >+ *q++ = g; >+ g += 0.5f; >+ } >+} >+ >+/* { dg-final { cleanup-tree-dump "vect" } } */ >--- gcc/testsuite/gcc.dg/vect/pr57741-2.c.jj 2013-06-27 >23:44:40.846538237 +0200 >+++ gcc/testsuite/gcc.dg/vect/pr57741-2.c 2013-06-27 23:50:23.552498840 >+0200 >@@ -0,0 +1,44 @@ >+/* PR tree-optimization/57741 */ >+/* { dg-do run } */ >+/* { dg-additional-options "-ffast-math" } */ >+ >+#include "tree-vect.h" >+ >+extern void abort (void); >+ >+__attribute__((noinline, noclone)) void >+foo (float *p, float *q, float x) >+{ >+ int i; >+ p = (float *) __builtin_assume_aligned (p, 32); >+ q = (float *) __builtin_assume_aligned (q, 32); >+ float f = 1.0f, g = 2.0f; >+ for (i = 0; i < 1024; i++) >+ { >+ *p++ = f; >+ f += x; >+ } >+ for (i = 0; i < 1024; i++) >+ { >+ *q++ = g; >+ g += 0.5f; >+ } >+} >+ >+float p[1024] __attribute__((aligned (32))) = { 17.0f }; >+float q[1024] __attribute__((aligned (32))) = { 17.0f }; >+ >+int >+main () >+{ >+ int i; >+ check_vect (); >+ foo (p, q, 1.5f); >+ for (i = 0; i < 1024; i++) >+ if (p[i] != 1.0f + i * 1.5f || q[i] != 2.0f + i * 0.5f) >+ abort (); >+ return 0; >+} >+ >+/* { dg-final { scan-tree-dump-times "vectorized 2 loop" 1 "vect" } } >*/ >+/* { dg-final { cleanup-tree-dump "vect" } } */ >--- gcc/testsuite/gcc.dg/vect/pr57741-3.c.jj 2013-06-28 >01:08:29.530334797 +0200 >+++ gcc/testsuite/gcc.dg/vect/pr57741-3.c 2013-06-28 01:11:31.365141459 >+0200 >@@ -0,0 +1,42 @@ >+/* PR tree-optimization/57741 */ >+/* { dg-do run } */ >+/* { dg-additional-options "-ffast-math" } */ >+ >+#include "tree-vect.h" >+ >+extern void abort (void); >+ >+float p[1024] __attribute__((aligned (32))) = { 17.0f }; >+float q[1024] __attribute__((aligned (32))) = { 17.0f }; >+char r[1024] __attribute__((aligned (32))) = { 1 }; >+ >+__attribute__((noinline, noclone)) void >+foo (float x) >+{ >+ int i; >+ float f = 1.0f, g = 2.0f; >+ for (i = 0; i < 1024; i++) >+ { >+ p[i] = f; >+ f += x; >+ q[i] = g; >+ g += 0.5f; >+ r[i]++; >+ } >+} >+ >+int >+main () >+{ >+ int i; >+ check_vect (); >+ r[0] = 0; >+ foo (1.5f); >+ for (i = 0; i < 1024; i++) >+ if (p[i] != 1.0f + i * 1.5f || q[i] != 2.0f + i * 0.5f || r[i] != >1) >+ abort (); >+ return 0; >+} >+ >+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } >*/ >+/* { dg-final { cleanup-tree-dump "vect" } } */ > > Jakub