On 3 November 2015 at 11:35, Richard Biener <[email protected]> wrote:
>
> I think this should simply re-write A << B to (type) (unsigned-type) A
> * (1U << B).
>
> Does that then still vectorize the signed case?
I didn't realize our representation of chrec's could express that.
Yes, it does - thanks! (And the avx512ifma- test is compiled without warnings.)
Patch attached. I've added a platform-independent version of the failing AVX512
test too.
--Alan
gcc/ChangeLog:
PR tree-optimization/65963
* tree-scalar-evolution.c (interpret_rhs_expr): Try to handle
LSHIFT_EXPRs as equivalent unsigned MULT_EXPRs.
gcc/testsuite/ChangeLog:
* gcc.dg/pr68112.c: New.
* gcc.dg/vect/vect-strided-shift-1.c: New.
---
gcc/testsuite/gcc.dg/pr68112.c | 11 ++++++++
gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c | 33 ++++++++++++++++++++++++
gcc/tree-scalar-evolution.c | 17 ++++++++++++
3 files changed, 61 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/pr68112.c
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c
diff --git a/gcc/testsuite/gcc.dg/pr68112.c b/gcc/testsuite/gcc.dg/pr68112.c
new file mode 100644
index 0000000..0a45b03
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr68112.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Waggressive-loop-optimizations" } */
+
+int *a;
+
+void
+foo ()
+{
+ for (int i = 0; i < 65536; i++)
+ *a = i << 24;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c
b/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c
new file mode 100644
index 0000000..b1ce2ec
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c
@@ -0,0 +1,33 @@
+/* PR tree-optimization/65963. */
+#include "tree-vect.h"
+
+#define N 512
+
+int in[2*N], out[N];
+
+__attribute__ ((noinline)) void
+loop (void)
+{
+ for (int i = 0; i < N; i++)
+ out[i] = in[i << 1] + 7;
+}
+
+int
+main (int argc, char **argv)
+{
+ check_vect ();
+ for (int i = 0; i < 2*N; i++)
+ {
+ in[i] = i;
+ __asm__ volatile ("" : : : "memory");
+ }
+ loop ();
+ __asm__ volatile ("" : : : "memory");
+ for (int i = 0; i < N; i++)
+ {
+ if (out[i] != i*2 + 7)
+ abort ();
+ }
+ return 0;
+}
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect"
{ target { vect_strided2 } } } } */
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 0753bf3..60d515d 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -1840,6 +1840,23 @@ interpret_rhs_expr (struct loop *loop, gimple *at_stmt,
res = chrec_fold_multiply (type, chrec1, chrec2);
break;
+ case LSHIFT_EXPR:
+ {
+ /* Handle A<<B as A * (1<<B). */
+ tree uns = unsigned_type_for (type);
+ chrec1 = analyze_scalar_evolution (loop, rhs1);
+ chrec2 = analyze_scalar_evolution (loop, rhs2);
+ chrec1 = chrec_convert (uns, chrec1, at_stmt);
+ chrec1 = instantiate_parameters (loop, chrec1);
+ chrec2 = instantiate_parameters (loop, chrec2);
+
+ tree one = build_int_cst (unsigned_type_for (TREE_TYPE (rhs1)), 1);
+ chrec2 = fold_build2 (LSHIFT_EXPR, uns, one, chrec2);
+ res = chrec_fold_multiply (uns, chrec1, chrec2);
+ res = chrec_convert (type, res, at_stmt);
+ }
+ break;
+
CASE_CONVERT:
/* In case we have a truncation of a widened operation that in
the truncated type has undefined overflow behavior analyze
--
1.9.1