On Sun, Oct 30, 2011 at 12:38:32AM -0400, David Miller wrote: > gcc.dg/pr48616.c segfaults on sparc as of a day or two ago > > vectorizable_shift() crashes because op1_vectype is NULL and > we hit this code path: > > /* Vector shifted by vector. */ > if (!scalar_shift_arg) > { > optab = optab_for_tree_code (code, vectype, optab_vector); > if (vect_print_dump_info (REPORT_DETAILS)) > fprintf (vect_dump, "vector/vector shift/rotate found."); > => if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)) > > dt[1] is vect_external_def and slp_node is non-NULL. > > Indeed, when the 'dt' arg to vect_is_simple_use_1() is > vect_external_def *vectype will be set to NULL.
Here is a fix for that (and other issues that show up on these testcases with -O3 -mxop if I disable all vector/scalar shift expanders in sse.md). For SLP it currently gives up more often than for loop vectorization, I assume we could handle all dt[1] == vect_constant_def and dt[2] == vect_external_def cases for SLP (and at least the former even if the constants differ between nodes) by building the vectors by hand, though the current vect_get_vec_defs/vect_get_vec_defs_for_stmt_copy can't be used for that as is. 2011-10-28 Jakub Jelinek <ja...@redhat.com> * tree-vect-stmts.c (vectorizable_shift): If op1 is vect_external_def in a loop and has different type from op0, cast it to op0's type before the loop first. For slp give up. Don't crash if op1_vectype is NULL. * gcc.dg/vshift-3.c: New test. * gcc.dg/vshift-4.c: New test. * gcc.dg/vshift-5.c: New test. --- gcc/tree-vect-stmts.c.jj 2011-10-28 16:21:06.000000000 +0200 +++ gcc/tree-vect-stmts.c 2011-10-31 10:27:57.000000000 +0100 @@ -2446,7 +2446,10 @@ vectorizable_shift (gimple stmt, gimple_ optab = optab_for_tree_code (code, vectype, optab_vector); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "vector/vector shift/rotate found."); - if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)) + if (!op1_vectype) + op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out); + if (op1_vectype == NULL_TREE + || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype)) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "unusable type for last operand in" @@ -2480,9 +2483,28 @@ vectorizable_shift (gimple stmt, gimple_ /* Unlike the other binary operators, shifts/rotates have the rhs being int, instead of the same type as the lhs, so make sure the scalar is the right type if we are - dealing with vectors of short/char. */ + dealing with vectors of long long/long/short/char. */ if (dt[1] == vect_constant_def) op1 = fold_convert (TREE_TYPE (vectype), op1); + else if (!useless_type_conversion_p (TREE_TYPE (vectype), + TREE_TYPE (op1))) + { + if (slp_node + && TYPE_MODE (TREE_TYPE (vectype)) + != TYPE_MODE (TREE_TYPE (op1))) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "unusable type for last operand in" + " vector/vector shift/rotate."); + return false; + } + if (vec_stmt && !slp_node) + { + op1 = fold_convert (TREE_TYPE (vectype), op1); + op1 = vect_init_vector (stmt, op1, + TREE_TYPE (vectype), NULL); + } + } } } } --- gcc/testsuite/gcc.dg/vshift-3.c.jj 2011-10-31 10:00:57.000000000 +0100 +++ gcc/testsuite/gcc.dg/vshift-3.c 2011-10-31 10:00:42.000000000 +0100 @@ -0,0 +1,136 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#include <stdlib.h> + +#define N 64 + +#ifndef TYPE1 +#define TYPE1 int +#define TYPE2 long long +#endif + +signed TYPE1 a[N], b, g[N]; +unsigned TYPE1 c[N], h[N]; +signed TYPE2 d[N], e, j[N]; +unsigned TYPE2 f[N], k[N]; + +#ifndef S +#define S(x) x +#endif + +__attribute__((noinline)) void +f1 (void) +{ + int i; + for (i = 0; i < N; i++) + g[i] = a[i] << S (b); +} + +__attribute__((noinline)) void +f2 (void) +{ + int i; + for (i = 0; i < N; i++) + g[i] = a[i] >> S (b); +} + +__attribute__((noinline)) void +f3 (void) +{ + int i; + for (i = 0; i < N; i++) + h[i] = c[i] >> S (b); +} + +__attribute__((noinline)) void +f4 (void) +{ + int i; + for (i = 0; i < N; i++) + j[i] = d[i] << S (e); +} + +__attribute__((noinline)) void +f5 (void) +{ + int i; + for (i = 0; i < N; i++) + j[i] = d[i] >> S (e); +} + +__attribute__((noinline)) void +f6 (void) +{ + int i; + for (i = 0; i < N; i++) + k[i] = f[i] >> S (e); +} + +__attribute__((noinline)) void +f7 (void) +{ + int i; + for (i = 0; i < N; i++) + j[i] = d[i] << S (b); +} + +__attribute__((noinline)) void +f8 (void) +{ + int i; + for (i = 0; i < N; i++) + j[i] = d[i] >> S (b); +} + +__attribute__((noinline)) void +f9 (void) +{ + int i; + for (i = 0; i < N; i++) + k[i] = f[i] >> S (b); +} + +int +main () +{ + int i; + b = 7; + e = 12; + for (i = 0; i < N; i++) + { + asm (""); + c[i] = (random () << 1) | (random () & 1); + a[i] = c[i]; + d[i] = (random () << 1) | (random () & 1); + d[i] |= (unsigned long long) c[i] << 32; + f[i] = d[i]; + } + f1 (); + f3 (); + f4 (); + f6 (); + for (i = 0; i < N; i++) + if (g[i] != (signed TYPE1) (a[i] << S (b)) + || h[i] != (unsigned TYPE1) (c[i] >> S (b)) + || j[i] != (signed TYPE2) (d[i] << S (e)) + || k[i] != (unsigned TYPE2) (f[i] >> S (e))) + abort (); + f2 (); + f5 (); + f9 (); + for (i = 0; i < N; i++) + if (g[i] != (signed TYPE1) (a[i] >> S (b)) + || j[i] != (signed TYPE2) (d[i] >> S (e)) + || k[i] != (unsigned TYPE2) (f[i] >> S (b))) + abort (); + f7 (); + for (i = 0; i < N; i++) + if (j[i] != (signed TYPE2) (d[i] << S (b))) + abort (); + f8 (); + for (i = 0; i < N; i++) + if (j[i] != (signed TYPE2) (d[i] >> S (b))) + abort (); + return 0; +} --- gcc/testsuite/gcc.dg/vshift-4.c.jj 2011-10-31 10:01:08.000000000 +0100 +++ gcc/testsuite/gcc.dg/vshift-4.c 2011-10-31 10:01:22.000000000 +0100 @@ -0,0 +1,6 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#define S(x) 3 + +#include "vshift-3.c" --- gcc/testsuite/gcc.dg/vshift-5.c.jj 2011-10-31 10:33:09.000000000 +0100 +++ gcc/testsuite/gcc.dg/vshift-5.c 2011-10-31 10:32:57.000000000 +0100 @@ -0,0 +1,80 @@ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +extern void abort (void); +long long a[16]; + +__attribute__((noinline, noclone)) void +f1 (void) +{ + long long a0, a1, a2, a3; + a0 = a[0]; + a1 = a[1]; + a2 = a[2]; + a3 = a[3]; + a0 = a0 << 2; + a1 = a1 << 3; + a2 = a2 << 4; + a3 = a3 << 5; + a[0] = a0; + a[1] = a1; + a[2] = a2; + a[3] = a3; +} + +__attribute__((noinline, noclone)) void +f2 (void) +{ + long long a0, a1, a2, a3; + a0 = a[0]; + a1 = a[1]; + a2 = a[2]; + a3 = a[3]; + a0 = a0 << 2; + a1 = a1 << 2; + a2 = a2 << 2; + a3 = a3 << 2; + a[0] = a0; + a[1] = a1; + a[2] = a2; + a[3] = a3; +} + +__attribute__((noinline, noclone)) void +f3 (int x) +{ + long long a0, a1, a2, a3; + a0 = a[0]; + a1 = a[1]; + a2 = a[2]; + a3 = a[3]; + a0 = a0 << x; + a1 = a1 << x; + a2 = a2 << x; + a3 = a3 << x; + a[0] = a0; + a[1] = a1; + a[2] = a2; + a[3] = a3; +} + +int +main () +{ + a[0] = 4LL; + a[1] = 3LL; + a[2] = 2LL; + a[3] = 1LL; + f1 (); + if (a[0] != (4LL << 2) || a[1] != (3LL << 3) + || a[2] != (2LL << 4) || a[3] != (1LL << 5)) + abort (); + f2 (); + if (a[0] != (4LL << 4) || a[1] != (3LL << 5) + || a[2] != (2LL << 6) || a[3] != (1LL << 7)) + abort (); + f3 (3); + if (a[0] != (4LL << 7) || a[1] != (3LL << 8) + || a[2] != (2LL << 9) || a[3] != (1LL << 10)) + abort (); +} Jakub