Hi! This patch adds folding of constant arguments v>> and v<<, which helps to optimize the testcase from the PR back into constant store after vectorized loop is unrolled.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2013-04-25 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/57051 * fold-const.c (const_binop): Handle VEC_LSHIFT_EXPR and VEC_RSHIFT_EXPR if shift count is a multiple of element bitsize. --- gcc/fold-const.c.jj 2013-04-12 10:16:25.000000000 +0200 +++ gcc/fold-const.c 2013-04-24 12:37:11.789122719 +0200 @@ -1380,17 +1380,42 @@ const_binop (enum tree_code code, tree a int count = TYPE_VECTOR_SUBPARTS (type), i; tree *elts = XALLOCAVEC (tree, count); - for (i = 0; i < count; i++) + if (code == VEC_LSHIFT_EXPR + || code == VEC_RSHIFT_EXPR) { - tree elem1 = VECTOR_CST_ELT (arg1, i); - - elts[i] = const_binop (code, elem1, arg2); + if (!host_integerp (arg2, 1)) + return NULL_TREE; - /* It is possible that const_binop cannot handle the given - code and return NULL_TREE */ - if (elts[i] == NULL_TREE) + unsigned HOST_WIDE_INT shiftc = tree_low_cst (arg2, 1); + unsigned HOST_WIDE_INT outerc = tree_low_cst (TYPE_SIZE (type), 1); + unsigned HOST_WIDE_INT innerc + = tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); + if (shiftc >= outerc || (shiftc % innerc) != 0) return NULL_TREE; + int offset = shiftc / innerc; + if (code == VEC_LSHIFT_EXPR) + offset = -offset; + tree zero = build_zero_cst (TREE_TYPE (type)); + for (i = 0; i < count; i++) + { + if (i + offset < 0 || i + offset >= count) + elts[i] = zero; + else + elts[i] = VECTOR_CST_ELT (arg1, i + offset); + } } + else + for (i = 0; i < count; i++) + { + tree elem1 = VECTOR_CST_ELT (arg1, i); + + elts[i] = const_binop (code, elem1, arg2); + + /* It is possible that const_binop cannot handle the given + code and return NULL_TREE */ + if (elts[i] == NULL_TREE) + return NULL_TREE; + } return build_vector (type, elts); } Jakub