Hi! As can be seen in the testcase, for the (x >> c) << c optimization into x & (-1<<c) we don't really care if the right shift is aritmetic or logical, as the affected bits are shifted away. Furthermore, while match.pd can handle ((unsigned long long)(unsigned)(x >> 32))<<32 for unsigned long long x - we figure out that after the logical right shift the upper 32 bits are already zero and optimize away those two casts - we don't handle that for arithmetic shift or e.g. for ((unsigned long long)(int)(x >> 32))<<32 Still, the upper 32 bits don't really matter on the result and can be anything.
Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-01-03 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/93118 * match.pd ((x >> c) << c -> x & (-1<<c)): Add nop_convert?. Add new simplifier with two intermediate conversions. * gcc.dg/tree-ssa/pr93118.c: New test. --- gcc/match.pd.jj 2020-01-01 12:15:50.000000000 +0100 +++ gcc/match.pd 2020-01-02 10:00:49.213022408 +0100 @@ -2738,9 +2738,26 @@ (define_operator_list COND_TERNARY /* Optimize (x >> c) << c into x & (-1<<c). */ (simplify - (lshift (rshift @0 INTEGER_CST@1) @1) + (lshift (nop_convert? (rshift @0 INTEGER_CST@1)) @1) (if (wi::ltu_p (wi::to_wide (@1), element_precision (type))) - (bit_and @0 (lshift { build_minus_one_cst (type); } @1)))) + /* It doesn't matter if the right shift is arithmetic or logical. */ + (bit_and (view_convert @0) (lshift { build_minus_one_cst (type); } @1)))) + +(simplify + (lshift (convert (convert@2 (rshift @0 INTEGER_CST@1))) @1) + (if (wi::ltu_p (wi::to_wide (@1), element_precision (type)) + /* Allow intermediate conversion to integral type with whatever sign, as + long as the low TYPE_PRECISION (type) + - TYPE_PRECISION (TREE_TYPE (@2)) bits are preserved. */ + && INTEGRAL_TYPE_P (type) + && INTEGRAL_TYPE_P (TREE_TYPE (@2)) + && INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (@0)) + && (TYPE_PRECISION (TREE_TYPE (@2)) >= TYPE_PRECISION (type) + || wi::geu_p (wi::to_wide (@1), + TYPE_PRECISION (type) + - TYPE_PRECISION (TREE_TYPE (@2))))) + (bit_and (convert @0) (lshift { build_minus_one_cst (type); } @1)))) /* Optimize (x << c) >> c into x & ((unsigned)-1 >> c) for unsigned types. */ --- gcc/testsuite/gcc.dg/tree-ssa/pr93118.c.jj 2020-01-02 09:58:21.186274254 +0100 +++ gcc/testsuite/gcc.dg/tree-ssa/pr93118.c 2020-01-02 09:57:44.959825348 +0100 @@ -0,0 +1,45 @@ +/* PR tree-optimization/93118 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-not ">>" "optimized" } } */ +/* { dg-final { scan-tree-dump-not "<<" "optimized" } } */ + +#if __SIZEOF_LONG_LONG__ == 8 && __SIZEOF_INT__ == 4 && __CHAR_BIT__ == 8 +unsigned long long +foo (unsigned long long a) +{ + unsigned long long b = a >> 32; + int c = b; + unsigned long long d = c; + return d << 32; +} + +unsigned long long +bar (unsigned long long a) +{ + unsigned long long b = a >> 32; + unsigned c = b; + unsigned long long d = c; + return d << 32; +} + +unsigned long long +baz (long long a) +{ + long long b = a >> 32; + unsigned long long c = b; + return c << 32; +} + +typedef unsigned V __attribute__((vector_size (2 * sizeof (int)))); +typedef int W __attribute__((vector_size (2 * sizeof (int)))); + +void +quux (W *w, V *v) +{ + W a = (W) (*v >> 16); + *w = a << 16; +} +#else +int i; +#endif Jakub