Hi! The 4 testcases below weren't vectorized, because while tree-vect-data-refs.c now allows more forms of simd lane access, scan_operand_equal_p didn't allow combining them together.
Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk. 2019-07-08 Jakub Jelinek <ja...@redhat.com> * tree-vect-stmts.c (scan_operand_equal_p): Look through MEM_REF with SSA_NAME address of POINTER_PLUS_EXPR. Handle MULT_EXPR and casts in offset when different, both through gimple stmts and through trees. Rewritten using loops to minimize code duplication for each operand. * g++.dg/vect/simd-6.cc: Replace xfail with target x86. * g++.dg/vect/simd-9.cc: Likewise. * testsuite/libgomp.c++/scan-13.C: Replace xfail with target x86. * testsuite/libgomp.c++/scan-16.C: Likewise. --- gcc/tree-vect-stmts.c.jj 2019-07-04 09:24:28.595303590 +0200 +++ gcc/tree-vect-stmts.c 2019-07-08 20:59:52.376285636 +0200 @@ -6334,30 +6334,88 @@ get_group_alias_ptr_type (stmt_vec_info static bool scan_operand_equal_p (tree ref1, tree ref2) { - machine_mode mode1, mode2; - poly_int64 bitsize1, bitsize2, bitpos1, bitpos2; - tree offset1, offset2; - int unsignedp1, unsignedp2, reversep1, reversep2; - int volatilep1 = 0, volatilep2 = 0; - tree base1 = get_inner_reference (ref1, &bitsize1, &bitpos1, &offset1, - &mode1, &unsignedp1, &reversep1, - &volatilep1); - tree base2 = get_inner_reference (ref2, &bitsize2, &bitpos2, &offset2, - &mode2, &unsignedp2, &reversep2, - &volatilep2); - if (reversep1 || reversep2 || volatilep1 || volatilep2) - return false; - if (!operand_equal_p (base1, base2, 0)) - return false; - if (maybe_ne (bitpos1, 0) || maybe_ne (bitpos2, 0)) - return false; - if (maybe_ne (bitsize1, bitsize2)) + tree ref[2] = { ref1, ref2 }; + poly_int64 bitsize[2], bitpos[2]; + tree offset[2], base[2]; + for (int i = 0; i < 2; ++i) + { + machine_mode mode; + int unsignedp, reversep, volatilep = 0; + base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i], + &offset[i], &mode, &unsignedp, + &reversep, &volatilep); + if (reversep || volatilep || maybe_ne (bitpos[i], 0)) + return false; + if (TREE_CODE (base[i]) == MEM_REF + && offset[i] == NULL_TREE + && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0)); + if (is_gimple_assign (def_stmt) + && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR + && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR + && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME) + { + if (maybe_ne (mem_ref_offset (base[i]), 0)) + return false; + base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0); + offset[i] = gimple_assign_rhs2 (def_stmt); + } + } + } + + if (!operand_equal_p (base[0], base[1], 0)) return false; - if (offset1 != offset2 - && (!offset1 - || !offset2 - || !operand_equal_p (offset1, offset2, 0))) + if (maybe_ne (bitsize[0], bitsize[1])) return false; + if (offset[0] != offset[1]) + { + if (!offset[0] || !offset[1]) + return false; + if (!operand_equal_p (offset[0], offset[1], 0)) + { + tree step[2]; + for (int i = 0; i < 2; ++i) + { + step[i] = integer_one_node; + if (TREE_CODE (offset[i]) == SSA_NAME) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]); + if (is_gimple_assign (def_stmt) + && gimple_assign_rhs_code (def_stmt) == MULT_EXPR + && (TREE_CODE (gimple_assign_rhs2 (def_stmt)) + == INTEGER_CST)) + { + step[i] = gimple_assign_rhs2 (def_stmt); + offset[i] = gimple_assign_rhs1 (def_stmt); + } + } + else if (TREE_CODE (offset[i]) == MULT_EXPR) + { + step[i] = TREE_OPERAND (offset[i], 1); + offset[i] = TREE_OPERAND (offset[i], 0); + } + tree rhs1 = NULL_TREE; + if (TREE_CODE (offset[i]) == SSA_NAME) + { + gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]); + if (gimple_assign_cast_p (def_stmt)) + rhs1 = gimple_assign_rhs1 (def_stmt); + } + else if (CONVERT_EXPR_P (offset[i])) + rhs1 = TREE_OPERAND (offset[i], 0); + if (rhs1 + && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) + && INTEGRAL_TYPE_P (TREE_TYPE (offset[i])) + && (TYPE_PRECISION (TREE_TYPE (offset[i])) + >= TYPE_PRECISION (TREE_TYPE (rhs1)))) + offset[i] = rhs1; + } + if (!operand_equal_p (offset[0], offset[1], 0) + || !operand_equal_p (step[0], step[1], 0)) + return false; + } + } return true; } --- gcc/testsuite/g++.dg/vect/simd-6.cc.jj 2019-06-21 08:47:04.176673236 +0200 +++ gcc/testsuite/g++.dg/vect/simd-6.cc 2019-07-08 20:42:18.599409663 +0200 @@ -1,7 +1,7 @@ // { dg-require-effective-target size32plus } // { dg-additional-options "-fopenmp-simd" } // { dg-additional-options "-mavx" { target avx_runtime } } -// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } #include "../../gcc.dg/vect/tree-vect.h" --- gcc/testsuite/g++.dg/vect/simd-9.cc.jj 2019-06-21 08:47:04.176673236 +0200 +++ gcc/testsuite/g++.dg/vect/simd-9.cc 2019-07-08 20:42:33.378169789 +0200 @@ -1,7 +1,7 @@ // { dg-require-effective-target size32plus } // { dg-additional-options "-fopenmp-simd" } // { dg-additional-options "-mavx" { target avx_runtime } } -// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } } +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } #include "../../gcc.dg/vect/tree-vect.h" --- libgomp/testsuite/libgomp.c++/scan-13.C.jj 2019-07-06 09:51:48.405289370 +0200 +++ libgomp/testsuite/libgomp.c++/scan-13.C 2019-07-08 20:45:43.957076490 +0200 @@ -1,7 +1,7 @@ // { dg-require-effective-target size32plus } // { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } // { dg-additional-options "-mavx" { target avx_runtime } } -// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { xfail *-*-* } } } +// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } extern "C" void abort (); --- libgomp/testsuite/libgomp.c++/scan-16.C.jj 2019-07-06 09:51:48.406289354 +0200 +++ libgomp/testsuite/libgomp.c++/scan-16.C 2019-07-08 20:45:56.709869498 +0200 @@ -1,7 +1,7 @@ // { dg-require-effective-target size32plus } // { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } // { dg-additional-options "-mavx" { target avx_runtime } } -// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { xfail *-*-* } } } +// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } extern "C" void abort (); Jakub