On Tue, Jul 9, 2019 at 12:13 AM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > The 4 testcases below weren't vectorized, because while > tree-vect-data-refs.c now allows more forms of simd lane access, > scan_operand_equal_p didn't allow combining them together. > > Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, > committed to trunk. > > 2019-07-08 Jakub Jelinek <ja...@redhat.com> > > * tree-vect-stmts.c (scan_operand_equal_p): Look through MEM_REF > with SSA_NAME address of POINTER_PLUS_EXPR. Handle MULT_EXPR > and casts in offset when different, both through gimple stmts > and through trees. Rewritten using loops to minimize code duplication > for each operand. > > * g++.dg/vect/simd-6.cc: Replace xfail with target x86. > * g++.dg/vect/simd-9.cc: Likewise. > > * testsuite/libgomp.c++/scan-13.C: Replace xfail with target x86. > * testsuite/libgomp.c++/scan-16.C: Likewise. > > --- gcc/tree-vect-stmts.c.jj 2019-07-04 09:24:28.595303590 +0200 > +++ gcc/tree-vect-stmts.c 2019-07-08 20:59:52.376285636 +0200 > @@ -6334,30 +6334,88 @@ get_group_alias_ptr_type (stmt_vec_info > static bool > scan_operand_equal_p (tree ref1, tree ref2) > { > - machine_mode mode1, mode2; > - poly_int64 bitsize1, bitsize2, bitpos1, bitpos2; > - tree offset1, offset2; > - int unsignedp1, unsignedp2, reversep1, reversep2; > - int volatilep1 = 0, volatilep2 = 0; > - tree base1 = get_inner_reference (ref1, &bitsize1, &bitpos1, &offset1, > - &mode1, &unsignedp1, &reversep1, > - &volatilep1); > - tree base2 = get_inner_reference (ref2, &bitsize2, &bitpos2, &offset2, > - &mode2, &unsignedp2, &reversep2, > - &volatilep2); > - if (reversep1 || reversep2 || volatilep1 || volatilep2) > - return false; > - if (!operand_equal_p (base1, base2, 0)) > - return false; > - if (maybe_ne (bitpos1, 0) || maybe_ne (bitpos2, 0)) > - return false; > - if (maybe_ne (bitsize1, bitsize2)) > + tree ref[2] = { ref1, ref2 }; > + poly_int64 bitsize[2], bitpos[2]; > + tree offset[2], base[2]; > + for (int i = 0; i < 2; ++i) > + { > + machine_mode mode; > + int unsignedp, reversep, volatilep = 0; > + base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i], > + &offset[i], &mode, &unsignedp, > + &reversep, &volatilep); > + if (reversep || volatilep || maybe_ne (bitpos[i], 0)) > + return false; > + if (TREE_CODE (base[i]) == MEM_REF > + && offset[i] == NULL_TREE > + && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME) > + { > + gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0)); > + if (is_gimple_assign (def_stmt) > + && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR > + && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR > + && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME) > + { > + if (maybe_ne (mem_ref_offset (base[i]), 0)) > + return false; > + base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0); > + offset[i] = gimple_assign_rhs2 (def_stmt); > + } > + } > + } > + > + if (!operand_equal_p (base[0], base[1], 0)) > return false; > - if (offset1 != offset2 > - && (!offset1 > - || !offset2 > - || !operand_equal_p (offset1, offset2, 0))) > + if (maybe_ne (bitsize[0], bitsize[1])) > return false; > + if (offset[0] != offset[1]) > + { > + if (!offset[0] || !offset[1]) > + return false; > + if (!operand_equal_p (offset[0], offset[1], 0)) > + { > + tree step[2]; > + for (int i = 0; i < 2; ++i) > + { > + step[i] = integer_one_node; > + if (TREE_CODE (offset[i]) == SSA_NAME) > + { > + gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]); > + if (is_gimple_assign (def_stmt) > + && gimple_assign_rhs_code (def_stmt) == MULT_EXPR > + && (TREE_CODE (gimple_assign_rhs2 (def_stmt)) > + == INTEGER_CST)) > + { > + step[i] = gimple_assign_rhs2 (def_stmt); > + offset[i] = gimple_assign_rhs1 (def_stmt); > + } > + } > + else if (TREE_CODE (offset[i]) == MULT_EXPR) > + { > + step[i] = TREE_OPERAND (offset[i], 1); > + offset[i] = TREE_OPERAND (offset[i], 0); > + } > + tree rhs1 = NULL_TREE; > + if (TREE_CODE (offset[i]) == SSA_NAME) > + { > + gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]); > + if (gimple_assign_cast_p (def_stmt)) > + rhs1 = gimple_assign_rhs1 (def_stmt); > + } > + else if (CONVERT_EXPR_P (offset[i])) > + rhs1 = TREE_OPERAND (offset[i], 0); > + if (rhs1 > + && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) > + && INTEGRAL_TYPE_P (TREE_TYPE (offset[i])) > + && (TYPE_PRECISION (TREE_TYPE (offset[i])) > + >= TYPE_PRECISION (TREE_TYPE (rhs1)))) > + offset[i] = rhs1; > + } > + if (!operand_equal_p (offset[0], offset[1], 0) > + || !operand_equal_p (step[0], step[1], 0)) > + return false;
seeing all this it might be easier to use tree_to_aff_combination_expand (ref1, TREE_TYPE (ref1), &aff1...); tree_to_aff_combination_expand (ref2, TREE_TYPE (ref2), &aff2...); aff_combination_scale (&aff2, -1) aff_combination_add (&aff1, &aff2); return aff_combination_zero_p (&aff1); where you probably need to feed it ADDR_EXPR of ref1/ref2 (or add a address_of_tree_to_aff_combination helper doing that). conversions is where that might fail though ... Richard. > + } > + } > return true; > } > > --- gcc/testsuite/g++.dg/vect/simd-6.cc.jj 2019-06-21 08:47:04.176673236 > +0200 > +++ gcc/testsuite/g++.dg/vect/simd-6.cc 2019-07-08 20:42:18.599409663 +0200 > @@ -1,7 +1,7 @@ > // { dg-require-effective-target size32plus } > // { dg-additional-options "-fopenmp-simd" } > // { dg-additional-options "-mavx" { target avx_runtime } } > -// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > xfail *-*-* } } } > +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > target i?86-*-* x86_64-*-* } } } > > #include "../../gcc.dg/vect/tree-vect.h" > > --- gcc/testsuite/g++.dg/vect/simd-9.cc.jj 2019-06-21 08:47:04.176673236 > +0200 > +++ gcc/testsuite/g++.dg/vect/simd-9.cc 2019-07-08 20:42:33.378169789 +0200 > @@ -1,7 +1,7 @@ > // { dg-require-effective-target size32plus } > // { dg-additional-options "-fopenmp-simd" } > // { dg-additional-options "-mavx" { target avx_runtime } } > -// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > xfail *-*-* } } } > +// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { > target i?86-*-* x86_64-*-* } } } > > #include "../../gcc.dg/vect/tree-vect.h" > > --- libgomp/testsuite/libgomp.c++/scan-13.C.jj 2019-07-06 09:51:48.405289370 > +0200 > +++ libgomp/testsuite/libgomp.c++/scan-13.C 2019-07-08 20:45:43.957076490 > +0200 > @@ -1,7 +1,7 @@ > // { dg-require-effective-target size32plus } > // { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } > // { dg-additional-options "-mavx" { target avx_runtime } } > -// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { > xfail *-*-* } } } > +// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { > target i?86-*-* x86_64-*-* } } } > > extern "C" void abort (); > > --- libgomp/testsuite/libgomp.c++/scan-16.C.jj 2019-07-06 09:51:48.406289354 > +0200 > +++ libgomp/testsuite/libgomp.c++/scan-16.C 2019-07-08 20:45:56.709869498 > +0200 > @@ -1,7 +1,7 @@ > // { dg-require-effective-target size32plus } > // { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } > // { dg-additional-options "-mavx" { target avx_runtime } } > -// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { > xfail *-*-* } } } > +// { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { > target i?86-*-* x86_64-*-* } } } > > extern "C" void abort (); > > > Jakub