On Wed, 3 Jul 2019, Jakub Jelinek wrote: > Hi! > > As mentioned in the PR, I'm afraid we can't easily move the scatter/gather > verification from vect_analyze_data_refs to vectorizable_{load,store}, > because we need to process_use in between on the gsinfo.offset to determine > what statements need to be vectorized and that can be only determined with > the successful scatter/gather detection.
Hmm. OK, so I guess the only way we could do this would be to support open-coded gather/scatter code generation and simply fall back to that (mark the stmt for gather/scatter but also set the strided flag for example, triggering index element extraction and then scalar loads + vector build togehter with appropriate costing of course). Something that should be done anyway I think (it's in the line of supporting partial loop vectorization, leaving some stmts unvectorized). > The following patch just makes sure that we don't mark failures to handle > scatter/gather as fatal, which means if there are multiple vectorization > factors, if there is a scatter/gather failure (but some scatter/gather is > supported, no scatter/gather support altogether is fatal), we keep trying > finding another suitable vectorization factors. > > This matters e.g. on AVX512F without AVX512VL, where we can scatter only > when using 512-bit vectors but not other sizes; if we try say 256-bit > vectors first without simdlen, it would mean we don't vectorize even if we > could (with 512-bit vectors), if we try 512-bit vectors first with simdlen, > vectorization succeeds for that, but gets fatal when retrying with 256-bit > or 128-bit vectors to see if that doesn't match the simdlen and we have > asserts to make sure that the fatal failures are either for all vector sizes > or none. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? OK for now. Thanks, Richard. > 2019-07-03 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/91033 > * tree-vectorizer.h (vect_mark_stmts_to_be_vectorized, > vect_analyze_data_refs): Add bool * arguments. > * tree-vect-data-refs.c (vect_analyze_data_refs): Add fatal argument, > if failure is due to scatter/gather, set *fatal to false if non-NULL. > * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Likewise. > * tree-vect-loop.c (vect_analyze_loop_2): Adjust > vect_mark_stmts_to_be_vectorized and vect_analyze_data_refs callers. > * tree-vect-slp.c (vect_slp_analyze_bb_1): Adjust > vect_analyze_data_refs caller. > > * gcc.target/i386/pr91033.c: New test. > > --- gcc/tree-vectorizer.h.jj 2019-06-21 08:47:04.169673346 +0200 > +++ gcc/tree-vectorizer.h 2019-07-02 18:35:45.759141160 +0200 > @@ -1501,7 +1501,7 @@ extern unsigned record_stmt_cost (stmt_v > extern stmt_vec_info vect_finish_replace_stmt (stmt_vec_info, gimple *); > extern stmt_vec_info vect_finish_stmt_generation (stmt_vec_info, gimple *, > gimple_stmt_iterator *); > -extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info); > +extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *); > extern tree vect_get_store_rhs (stmt_vec_info); > extern tree vect_get_vec_def_for_operand_1 (stmt_vec_info, enum > vect_def_type); > extern tree vect_get_vec_def_for_operand (tree, stmt_vec_info, tree = NULL); > @@ -1559,7 +1559,7 @@ extern bool vect_check_gather_scatter (s > gather_scatter_info *); > extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, > vec<data_reference_p> *); > -extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *); > +extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *); > extern void vect_record_base_alignments (vec_info *); > extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, > tree, > tree *, gimple_stmt_iterator *, > --- gcc/tree-vect-data-refs.c.jj 2019-06-21 23:37:57.002962811 +0200 > +++ gcc/tree-vect-data-refs.c 2019-07-02 18:34:03.225722070 +0200 > @@ -4160,7 +4160,7 @@ vect_find_stmt_data_reference (loop_p lo > */ > > opt_result > -vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf) > +vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal) > { > struct loop *loop = NULL; > unsigned int i; > @@ -4386,12 +4386,16 @@ vect_analyze_data_refs (vec_info *vinfo, > as_a <loop_vec_info> (vinfo), > &gs_info) > || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset))) > - return opt_result::failure_at > - (stmt_info->stmt, > - (gatherscatter == GATHER) ? > - "not vectorized: not suitable for gather load %G" : > - "not vectorized: not suitable for scatter store %G", > - stmt_info->stmt); > + { > + if (fatal) > + *fatal = false; > + return opt_result::failure_at > + (stmt_info->stmt, > + (gatherscatter == GATHER) > + ? "not vectorized: not suitable for gather load %G" > + : "not vectorized: not suitable for scatter store %G", > + stmt_info->stmt); > + } > STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter; > } > } > --- gcc/tree-vect-stmts.c.jj 2019-06-27 23:22:40.801471237 +0200 > +++ gcc/tree-vect-stmts.c 2019-07-02 18:35:35.244303277 +0200 > @@ -608,7 +608,7 @@ process_use (stmt_vec_info stmt_vinfo, t > This pass detects such stmts. */ > > opt_result > -vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo) > +vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) > { > struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); > @@ -778,7 +778,11 @@ vect_mark_stmts_to_be_vectorized (loop_v > = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant, > &worklist, true); > if (!res) > - return res; > + { > + if (fatal) > + *fatal = false; > + return res; > + } > } > } /* while worklist */ > > --- gcc/tree-vect-loop.c.jj 2019-06-27 23:22:14.380884386 +0200 > +++ gcc/tree-vect-loop.c 2019-07-02 18:36:41.581280458 +0200 > @@ -1901,7 +1901,7 @@ vect_analyze_loop_2 (loop_vec_info loop_ > /* Analyze the data references and also adjust the minimal > vectorization factor according to the loads and stores. */ > > - ok = vect_analyze_data_refs (loop_vinfo, &min_vf); > + ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal); > if (!ok) > { > if (dump_enabled_p ()) > @@ -1932,7 +1932,7 @@ vect_analyze_loop_2 (loop_vec_info loop_ > > /* Data-flow analysis to detect stmts that do not need to be vectorized. > */ > > - ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); > + ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal); > if (!ok) > { > if (dump_enabled_p ()) > --- gcc/tree-vect-slp.c.jj 2019-06-05 09:34:02.393372307 +0200 > +++ gcc/tree-vect-slp.c 2019-07-02 18:37:15.150762867 +0200 > @@ -2861,7 +2861,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_itera > > /* Analyze the data references. */ > > - if (!vect_analyze_data_refs (bb_vinfo, &min_vf)) > + if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > --- gcc/testsuite/gcc.target/i386/pr91033.c.jj 2019-07-02 > 19:49:02.300365110 +0200 > +++ gcc/testsuite/gcc.target/i386/pr91033.c 2019-07-02 19:48:56.219458810 > +0200 > @@ -0,0 +1,15 @@ > +/* PR tree-optimization/91033 */ > +/* { dg-do compile { target pthread } } */ > +/* { dg-options "-march=knl -O2 -fopenmp-simd -ftree-parallelize-loops=2" } > */ > + > +#define N 1024 > +int a[N]; > + > +void > +foo (void) > +{ > + int i; > + #pragma omp simd simdlen (4) > + for (i = 0; i < N; ++i) > + a[i] = a[i] + 1; > +} > > Jakub > -- Richard Biener <rguent...@suse.de> SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)