On Wed, 19 Oct 2011, Jakub Jelinek wrote: > Hi! > > Similarly to casts of bool to integer, even stores into bool arrays > can be handled similarly. Just we need to ensure tree-vect-data-refs.c > doesn't reject vectorization before tree-vect-patterns.c has a chance > to optimize it. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
Ok with ... > 2011-10-19 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/50596 > * tree-vect-stmts.c (vect_mark_relevant): Only use > FOR_EACH_IMM_USE_FAST if lhs is SSA_NAME. > (vectorizable_store): If is_pattern_stmt_p look through > VIEW_CONVERT_EXPR on lhs. > * tree-vect-patterns.c (vect_recog_bool_pattern): Optimize > also stores into bool memory in addition to casts from bool > to integral types. > (vect_mark_pattern_stmts): If pattern_stmt already has vinfo > created, don't create it again. > * tree-vect-data-refs.c (vect_analyze_data_refs): For stores > into bool memory use vectype for integral type corresponding > to bool's mode. > * tree-vect-loop.c (vect_determine_vectorization_factor): Give up > if a store into bool memory hasn't been replaced by the pattern > recognizer. > > * gcc.dg/vect/vect-cond-10.c: New test. > > --- gcc/tree-vect-stmts.c.jj 2011-10-18 23:52:07.000000000 +0200 > +++ gcc/tree-vect-stmts.c 2011-10-19 14:19:00.000000000 +0200 > @@ -159,19 +159,20 @@ vect_mark_relevant (VEC(gimple,heap) **w > /* This use is out of pattern use, if LHS has other uses that are > pattern uses, we should mark the stmt itself, and not the > pattern > stmt. */ > - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) > - { > - if (is_gimple_debug (USE_STMT (use_p))) > - continue; > - use_stmt = USE_STMT (use_p); > + if (TREE_CODE (lhs) == SSA_NAME) > + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) > + { > + if (is_gimple_debug (USE_STMT (use_p))) > + continue; > + use_stmt = USE_STMT (use_p); > > - if (vinfo_for_stmt (use_stmt) > - && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) > - { > - found = true; > - break; > - } > - } > + if (vinfo_for_stmt (use_stmt) > + && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) > + { > + found = true; > + break; > + } > + } > } > > if (!found) > @@ -3656,6 +3657,9 @@ vectorizable_store (gimple stmt, gimple_ > return false; > > scalar_dest = gimple_assign_lhs (stmt); > + if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR > + && is_pattern_stmt_p (stmt_info)) > + scalar_dest = TREE_OPERAND (scalar_dest, 0); > if (TREE_CODE (scalar_dest) != ARRAY_REF > && TREE_CODE (scalar_dest) != INDIRECT_REF > && TREE_CODE (scalar_dest) != COMPONENT_REF Just change the if () stmt to if (!handled_component_p (scalar_dest) && TREE_CODE (scalar_dest) != MEM_REF) return false; > --- gcc/tree-vect-patterns.c.jj 2011-10-18 23:52:05.000000000 +0200 > +++ gcc/tree-vect-patterns.c 2011-10-19 13:55:27.000000000 +0200 > @@ -1933,6 +1933,50 @@ vect_recog_bool_pattern (VEC (gimple, he > VEC_safe_push (gimple, heap, *stmts, last_stmt); > return pattern_stmt; > } > + else if (rhs_code == SSA_NAME > + && STMT_VINFO_DATA_REF (stmt_vinfo)) > + { > + stmt_vec_info pattern_stmt_info; > + vectype = STMT_VINFO_VECTYPE (stmt_vinfo); > + gcc_assert (vectype != NULL_TREE); > + if (!check_bool_pattern (var, loop_vinfo)) > + return NULL; > + > + rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); > + if (TREE_CODE (lhs) == MEM_REF || TREE_CODE (lhs) == TARGET_MEM_REF) > + { > + lhs = copy_node (lhs); We don't handle TARGET_MEM_REF in vectorizable_store, so no need to do it here. In fact, just unconditionally do ... > + TREE_TYPE (lhs) = TREE_TYPE (vectype); > + } > + else > + lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); ... this (wrap it in a V_C_E). No need to special-case any MEM_REFs. > + if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) This should never be false, so you can as well unconditionally build the conversion stmt. > + { > + tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); > + gimple cast_stmt > + = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE); > + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = cast_stmt; > + rhs = rhs2; > + } > + pattern_stmt > + = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE); > + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL); > + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); > + STMT_VINFO_DATA_REF (pattern_stmt_info) > + = STMT_VINFO_DATA_REF (stmt_vinfo); > + STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info) > + = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo); > + STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT > (stmt_vinfo); > + STMT_VINFO_DR_OFFSET (pattern_stmt_info) > + = STMT_VINFO_DR_OFFSET (stmt_vinfo); > + STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP > (stmt_vinfo); > + STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info) > + = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo); > + *type_out = vectype; > + *type_in = vectype; > + VEC_safe_push (gimple, heap, *stmts, last_stmt); > + return pattern_stmt; > + } > else > return NULL; > } > @@ -1949,19 +1993,22 @@ vect_mark_pattern_stmts (gimple orig_stm > loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); > gimple def_stmt; > > - set_vinfo_for_stmt (pattern_stmt, > - new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL)); > - gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); > pattern_stmt_info = vinfo_for_stmt (pattern_stmt); > + if (pattern_stmt_info == NULL) > + { > + pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL); > + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); > + } > + gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); > > STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; > STMT_VINFO_DEF_TYPE (pattern_stmt_info) > - = STMT_VINFO_DEF_TYPE (orig_stmt_info); > + = STMT_VINFO_DEF_TYPE (orig_stmt_info); > STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; > STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; > STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; > STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info) > - = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); > + = STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info); > if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)) > { > def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info); > --- gcc/tree-vect-data-refs.c.jj 2011-09-20 21:43:07.000000000 +0200 > +++ gcc/tree-vect-data-refs.c 2011-10-19 14:37:44.000000000 +0200 > @@ -2752,8 +2752,23 @@ vect_analyze_data_refs (loop_vec_info lo > > /* Set vectype for STMT. */ > scalar_type = TREE_TYPE (DR_REF (dr)); > - STMT_VINFO_VECTYPE (stmt_info) = > - get_vectype_for_scalar_type (scalar_type); > + STMT_VINFO_VECTYPE (stmt_info) > + = get_vectype_for_scalar_type (scalar_type); > + if (!STMT_VINFO_VECTYPE (stmt_info) > + && ((TYPE_PRECISION (scalar_type) == 1 > + && TYPE_UNSIGNED (scalar_type)) > + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) > + && DR_IS_WRITE (dr) > + && loop_vinfo) > + { > + /* For bool stores use integral type with the same > + TYPE_MODE, but bigger precision. vect_recog_bool_pattern > + can transform those into something vectorizable. */ > + unsigned int modesize = GET_MODE_BITSIZE (TYPE_MODE (scalar_type)); > + scalar_type = build_nonstandard_integer_type (modesize, 1); > + STMT_VINFO_VECTYPE (stmt_info) > + = get_vectype_for_scalar_type (scalar_type); > + } > if (!STMT_VINFO_VECTYPE (stmt_info)) > { > if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) > --- gcc/tree-vect-loop.c.jj 2011-09-26 14:06:52.000000000 +0200 > +++ gcc/tree-vect-loop.c 2011-10-19 14:49:18.000000000 +0200 > @@ -1,5 +1,5 @@ > /* Loop Vectorization > - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 > + Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 > Free Software Foundation, Inc. > Contributed by Dorit Naishlos <do...@il.ibm.com> and > Ira Rosen <i...@il.ibm.com> > @@ -347,6 +347,28 @@ vect_determine_vectorization_factor (loo > gcc_assert (STMT_VINFO_DATA_REF (stmt_info) > || is_pattern_stmt_p (stmt_info)); > vectype = STMT_VINFO_VECTYPE (stmt_info); > + if (STMT_VINFO_DATA_REF (stmt_info)) > + { > + struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); > + tree scalar_type = TREE_TYPE (DR_REF (dr)); > + /* vect_analyze_data_refs will allow bool writes through, > + in order to allow vect_recog_bool_pattern to transform > + those. If they couldn't be transformed, give up now. */ > + if (((TYPE_PRECISION (scalar_type) == 1 > + && TYPE_UNSIGNED (scalar_type)) > + || TREE_CODE (scalar_type) == BOOLEAN_TYPE) Shouldn't it be always possible to vectorize those? For loads we can assume the memory contains only 1 or 0 (we assume that for scalar loads), for stores we can mask out all other bits explicitly if you add support for truncating conversions to non-mode precision (in fact, we could support non-mode precision vectorization that way, if not support bitfield loads or extending conversions). So maybe that obsoletes my conditional approval ;) Can you investigate whether the above would work? Thanks, Richard. > + && DR_IS_WRITE (dr) > + && !is_pattern_stmt_p (stmt_info)) > + { > + if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) > + { > + fprintf (vect_dump, > + "not vectorized: unsupported data-type "); > + print_generic_expr (vect_dump, scalar_type, TDF_SLIM); > + } > + return false; > + } > + } > } > else > { > --- gcc/testsuite/gcc.dg/vect/vect-cond-10.c.jj 2011-10-19 > 15:54:42.000000000 +0200 > +++ gcc/testsuite/gcc.dg/vect/vect-cond-10.c 2011-10-19 16:00:22.000000000 > +0200 > @@ -0,0 +1,165 @@ > +/* { dg-require-effective-target vect_cond_mixed } */ > + > +#include "tree-vect.h" > + > +#define N 1024 > +float a[N], b[N], c[N], d[N]; > +_Bool k[N]; > + > +__attribute__((noinline, noclone)) void > +f1 (void) > +{ > + int i; > + for (i = 0; i < N; ++i) > + { > + int x = a[i] < b[i]; > + int y = c[i] < d[i]; > + k[i] = x & y; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f2 (void) > +{ > + int i; > + for (i = 0; i < N; ++i) > + k[i] = (a[i] < b[i]) & (c[i] < d[i]); > +} > + > +__attribute__((noinline, noclone)) void > +f3 (void) > +{ > + int i; > + for (i = 0; i < N; ++i) > + { > + int x = a[i] < b[i]; > + int y = c[i] < d[i]; > + k[i] = x | y; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f4 (void) > +{ > + int i; > + for (i = 0; i < N; ++i) > + k[i] = (a[i] < b[i]) | (c[i] < d[i]); > +} > + > +__attribute__((noinline, noclone)) void > +f5 (_Bool *p) > +{ > + int i; > + for (i = 0; i < N; ++i) > + { > + int x = a[i] < b[i]; > + int y = c[i] < d[i]; > + p[i] = x & y; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f6 (_Bool *p) > +{ > + int i; > + for (i = 0; i < N; ++i) > + p[i] = (a[i] < b[i]) & (c[i] < d[i]); > +} > + > +__attribute__((noinline, noclone)) void > +f7 (_Bool *p) > +{ > + int i; > + for (i = 0; i < N; ++i) > + { > + int x = a[i] < b[i]; > + int y = c[i] < d[i]; > + p[i] = x | y; > + } > +} > + > +__attribute__((noinline, noclone)) void > +f8 (_Bool *p) > +{ > + int i; > + for (i = 0; i < N; ++i) > + p[i] = (a[i] < b[i]) | (c[i] < d[i]); > +} > + > +int > +main () > +{ > + int i; > + > + check_vect (); > + > + for (i = 0; i < N; i++) > + { > + switch (i % 9) > + { > + case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break; > + case 1: a[i] = 0; b[i] = 0; break; > + case 2: a[i] = i + 1; b[i] = - i - 1; break; > + case 3: a[i] = i; b[i] = i + 7; break; > + case 4: a[i] = i; b[i] = i; break; > + case 5: a[i] = i + 16; b[i] = i + 3; break; > + case 6: a[i] = - i - 5; b[i] = - i; break; > + case 7: a[i] = - i; b[i] = - i; break; > + case 8: a[i] = - i; b[i] = - i - 7; break; > + } > + } > + for (i = 0; i < N; i++) > + { > + switch ((i / 9) % 3) > + { > + case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break; > + case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break; > + case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break; > + } > + } > + f1 (); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f2 (); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f3 (); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f4 (); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f5 (k); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f6 (k); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f7 (k); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + f8 (k); > + for (i = 0; i < N; i++) > + if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0)) > + abort (); > + __builtin_memset (k, 0, sizeof (k)); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 8 "vect" } } > */ > +/* { dg-final { cleanup-tree-dump "vect" } } */ > > Jakub > > -- Richard Guenther <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer