On 09/28/2011 11:53 AM, Richard Guenther wrote: > On Wed, Sep 28, 2011 at 11:34 AM, Tom de Vries <tom_devr...@mentor.com> wrote: >> Richard, >> >> I got a patch for PR50527. >> >> The patch prevents the alignment of vla-related allocas to be set to >> BIGGEST_ALIGNMENT in ccp. The alignment may turn out smaller after folding >> the alloca. >> >> Bootstrapped and regtested on x86_64. >> >> OK for trunk? > > Hmm. As gfortran with -fstack-arrays uses VLAs it's probably bad that > the vectorizer then will no longer see that the arrays are properly aligned. > > I'm not sure what the best thing to do is here, other than trying to record > the alignment requirement of the VLA somewhere. > > Forcing the alignment of the alloca replacement decl to BIGGEST_ALIGNMENT > has the issue that it will force stack-realignment which isn't free (and the > point was to make the decl cheaper than the alloca). But that might > possibly be the better choice. > > Any other thoughts?
How about the approach in this (untested) patch? Using the DECL_ALIGN of the vla for the new array prevents stack realignment for folded vla-allocas, also for large vlas. This will not help in vectorizing large folded vla-allocas, but I think it's not reasonable to expect BIGGEST_ALIGNMENT when writing a vla (although that has been the case up until we started to fold). If you want to trigger vectorization for a vla, you can still use the aligned attribute on the declaration. Still, the unfolded vla-allocas will have BIGGEST_ALIGNMENT, also without using an attribute on the decl. This patch exploits this by setting it at the end of the 3rd pass_ccp, renamed to pass_ccp_last. This is not very effective in propagation though, because although the ptr_info of the lhs is propagated via copy_prop afterwards, it's not propagated anymore via ccp. Another way to do this would be to set BIGGEST_ALIGNMENT at the end of ccp2 and not fold during ccp3. Thanks, - Tom
Index: gcc/tree-pass.h =================================================================== --- gcc/tree-pass.h (revision 179043) +++ gcc/tree-pass.h (working copy) @@ -389,6 +389,7 @@ extern struct gimple_opt_pass pass_iv_op extern struct gimple_opt_pass pass_tree_loop_done; extern struct gimple_opt_pass pass_ch; extern struct gimple_opt_pass pass_ccp; +extern struct gimple_opt_pass pass_ccp_last; extern struct gimple_opt_pass pass_phi_only_cprop; extern struct gimple_opt_pass pass_build_ssa; extern struct gimple_opt_pass pass_build_alias; Index: gcc/tree-ssa-ccp.c =================================================================== --- gcc/tree-ssa-ccp.c (revision 179210) +++ gcc/tree-ssa-ccp.c (working copy) @@ -167,8 +167,12 @@ typedef struct prop_value_d prop_value_t doing the store). */ static prop_value_t *const_val; +/* Indicates whether we're in pass_ccp_last. */ +static bool ccp_last; + static void canonicalize_float_value (prop_value_t *); static bool ccp_fold_stmt (gimple_stmt_iterator *); +static bool fold_builtin_alloca_for_var_p (gimple); /* Dump constant propagation value VAL to file OUTF prefixed by PREFIX. */ @@ -813,6 +817,15 @@ ccp_finalize (void) || !POINTER_TYPE_P (TREE_TYPE (name))) continue; + if (ccp_last && !SSA_NAME_IS_DEFAULT_DEF (name) + && gimple_call_alloca_for_var_p (SSA_NAME_DEF_STMT (name)) + && !fold_builtin_alloca_for_var_p (SSA_NAME_DEF_STMT (name))) + { + pi = get_ptr_info (name); + pi->align = BIGGEST_ALIGNMENT / BITS_PER_UNIT; + continue; + } + val = get_value (name); if (val->lattice_val != CONSTANT || TREE_CODE (val->value) != INTEGER_CST) @@ -1492,6 +1505,7 @@ evaluate_stmt (gimple stmt) tree simplified = NULL_TREE; ccp_lattice_t likelyvalue = likely_value (stmt); bool is_constant = false; + unsigned int align; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1632,10 +1646,13 @@ evaluate_stmt (gimple stmt) break; case BUILT_IN_ALLOCA: + align = (gimple_call_alloca_for_var_p (stmt) + ? TYPE_ALIGN (TREE_TYPE (gimple_get_lhs (stmt))) + : BIGGEST_ALIGNMENT); val.lattice_val = CONSTANT; val.value = build_int_cst (TREE_TYPE (gimple_get_lhs (stmt)), 0); val.mask = shwi_to_double_int - (~(((HOST_WIDE_INT) BIGGEST_ALIGNMENT) + (~(((HOST_WIDE_INT) align) / BITS_PER_UNIT - 1)); break; @@ -1685,27 +1702,25 @@ evaluate_stmt (gimple stmt) return val; } -/* Detects a vla-related alloca with a constant argument. Declares fixed-size - array and return the address, if found, otherwise returns NULL_TREE. */ - -static tree -fold_builtin_alloca_for_var (gimple stmt) +static bool +fold_builtin_alloca_for_var_p (gimple stmt) { - unsigned HOST_WIDE_INT size, threshold, n_elem; - tree lhs, arg, block, var, elem_type, array_type; - unsigned int align; + unsigned HOST_WIDE_INT size, threshold; + tree lhs, arg, block; + + gcc_checking_assert (gimple_call_alloca_for_var_p (stmt)); /* Get lhs. */ lhs = gimple_call_lhs (stmt); if (lhs == NULL_TREE) - return NULL_TREE; + return false; /* Detect constant argument. */ arg = get_constant_value (gimple_call_arg (stmt, 0)); if (arg == NULL_TREE || TREE_CODE (arg) != INTEGER_CST || !host_integerp (arg, 1)) - return NULL_TREE; + return false; size = TREE_INT_CST_LOW (arg); @@ -1718,17 +1733,33 @@ fold_builtin_alloca_for_var (gimple stmt && TREE_CODE (BLOCK_SUPERCONTEXT (block)) == FUNCTION_DECL)) threshold /= 10; if (size > threshold) - return NULL_TREE; + return false; + + return true; +} + +/* Detects a vla-related alloca with a constant argument. Declares fixed-size + array and return the address, if found, otherwise returns NULL_TREE. */ + +static tree +fold_builtin_alloca_for_var (gimple stmt) +{ + unsigned HOST_WIDE_INT size, n_elem; + tree lhs, arg, var, elem_type, array_type; + + if (!fold_builtin_alloca_for_var_p (stmt)) + return NULL; + + lhs = gimple_call_lhs (stmt); + arg = get_constant_value (gimple_call_arg (stmt, 0)); + size = TREE_INT_CST_LOW (arg); /* Declare array. */ elem_type = build_nonstandard_integer_type (BITS_PER_UNIT, 1); n_elem = size * 8 / BITS_PER_UNIT; - align = MIN (size * 8, BIGGEST_ALIGNMENT); - if (align < BITS_PER_UNIT) - align = BITS_PER_UNIT; array_type = build_array_type_nelts (elem_type, n_elem); var = create_tmp_var (array_type, NULL); - DECL_ALIGN (var) = align; + DECL_ALIGN (var) = TYPE_ALIGN (TREE_TYPE (TREE_TYPE (SSA_NAME_VAR (lhs)))); { struct ptr_info_def *pi = SSA_NAME_PTR_INFO (lhs); if (pi != NULL && !pi->pt.anything) @@ -2030,6 +2061,19 @@ do_ssa_ccp (void) return 0; } +/* Last invocation of SSA Conditional Constant Propagation. + At the end of the last invocation, we know which vla-allocas will remain + allocas, and we can assume BIGGEST_ALIGNMENT for those. */ + +static unsigned int +do_ssa_ccp_last (void) +{ + unsigned int res; + ccp_last = true; + res = do_ssa_ccp (); + ccp_last = false; + return res; +} static bool gate_ccp (void) @@ -2058,6 +2102,25 @@ struct gimple_opt_pass pass_ccp = } }; +struct gimple_opt_pass pass_ccp_last = +{ + { + GIMPLE_PASS, + "ccp3", /* name */ + gate_ccp, /* gate */ + do_ssa_ccp_last, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_CCP, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_ssa + | TODO_verify_stmts | TODO_ggc_collect/* todo_flags_finish */ + } +}; /* Try to optimize out __builtin_stack_restore. Optimize it out Index: gcc/gimplify.c =================================================================== --- gcc/gimplify.c (revision 179043) +++ gcc/gimplify.c (working copy) @@ -1321,7 +1321,8 @@ gimplify_vla_decl (tree decl, gimple_seq things: First, it lets the rest of the gimplifier know what replacement to use. Second, it lets the debug info know where to find the value. */ - ptr_type = build_pointer_type (TREE_TYPE (decl)); + ptr_type = build_pointer_type (build_aligned_type (TREE_TYPE (decl), + DECL_ALIGN (decl))); addr = create_tmp_var (ptr_type, get_name (decl)); DECL_IGNORED_P (addr) = 0; t = build_fold_indirect_ref (addr); Index: gcc/passes.c =================================================================== --- gcc/passes.c (revision 179043) +++ gcc/passes.c (working copy) @@ -1321,7 +1321,7 @@ init_optimization_passes (void) NEXT_PASS (pass_forwprop); NEXT_PASS (pass_phiopt); NEXT_PASS (pass_object_sizes); - NEXT_PASS (pass_ccp); + NEXT_PASS (pass_ccp_last); NEXT_PASS (pass_copy_prop); NEXT_PASS (pass_cse_sincos); NEXT_PASS (pass_optimize_bswap);