Hi, I previously committed two patches lowering complex address expression for IVOPT at http://gcc.gnu.org/ml/gcc-patches/2013-11/msg00546.html and http://gcc.gnu.org/ml/gcc-patches/2013-11/msg01103.html When I bootstrapping GCC I found there were some peculiar cases like &MEM[ptr+CST] + xxxx, which should be handled too. This patch consists below two changes:
1) change in alloc_iv: Original code lowers top level complex address expressions like &MEM[ptr+off]. The patch relaxes check condition in order to lower expressions like &MEM[ptr+off] + xxx, just as the BASE from below dump: use 2 generic in statement _595 = &MEM[(void *)&this_prg + 36B] + _594; at position type struct gcov_bucket_type * base (struct gcov_bucket_type *) &MEM[(void *)&this_prg + 36B] + (sizetype) ((unsigned int) (src_i_683 + -1) * 20) step 4294967276 base object (void *) &this_prg related candidates 2) change in tree_to_aff_combination: The function get_inner_reference returns "&MEM[ptr+off]" as the core for input like the memory ADDRESS in below dump: use 2 address in statement _59 = MEM[(const struct gcov_ctr_summary *)summary_22(D) + 4B].histogram[h_ix_111].min_value; at position MEM[(const struct gcov_ctr_summary *)summary_22(D) + 4B].histogram[h_ix_111].min_value type const gcov_type * base (const gcov_type *) &MEM[(const struct gcov_ctr_summary *)summary_22(D) + 4B] + 36 step 20 base object (void *) summary_22(D) related candidates Which can be further reduced into something like "summary_22(D) + 40B". This change is necessary for the first one, because I am using tree_to_aff_combination rather than get_inner_reference_aff now. Bootstrap and test on x86/x86_64/arm. Is it OK? Thanks. bin 2013-11-25 Bin Cheng <bin.ch...@arm.com> * tree-ssa-loop-ivopts.c (contain_complex_addr_expr): New. (alloc_iv): Lower more cases by calling contain_complex_addr_expr and tree_to_aff_combination. * tree-affine.c (tree_to_aff_combination): Handle &MEM[ptr+CST] in core part of complex reference. gcc/testsuite/ChangeLog 2013-11-25 Bin Cheng <bin.ch...@arm.com> * gcc.dg/tree-ssa/ivopts-lower_base.c: New test.
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lower_base.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lower_base.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lower_base.c (revision 0) @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-ivopts-details" } */ + +struct tag1 +{ + int x[100]; + int y; +}; + +struct tag2 +{ + int a; + struct tag1 t1[100]; + int b; +}; + +int +foo (struct tag2 *t2, int len) +{ + int i = 0; + for (i = 0; i < len; i++) + { + (*(struct tag2*)((char *)t2+4)).t1[i].x[len] = len; + } + + return 0; +} +/* { dg-final { scan-tree-dump-not "base .*&MEM\\\[" "ivopts" } } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ Index: gcc/tree-ssa-loop-ivopts.c =================================================================== --- gcc/tree-ssa-loop-ivopts.c (revision 205087) +++ gcc/tree-ssa-loop-ivopts.c (working copy) @@ -924,13 +924,40 @@ determine_base_object (tree expr) } } +/* Return true if complex address expression appears in EXPR. */ + +static bool +contain_complex_addr_expr (tree expr) +{ + bool res = false; + + STRIP_NOPS (expr); + switch (TREE_CODE (expr)) + { + case POINTER_PLUS_EXPR: + case PLUS_EXPR: + case MINUS_EXPR: + res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0)); + res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1)); + break; + + case ADDR_EXPR: + return (!DECL_P (TREE_OPERAND (expr, 0))); + + default: + return false; + } + + return res; +} + /* Allocates an induction variable with given initial value BASE and step STEP for loop LOOP. */ static struct iv * alloc_iv (tree base, tree step) { - tree base_object = base; + tree expr = base; struct iv *iv = XCNEW (struct iv); gcc_assert (step != NULL_TREE); @@ -939,21 +966,17 @@ alloc_iv (tree base, tree step) 1) More accurate cost can be computed for address expressions; 2) Duplicate candidates won't be created for bases in different forms, like &a[0] and &a. */ - STRIP_NOPS (base_object); - if (TREE_CODE (base_object) == ADDR_EXPR - && !DECL_P (TREE_OPERAND (base_object, 0))) + STRIP_NOPS (expr); + if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0))) + || contain_complex_addr_expr (expr)) { aff_tree comb; - double_int size; - base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0), - &comb, &size); - gcc_assert (base_object != NULL_TREE); - base_object = build_fold_addr_expr (base_object); + tree_to_aff_combination (expr, TREE_TYPE (base), &comb); base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb)); } iv->base = base; - iv->base_object = determine_base_object (base_object); + iv->base_object = determine_base_object (base); iv->step = step; iv->biv_p = false; iv->have_use_for = false; Index: gcc/tree-affine.c =================================================================== --- gcc/tree-affine.c (revision 205087) +++ gcc/tree-affine.c (working copy) @@ -328,7 +328,19 @@ tree_to_aff_combination (tree expr, tree type, aff double_int::from_uhwi (bitpos / BITS_PER_UNIT)); core = build_fold_addr_expr (core); if (TREE_CODE (core) == ADDR_EXPR) - aff_combination_add_elt (comb, core, double_int_one); + { + /* Handle &MEM[ptr + CST] in core part of complex reference. */ + if (TREE_CODE (TREE_OPERAND (core, 0)) == MEM_REF) + { + core = TREE_OPERAND (core, 0); + tree_to_aff_combination (TREE_OPERAND (core, 0), type, &tmp); + aff_combination_add (comb, &tmp); + tree_to_aff_combination (TREE_OPERAND (core, 1), sizetype, &tmp); + aff_combination_add (comb, &tmp); + } + else + aff_combination_add_elt (comb, core, double_int_one); + } else { tree_to_aff_combination (core, type, &tmp);