On Thu, 4 Aug 2011, Richard Guenther wrote: > On Wed, 3 Aug 2011, Mikael Morin wrote: > > > Hello, > > > > On Wednesday 03 August 2011 15:47:37 Richard Guenther wrote: > > > Comments? Any idea why reversing the loop would break? > > > > Yes, the list of scalarized expressions has to be created in the same order > > it > > is consumed. Here the scalarized expressions are array indexes to be > > precomputed out of the loop. > > The attached patch seems to work (the interesting part is in > > gfc_walk_variable_expr). > > Ah, thanks. I'll work from there to revise the patch.
The following is almost identical to your proposal. Bootstrapped and tested on x86_64-unknown-linux-gnu, ok for trunk? Thanks, Richard. 2011-08-03 Richard Guenther <rguent...@suse.de> PR fortran/49957 * trans-array.c (add_to_offset): New function. (gfc_conv_array_ref): Build the array index expression in optimally associated order. (gfc_walk_variable_expr): Adjust for the backward walk. * gfortran.dg/vect/O3-pr49957.f: New testcase. Index: gcc/fortran/trans-array.c =================================================================== --- gcc/fortran/trans-array.c (revision 177357) +++ gcc/fortran/trans-array.c (working copy) @@ -2622,6 +2622,22 @@ gfc_conv_tmp_array_ref (gfc_se * se) gfc_advance_se_ss_chain (se); } +/* Add T to the offset pair *OFFSET, *CST_OFFSET. */ + +static void +add_to_offset (tree *cst_offset, tree *offset, tree t) +{ + if (TREE_CODE (t) == INTEGER_CST) + *cst_offset = int_const_binop (PLUS_EXPR, *cst_offset, t); + else + { + if (!integer_zerop (*offset)) + *offset = fold_build2_loc (input_location, PLUS_EXPR, + gfc_array_index_type, *offset, t); + else + *offset = t; + } +} /* Build an array reference. se->expr already holds the array descriptor. This should be either a variable, indirect variable reference or component @@ -2634,7 +2650,7 @@ gfc_conv_array_ref (gfc_se * se, gfc_arr locus * where) { int n; - tree index; + tree offset, cst_offset; tree tmp; tree stride; gfc_se indexse; @@ -2669,10 +2685,12 @@ gfc_conv_array_ref (gfc_se * se, gfc_arr return; } - index = gfc_index_zero_node; + cst_offset = offset = gfc_index_zero_node; + add_to_offset (&cst_offset, &offset, gfc_conv_array_offset (se->expr)); - /* Calculate the offsets from all the dimensions. */ - for (n = 0; n < ar->dimen; n++) + /* Calculate the offsets from all the dimensions. Make sure to associate + the final offset so that we form a chain of loop invariant summands. */ + for (n = ar->dimen - 1; n >= 0; n--) { /* Calculate the index for this dimension. */ gfc_init_se (&indexse, se); @@ -2741,19 +2759,17 @@ gfc_conv_array_ref (gfc_se * se, gfc_arr indexse.expr, stride); /* And add it to the total. */ - index = fold_build2_loc (input_location, PLUS_EXPR, - gfc_array_index_type, index, tmp); + add_to_offset (&cst_offset, &offset, tmp); } - tmp = gfc_conv_array_offset (se->expr); - if (!integer_zerop (tmp)) - index = fold_build2_loc (input_location, PLUS_EXPR, - gfc_array_index_type, index, tmp); + if (!integer_zerop (cst_offset)) + offset = fold_build2_loc (input_location, PLUS_EXPR, + gfc_array_index_type, offset, cst_offset); /* Access the calculated element. */ tmp = gfc_conv_array_data (se->expr); tmp = build_fold_indirect_ref (tmp); - se->expr = gfc_build_array_ref (tmp, index, sym->backend_decl); + se->expr = gfc_build_array_ref (tmp, offset, sym->backend_decl); } @@ -7575,7 +7591,7 @@ gfc_walk_variable_expr (gfc_ss * ss, gfc switch (ar->type) { case AR_ELEMENT: - for (n = 0; n < ar->dimen + ar->codimen; n++) + for (n = ar->dimen + ar->codimen - 1; n >= 0; n--) { newss = gfc_get_ss (); newss->type = GFC_SS_SCALAR; Index: gcc/testsuite/gfortran.dg/vect/O3-pr49957.f =================================================================== --- gcc/testsuite/gfortran.dg/vect/O3-pr49957.f (revision 0) +++ gcc/testsuite/gfortran.dg/vect/O3-pr49957.f (revision 0) @@ -0,0 +1,16 @@ +! { dg-do compile } +! { dg-require-effective-target vect_double } + subroutine shell(nx,ny,nz,q,dq) + implicit none + integer i,j,k,l,nx,ny,nz + real*8 q(5,nx,ny),dq(5,nx,ny) + do j=1,ny + do i=1,nx + do l=1,5 + q(l,i,j)=q(l,i,j)+dq(l,i,j) + enddo + enddo + enddo + return + end +! { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail vect_no_align } } }