The following fixes two testcases in PR62283 which are not vectorized because they are deemed not profitable to. They are in fact not if we employ peeling for alignment but they are (according to the cost model and my naiive thinking) if we do not do that.
And it's trivial to see that peeling a loop running 4 times for alignment isn't a very bright idea if the vectorization factor is 4 as well... Thus the following patch adjusts heuristics deciding whether to do peeling to avoid doing that if it surely (or likely) results in a vector loop with zero iterations. I made that likely case make sure we will always at least end up with one vectorized iteration to be profitable to peel (which means at least 2 * VF - 1 iterations as if peeling for unknown alignment we also need an epilogue loop for the remaining iterations). Unsurprisingly this requires some fiddling with existing testcases that we now vectorize. Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2014-08-28 Richard Biener <rguent...@suse.de> PR tree-optimization/62283 * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not peel loops for alignment where the vector loop likely doesn't run at least VF times. * gfortran.dg/vect/pr62283.f: New testcase. * gcc.dg/tree-ssa/cunroll-5.c: Adjust. * gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: Likewise. * gcc.dg/vect/costmodel/i386/costmodel-vect-33.c: Likewise. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c: Likewise. * gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c: Likewise. * gcc.dg/vect/vect-33.c: Likewise. Index: gcc/tree-vect-data-refs.c =================================================================== *** gcc/tree-vect-data-refs.c.orig 2014-08-28 14:31:22.991260098 +0200 --- gcc/tree-vect-data-refs.c 2014-08-28 14:31:31.943259482 +0200 *************** vect_enhance_data_refs_alignment (loop_v *** 1537,1546 **** || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))) do_peeling = false; ! if (do_peeling && all_misalignments_unknown && vect_supportable_dr_alignment (dr0, false)) { - /* Check if the target requires to prefer stores over loads, i.e., if misaligned stores are more expensive than misaligned loads (taking drs with same alignment into account). */ --- 1537,1556 ---- || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))) do_peeling = false; ! /* If we don't know how many times the peeling loop will run ! assume it will run VF-1 times and disable peeling if the remaining ! iters are less than the vectorization factor. */ ! if (do_peeling ! && all_misalignments_unknown ! && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ! && (LOOP_VINFO_INT_NITERS (loop_vinfo) ! < 2 * (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1)) ! do_peeling = false; ! ! if (do_peeling ! && all_misalignments_unknown && vect_supportable_dr_alignment (dr0, false)) { /* Check if the target requires to prefer stores over loads, i.e., if misaligned stores are more expensive than misaligned loads (taking drs with same alignment into account). */ *************** vect_enhance_data_refs_alignment (loop_v *** 1627,1632 **** --- 1637,1650 ---- &body_cost_vec); if (!dr0 || !npeel) do_peeling = false; + + /* If peeling by npeel will result in a remaining loop not iterating + enough to be vectorized then do not peel. */ + if (do_peeling + && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && (LOOP_VINFO_INT_NITERS (loop_vinfo) + < LOOP_VINFO_VECT_FACTOR (loop_vinfo) + npeel)) + do_peeling = false; } if (do_peeling) Index: gcc/testsuite/gfortran.dg/vect/pr62283.f =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- gcc/testsuite/gfortran.dg/vect/pr62283.f 2014-08-28 14:31:31.968259480 +0200 *************** *** 0 **** --- 1,17 ---- + C { dg-do compile } + C { dg-additional-options "-fvect-cost-model=dynamic" } + subroutine test2(x,y) + real x(4),y(4) + beta=3.141593 + do i=1,4 + y(i)=y(i)+beta*x(i) + end do + end + + subroutine test3(x,y) + real x(4),y(4) + beta=3.141593 + y=y+beta*x + end + C { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_hw_misalign } } } } + C { dg-final { cleanup-tree-dump "vect" } } Index: gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c =================================================================== *** gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c.orig 2013-08-30 09:55:26.721775507 +0200 --- gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c 2014-08-28 14:44:39.436205264 +0200 *************** *** 1,5 **** /* { dg-do compile } */ ! /* { dg-options "-O3 -fdump-tree-cunroll-details" } */ int *a; test(int c) { --- 1,5 ---- /* { dg-do compile } */ ! /* { dg-options "-O3 -fdump-tree-cunroll-details -fno-tree-vectorize" } */ int *a; test(int c) { Index: gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c =================================================================== *** gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c.orig 2013-06-11 09:32:59.057850490 +0200 --- gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-31.c 2014-08-28 14:31:44.536258615 +0200 *************** int main (void) *** 86,91 **** return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ ! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ --- 86,90 ---- return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-33.c =================================================================== *** gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-33.c.orig 2009-10-27 14:29:29.000000000 +0100 --- gcc/testsuite/gcc.dg/vect/costmodel/i386/costmodel-vect-33.c 2014-08-28 14:32:30.985255417 +0200 *************** int main (void) *** 36,40 **** return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ --- 36,40 ---- return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c =================================================================== *** gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c.orig 2013-06-11 09:32:59.139851514 +0200 --- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c 2014-08-28 14:34:25.878247506 +0200 *************** int main (void) *** 86,91 **** return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ ! /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ --- 86,90 ---- return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c =================================================================== *** gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c.orig 2009-10-27 14:29:29.000000000 +0100 --- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c 2014-08-28 14:34:49.881245854 +0200 *************** int main (void) *** 36,40 **** return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ --- 36,40 ---- return main1 (); } ! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: gcc/testsuite/gcc.dg/vect/vect-33.c =================================================================== *** gcc/testsuite/gcc.dg/vect/vect-33.c.orig 2011-12-21 17:08:33.000000000 +0100 --- gcc/testsuite/gcc.dg/vect/vect-33.c 2014-08-28 14:42:25.311214498 +0200 *************** int main (void) *** 38,44 **** /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ! /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 "vect" } } */ ! /* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" { target vector_alignment_reachable } } } */ ! /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { {! vector_alignment_reachable} && {! vect_hw_misalign} } } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ --- 38,44 ---- /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ! /* { dg-final { scan-tree-dump "Vectorizing an unaligned access" "vect" { target { vect_hw_misalign && { ! vect64 } } } } } */ ! /* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" { target { vector_alignment_reachable && vect64 } } } } */ ! /* { dg-final { scan-tree-dump-times "Alignment of access forced using versioning" 1 "vect" { target { { {! vector_alignment_reachable} || {! vect64} } && {! vect_hw_misalign} } } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */