https://gcc.gnu.org/g:1bc5b47f5b06dc4e8d2e7b622a7100b40b8e6b27
commit r16-924-g1bc5b47f5b06dc4e8d2e7b622a7100b40b8e6b27 Author: liuhongt <hongtao....@intel.com> Date: Tue Mar 11 18:40:07 2025 -0700 For datarefs with big gap, split them into different groups. The patch tries to solve miss vectorization for below case. void foo (int* a, int* restrict b) { b[0] = a[0] * a[64]; b[1] = a[65] * a[1]; b[2] = a[2] * a[66]; b[3] = a[67] * a[3]; b[4] = a[68] * a[4]; b[5] = a[69] * a[5]; b[6] = a[6] * a[70]; b[7] = a[7] * a[71]; } In vect_analyze_data_ref_accesses, a[0], a[1], .. a[7], a[64], ..., a[71] are in same group with size of 71. It caused vectorization unprofitable. gcc/ChangeLog: PR tree-optimization/119181 * tree-vect-data-refs.cc (vect_analyze_data_ref_accesses): Split datarefs when there's a gap bigger than MAX_BITSIZE_MODE_ANY_MODE. gcc/testsuite/ChangeLog: * gcc.dg/vect/bb-slp-pr119181.c: New test. Diff: --- gcc/testsuite/gcc.dg/vect/bb-slp-pr119181.c | 15 +++++++++++++++ gcc/tree-vect-data-refs.cc | 7 +++++++ 2 files changed, 22 insertions(+) diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr119181.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr119181.c new file mode 100644 index 000000000000..b0d3e5a3cb8b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr119181.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +void +foo (int* a, int* restrict b) +{ + b[0] = a[0] * a[64]; + b[1] = a[65] * a[1]; + b[2] = a[2] * a[66]; + b[3] = a[67] * a[3]; + b[4] = a[68] * a[4]; + b[5] = a[69] * a[5]; + b[6] = a[6] * a[70]; + b[7] = a[7] * a[71]; +} + +/* { dg-final { scan-tree-dump-times "optimized: basic block" 1 "slp2" { target vect_int_mult } } } */ diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 9fd1ef296506..f2deb751ed92 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -3682,6 +3682,13 @@ vect_analyze_data_ref_accesses (vec_info *vinfo, != type_size_a)) break; + /* For datarefs with big gap, it's better to split them into different + groups. + .i.e a[0], a[1], a[2], .. a[7], a[100], a[101],..., a[107] */ + if ((unsigned HOST_WIDE_INT)(init_b - init_prev) * tree_to_uhwi (szb) + > MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT) + break; + /* If the step (if not zero or non-constant) is smaller than the difference between data-refs' inits this splits groups into suitable sizes. */