https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121737
Bug ID: 121737
Summary: vectorizer part of the loop but not all of it
Product: gcc
Version: 16.0
Status: UNCONFIRMED
Keywords: missed-optimization
Severity: normal
Priority: P3
Component: tree-optimization
Assignee: unassigned at gcc dot gnu.org
Reporter: pinskia at gcc dot gnu.org
Target Milestone: ---
Take at -O3:
```
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
typedef struct {
float x;
float y;
} pt_t;
typedef struct
{
int len;
pt_t v[8];
} poly_t;
extern void print_pt(pt_t p);
void process_poly(const poly_t* poly)
{
// Two uninitialized arrays.
float rngx[2];
float rngy[2];
const int len = poly->len;
assert(len > 0);
for (int i=0; i<len; ++i)
{
if (i==0 || poly->v[i].x < rngx[0]) rngx[0] = poly->v[i].x;
if (i==0 || poly->v[i].x > rngx[1]) rngx[1] = poly->v[i].x;
if (i==0 || poly->v[i].y < rngy[0]) rngy[0] = poly->v[i].y;
if (i==0 || poly->v[i].y > rngy[1]) rngy[1] = poly->v[i].y;
}
const pt_t q = { rngx[1] - rngx[0], rngy[1] - rngy[0] };
// compiler warning on unintialized use. (But only when compiling -O2)
print_pt(q);
}
```
We get:
```
<bb 6> [local count: 118111599]:
_73 = {_23, _31};
goto <bb 8>; [100.00%]
<bb 7> [local count: 611297548]:
# rngx$1_65 = PHI <_75(5), _23(7)>
# rngy$1_63 = PHI <_74(5), _31(7)>
# vect_rngx$0_66.19_86 = PHI <vect_poly__v_I_x_lsm0.14_76(5),
vect__61.21_88(7)>
# ivtmp.31_18 = PHI <ivtmp.31_83(5), ivtmp.31_17(7)>
_44 = (void *) ivtmp.31_18;
vect__62.18_93 = MEM <const vector(2) float> [(float *)_44];
mask__24.20_87 = vect_rngx$0_66.19_86 > vect__62.18_93;
vect__61.21_88 = .VCOND_MASK (mask__24.20_87, vect__62.18_93,
vect_rngx$0_66.19_86);
_85 = BIT_FIELD_REF <vect__62.18_93, 32, 32>;
_94 = BIT_FIELD_REF <vect__62.18_93, 32, 0>;
_6 = rngx$1_65 < _94;
_23 = _6 ? _94 : rngx$1_65;
_71 = rngy$1_63 < _85;
_31 = _71 ? _85 : rngy$1_63;
ivtmp.31_17 = ivtmp.31_18 + 8;
if (ivtmp.31_17 != _40)
goto <bb 7>; [80.68%]
else
goto <bb 6>; [19.32%]
```
If we manually peel the loop SLP can vectorize the loop fully at -O2 and we
get:
```
<bb 6> [local count: 939524096]:
# vect_rngx$1_37.14_29 = PHI <vect__10.16_43(6), vect__1.13_30(5)>
# vect_rngx$0_35.19_46 = PHI <vect__28.21_48(6), vect__1.13_30(5)>
# ivtmp.31_17 = PHI <ivtmp.31_16(6), ivtmp.31_53(5)>
_55 = (void *) ivtmp.31_17;
vect__3.10_32 = MEM <const vector(2) float> [(float *)_55];
mask__11.20_47 = vect__3.10_32 < vect_rngx$0_35.19_46;
vect__28.21_48 = .VCOND_MASK (mask__11.20_47, vect__3.10_32,
vect_rngx$0_35.19_46);
mask__9.15_33 = vect_rngx$1_37.14_29 < vect__3.10_32;
vect__10.16_43 = .VCOND_MASK (mask__9.15_33, vect__3.10_32,
vect_rngx$1_37.14_29);
ivtmp.31_16 = ivtmp.31_17 + 8;
if (ivtmp.31_16 != _63)
goto <bb 6>; [87.50%]
else
goto <bb 7>; [12.50%]
```