https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121737

            Bug ID: 121737
           Summary: vectorizer part of the loop but not all of it
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: pinskia at gcc dot gnu.org
  Target Milestone: ---

Take at -O3:
```
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>

typedef struct {
        float x;
        float y;
} pt_t;

typedef struct
{
        int  len;
        pt_t v[8];
} poly_t;

extern void print_pt(pt_t p);

void process_poly(const poly_t* poly)
{
        // Two uninitialized arrays.
        float rngx[2];
        float rngy[2];

        const int len = poly->len;
        assert(len > 0);

        for (int i=0; i<len; ++i)
        {
                if (i==0 || poly->v[i].x < rngx[0]) rngx[0] = poly->v[i].x;
                if (i==0 || poly->v[i].x > rngx[1]) rngx[1] = poly->v[i].x;
                if (i==0 || poly->v[i].y < rngy[0]) rngy[0] = poly->v[i].y;
                if (i==0 || poly->v[i].y > rngy[1]) rngy[1] = poly->v[i].y;
        }

        const pt_t q = { rngx[1] - rngx[0], rngy[1] - rngy[0] };
        // compiler warning on unintialized use. (But only when compiling -O2)
        print_pt(q);
}
```

We get:
```
  <bb 6> [local count: 118111599]:
  _73 = {_23, _31};
  goto <bb 8>; [100.00%]

  <bb 7> [local count: 611297548]:
  # rngx$1_65 = PHI <_75(5), _23(7)>
  # rngy$1_63 = PHI <_74(5), _31(7)>
  # vect_rngx$0_66.19_86 = PHI <vect_poly__v_I_x_lsm0.14_76(5),
vect__61.21_88(7)>
  # ivtmp.31_18 = PHI <ivtmp.31_83(5), ivtmp.31_17(7)>
  _44 = (void *) ivtmp.31_18;
  vect__62.18_93 = MEM <const vector(2) float> [(float *)_44];
  mask__24.20_87 = vect_rngx$0_66.19_86 > vect__62.18_93;
  vect__61.21_88 = .VCOND_MASK (mask__24.20_87, vect__62.18_93,
vect_rngx$0_66.19_86);
  _85 = BIT_FIELD_REF <vect__62.18_93, 32, 32>;
  _94 = BIT_FIELD_REF <vect__62.18_93, 32, 0>;
  _6 = rngx$1_65 < _94;
  _23 = _6 ? _94 : rngx$1_65;
  _71 = rngy$1_63 < _85;
  _31 = _71 ? _85 : rngy$1_63;
  ivtmp.31_17 = ivtmp.31_18 + 8;
  if (ivtmp.31_17 != _40)
    goto <bb 7>; [80.68%]
  else
    goto <bb 6>; [19.32%]
```

If we manually peel the loop SLP can vectorize the loop fully at -O2 and we
get:
```
  <bb 6> [local count: 939524096]:
  # vect_rngx$1_37.14_29 = PHI <vect__10.16_43(6), vect__1.13_30(5)>
  # vect_rngx$0_35.19_46 = PHI <vect__28.21_48(6), vect__1.13_30(5)>
  # ivtmp.31_17 = PHI <ivtmp.31_16(6), ivtmp.31_53(5)>
  _55 = (void *) ivtmp.31_17;
  vect__3.10_32 = MEM <const vector(2) float> [(float *)_55];
  mask__11.20_47 = vect__3.10_32 < vect_rngx$0_35.19_46;
  vect__28.21_48 = .VCOND_MASK (mask__11.20_47, vect__3.10_32,
vect_rngx$0_35.19_46);
  mask__9.15_33 = vect_rngx$1_37.14_29 < vect__3.10_32;
  vect__10.16_43 = .VCOND_MASK (mask__9.15_33, vect__3.10_32,
vect_rngx$1_37.14_29);
  ivtmp.31_16 = ivtmp.31_17 + 8;
  if (ivtmp.31_16 != _63)
    goto <bb 6>; [87.50%]
  else
    goto <bb 7>; [12.50%]
```

Reply via email to