https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111846
--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
We are dividing 4 by 8. We get here with basic-block vectorization and
a group size of 4 but we let vectorizable_simd_clone_call choose its
own vector type (it chooses 8 lanes).
While we properly constrain with
if (!constant_multiple_p (vf * group_size,
n->simdclone->simdlen, &num_calls)
|| n->simdclone->nargs != nargs)
continue;
we seem to cache things in STMT_VINFO_SIMD_CLONE_INFO and this info gets
re-used across a group_size 8 -> 4 transition here. This is because
we have multiple SLP instances each sharing some of the calls (in the end
costs would make vectorization not profitable), vectorizing the CTORs in
<bb 2> [local count: 9759239]:
_14 = eq_set_bands_real_adj[0];
_15 = powf (0.0, _14);
_16 = _15 - 1.0e+0;
_21 = eq_set_bands_real_adj[1];
_22 = powf (0.0, _21);
_23 = _22 - 1.0e+0;
_28 = eq_set_bands_real_adj[2];
_29 = powf (0.0, _28);
_30 = _29 - 1.0e+0;
_35 = eq_set_bands_real_adj[3];
_36 = powf (0.0, _35);
_37 = _36 - 1.0e+0;
_42 = eq_set_bands_real_adj[4];
_43 = powf (0.0, _42);
_44 = _43 - 1.0e+0;
_49 = eq_set_bands_real_adj[5];
_50 = powf (0.0, _49);
_51 = _50 - 1.0e+0;
_56 = eq_set_bands_real_adj[6];
_57 = powf (0.0, _56);
_58 = _57 - 1.0e+0;
_63 = eq_set_bands_real_adj[7];
_64 = powf (0.0, _63);
_65 = _64 - 1.0e+0;
_70 = eq_set_bands_real_adj[8];
_71 = powf (0.0, _70);
_72 = _71 - 1.0e+0;
_77 = eq_set_bands_real_adj[9];
_78 = powf (0.0, _77);
_79 = _78 - 1.0e+0;
_19 = {_30, _37, _44, _51, _58, _65, _72, _79};
_25 = {_44, _51, _58, _65, _72, _79, _16, _23};
_26 = {_58, _65, _72, _79, _16, _23, _30, _37};
_32 = {_72, _79, _16, _23, _30, _37, _44, _51};
_33 = {_16, _23, _30, _37, _44, _51, _58, _65};
MEM <vector(8) float> [(float *)&gv] = _33;
MEM <vector(8) float> [(float *)&gv + 32B] = _32;
MEM <vector(8) float> [(float *)&gv + 64B] = _26;
MEM <vector(8) float> [(float *)&gv + 96B] = _25;
MEM <vector(8) float> [(float *)&gv + 128B] = _19;
MEM <vector(8) float> [(float *)&gv + 160B] = _33;
MEM <vector(8) float> [(float *)&gv + 192B] = _32;
MEM <vector(8) float> [(float *)&gv + 224B] = _26;
MEM <vector(8) float> [(float *)&gv + 256B] = _25;
MEM <vector(8) float> [(float *)&gv + 288B] = _19;
_82 = {_58, _65, _72, _79};
_83 = {_30, _37, _44, _51};
_84 = {_72, _79, _16, _23};
_85 = {_44, _51, _58, _65};
_86 = {_16, _23, _30, _37};
MEM <vector(4) float> [(float *)&gv + 320B] = _86;
MEM <vector(4) float> [(float *)&gv + 336B] = _85;
MEM <vector(4) float> [(float *)&gv + 352B] = _84;
MEM <vector(4) float> [(float *)&gv + 368B] = _83;
MEM <vector(4) float> [(float *)&gv + 384B] = _82;
return;