https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103144
--- Comment #2 from Hongtao.liu <crazylht at gmail dot com> ---
Another issue is for SLP, when trip count is small and loop is completely
unrolled. SLP failed to generate vlshr_optab.
#include<stdint.h>
void
foo (uint64_t* __restrict pdst, uint64_t* psrc, uint64_t shift)
{
for (int64_t i = 0; i != 4; i++)
{
pdst[i] = psrc[i] + shift;
shift >>= 1;
}
}
.175t.slp1 dump:
_8 = *psrc_12(D);
_28 = _8 + shift_10(D);
shift_30 = shift_10(D) >> 1;
_39 = psrc_12(D) + 8;
_40 = *_39;
_41 = pdst_13(D) + 8;
_42 = shift_30 + _40;
shift_44 = shift_30 >> 1;
_53 = psrc_12(D) + 16;
_54 = *_53;
_55 = pdst_13(D) + 16;
_56 = shift_44 + _54;
shift_58 = shift_44 >> 1;
_3 = psrc_12(D) + 24;
_4 = *_3;
_5 = pdst_13(D) + 24;
_6 = _4 + shift_58;
_15 = {_28, _42, _56, _6};
vectp.8_18 = pdst_13(D);
MEM <vector(4) long unsigned int> [(uint64_t *)vectp.8_18] = _15;