https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103144
--- Comment #2 from Hongtao.liu <crazylht at gmail dot com> --- Another issue is for SLP, when trip count is small and loop is completely unrolled. SLP failed to generate vlshr_optab. #include<stdint.h> void foo (uint64_t* __restrict pdst, uint64_t* psrc, uint64_t shift) { for (int64_t i = 0; i != 4; i++) { pdst[i] = psrc[i] + shift; shift >>= 1; } } .175t.slp1 dump: _8 = *psrc_12(D); _28 = _8 + shift_10(D); shift_30 = shift_10(D) >> 1; _39 = psrc_12(D) + 8; _40 = *_39; _41 = pdst_13(D) + 8; _42 = shift_30 + _40; shift_44 = shift_30 >> 1; _53 = psrc_12(D) + 16; _54 = *_53; _55 = pdst_13(D) + 16; _56 = shift_44 + _54; shift_58 = shift_44 >> 1; _3 = psrc_12(D) + 24; _4 = *_3; _5 = pdst_13(D) + 24; _6 = _4 + shift_58; _15 = {_28, _42, _56, _6}; vectp.8_18 = pdst_13(D); MEM <vector(4) long unsigned int> [(uint64_t *)vectp.8_18] = _15;