On 8/30/23 01:48, Song Gao wrote:
void HELPER(vssrlni_d_q)(void *vd, void *vj, uint64_t imm, uint32_t desc) { - Int128 shft_res1, shft_res2, mask; + int i, j; + Int128 shft_res[4], mask; VReg *Vd = (VReg *)vd; VReg *Vj = (VReg *)vj; + int oprsz = simd_oprsz(desc);- if (imm == 0) {- shft_res1 = Vj->Q(0); - shft_res2 = Vd->Q(0); - } else { - shft_res1 = int128_urshift(Vj->Q(0), imm); - shft_res2 = int128_urshift(Vd->Q(0), imm); - } mask = int128_sub(int128_lshift(int128_one(), 63), int128_one());- if (int128_ult(mask, shft_res1)) {- Vd->D(0) = int128_getlo(mask); - }else { - Vd->D(0) = int128_getlo(shft_res1); - } - - if (int128_ult(mask, shft_res2)) { - Vd->D(1) = int128_getlo(mask); - }else { - Vd->D(1) = int128_getlo(shft_res2); + for (i = 0; i < oprsz / 16; i++) { + if (imm == 0) { + shft_res[2 * i] = Vj->Q(i); + shft_res[2 * i + 1] = Vd->Q(i); + } else { + shft_res[2 * i] = int128_urshift(Vj->Q(i), imm); + shft_res[2 * i + 1] = int128_urshift(Vd->Q(i), imm); + } + for (j = 2 * i; j <= 2 * i + 1; j++) { + if (int128_ult(mask, shft_res[j])) { + Vd->D(j) = int128_getlo(mask); + }else { + Vd->D(j) = int128_getlo(shft_res[j]); + } + } } }
This does not require an array of shift_res. In fact, I encourage you to split out a helper. r~
