From: "Lucas Mateus Castro (alqotel)" <[email protected]>
Implement the following PowerISA v3.1 instructions: xvi4ger8: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) xvi4ger8pp: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) Positive multiply, Positive accumulate xvi8ger4: VSX Vector 4-bit Signed Integer GER (rank-8 update) xvi8ger4pp: VSX Vector 4-bit Signed Integer GER (rank-8 update) Positive multiply, Positive accumulate xvi8ger4spp: VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with Saturate Positive multiply, Positive accumulate xvi16ger2: VSX Vector 16-bit Signed Integer GER (rank-2 update) xvi16ger2pp: VSX Vector 16-bit Signed Integer GER (rank-2 update) Positive multiply, Positive accumulate xvi16ger2s: VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation xvi16ger2spp: VSX Vector 16-bit Signed Integer GER (rank-2 update) with Saturation Positive multiply, Positive accumulate Signed-off-by: Lucas Mateus Castro (alqotel) <[email protected]> --- target/ppc/cpu.h | 5 ++ target/ppc/helper.h | 3 + target/ppc/insn32.decode | 15 +++++ target/ppc/int_helper.c | 85 +++++++++++++++++++++++++++++ target/ppc/internal.h | 28 ++++++++++ target/ppc/translate/vsx-impl.c.inc | 50 +++++++++++++++++ 6 files changed, 186 insertions(+) diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index c2b6c987c0..ee55c6cfa2 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -2688,6 +2688,11 @@ static inline uint64_t *cpu_vsrl_ptr(CPUPPCState *env, int i) return (uint64_t *)((uintptr_t)env + vsr64_offset(i, false)); } +static inline ppc_vsr_t *cpu_vsr_ptr(CPUPPCState *env, int i) +{ + return (ppc_vsr_t *)((uintptr_t)env + vsr_full_offset(i)); +} + static inline long avr64_offset(int i, bool high) { return vsr64_offset(i + 32, high); diff --git a/target/ppc/helper.h b/target/ppc/helper.h index aa6773c4a5..06553517de 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -537,6 +537,9 @@ DEF_HELPER_5(XXBLENDVB, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_5(XXBLENDVH, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_5(XXBLENDVW, void, vsr, vsr, vsr, vsr, i32) DEF_HELPER_5(XXBLENDVD, void, vsr, vsr, vsr, vsr, i32) +DEF_HELPER_6(XVI4GER8, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(XVI8GER4, void, env, i32, i32, i32, i32, i32) +DEF_HELPER_6(XVI16GER2, void, env, i32, i32, i32, i32, i32) DEF_HELPER_2(efscfsi, i32, env, i32) DEF_HELPER_2(efscfui, i32, env, i32) diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index 7a76bedfa6..653f50db93 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -170,6 +170,9 @@ &XX3 xt xa xb @XX3 ...... ..... ..... ..... ........ ... &XX3 xt=%xx_xt xa=%xx_xa xb=%xx_xb +%xx_at 23:3 !function=times_4 +@XX3_at ...... ... .. ..... ..... ........ ... &XX3 xt=%xx_at xb=%xx_xb + &XX3_dm xt xa xb dm @XX3_dm ...... ..... ..... ..... . dm:2 ..... ... &XX3_dm xt=%xx_xt xa=%xx_xa xb=%xx_xb @@ -719,3 +722,15 @@ RFEBB 010011-------------- . 0010010010 - @XL_s XXMFACC 011111 ... -- 00000 ----- 0010110001 - @X_a XXMTACC 011111 ... -- 00001 ----- 0010110001 - @X_a XXSETACCZ 011111 ... -- 00011 ----- 0010110001 - @X_a + +## Vector GER instruction + +XVI4GER8 111011 ... -- ..... ..... 00100011 ..- @XX3_at xa=%xx_xa +XVI4GER8PP 111011 ... -- ..... ..... 00100010 ..- @XX3_at xa=%xx_xa +XVI8GER4 111011 ... -- ..... ..... 00000011 ..- @XX3_at xa=%xx_xa +XVI8GER4PP 111011 ... -- ..... ..... 00000010 ..- @XX3_at xa=%xx_xa +XVI16GER2 111011 ... -- ..... ..... 01001011 ..- @XX3_at xa=%xx_xa +XVI16GER2PP 111011 ... -- ..... ..... 01101011 ..- @XX3_at xa=%xx_xa +XVI8GER4SPP 111011 ... -- ..... ..... 01100011 ..- @XX3_at xa=%xx_xa +XVI16GER2S 111011 ... -- ..... ..... 00101011 ..- @XX3_at xa=%xx_xa +XVI16GER2SPP 111011 ... -- ..... ..... 00101010 ..- @XX3_at xa=%xx_xa diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 8c1674510b..bd2f1a7c2a 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -782,6 +782,91 @@ VCT(uxs, cvtsduw, u32) VCT(sxs, cvtsdsw, s32) #undef VCT +/* + * Packed VSX Integer GER Flags + * 00 - no accumulation no saturation + * 01 - accumulate but no saturation + * 10 - no accumulation but with saturation + * 11 - accumulate with saturation + */ +static inline bool get_sat(uint32_t flags) +{ + return flags & 0x2; +} + +static inline bool get_acc(uint32_t flags) +{ + return flags & 0x1; +} + +#define GET_VsrN(a, i) (extract32(a->VsrB((i) / 2), (i) % 2 ? 4 : 0, 4)) +#define GET_VsrB(a, i) a->VsrB(i) +#define GET_VsrH(a, i) a->VsrH(i) + +#define GET_VsrSN(a, i) (sextract32(a->VsrSB((i) / 2), (i) % 2 ? 4 : 0, 4)) +#define GET_VsrSB(a, i) a->VsrSB(i) +#define GET_VsrSH(a, i) a->VsrSH(i) + +#define XVIGER(NAME, RANK, EL) \ + void NAME(CPUPPCState *env, uint32_t a_r, uint32_t b_r, \ + uint32_t at_r, uint32_t mask, uint32_t packed_flags) \ + { \ + ppc_vsr_t *a = cpu_vsr_ptr(env, a_r), *b = cpu_vsr_ptr(env, b_r), *at; \ + bool sat = get_sat(packed_flags), acc = get_acc(packed_flags); \ + uint8_t pmsk = ger_get_pmsk(mask), xmsk = ger_get_xmsk(mask), \ + ymsk = ger_get_ymsk(mask); \ + uint8_t pmsk_bit, xmsk_bit, ymsk_bit; \ + int64_t psum; \ + int32_t va, vb; \ + int i, j, k; \ + for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) { \ + at = cpu_vsr_ptr(env, at_r + i); \ + for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) { \ + if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) { \ + psum = 0; \ + for (k = 0, pmsk_bit = 1 << (RANK - 1); k < RANK; \ + k++, pmsk_bit >>= 1) { \ + if (pmsk_bit & pmsk) { \ + va = (int32_t)GET_VsrS##EL(a, RANK * i + k); \ + vb = (int32_t) ((RANK == 4) ? \ + GET_Vsr##EL(b, RANK * j + k) : \ + GET_VsrS##EL(b, RANK * j + k));\ + psum += va * vb; \ + } \ + } \ + if (acc) { \ + psum += at->VsrSW(j); \ + } \ + if (sat && psum > INT32_MAX) { \ + set_vscr_sat(env); \ + at->VsrSW(j) = INT32_MAX; \ + } else if (sat && psum < INT32_MIN) { \ + set_vscr_sat(env); \ + at->VsrSW(j) = INT32_MIN; \ + } else { \ + at->VsrSW(j) = (int32_t) psum; \ + } \ + } else { \ + at->VsrSW(j) = 0; \ + } \ + } \ + } \ + } + +XVIGER(helper_XVI4GER8, 8, N) +XVIGER(helper_XVI8GER4, 4, B) +XVIGER(helper_XVI16GER2, 2, H) + +#undef GER_MULT +#undef XVIGER_NAME +#undef XVIGER +#undef GET_VsrN +#undef GET_VsrB +#undef GET_VsrH +#undef GET_VsrSN +#undef GET_VsrSB +#undef GET_VsrSH + target_ulong helper_vclzlsbb(ppc_avr_t *r) { target_ulong count = 0; diff --git a/target/ppc/internal.h b/target/ppc/internal.h index 8094e0b033..a994d98238 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -291,4 +291,32 @@ G_NORETURN void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr addr, uintptr_t retaddr); #endif +/* + * Auxiliary functions to pack/unpack masks for GER instructions. + * + * Packed format: + * Bits 0-3: xmsk + * Bits 4-7: ymsk + * Bits 8-15: pmsk + */ +static inline uint8_t ger_get_xmsk(uint32_t packed_masks) +{ + return packed_masks & 0xF; +} + +static inline uint8_t ger_get_ymsk(uint32_t packed_masks) +{ + return (packed_masks >> 4) & 0xF; +} + +static inline uint8_t ger_get_pmsk(uint32_t packed_masks) +{ + return (packed_masks >> 8) & 0xFF; +} + +static inline int ger_pack_masks(int pmsk, int ymsk, int xmsk) +{ + return (pmsk & 0xFF) << 8 | (ymsk & 0xF) << 4 | (xmsk & 0xF); +} + #endif /* PPC_INTERNAL_H */ diff --git a/target/ppc/translate/vsx-impl.c.inc b/target/ppc/translate/vsx-impl.c.inc index 919b889c40..1eb68c7081 100644 --- a/target/ppc/translate/vsx-impl.c.inc +++ b/target/ppc/translate/vsx-impl.c.inc @@ -2823,6 +2823,56 @@ static bool trans_XXSETACCZ(DisasContext *ctx, arg_X_a *a) return true; } +/* + * Packed VSX Integer GER Flags + * 00 - no accumulation no saturation + * 01 - accumulate but no saturation + * 10 - no accumulation but with saturation + * 11 - accumulate with saturation + */ +static uint32_t pack_flags_xvi(int acc, int sat) +{ + return (sat << 1) | acc; +} + +static bool do_ger_XX3(DisasContext *ctx, arg_XX3 *a, uint32_t op, + void (*helper)(TCGv_env, TCGv_i32, TCGv_i32, + TCGv_i32, TCGv_i32, TCGv_i32)) +{ + uint32_t mask; + REQUIRE_INSNS_FLAGS2(ctx, ISA310); + REQUIRE_VSX(ctx); + if (unlikely((a->xa / 4 == a->xt / 4) || (a->xb / 4 == a->xt / 4))) { + gen_invalid(ctx); + return true; + } + + mask = 0xFFFFFFFF; + helper(cpu_env, tcg_constant_i32(a->xa), tcg_constant_i32(a->xb), + tcg_constant_i32(a->xt), tcg_constant_i32(mask), + tcg_constant_i32(op)); + return true; +} + +/* Used to keep line length < 80 */ +#define GER_NOP pack_flags_xvi(0, 0) +#define GER_PP pack_flags_xvi(1, 0) +#define GER_SAT pack_flags_xvi(0, 1) +#define GER_SPP pack_flags_xvi(1, 1) +TRANS(XVI4GER8, do_ger_XX3, GER_NOP, gen_helper_XVI4GER8) +TRANS(XVI4GER8PP, do_ger_XX3, GER_PP, gen_helper_XVI4GER8) +TRANS(XVI8GER4, do_ger_XX3, GER_NOP, gen_helper_XVI8GER4) +TRANS(XVI8GER4PP, do_ger_XX3, GER_PP, gen_helper_XVI8GER4) +TRANS(XVI8GER4SPP, do_ger_XX3, GER_SPP, gen_helper_XVI8GER4) +TRANS(XVI16GER2, do_ger_XX3, GER_NOP, gen_helper_XVI16GER2) +TRANS(XVI16GER2PP, do_ger_XX3, GER_PP, gen_helper_XVI16GER2) +TRANS(XVI16GER2S, do_ger_XX3, GER_SAT, gen_helper_XVI16GER2) +TRANS(XVI16GER2SPP, do_ger_XX3, GER_SPP, gen_helper_XVI16GER2) +#undef GER_NOP +#undef GER_PP +#undef GER_SAT +#undef GER_SPP + #undef GEN_XX2FORM #undef GEN_XX3FORM #undef GEN_XX2IFORM -- 2.31.1
