On 9/4/24 07:27, LIU Zhiwei wrote:
@@ -698,6 +704,21 @@ static bool tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
case TCG_TYPE_I64:
tcg_out_opc_imm(s, OPC_ADDI, ret, arg, 0);
break;
+ case TCG_TYPE_V64:
+ case TCG_TYPE_V128:
+ case TCG_TYPE_V256:
+ {
+ int nf = get_vec_type_bytes(type) / riscv_vlenb;
+
+ if (nf != 0) {
+ tcg_debug_assert(is_power_of_2(nf) && nf <= 8);
+ tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1, true);
+ } else {
+ riscv_set_vec_config_vl(s, type);
+ tcg_out_opc_vv(s, OPC_VMV_V_V, ret, TCG_REG_V0, arg, true);
+ }
+ }
+ break;
Perhaps
int lmul = type - riscv_lg2_vlenb;
int nf = 1 << MIN(lmul, 0);
tcg_out_opc_vi(s, OPC_VMVNR_V, ret, arg, nf - 1);
Is there a reason to prefer vmv.v.v over vmvnr.v?
Seems like we can always move one vector reg...
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, int64_t arg)
+{
+ if (arg < 16 && arg >= -16) {
+ riscv_set_vec_config_vl_vece(s, type, vece);
+ tcg_out_opc_vi(s, OPC_VMV_V_I, dst, TCG_REG_V0, arg, true);
+ return;
+ }
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, arg);
+ tcg_out_dup_vec(s, type, vece, dst, TCG_REG_TMP0);
+}
I'll note that 0 and -1 do not require SEW change. I don't know how often that will come
up, since in my testing with aarch64, we usually needed to swap to TCG_TYPE_V256 anyway.
r~