https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115725
--- Comment #8 from JuzheZhong <juzhe.zhong at rivai dot ai> ---
I think we should include operands[0] as the "merge/maskoff" operand which we
need to depend on and use TU for vec_set pattern
Take ARM for example:
(define_expand "vec_set<mode>"
[(match_operand:VALL_F16 0 "register_operand")
(match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
(match_operand:SI 2 "immediate_operand")]
"TARGET_SIMD"
{
HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
GEN_INT (elem), operands[0]));
DONE;
}
)
(define_insn "aarch64_simd_vec_set<mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
(vec_merge:VALL_F16
(vec_duplicate:VALL_F16
(match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand"
"w,?r,Utv"))
(match_operand:VALL_F16 3 "register_operand" "0,0,0")
(match_operand:SI 2 "immediate_operand" "i,i,i")))]
"TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
{
int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
switch (which_alternative)
{
case 0:
return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
case 1:
return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
case 2:
return "ld1\\t{%0.<Vetype>}[%p2], %1";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
)
After we use specify "vmv.s.x" as "TU" policy, the "avl_prop" PASS won't
propagate "avl=1" from "vmv.s.x" to "vle16.v".
So, I think the correct ASM should be after we vec_set it into "TU":
vec_set_vnx8hi_0:
vsetivli zero,8,e16,mf4,ta,ma
vle16.v v1,0(a1)
vsetivli zero,1,e16,mf4,tu,ma
vmv.s.x v1,a2
vsetivli zero,8,e16,mf4,ta,ma
vse16.v v1,0(a0)
ret
Robin could you make sure whether the assembly is like above after this "TU"
fix?
If yes, I think you can send a patch to fix it and backport it to GCC-14.