> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index 6753b01db59..866aaf1e8a0 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -1580,8 +1580,27 @@ (define_insn_and_split "*vec_duplicate<mode>" > "&& 1" > [(const_int 0)] > { > - riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), > - riscv_vector::UNARY_OP, operands); > + if (!strided_load_broadcast_p () > + && TARGET_ZVFHMIN && !TARGET_ZVFH && <VEL>mode == HFmode) > + { > + /* For Float16, load, convert to float, then broadcast and > + truncate. */ > + rtx tmpsf = gen_reg_rtx (SFmode); > + emit_insn (gen_extendhfsf2 (tmpsf, operands[1])); > + poly_uint64 nunits = GET_MODE_NUNITS (<MODE>mode);
This could be HF -> HI (bitcast) then a HI pred_broadcast then bitcast back to a HF vector again, this could prevent us introducing trunc here, and I would prefer to improve this since RVA23 profile only mandatory Zvfhmin not Zvfh. > + machine_mode vmodesf > + = riscv_vector::get_vector_mode (SFmode, nunits).require (); > + rtx tmp = gen_reg_rtx (vmodesf); > + rtx ops[] = {tmp, tmpsf}; > + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (vmodesf), > + riscv_vector::UNARY_OP, ops); > + rtx ops2[] = {operands[0], tmp}; > + riscv_vector::emit_vlmax_insn (code_for_pred_trunc (vmodesf), > + riscv_vector::UNARY_OP_FRM_DYN, ops2); > + } > + else > + riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode), > + riscv_vector::UNARY_OP, operands); > DONE; > } > [(set_attr "type" "vector")] > @@ -2171,7 +2190,7 @@ (define_expand "@pred_broadcast<mode>" > } > } > else if (GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode) > - && (immediate_operand (operands[3], Pmode) > + && (immediate_operand (operands[3], Pmode) > || (CONST_POLY_INT_P (operands[3]) > && known_ge (rtx_to_poly_int64 (operands[3]), 0U) > && known_le (rtx_to_poly_int64 (operands[3]), > GET_MODE_SIZE (<MODE>mode))))) > @@ -2224,12 +2243,7 @@ (define_insn_and_split "*pred_broadcast<mode>" > "(register_operand (operands[3], <VEL>mode) > || CONST_POLY_INT_P (operands[3])) > && GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode)" > - [(set (match_dup 0) > - (if_then_else:V_VLSI (unspec:<VM> [(match_dup 1) (match_dup 4) > - (match_dup 5) (match_dup 6) (match_dup 7) > - (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > - (vec_duplicate:V_VLSI (match_dup 3)) > - (match_dup 2)))] > + [(const_int 0)] > { > gcc_assert (can_create_pseudo_p ()); > if (CONST_POLY_INT_P (operands[3])) > @@ -2238,12 +2252,6 @@ (define_insn_and_split "*pred_broadcast<mode>" > emit_move_insn (tmp, operands[3]); > operands[3] = tmp; > } > - rtx m = assign_stack_local (<VEL>mode, GET_MODE_SIZE (<VEL>mode), > - GET_MODE_ALIGNMENT (<VEL>mode)); > - m = validize_mem (m); > - emit_move_insn (m, operands[3]); > - m = gen_rtx_MEM (<VEL>mode, force_reg (Pmode, XEXP (m, 0))); > - operands[3] = m; > > /* For SEW = 64 in RV32 system, we expand vmv.s.x: > andi a2,a2,1 > @@ -2254,6 +2262,35 @@ (define_insn_and_split "*pred_broadcast<mode>" > operands[4] = riscv_vector::gen_avl_for_scalar_move (operands[4]); > operands[1] = CONSTM1_RTX (<VM>mode); > } > + > + /* If the target doesn't want a strided-load broadcast we go with a > regular > + V1DImode load and a broadcast gather. */ > + if (strided_load_broadcast_p ()) > + { > + rtx mem = assign_stack_local (<VEL>mode, GET_MODE_SIZE (<VEL>mode), > + GET_MODE_ALIGNMENT (<VEL>mode)); > + mem = validize_mem (mem); > + emit_move_insn (mem, operands[3]); > + mem = gen_rtx_MEM (<VEL>mode, force_reg (Pmode, XEXP (mem, 0))); > + > + emit_insn > + (gen_pred_broadcast<mode> > + (operands[0], operands[1], operands[2], mem, > + operands[4], operands[5], operands[6], operands[7])); > + } > + else > + { > + rtx tmp = gen_reg_rtx (V1DImode); > + emit_move_insn (tmp, lowpart_subreg (V1DImode, operands[3], > + <VEL>mode)); > + tmp = lowpart_subreg (<MODE>mode, tmp, V1DImode); > + > + emit_insn > + (gen_pred_gather<mode>_scalar > + (operands[0], operands[1], operands[2], tmp, CONST0_RTX (Pmode), > + operands[4], operands[5], operands[6], operands[7])); > + } > + DONE; > } > [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv") > (set_attr "mode" "<MODE>")]) > @@ -2293,9 +2330,9 @@ (define_insn "*pred_broadcast<mode>_zvfhmin" > (reg:SI VL_REGNUM) > (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > (vec_duplicate:V_VLSF_ZVFHMIN > - (match_operand:<VEL> 3 "direct_broadcast_operand" > "Wdm, Wdm, Wdm, Wdm")) > + (match_operand:<VEL> 3 "direct_broadcast_operand" " > A, A, A, A")) > (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand" " > vu, 0, vu, 0")))] > - "TARGET_VECTOR" > + "TARGET_VECTOR && strided_load_broadcast_p ()" > "@ > vlse<sew>.v\t%0,%3,zero,%1.t > vlse<sew>.v\t%0,%3,zero,%1.t > -- > 2.50.0 > >