On Wed, 2025-02-05 at 16:47 +0800, Li Wei wrote: /* snip */
> +/* Try to use the [x]vbitsel.v insn to optimize the vector shuffle, which > + can reduce one copy insn in the loop compared to [x]vshuff. */ > +static bool > +loongarch_expand_vec_perm_bitsel (struct expand_vec_perm_d *d) > +{ > + if (!ISA_HAS_LSX && !ISA_HAS_LASX) > + return false; I think this check is redundant, if this function is called when !ISA_HAS_LSX && !ISA_HAS_LASX, something must have gone seriously wrong and we cannot salvage it here anyway. > + if (!loongarch_is_bitsel_pattern (d)) > + return false; > + > + if (d->testing_p) > + return true; > + > + int i, val; > + rtx tmp, tmp2, sel, op0, op1, target; > + rtx rperm[MAX_VECT_LEN]; > + > + for (i = 0; i < d->nelt; i += 1) > + { > + /* Here -1 means that all bits of the corresponding type are 1 > + (including the sign bit). */ > + val = d->perm[i] >= d->nelt ? -1 : 0; > + rperm[i] = GEN_INT (val); > + } > + > + tmp2 = gen_reg_rtx (d->vmode); > + switch (d->vmode) > + { Perhaps something like machine_mode vimode = mode_for_vector (int_mode_for_size (GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode))).require (), d->nelt); (Not even tried to compile, just written in the mail client for demonstrating the idea). > + case E_V4DFmode: > + sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, > + rperm)); > + /* Because the [x]vbitsel.v insn pattern requires that all src > + operands and dest operands are of the same type, they need to > + be type-converted. */ > + tmp = simplify_gen_subreg (E_V4DImode, tmp2, d->vmode, 0); > + emit_move_insn (tmp, sel); > + break; > + case E_V2DFmode: > + sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, > + rperm)); > + tmp = simplify_gen_subreg (E_V2DImode, tmp2, d->vmode, 0); > + emit_move_insn (tmp, sel); > + break; > + case E_V8SFmode: > + sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, > + rperm)); > + tmp = simplify_gen_subreg (E_V8SImode, tmp2, d->vmode, 0); > + emit_move_insn (tmp, sel); > + break; > + case E_V4SFmode: > + sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, > + rperm)); > + tmp = simplify_gen_subreg (E_V4SImode, tmp2, d->vmode, 0); > + emit_move_insn (tmp, sel); > + break; > + default: > + sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, > + rperm)); > + emit_move_insn (tmp2, sel); > + break; > + } > + > + target = d->target; > + op0 = d->op0; > + op1 = d->one_vector_p ? d->op0 : d->op1; > + > + if (GET_MODE_SIZE (d->vmode) == 16) > + { > + switch (d->vmode) We can refactor the .md files a little (see the patch below), and then simply use gen_simd_vbitsel (d->vmode, target, op0, op1, tmp2); instead of the 12 different cases here. diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index c31aefa892a..1b95ac70220 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -1151,18 +1151,6 @@ (define_insn "lasx_xvbitrevi_<lasxfmt>" [(set_attr "type" "simd_bit") (set_attr "mode" "<MODE>")]) -(define_insn "lasx_xvbitsel_<lasxfmt_f>" - [(set (match_operand:LASX 0 "register_operand" "=f") - (ior:LASX (and:LASX (not:LASX - (match_operand:LASX 3 "register_operand" "f")) - (match_operand:LASX 1 "register_operand" "f")) - (and:LASX (match_dup 3) - (match_operand:LASX 2 "register_operand" "f"))))] - "ISA_HAS_LASX" - "xvbitsel.v\t%u0,%u1,%u2,%u3" - [(set_attr "type" "simd_bitmov") - (set_attr "mode" "<MODE>")]) - (define_insn "lasx_xvbitseli_b" [(set (match_operand:V32QI 0 "register_operand" "=f") (ior:V32QI (and:V32QI (not:V32QI diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc index 92248518cd5..d124c584e8d 100644 --- a/gcc/config/loongarch/loongarch-builtins.cc +++ b/gcc/config/loongarch/loongarch-builtins.cc @@ -247,7 +247,7 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) #define CODE_FOR_lsx_vandi_b CODE_FOR_andv16qi3 #define CODE_FOR_lsx_bnz_v CODE_FOR_lsx_bnz_v_b #define CODE_FOR_lsx_bz_v CODE_FOR_lsx_bz_v_b -#define CODE_FOR_lsx_vbitsel_v CODE_FOR_lsx_vbitsel_b +#define CODE_FOR_lsx_vbitsel_v CODE_FOR_simd_vbitselv16qi #define CODE_FOR_lsx_vseqi_b CODE_FOR_lsx_vseq_b #define CODE_FOR_lsx_vseqi_h CODE_FOR_lsx_vseq_h #define CODE_FOR_lsx_vseqi_w CODE_FOR_lsx_vseq_w @@ -568,7 +568,7 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE) #define CODE_FOR_lasx_xvaddi_du CODE_FOR_addv4di3 #define CODE_FOR_lasx_xvand_v CODE_FOR_andv32qi3 #define CODE_FOR_lasx_xvandi_b CODE_FOR_andv32qi3 -#define CODE_FOR_lasx_xvbitsel_v CODE_FOR_lasx_xvbitsel_b +#define CODE_FOR_lasx_xvbitsel_v CODE_FOR_simd_vbitselv32qi #define CODE_FOR_lasx_xvseqi_b CODE_FOR_lasx_xvseq_b #define CODE_FOR_lasx_xvseqi_h CODE_FOR_lasx_xvseq_h #define CODE_FOR_lasx_xvseqi_w CODE_FOR_lasx_xvseq_w diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index 9dc89ae8fe6..8fd24a27cfd 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -1011,18 +1011,6 @@ (define_insn "lsx_vbitrevi_<lsxfmt>" [(set_attr "type" "simd_bit") (set_attr "mode" "<MODE>")]) -(define_insn "lsx_vbitsel_<lsxfmt>" - [(set (match_operand:ILSX 0 "register_operand" "=f") - (ior:ILSX (and:ILSX (not:ILSX - (match_operand:ILSX 3 "register_operand" "f")) - (match_operand:ILSX 1 "register_operand" "f")) - (and:ILSX (match_dup 3) - (match_operand:ILSX 2 "register_operand" "f"))))] - "ISA_HAS_LSX" - "vbitsel.v\t%w0,%w1,%w2,%w3" - [(set_attr "type" "simd_bitmov") - (set_attr "mode" "<MODE>")]) - (define_insn "lsx_vbitseli_b" [(set (match_operand:V16QI 0 "register_operand" "=f") (ior:V16QI (and:V16QI (not:V16QI diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md index 611d1f87dd2..49070e829ca 100644 --- a/gcc/config/loongarch/simd.md +++ b/gcc/config/loongarch/simd.md @@ -950,6 +950,19 @@ (define_expand "<simd_isa>_maddw<ev_od>_q_du_d_punned" DONE; }) +(define_insn "@simd_vbitsel<mode>" + [(set (match_operand:ALLVEC 0 "register_operand" "=f") + (ior:ALLVEC + (and:ALLVEC + (not:ALLVEC (match_operand:ALLVEC 3 "register_operand" "f")) + (match_operand:ALLVEC 1 "register_operand" "f")) + (and:ALLVEC (match_dup 3) + (match_operand:ALLVEC 2 "register_operand" "f"))))] + "" + "<x>vbitsel.v\t%<wu>0,%<wu>1,%<wu>2,%<wu>3" + [(set_attr "type" "simd_bitmov") + (set_attr "mode" "<MODE>")]) + ; The LoongArch SX Instructions. (include "lsx.md") -- Xi Ruoyao <xry...@xry111.site> School of Aerospace Science and Technology, Xidian University