On Wed, 2025-02-05 at 16:47 +0800, Li Wei wrote:
/* snip */
> +/* Try to use the [x]vbitsel.v insn to optimize the vector shuffle, which
> + can reduce one copy insn in the loop compared to [x]vshuff. */
> +static bool
> +loongarch_expand_vec_perm_bitsel (struct expand_vec_perm_d *d)
> +{
> + if (!ISA_HAS_LSX && !ISA_HAS_LASX)
> + return false;
I think this check is redundant, if this function is called when
!ISA_HAS_LSX && !ISA_HAS_LASX, something must have gone seriously wrong
and we cannot salvage it here anyway.
> + if (!loongarch_is_bitsel_pattern (d))
> + return false;
> +
> + if (d->testing_p)
> + return true;
> +
> + int i, val;
> + rtx tmp, tmp2, sel, op0, op1, target;
> + rtx rperm[MAX_VECT_LEN];
> +
> + for (i = 0; i < d->nelt; i += 1)
> + {
> + /* Here -1 means that all bits of the corresponding type are 1
> + (including the sign bit). */
> + val = d->perm[i] >= d->nelt ? -1 : 0;
> + rperm[i] = GEN_INT (val);
> + }
> +
> + tmp2 = gen_reg_rtx (d->vmode);
> + switch (d->vmode)
> + {
Perhaps something like
machine_mode vimode = mode_for_vector (int_mode_for_size (GET_MODE_BITSIZE
(GET_MODE_INNER (d->vmode))).require (), d->nelt);
(Not even tried to compile, just written in the mail client for
demonstrating the idea).
> + case E_V4DFmode:
> + sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt,
> + rperm));
> + /* Because the [x]vbitsel.v insn pattern requires that all src
> + operands and dest operands are of the same type, they need to
> + be type-converted. */
> + tmp = simplify_gen_subreg (E_V4DImode, tmp2, d->vmode, 0);
> + emit_move_insn (tmp, sel);
> + break;
> + case E_V2DFmode:
> + sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt,
> + rperm));
> + tmp = simplify_gen_subreg (E_V2DImode, tmp2, d->vmode, 0);
> + emit_move_insn (tmp, sel);
> + break;
> + case E_V8SFmode:
> + sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt,
> + rperm));
> + tmp = simplify_gen_subreg (E_V8SImode, tmp2, d->vmode, 0);
> + emit_move_insn (tmp, sel);
> + break;
> + case E_V4SFmode:
> + sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt,
> + rperm));
> + tmp = simplify_gen_subreg (E_V4SImode, tmp2, d->vmode, 0);
> + emit_move_insn (tmp, sel);
> + break;
> + default:
> + sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt,
> + rperm));
> + emit_move_insn (tmp2, sel);
> + break;
> + }
> +
> + target = d->target;
> + op0 = d->op0;
> + op1 = d->one_vector_p ? d->op0 : d->op1;
> +
> + if (GET_MODE_SIZE (d->vmode) == 16)
> + {
> + switch (d->vmode)
We can refactor the .md files a little (see the patch below), and then
simply use
gen_simd_vbitsel (d->vmode, target, op0, op1, tmp2);
instead of the 12 different cases here.
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index c31aefa892a..1b95ac70220 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -1151,18 +1151,6 @@ (define_insn "lasx_xvbitrevi_<lasxfmt>"
[(set_attr "type" "simd_bit")
(set_attr "mode" "<MODE>")])
-(define_insn "lasx_xvbitsel_<lasxfmt_f>"
- [(set (match_operand:LASX 0 "register_operand" "=f")
- (ior:LASX (and:LASX (not:LASX
- (match_operand:LASX 3 "register_operand" "f"))
- (match_operand:LASX 1 "register_operand" "f"))
- (and:LASX (match_dup 3)
- (match_operand:LASX 2 "register_operand" "f"))))]
- "ISA_HAS_LASX"
- "xvbitsel.v\t%u0,%u1,%u2,%u3"
- [(set_attr "type" "simd_bitmov")
- (set_attr "mode" "<MODE>")])
-
(define_insn "lasx_xvbitseli_b"
[(set (match_operand:V32QI 0 "register_operand" "=f")
(ior:V32QI (and:V32QI (not:V32QI
diff --git a/gcc/config/loongarch/loongarch-builtins.cc
b/gcc/config/loongarch/loongarch-builtins.cc
index 92248518cd5..d124c584e8d 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -247,7 +247,7 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
#define CODE_FOR_lsx_vandi_b CODE_FOR_andv16qi3
#define CODE_FOR_lsx_bnz_v CODE_FOR_lsx_bnz_v_b
#define CODE_FOR_lsx_bz_v CODE_FOR_lsx_bz_v_b
-#define CODE_FOR_lsx_vbitsel_v CODE_FOR_lsx_vbitsel_b
+#define CODE_FOR_lsx_vbitsel_v CODE_FOR_simd_vbitselv16qi
#define CODE_FOR_lsx_vseqi_b CODE_FOR_lsx_vseq_b
#define CODE_FOR_lsx_vseqi_h CODE_FOR_lsx_vseq_h
#define CODE_FOR_lsx_vseqi_w CODE_FOR_lsx_vseq_w
@@ -568,7 +568,7 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
#define CODE_FOR_lasx_xvaddi_du CODE_FOR_addv4di3
#define CODE_FOR_lasx_xvand_v CODE_FOR_andv32qi3
#define CODE_FOR_lasx_xvandi_b CODE_FOR_andv32qi3
-#define CODE_FOR_lasx_xvbitsel_v CODE_FOR_lasx_xvbitsel_b
+#define CODE_FOR_lasx_xvbitsel_v CODE_FOR_simd_vbitselv32qi
#define CODE_FOR_lasx_xvseqi_b CODE_FOR_lasx_xvseq_b
#define CODE_FOR_lasx_xvseqi_h CODE_FOR_lasx_xvseq_h
#define CODE_FOR_lasx_xvseqi_w CODE_FOR_lasx_xvseq_w
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index 9dc89ae8fe6..8fd24a27cfd 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -1011,18 +1011,6 @@ (define_insn "lsx_vbitrevi_<lsxfmt>"
[(set_attr "type" "simd_bit")
(set_attr "mode" "<MODE>")])
-(define_insn "lsx_vbitsel_<lsxfmt>"
- [(set (match_operand:ILSX 0 "register_operand" "=f")
- (ior:ILSX (and:ILSX (not:ILSX
- (match_operand:ILSX 3 "register_operand" "f"))
- (match_operand:ILSX 1 "register_operand" "f"))
- (and:ILSX (match_dup 3)
- (match_operand:ILSX 2 "register_operand" "f"))))]
- "ISA_HAS_LSX"
- "vbitsel.v\t%w0,%w1,%w2,%w3"
- [(set_attr "type" "simd_bitmov")
- (set_attr "mode" "<MODE>")])
-
(define_insn "lsx_vbitseli_b"
[(set (match_operand:V16QI 0 "register_operand" "=f")
(ior:V16QI (and:V16QI (not:V16QI
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 611d1f87dd2..49070e829ca 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -950,6 +950,19 @@ (define_expand "<simd_isa>_maddw<ev_od>_q_du_d_punned"
DONE;
})
+(define_insn "@simd_vbitsel<mode>"
+ [(set (match_operand:ALLVEC 0 "register_operand" "=f")
+ (ior:ALLVEC
+ (and:ALLVEC
+ (not:ALLVEC (match_operand:ALLVEC 3 "register_operand" "f"))
+ (match_operand:ALLVEC 1 "register_operand" "f"))
+ (and:ALLVEC (match_dup 3)
+ (match_operand:ALLVEC 2 "register_operand" "f"))))]
+ ""
+ "<x>vbitsel.v\t%<wu>0,%<wu>1,%<wu>2,%<wu>3"
+ [(set_attr "type" "simd_bitmov")
+ (set_attr "mode" "<MODE>")])
+
; The LoongArch SX Instructions.
(include "lsx.md")
--
Xi Ruoyao <[email protected]>
School of Aerospace Science and Technology, Xidian University