Currently, the shuffle in which LoongArch selects two vectors at
corresponding positions is implemented through the [x]vshuf instruction,
but this will introduce additional index copies. In this case, the
[x]vbitsel.v instruction can be used for optimization.
gcc/ChangeLog:
* config/loongarch/lasx.md (lasx_xvbitsel_<lasxfmt_f>): Remove.
* config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vbitsel_v):
Adjust.
(CODE_FOR_lasx_xvbitsel_v): Ditto.
* config/loongarch/loongarch.cc (loongarch_try_expand_lsx_vshuf_const):
Ditto.
(loongarch_is_bitsel_pattern): Add new check function.
(loongarch_expand_vec_perm_bitsel): Add new implement function.
(loongarch_expand_lsx_shuffle): Adjust.
(loongarch_expand_vec_perm_const): Add new optimize case.
* config/loongarch/lsx.md (lsx_vbitsel_<lsxfmt>): Adjust insn
pattern mode.
* config/loongarch/simd.md (@simd_vbitsel<mode>): New
define_insn template.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/vec_perm-xvshuf.c: Move to...
* gcc.target/loongarch/vec_perm-xvbitsel.c: ...here.
* gcc.target/loongarch/vec_perm-vbitsel.c: New test.
---
gcc/config/loongarch/lasx.md | 12 ---
gcc/config/loongarch/loongarch-builtins.cc | 4 +-
gcc/config/loongarch/loongarch.cc | 89 ++++++++++++++++++-
gcc/config/loongarch/lsx.md | 12 ---
gcc/config/loongarch/simd.md | 13 +++
.../gcc.target/loongarch/vec_perm-vbitsel.c | 17 ++++
...{vec_perm-xvshuf.c => vec_perm-xvbitsel.c} | 4 +-
7 files changed, 121 insertions(+), 30 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/vec_perm-vbitsel.c
rename gcc/testsuite/gcc.target/loongarch/{vec_perm-xvshuf.c =>
vec_perm-xvbitsel.c} (77%)
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index a37c85a25a4..c1049e319db 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -1217,18 +1217,6 @@ (define_insn "lasx_xvbitrevi_<lasxfmt>"
[(set_attr "type" "simd_bit")
(set_attr "mode" "<MODE>")])
-(define_insn "lasx_xvbitsel_<lasxfmt_f>"
- [(set (match_operand:LASX 0 "register_operand" "=f")
- (ior:LASX (and:LASX (not:LASX
- (match_operand:LASX 3 "register_operand" "f"))
- (match_operand:LASX 1 "register_operand" "f"))
- (and:LASX (match_dup 3)
- (match_operand:LASX 2 "register_operand" "f"))))]
- "ISA_HAS_LASX"
- "xvbitsel.v\t%u0,%u1,%u2,%u3"
- [(set_attr "type" "simd_bitmov")
- (set_attr "mode" "<MODE>")])
-
(define_insn "lasx_xvbitseli_b"
[(set (match_operand:V32QI 0 "register_operand" "=f")
(ior:V32QI (and:V32QI (not:V32QI
diff --git a/gcc/config/loongarch/loongarch-builtins.cc
b/gcc/config/loongarch/loongarch-builtins.cc
index 92d995a916a..0682bc6baf9 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -247,7 +247,7 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
#define CODE_FOR_lsx_vandi_b CODE_FOR_andv16qi3
#define CODE_FOR_lsx_bnz_v CODE_FOR_lsx_bnz_v_b
#define CODE_FOR_lsx_bz_v CODE_FOR_lsx_bz_v_b
-#define CODE_FOR_lsx_vbitsel_v CODE_FOR_lsx_vbitsel_b
+#define CODE_FOR_lsx_vbitsel_v CODE_FOR_simd_vbitselv16qi
#define CODE_FOR_lsx_vseqi_b CODE_FOR_lsx_vseq_b
#define CODE_FOR_lsx_vseqi_h CODE_FOR_lsx_vseq_h
#define CODE_FOR_lsx_vseqi_w CODE_FOR_lsx_vseq_w
@@ -538,7 +538,7 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
#define CODE_FOR_lasx_xvaddi_du CODE_FOR_addv4di3
#define CODE_FOR_lasx_xvand_v CODE_FOR_andv32qi3
#define CODE_FOR_lasx_xvandi_b CODE_FOR_andv32qi3
-#define CODE_FOR_lasx_xvbitsel_v CODE_FOR_lasx_xvbitsel_b
+#define CODE_FOR_lasx_xvbitsel_v CODE_FOR_simd_vbitselv32qi
#define CODE_FOR_lasx_xvseqi_b CODE_FOR_lasx_xvseq_b
#define CODE_FOR_lasx_xvseqi_h CODE_FOR_lasx_xvseq_h
#define CODE_FOR_lasx_xvseqi_w CODE_FOR_lasx_xvseq_w
diff --git a/gcc/config/loongarch/loongarch.cc
b/gcc/config/loongarch/loongarch.cc
index 3ac6a74f15b..2de3110383a 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -8372,7 +8372,10 @@ loongarch_try_expand_lsx_vshuf_const (struct
expand_vec_perm_d *d)
else
{
sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm));
- emit_move_insn (d->target, sel);
+ /* Weakening dependencies by copying indices (for vshuf). */
+ tmp = gen_reg_rtx (d->vmode);
+ emit_move_insn (tmp, sel);
+ emit_move_insn (d->target, tmp);
}
switch (d->vmode)
@@ -8444,9 +8447,31 @@ loongarch_is_imm_set_shuffle (struct expand_vec_perm_d
*d)
return true;
}
+/* Check if the d->perm meets the requirements of the [x]vbitsel.v insn. */
+static bool
+loongarch_is_bitsel_pattern (struct expand_vec_perm_d *d)
+{
+ bool result = true;
+
+ for (int i = 0; i < d->nelt; i++)
+ {
+ unsigned char buf = d->perm[i];
+ if ((buf % d->nelt) != i)
+ {
+ result = false;
+ break;
+ }
+ }
+
+ return result;
+}
+
static bool
loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *);
+static bool
+loongarch_expand_vec_perm_bitsel (struct expand_vec_perm_d *);
+
/* Try to match and expand all kinds of 128-bit const vector permutation
cases. */
@@ -8462,6 +8487,9 @@ loongarch_expand_lsx_shuffle (struct expand_vec_perm_d *d)
if (loongarch_expand_vec_perm_even_odd (d))
return true;
+ if (loongarch_expand_vec_perm_bitsel (d))
+ return true;
+
return loongarch_try_expand_lsx_vshuf_const (d);
}
@@ -9122,6 +9150,57 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1,
rtx sel)
}
}
+/* Try to use the [x]vbitsel.v insn to optimize the vector shuffle, which
+ can reduce one copy insn in the loop compared to [x]vshuff. */
+static bool
+loongarch_expand_vec_perm_bitsel (struct expand_vec_perm_d *d)
+{
+ gcc_assert (ISA_HAS_LSX || ISA_HAS_LASX);
+
+ if (!loongarch_is_bitsel_pattern (d))
+ return false;
+
+ if (d->testing_p)
+ return true;
+
+ int i, val;
+ rtx tmp, tmp2, sel, op0, op1, target;
+ rtx rperm[MAX_VECT_LEN];
+
+ for (i = 0; i < d->nelt; i += 1)
+ {
+ /* Here -1 means that all bits of the corresponding type are 1
+ (including the sign bit). */
+ val = d->perm[i] >= d->nelt ? -1 : 0;
+ rperm[i] = GEN_INT (val);
+ }
+
+ tmp2 = gen_reg_rtx (d->vmode);
+ machine_mode vimode = mode_for_vector
+ (int_mode_for_size (GET_MODE_BITSIZE
+ (GET_MODE_INNER
+ (d->vmode)), 0).require (), d->nelt).require ();
+
+ sel = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (d->nelt, rperm));
+ if (GET_MODE_CLASS (d->vmode) == MODE_VECTOR_FLOAT)
+ {
+ /* Because the [x]vbitsel.v insn pattern requires that all src
+ operands and dest operands are of the same type, they need to
+ be type-converted. */
+ tmp = simplify_gen_subreg (vimode, tmp2, d->vmode, 0);
+ emit_move_insn (tmp, sel);
+ }
+ else
+ emit_move_insn (tmp2, sel);
+
+ target = d->target;
+ op0 = d->op0;
+ op1 = d->one_vector_p ? d->op0 : d->op1;
+
+ emit_insn (gen_simd_vbitsel (d->vmode, target, op0, op1, tmp2));
+ return true;
+}
+
/* Following are the assist function for const vector permutation support. */
static bool
loongarch_is_quad_duplicate (struct expand_vec_perm_d *d)
@@ -9598,6 +9677,9 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d
*d)
return true;
}
+ if (loongarch_expand_vec_perm_bitsel (d))
+ return true;
+
if (loongarch_if_match_xvshuffle (d))
{
if (d->testing_p)
@@ -9666,7 +9748,10 @@ expand_perm_const_end:
default:
sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt,
rperm));
- emit_move_insn (d->target, sel);
+ /* Weakening dependencies by copying indices (for xvshuf). */
+ tmp = gen_reg_rtx (d->vmode);
+ emit_move_insn (tmp, sel);
+ emit_move_insn (d->target, tmp);
break;
}
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index ca0066a21ed..0d7a8588819 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -1073,18 +1073,6 @@ (define_insn "lsx_vbitrevi_<lsxfmt>"
[(set_attr "type" "simd_bit")
(set_attr "mode" "<MODE>")])
-(define_insn "lsx_vbitsel_<lsxfmt>"
- [(set (match_operand:ILSX 0 "register_operand" "=f")
- (ior:ILSX (and:ILSX (not:ILSX
- (match_operand:ILSX 3 "register_operand" "f"))
- (match_operand:ILSX 1 "register_operand" "f"))
- (and:ILSX (match_dup 3)
- (match_operand:ILSX 2 "register_operand" "f"))))]
- "ISA_HAS_LSX"
- "vbitsel.v\t%w0,%w1,%w2,%w3"
- [(set_attr "type" "simd_bitmov")
- (set_attr "mode" "<MODE>")])
-
(define_insn "lsx_vbitseli_b"
[(set (match_operand:V16QI 0 "register_operand" "=f")
(ior:V16QI (and:V16QI (not:V16QI
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 7605b17d21e..4df19b06727 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -546,6 +546,19 @@ (define_expand "cbranch<mode>4"
DONE;
})
+(define_insn "@simd_vbitsel<mode>"
+ [(set (match_operand:ALLVEC 0 "register_operand" "=f")
+ (ior:ALLVEC
+ (and:ALLVEC
+ (not:ALLVEC (match_operand:ALLVEC 3 "register_operand" "f"))
+ (match_operand:ALLVEC 1 "register_operand" "f"))
+ (and:ALLVEC (match_dup 3)
+ (match_operand:ALLVEC 2 "register_operand" "f"))))]
+ ""
+ "<x>vbitsel.v\t%<wu>0,%<wu>1,%<wu>2,%<wu>3"
+ [(set_attr "type" "simd_bitmov")
+ (set_attr "mode" "<MODE>")])
+
; The LoongArch SX Instructions.
(include "lsx.md")
diff --git a/gcc/testsuite/gcc.target/loongarch/vec_perm-vbitsel.c
b/gcc/testsuite/gcc.target/loongarch/vec_perm-vbitsel.c
new file mode 100644
index 00000000000..7a5118273c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vec_perm-vbitsel.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlsx" } */
+/* { dg-final { scan-assembler-not "vshuf.w" } } */
+/* { dg-final { scan-assembler-not "vori.b" } } */
+/* { dg-final { scan-assembler "vbitsel.v" } } */
+
+void
+foo (int a[], int b[], int c[])
+{
+ for (int i = 0; i < 100; i += 4)
+ {
+ c[i + 0] = a[i + 0] + b[i + 0];
+ c[i + 1] = a[i + 1] - b[i + 1];
+ c[i + 2] = a[i + 2] - b[i + 2];
+ c[i + 3] = a[i + 3] + b[i + 3];
+ }
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vec_perm-xvshuf.c
b/gcc/testsuite/gcc.target/loongarch/vec_perm-xvbitsel.c
similarity index 77%
rename from gcc/testsuite/gcc.target/loongarch/vec_perm-xvshuf.c
rename to gcc/testsuite/gcc.target/loongarch/vec_perm-xvbitsel.c
index 6b19c2c2fd8..b3808b550e5 100644
--- a/gcc/testsuite/gcc.target/loongarch/vec_perm-xvshuf.c
+++ b/gcc/testsuite/gcc.target/loongarch/vec_perm-xvbitsel.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
/* { dg-options "-O3 -mlasx" } */
-/* { dg-final { scan-assembler "xvshuf.w" } } */
+/* { dg-final { scan-assembler-not "xvshuf.w" } } */
/* { dg-final { scan-assembler-not "xvperm.w" } } */
-/* { dg-final { scan-assembler-not "xvbitsel.v" } } */
+/* { dg-final { scan-assembler "xvbitsel.v" } } */
void
foo (int a[], int b[], int c[])
--
2.38.1