The AArch64 FEAT_LUT extension is optional from Armv9.2-a and mandatory
from Armv9.5-a. This extension introduces instructions for lookup table
read with 2-bit indices.
This patch adds AdvSIMD LUT intrinsics for LUTI2, supporting table
lookup with 2-bit packed indices. The following intrinsics are added:
* vluti2{q}_lane{q}_u8
* vluti2{q}_lane{q}_s8
* vluti2{q}_lane{q}_p8
* vluti2{q}_lane{q}_u16
* vluti2{q}_lane{q}_s16
* vluti2{q}_lane{q}_p16
* vluti2{q}_lane{q}_f16
* vluti2{q}_lane{q}_bf16
gcc/ChangeLog:
* config/aarch64/aarch64-builtins.cc (enum class):
Add binary_lane shape.
(aarch64_fntype): Modify to handle binary_lane shape.
(aarch64_expand_pragma_builtin): Extend to distinguish
and expand binary and binary lane-based intrinsics.
* config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION):
Add LUT feature flag.
* config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_LANE):
New macro for lane-based intrinsics.
(ENTRY_VLANEIU): New macro for LUTI lanes (unsigned).
(ENTRY_VLANEIS): New macro for LUTI lanes (signed).
(ENTRY_VLANEP): New macro for LUTI lanes (poly).
(ENTRY_VLANEF): New macro for LUTI lanes (float).
(ENTRY_VLANEBF): New macro for LUTI lanes (bfloat).
(REQUIRED_EXTENSIONS): Set per LUTI requirements.
* config/aarch64/aarch64-simd.md
(@aarch64_<vluti_uns_op><VLUT1:mode><VLUT2:mode>):
Add instruction pattern for LUTI2 instructions.
* config/aarch64/aarch64.h (TARGET_LUT): Add TARGET_LUT macro for
enabling LUT extension support.
* config/aarch64/iterators.md (v16qi): Update iterators to include
VLUT1 and VLUT2 for LUTI2 operations.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/simd/vluti-builtins.c: New test.
---
gcc/config/aarch64/aarch64-builtins.cc | 22 +-
.../aarch64/aarch64-option-extensions.def | 2 +
.../aarch64/aarch64-simd-pragma-builtins.def | 61 ++++
gcc/config/aarch64/aarch64-simd.md | 10 +
gcc/config/aarch64/aarch64.h | 4 +
gcc/config/aarch64/iterators.md | 25 ++
.../gcc.target/aarch64/simd/vluti-builtins.c | 329 ++++++++++++++++++
7 files changed, 452 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/vluti-builtins.c
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index ad82c680c6a..65813091f23 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1591,6 +1591,7 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma)
enum class aarch64_builtin_signatures
{
binary,
+ binary_lane,
};
namespace {
@@ -1656,6 +1657,7 @@ static tree
aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
{
tree type0, type1, type2;
+ tree immtype = aarch64_simd_builtin_type (SImode, qualifier_lane_index);
switch (builtin_data.signature)
{
@@ -1668,6 +1670,16 @@ aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
builtin_data.types[2].qualifiers);
return build_function_type_list (type0, type1, type2, NULL_TREE);
+ case aarch64_builtin_signatures::binary_lane:
+ type0 = aarch64_simd_builtin_type (builtin_data.types[0].mode,
+ builtin_data.types[0].qualifiers);
+ type1 = aarch64_simd_builtin_type (builtin_data.types[1].mode,
+ builtin_data.types[1].qualifiers);
+ type2 = aarch64_simd_builtin_type (builtin_data.types[2].mode,
+ builtin_data.types[2].qualifiers);
+ return build_function_type_list (type0, type1, type2,
+ immtype, NULL_TREE);
+
default:
gcc_unreachable ();
}
@@ -3383,7 +3395,7 @@ static rtx
aarch64_expand_pragma_builtin (tree exp, rtx target,
const aarch64_pragma_builtins_data *builtin_data)
{
- expand_operand ops[3];
+ expand_operand ops[4];
auto op1 = expand_normal (CALL_EXPR_ARG (exp, 0));
auto op2 = expand_normal (CALL_EXPR_ARG (exp, 1));
create_output_operand (&ops[0], target, builtin_data->types[0].mode);
@@ -3399,6 +3411,14 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
icode = code_for_aarch64 (unspec, builtin_data->types[0].mode);
expand_insn (icode, 3, ops);
break;
+ case aarch64_builtin_signatures::binary_lane:
+ rtx op3;
+ op3 = expand_normal (CALL_EXPR_ARG (exp, 2));
+ create_input_operand (&ops[3], op3, SImode);
+ icode = code_for_aarch64 (unspec,
+ builtin_data->types[1].mode, builtin_data->types[2].mode);
+ expand_insn (icode, 4, ops);
+ break;
default:
gcc_unreachable();
}
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 8279f5a76ea..e3cafd90df9 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -236,6 +236,8 @@ AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8")
AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax")
+AARCH64_OPT_EXTENSION("lut", LUT, (SIMD), (), (), "lut")
+
#undef AARCH64_OPT_FMV_EXTENSION
#undef AARCH64_OPT_EXTENSION
#undef AARCH64_FMV_FEATURE
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
index c669919fa04..5ae36673292 100644
--- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -31,8 +31,69 @@
ENTRY_BINARY (NAME##q_f32, SIGNATURE, f32q, f32q, f32q, UNSPEC) \
ENTRY_BINARY (NAME##q_f64, SIGNATURE, f64q, f64q, f64q, UNSPEC)
+#undef ENTRY_LANE
+#define ENTRY_LANE(N, S, T0, T1, T2, U) \
+ ENTRY (N, S, T0, T1, T2, u8, U)
+
+#undef ENTRY_VLANEIU
+#define ENTRY_VLANEIU(N, S, U) \
+ ENTRY_LANE (N##_lane_u8, S, u8q, u8, u8, U) \
+ ENTRY_LANE (N##_laneq_u8, S, u8q, u8, u8q, U) \
+ ENTRY_LANE (N##q_lane_u8, S, u8q, u8q, u8, U) \
+ ENTRY_LANE (N##q_laneq_u8, S, u8q, u8q, u8q, U) \
+ ENTRY_LANE (N##_lane_u16, S, u16q, u16, u8, U) \
+ ENTRY_LANE (N##_laneq_u16, S, u16q, u16, u8q, U) \
+ ENTRY_LANE (N##q_lane_u16, S, u16q, u16q, u8, U) \
+ ENTRY_LANE (N##q_laneq_u16, S, u16q, u16q, u8q, U)
+
+#undef ENTRY_VLANEIS
+#define ENTRY_VLANEIS(N, S, U) \
+ ENTRY_LANE (N##_lane_s8, S, s8q, s8, u8, U) \
+ ENTRY_LANE (N##_laneq_s8, S, s8q, s8, u8q, U) \
+ ENTRY_LANE (N##q_lane_s8, S, s8q, s8q, u8, U) \
+ ENTRY_LANE (N##q_laneq_s8, S, s8q, s8q, u8q, U) \
+ ENTRY_LANE (N##_lane_s16, S, s16q, s16, u8, U) \
+ ENTRY_LANE (N##_laneq_s16, S, s16q, s16, u8q, U) \
+ ENTRY_LANE (N##q_lane_s16, S, s16q, s16q, u8, U) \
+ ENTRY_LANE (N##q_laneq_s16, S, s16q, s16q, u8q, U)
+
+#undef ENTRY_VLANEP
+#define ENTRY_VLANEP(N, S, U) \
+ ENTRY_LANE (N##_lane_p8, S, p8q, p8, u8, U) \
+ ENTRY_LANE (N##_laneq_p8, S, p8q, p8, u8q, U) \
+ ENTRY_LANE (N##q_lane_p8, S, p8q, p8q, u8, U) \
+ ENTRY_LANE (N##q_laneq_p8, S, p8q, p8q, u8q, U) \
+ ENTRY_LANE (N##_lane_p16, S, p16q, p16, u8, U) \
+ ENTRY_LANE (N##_laneq_p16, S, p16q, p16, u8q, U) \
+ ENTRY_LANE (N##q_lane_p16, S, p16q, p16q, u8, U) \
+ ENTRY_LANE (N##q_laneq_p16, S, p16q, p16q, u8q, U)
+
+
+#undef ENTRY_VLANEF
+#define ENTRY_VLANEF(N, S, U) \
+ ENTRY_LANE (N##_lane_f16, S, f16q, f16, u8, U) \
+ ENTRY_LANE (N##_laneq_f16, S, f16q, f16, u8q, U) \
+ ENTRY_LANE (N##q_lane_f16, S, f16q, f16q, u8, U) \
+ ENTRY_LANE (N##q_laneq_f16, S, f16q, f16q, u8q, U)
+
+#undef ENTRY_VLANEBF
+#define ENTRY_VLANEBF(N, S, U) \
+ ENTRY_LANE (N##_lane_bf16, S, bf16q, bf16, u8, U) \
+ ENTRY_LANE (N##_laneq_bf16, S, bf16q, bf16, u8q, U) \
+ ENTRY_LANE (N##q_lane_bf16, S, bf16q, bf16q, u8, U) \
+ ENTRY_LANE (N##q_laneq_bf16, S, bf16q, bf16q, u8q, U)
+
// faminmax
#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)
ENTRY_VHSDF (vamax, binary, UNSPEC_FAMAX)
ENTRY_VHSDF (vamin, binary, UNSPEC_FAMIN)
#undef REQUIRED_EXTENSIONS
+
+// lut
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_LUT)
+ENTRY_VLANEIS (vluti2, binary_lane, UNSPEC_SIMD_LUTI2)
+ENTRY_VLANEIU (vluti2, binary_lane, UNSPEC_SIMD_LUTI2)
+ENTRY_VLANEP (vluti2, binary_lane, UNSPEC_SIMD_LUTI2)
+ENTRY_VLANEF (vluti2, binary_lane, UNSPEC_SIMD_LUTI2)
+ENTRY_VLANEBF (vluti2, binary_lane, UNSPEC_SIMD_LUTI2)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e456f693d2f..73834966d21 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -9965,3 +9965,13 @@
"TARGET_FAMINMAX"
"<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
)
+
+(define_insn "@aarch64_<vluti_uns_op><VLUT1:mode><VLUT2:mode>"
+ [(set (match_operand:<VLUT1:VCONQ> 0 "register_operand" "=w")
+ (unspec:<VLUT1:VCONQ> [(match_operand:VLUT1 1 "register_operand" "w")
+ (match_operand:VLUT2 2 "register_operand" "w")
+ (match_operand:SI 3 "const_int_operand")]
+ VLUT_UNS))]
+ "TARGET_SIMD"
+ "luti2\t%0<VLUT1:Vmtype>, %1<VLUT1:Vmtype>, %2[%3]"
+)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 593319fd472..86471717a49 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -474,6 +474,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
#define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX)
#define TARGET_SVE_FAMINMAX (TARGET_SVE && TARGET_FAMINMAX)
+/* Lookup table (LUTI) extension instructions are
+ enabled through +lut. */
+#define TARGET_LUT AARCH64_HAVE_ISA (LUT)
+
/* Prefer different predicate registers for the output of a predicated
operation over re-using an existing input predicate. */
#define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 0bc98315bb6..76cf44516ec 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1064,6 +1064,8 @@
UNSPEC_FCVTXN ; Used in aarch64-simd.md.
UNSPEC_FAMAX ; Used in aarch64-simd.md.
UNSPEC_FAMIN ; Used in aarch64-simd.md.
+ UNSPEC_SIMD_LUTI2 ; Used in aarch64-simd.md.
+ UNSPEC_SIMD_LUTI4 ; Used in aarch64-simd.md.
;; All used in aarch64-sve2.md
UNSPEC_FCVTN
@@ -1603,11 +1605,24 @@
(V2SI "V4SI") (V4SI "V4SI")
(DI "V2DI") (V2DI "V2DI")
(V4HF "V8HF") (V8HF "V8HF")
+ (V4BF "V8BF") (V8BF "V8BF")
(V2SF "V4SF") (V4SF "V4SF")
(V2DF "V2DF") (SI "V4SI")
(HI "V8HI") (QI "V16QI")
(SF "V4SF") (DF "V2DF")])
+;; 128-bit container modes the inner or scalar source mode, in lower-case.
+(define_mode_attr Vconq [(V8QI "v16qi") (V16QI "v16qi")
+ (V4HI "v8hi") (V8HI "v8hi")
+ (V2SI "v4si") (V4SI "v4si")
+ (DI "v2di") (V2DI "v2di")
+ (V4HF "v8hf") (V8HF "v8hf")
+ (V4BF "v8bf") (V8BF "v8bf")
+ (V2SF "v4sf") (V4SF "v4sf")
+ (V2DF "v2df") (SI "v4si")
+ (HI "v8hi") (QI "v16qi")
+ (SF "v4sf") (DF "v2df")])
+
;; Half modes of all vector modes.
(define_mode_attr VHALF [(V8QI "V4QI") (V16QI "V8QI")
(V4HI "V2HI") (V8HI "V4HI")
@@ -4531,3 +4546,13 @@
(define_code_attr faminmax_op
[(smax "famax") (smin "famin")])
+
+;; Iterators and attributes for lut
+
+(define_mode_iterator VLUT1 [V16QI V8QI V8HI V4HI V8HF V4HF V8BF V4BF])
+
+(define_mode_iterator VLUT2 [V8QI V16QI])
+(define_int_iterator VLUT_UNS [UNSPEC_SIMD_LUTI2 UNSPEC_SIMD_LUTI4])
+
+(define_int_attr vluti_uns_op
+ [(UNSPEC_SIMD_LUTI2 "luti2") (UNSPEC_SIMD_LUTI4 "luti4")])
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vluti-builtins.c b/gcc/testsuite/gcc.target/aarch64/simd/vluti-builtins.c
new file mode 100644
index 00000000000..142657ba2ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vluti-builtins.c
@@ -0,0 +1,329 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+lut" } */
+/* { dg-final { check-function-bodies "**" ""} } */
+
+#include "arm_neon.h"
+
+/*
+** test_vluti2_lane_u8:
+** luti2 v0\.8b, v0\.8b, v1\[0\]
+** ret
+*/
+
+uint8x16_t
+test_vluti2_lane_u8(uint8x8_t a, uint8x8_t b)
+{
+ return vluti2_lane_u8(a, b, 0);
+}
+
+/*
+** test_vluti2q_lane_u8:
+** luti2 v0\.16b, v0\.16b, v1\[0\]
+** ret
+*/
+
+uint8x16_t
+test_vluti2q_lane_u8(uint8x16_t a, uint8x8_t b)
+{
+ return vluti2q_lane_u8(a, b, 0);
+}
+
+/*
+** test_vluti2_laneq_u8:
+** luti2 v0\.8b, v0\.8b, v1\[0\]
+** ret
+*/
+
+uint8x16_t
+test_vluti2_laneq_u8(uint8x8_t a, uint8x16_t b)
+{
+ return vluti2_laneq_u8(a, b, 0);
+}
+
+/*
+** test_vluti2q_laneq_u8:
+** luti2 v0\.16b, v0\.16b, v1\[0\]
+** ret
+*/
+
+uint8x16_t
+test_vluti2q_laneq_u8(uint8x16_t a, uint8x16_t b)
+{
+ return vluti2q_laneq_u8(a, b, 0);
+}
+
+/*
+** test_vluti2_lane_s8:
+** luti2 v0\.8b, v0\.8b, v1\[0\]
+** ret
+*/
+
+int8x16_t
+test_vluti2_lane_s8(int8x8_t a, uint8x8_t b)
+{
+ return vluti2_lane_s8(a, b, 0);
+}
+
+/*
+** test_vluti2q_lane_s8:
+** luti2 v0\.16b, v0\.16b, v1\[0\]
+** ret
+*/
+
+int8x16_t
+test_vluti2q_lane_s8(int8x16_t a, uint8x8_t b)
+{
+ return vluti2q_lane_s8(a, b, 0);
+}
+
+/*
+** test_vluti2_laneq_s8:
+** luti2 v0\.8b, v0\.8b, v1\[0\]
+** ret
+*/
+
+int8x16_t
+test_vluti2_laneq_s8(int8x8_t a, uint8x16_t b)
+{
+ return vluti2_laneq_s8(a, b, 0);
+}
+
+/*
+** test_vluti2q_laneq_s8:
+** luti2 v0\.16b, v0\.16b, v1\[0\]
+** ret
+*/
+
+int8x16_t
+test_vluti2q_laneq_s8(int8x16_t a, uint8x16_t b)
+{
+ return vluti2q_laneq_s8(a, b, 0);
+}
+
+/*
+** test_vluti2_lane_u16:
+** luti2 v0\.4h, v0\.4h, v1\[0\]
+** ret
+*/
+
+uint16x8_t
+test_vluti2_lane_u16(uint16x4_t a, uint8x8_t b)
+{
+ return vluti2_lane_u16(a, b, 0);
+}
+
+/*
+** test_vluti2q_lane_u16:
+** luti2 v0\.8h, v0\.8h, v1\[0\]
+** ret
+*/
+
+uint16x8_t
+test_vluti2q_lane_u16(uint16x8_t a, uint8x8_t b)
+{
+ return vluti2q_lane_u16(a, b, 0);
+}
+
+/*
+** test_vluti2_laneq_u16:
+** luti2 v0\.4h, v0\.4h, v1\[0\]
+** ret
+*/
+
+uint16x8_t
+test_vluti2_laneq_u16(uint16x4_t a, uint8x16_t b)
+{
+ return vluti2_laneq_u16(a, b, 0);
+}
+
+/*
+** test_vluti2q_laneq_u16:
+** luti2 v0\.8h, v0\.8h, v1\[0\]
+** ret
+*/
+
+uint16x8_t
+test_vluti2q_laneq_u16(uint16x8_t a, uint8x16_t b)
+{
+ return vluti2q_laneq_u16(a, b, 0);
+}
+
+/*
+** test_vluti2q_lane_s16:
+** luti2 v0\.8h, v0\.8h, v1\[0\]
+** ret
+*/
+
+int16x8_t
+test_vluti2q_lane_s16(int16x8_t a, uint8x8_t b)
+{
+ return vluti2q_lane_s16(a, b, 0);
+}
+
+/*
+** test_vluti2_laneq_s16:
+** luti2 v0\.4h, v0\.4h, v1\[0\]
+** ret
+*/
+
+int16x8_t
+test_vluti2_laneq_s16(int16x4_t a, uint8x16_t b)
+{
+ return vluti2_laneq_s16(a, b, 0);
+}
+
+/*
+** test_vluti2q_laneq_s16:
+** luti2 v0\.8h, v0\.8h, v1\[0\]
+** ret
+*/
+
+int16x8_t
+test_vluti2q_laneq_s16(int16x8_t a, uint8x16_t b)
+{
+ return vluti2q_laneq_s16(a, b, 0);
+}
+
+/*
+** test_vluti2_lane_p8:
+** luti2 v0\.8b, v0\.8b, v1\[0\]
+** ret
+*/
+poly8x16_t test_vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm) {
+ return vluti2_lane_p8(vn, vm, 0);
+}
+
+/*
+** test_vluti2_laneq_p8:
+** luti2 v0\.8b, v0\.8b, v1\[0\]
+** ret
+*/
+poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) {
+ return vluti2_laneq_p8(vn, vm, 0);
+}
+
+/*
+** test_vluti2q_lane_p8:
+** luti2 v0\.16b, v0\.16b, v1\[0\]
+** ret
+*/
+poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) {
+ return vluti2q_lane_p8(vn, vm, 0);
+}
+
+/*
+** test_vluti2q_laneq_p8:
+** luti2 v0\.16b, v0\.16b, v1\[0\]
+** ret
+*/
+poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) {
+ return vluti2q_laneq_p8(vn, vm, 0);
+}
+
+/*
+** test_vluti2_lane_f16:
+** luti2 v0\.4h, v0\.4h, v1\[0\]
+** ret
+*/
+float16x8_t test_vluti2_lane_f16(float16x4_t vn, uint8x8_t vm) {
+ return vluti2_lane_f16(vn, vm, 0);
+}
+
+/*
+** test_vluti2_laneq_f16:
+** luti2 v0\.4h, v0\.4h, v1\[0\]
+** ret
+*/
+float16x8_t test_vluti2_laneq_f16(float16x4_t vn, uint8x16_t vm) {
+ return vluti2_laneq_f16(vn, vm, 0);
+}
+
+/*
+** test_vluti2q_lane_f16:
+** luti2 v0\.8h, v0\.8h, v1\[0\]
+** ret
+*/
+float16x8_t test_vluti2q_lane_f16(float16x8_t vn, uint8x8_t vm) {
+ return vluti2q_lane_f16(vn, vm, 0);
+}
+
+/*
+** test_vluti2q_laneq_f16:
+** luti2 v0\.8h, v0\.8h, v1\[0\]
+** ret
+*/
+float16x8_t test_vluti2q_laneq_f16(float16x8_t vn, uint8x16_t vm) {
+ return vluti2q_laneq_f16(vn, vm, 0);
+}
+
+/*
+** test_vluti2_lane_bf16:
+** luti2 v0\.4h, v0\.4h, v1\[0\]
+** ret
+*/
+bfloat16x8_t test_vluti2_lane_bf16(bfloat16x4_t vn, uint8x8_t vm) {
+ return vluti2_lane_bf16(vn, vm, 0);
+}
+
+/*
+** test_vluti2_laneq_bf16:
+** luti2 v0\.4h, v0\.4h, v1\[0\]
+** ret
+*/
+bfloat16x8_t test_vluti2_laneq_bf16(bfloat16x4_t vn, uint8x16_t vm) {
+ return vluti2_laneq_bf16(vn, vm, 0);
+}
+
+/*
+** test_vluti2q_lane_bf16:
+** luti2 v0\.8h, v0\.8h, v1\[0\]
+** ret
+*/
+bfloat16x8_t test_vluti2q_lane_bf16(bfloat16x8_t vn, uint8x8_t vm) {
+ return vluti2q_lane_bf16(vn, vm, 0);
+}
+
+/*
+** test_vluti2q_laneq_bf16:
+** luti2 v0\.8h, v0\.8h, v1\[0\]
+** ret
+*/
+bfloat16x8_t test_vluti2q_laneq_bf16(bfloat16x8_t vn, uint8x16_t vm) {
+ return vluti2q_laneq_bf16(vn, vm, 0);
+}
+
+/*
+** test_vluti2_lane_p16:
+** luti2 v0\.4h, v0\.4h, v1\[0\]
+** ret
+*/
+poly16x8_t test_vluti2_lane_p16(poly16x4_t vn, uint8x8_t vm) {
+ return vluti2_lane_p16(vn, vm, 0);
+}
+
+/*
+** test_vluti2_laneq_p16:
+** luti2 v0\.4h, v0.4h, v1\[0\]
+** ret
+*/
+poly16x8_t test_vluti2_laneq_p16(poly16x4_t vn, uint8x16_t vm) {
+ return vluti2_laneq_p16(vn, vm, 0);
+}
+
+/*
+** test_vluti2q_lane_p16:
+** luti2 v0\.8h, v0\.8h, v1\[0\]
+** ret
+*/
+poly16x8_t test_vluti2q_lane_p16(poly16x8_t vn, uint8x8_t vm) {
+ return vluti2q_lane_p16(vn, vm, 0);
+}
+
+/*
+** test_vluti2q_laneq_p16:
+** luti2 v0\.8h, v0\.8h, v1\[0\]
+** ret
+*/
+poly16x8_t test_vluti2q_laneq_p16(poly16x8_t vn, uint8x16_t vm) {
+ return vluti2q_laneq_p16(vn, vm, 0);
+}