https://gcc.gnu.org/g:a07a2b8c9e7c2d123f0178875c9110eaf9770b7a
commit r15-5884-ga07a2b8c9e7c2d123f0178875c9110eaf9770b7a Author: Saurabh Jha <saurabh....@arm.com> Date: Tue Dec 3 09:54:01 2024 +0000 aarch64: Add support for AdvSIMD lut The AArch64 FEAT_LUT extension is optional from Armv9.2-A and mandatory from Armv9.5-A. It introduces instructions for lookup table reads with bit indices. This patch adds support for AdvSIMD lut intrinsics. The intrinsics for this extension are implemented as the following builtin functions: * vluti2{q}_lane{q}_{u8|s8|p8} * vluti2{q}_lane{q}_{u16|s16|p16|f16|bf16} * vluti4q_lane{q}_{u8|s8|p8} * vluti4q_lane{q}_{u16|s16|p16|f16|bf16}_x2 We also introduced a new approach to do lane checks for AdvSIMD. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (aarch64_builtin_signatures): Add binary_lane. (aarch64_fntype): Handle it. (simd_types): Add 16-bit x2 types. (aarch64_pragma_builtins_checker): New class. (aarch64_general_check_builtin_call): Use it. (aarch64_expand_pragma_builtin): Add support for lut unspecs. * config/aarch64/aarch64-option-extensions.def (AARCH64_OPT_EXTENSION): Add lut option. * config/aarch64/aarch64-simd-pragma-builtins.def (ENTRY_BINARY_LANE): Modify to use new ENTRY macro. (ENTRY_TERNARY_VLUT8): Macro to declare lut intrinsics. (ENTRY_TERNARY_VLUT16): Macro to declare lut intrinsics. (REQUIRED_EXTENSIONS): Declare lut intrinsics. * config/aarch64/aarch64-simd.md (@aarch64_<vluti_uns_op><VLUT:mode><VB:mode>): Instruction pattern for luti2 and luti4 intrinsics. (@aarch64_lutx2<VLUT:mode><VB:mode>): Instruction pattern for luti4x2 intrinsics. * config/aarch64/aarch64.h (TARGET_LUT): lut flag. * config/aarch64/iterators.md: Iterators and attributes for lut. * doc/invoke.texi: Document extension in AArch64 Options. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/lut-incorrect-range.c: New test. * gcc.target/aarch64/simd/lut-no-flag.c: New test. * gcc.target/aarch64/simd/lut.c: New test. Co-authored-by: Vladimir Miloserdov <vladimir.miloser...@arm.com> Co-authored-by: Richard Sandiford <richard.sandif...@arm.com> Diff: --- gcc/config/aarch64/aarch64-builtins.cc | 132 +++- gcc/config/aarch64/aarch64-option-extensions.def | 2 + .../aarch64/aarch64-simd-pragma-builtins.def | 38 + gcc/config/aarch64/aarch64-simd.md | 25 + gcc/config/aarch64/aarch64.h | 3 + gcc/config/aarch64/iterators.md | 14 + gcc/doc/invoke.texi | 2 + .../gcc.target/aarch64/simd/lut-incorrect-range.c | 221 ++++++ .../gcc.target/aarch64/simd/lut-no-flag.c | 10 + gcc/testsuite/gcc.target/aarch64/simd/lut.c | 849 +++++++++++++++++++++ 10 files changed, 1294 insertions(+), 2 deletions(-) diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index 8984f0c59b97..f8c8a2721388 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -50,6 +50,8 @@ #include "builtins.h" #include "aarch64-builtins.h" +using namespace aarch64; + #define v8qi_UP E_V8QImode #define v8di_UP E_V8DImode #define v4hi_UP E_V4HImode @@ -1600,6 +1602,7 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma) enum class aarch64_builtin_signatures { binary, + binary_lane, }; namespace { @@ -1623,15 +1626,20 @@ namespace simd_types { constexpr simd_type f16 { V4HFmode, qualifier_none }; constexpr simd_type f16q { V8HFmode, qualifier_none }; + constexpr simd_type f16qx2 { V2x8HFmode, qualifier_none }; constexpr simd_type p16 { V4HImode, qualifier_poly }; constexpr simd_type p16q { V8HImode, qualifier_poly }; + constexpr simd_type p16qx2 { V2x8HImode, qualifier_poly }; constexpr simd_type s16 { V4HImode, qualifier_none }; constexpr simd_type s16q { V8HImode, qualifier_none }; + constexpr simd_type s16qx2 { V2x8HImode, qualifier_none }; constexpr simd_type u16 { V4HImode, qualifier_unsigned }; constexpr simd_type u16q { V8HImode, qualifier_unsigned }; + constexpr simd_type u16qx2 { V2x8HImode, qualifier_unsigned }; constexpr simd_type bf16 { V4BFmode, qualifier_none }; constexpr simd_type bf16q { V8BFmode, qualifier_none }; + constexpr simd_type bf16qx2 { V2x8BFmode, qualifier_none }; constexpr simd_type f32 { V2SFmode, qualifier_none }; constexpr simd_type f32q { V4SFmode, qualifier_none }; @@ -1671,11 +1679,21 @@ aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data) switch (builtin_data.signature) { case aarch64_builtin_signatures::binary: + case aarch64_builtin_signatures::binary_lane: return_type = builtin_data.types[0].type (); for (int i = 1; i <= 2; ++i) arg_types.quick_push (builtin_data.types[i].type ()); break; } + switch (builtin_data.signature) + { + case aarch64_builtin_signatures::binary_lane: + arg_types.quick_push (integer_type_node); + break; + + default: + break; + } return build_function_type_array (return_type, arg_types.length (), arg_types.address ()); } @@ -2522,17 +2540,109 @@ aarch64_general_required_extensions (unsigned int code) return ext::streaming_compatible (0); } +/* Checks calls to intrinsics that are defined using + aarch64-simd-pragma-builtins.def. */ +struct aarch64_pragma_builtins_checker +{ + aarch64_pragma_builtins_checker (location_t, tree, unsigned int, tree *, + const aarch64_pragma_builtins_data &); + + bool require_immediate_range (unsigned int, HOST_WIDE_INT, + HOST_WIDE_INT); + + bool check (); + + location_t location; + tree fndecl; + unsigned int nargs; + array_slice<tree> args; + const aarch64_pragma_builtins_data &builtin_data; +}; + +/* LOCATION is the location of the call; FNDECL is the FUNCTION_DECL + that is being called; NARGS is the number of arguments to the call, + which are in a vector starting at FIRST_ARG; and BUILTIN_DATA describes + the intrinsic. */ +aarch64_pragma_builtins_checker:: +aarch64_pragma_builtins_checker (location_t location, tree fndecl, + unsigned int nargs, tree *first_arg, + const aarch64_pragma_builtins_data + &builtin_data) + : location (location), fndecl (fndecl), nargs (nargs), + args (first_arg, nargs), builtin_data (builtin_data) +{ +} + +/* Require argument ARGNO to be an integer constant expression in the + range [MIN, MAX]. Return true if it was. */ +bool +aarch64_pragma_builtins_checker:: +require_immediate_range (unsigned int argno, HOST_WIDE_INT min, + HOST_WIDE_INT max) +{ + if (!tree_fits_shwi_p (args[argno])) + { + report_non_ice (location, fndecl, argno); + return false; + } + + HOST_WIDE_INT actual = tree_to_shwi (args[argno]); + if (actual < min || actual > max) + { + report_out_of_range (location, fndecl, argno, actual, min, max); + return false; + } + + return true; +} + +/* Check the arguments to the intrinsic call and return true if they + are valid. */ +bool +aarch64_pragma_builtins_checker::check () +{ + switch (builtin_data.unspec) + { + case UNSPEC_LUTI2: + case UNSPEC_LUTI4: + { + auto vector_to_index_mode = builtin_data.types[nargs - 1].mode; + int vector_to_index_nunits + = GET_MODE_NUNITS (vector_to_index_mode).to_constant (); + int output_mode_nunits + = GET_MODE_NUNITS (builtin_data.types[0].mode).to_constant (); + + int high; + if (builtin_data.unspec == UNSPEC_LUTI2) + high = (4 * vector_to_index_nunits / output_mode_nunits) - 1; + else + high = (2 * vector_to_index_nunits / output_mode_nunits) - 1; + + return require_immediate_range (nargs - 1, 0, high); + } + + default: + return true; + } +} + bool aarch64_general_check_builtin_call (location_t location, vec<location_t>, unsigned int code, tree fndecl, - unsigned int nargs ATTRIBUTE_UNUSED, - tree *args) + unsigned int nargs, tree *args) { tree decl = aarch64_builtin_decls[code]; auto required_extensions = aarch64_general_required_extensions (code); if (!aarch64_check_required_extensions (location, decl, required_extensions)) return false; + if (auto builtin_data = aarch64_get_pragma_builtin (code)) + { + aarch64_pragma_builtins_checker checker (location, fndecl, nargs, args, + *builtin_data); + return checker.check (); + } + switch (code) { case AARCH64_RSR: @@ -3442,6 +3552,16 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, TYPE_MODE (TREE_TYPE (arg))); } + /* LUTI2 treats the first argument as a vector of 4 elements. The forms + with 128-bit inputs are only provided as a convenience; the upper halves + don't actually matter. */ + if (builtin_data.unspec == UNSPEC_LUTI2 + && known_eq (GET_MODE_BITSIZE (ops[1].mode), 128u)) + { + ops[1].mode = aarch64_v64_mode (GET_MODE_INNER (ops[1].mode)).require (); + ops[1].value = gen_lowpart (ops[1].mode, ops[1].value); + } + insn_code icode; switch (builtin_data.unspec) { @@ -3450,6 +3570,14 @@ aarch64_expand_pragma_builtin (tree exp, rtx target, icode = code_for_aarch64 (builtin_data.unspec, builtin_data.types[0].mode); break; + + case UNSPEC_LUTI2: + case UNSPEC_LUTI4: + create_integer_operand (ops.safe_push ({}), + builtin_data.unspec == UNSPEC_LUTI2 ? 2 : 4); + icode = code_for_aarch64_lut (ops[1].mode, ops[2].mode); + break; + default: gcc_unreachable (); } diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 90abb1c5edd9..0a61b4858b17 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -259,6 +259,8 @@ AARCH64_OPT_EXTENSION("fp8dot2", FP8DOT2, (FP8DOT4), (), (), "fp8dot2") AARCH64_OPT_EXTENSION("ssve-fp8dot2", SSVE_FP8DOT2, (SSVE_FP8DOT4), (), (), "ssve-fp8dot2") +AARCH64_OPT_EXTENSION("lut", LUT, (SIMD), (), (), "lut") + #undef AARCH64_OPT_FMV_EXTENSION #undef AARCH64_OPT_EXTENSION #undef AARCH64_FMV_FEATURE diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def index e49db23cbd18..db40745e9e34 100644 --- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def @@ -22,6 +22,10 @@ #define ENTRY_BINARY(N, T0, T1, T2, U) \ ENTRY (N, binary, T0, T1, T2, U) +#undef ENTRY_BINARY_LANE +#define ENTRY_BINARY_LANE(N, T0, T1, T2, U) \ + ENTRY (N, binary_lane, T0, T1, T2, U) + #undef ENTRY_BINARY_VHSDF #define ENTRY_BINARY_VHSDF(NAME, UNSPEC) \ ENTRY_BINARY (NAME##_f16, f16, f16, f16, UNSPEC) \ @@ -30,8 +34,42 @@ ENTRY_BINARY (NAME##q_f32, f32q, f32q, f32q, UNSPEC) \ ENTRY_BINARY (NAME##q_f64, f64q, f64q, f64q, UNSPEC) +#undef ENTRY_TERNARY_VLUT8 +#define ENTRY_TERNARY_VLUT8(T) \ + ENTRY_BINARY_LANE (vluti2_lane_##T##8, T##8q, T##8, u8, UNSPEC_LUTI2) \ + ENTRY_BINARY_LANE (vluti2_laneq_##T##8, T##8q, T##8, u8q, UNSPEC_LUTI2) \ + ENTRY_BINARY_LANE (vluti2q_lane_##T##8, T##8q, T##8q, u8, UNSPEC_LUTI2) \ + ENTRY_BINARY_LANE (vluti2q_laneq_##T##8, T##8q, T##8q, u8q, UNSPEC_LUTI2) \ + ENTRY_BINARY_LANE (vluti4q_lane_##T##8, T##8q, T##8q, u8, UNSPEC_LUTI4) \ + ENTRY_BINARY_LANE (vluti4q_laneq_##T##8, T##8q, T##8q, u8q, UNSPEC_LUTI4) + +#undef ENTRY_TERNARY_VLUT16 +#define ENTRY_TERNARY_VLUT16(T) \ + ENTRY_BINARY_LANE (vluti2_lane_##T##16, T##16q, T##16, u8, UNSPEC_LUTI2) \ + ENTRY_BINARY_LANE (vluti2_laneq_##T##16, T##16q, T##16, u8q, UNSPEC_LUTI2) \ + ENTRY_BINARY_LANE (vluti2q_lane_##T##16, T##16q, T##16q, u8, UNSPEC_LUTI2) \ + ENTRY_BINARY_LANE (vluti2q_laneq_##T##16, T##16q, T##16q, u8q, \ + UNSPEC_LUTI2) \ + ENTRY_BINARY_LANE (vluti4q_lane_##T##16_x2, T##16q, T##16qx2, u8, \ + UNSPEC_LUTI4) \ + ENTRY_BINARY_LANE (vluti4q_laneq_##T##16_x2, T##16q, T##16qx2, u8q, \ + UNSPEC_LUTI4) + // faminmax #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX) ENTRY_BINARY_VHSDF (vamax, UNSPEC_FAMAX) ENTRY_BINARY_VHSDF (vamin, UNSPEC_FAMIN) #undef REQUIRED_EXTENSIONS + +// lut +#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_LUT) +ENTRY_TERNARY_VLUT8 (p) +ENTRY_TERNARY_VLUT8 (s) +ENTRY_TERNARY_VLUT8 (u) + +ENTRY_TERNARY_VLUT16 (bf) +ENTRY_TERNARY_VLUT16 (f) +ENTRY_TERNARY_VLUT16 (p) +ENTRY_TERNARY_VLUT16 (s) +ENTRY_TERNARY_VLUT16 (u) +#undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index cfe95bd4c316..05cbd38372d3 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -9999,3 +9999,28 @@ "TARGET_FAMINMAX" "<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" ) + +(define_insn "@aarch64_lut<VLUT:mode><VB:mode>" + [(set (match_operand:<VLUT:VCONQ> 0 "register_operand" "=w") + (unspec:<VLUT:VCONQ> + [(match_operand:VLUT 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w") + (match_operand:SI 3 "const_int_operand") + (match_operand:SI 4 "const_int_operand")] + UNSPEC_LUTI))] + "TARGET_LUT && INTVAL (operands[4]) <= exact_log2 (<VLUT:nunits>)" + "luti%4\t%0<VLUT:Vconqtype>, {%1<VLUT:Vconqtype>}, %2[%3]" +) + +;; lutx2 +(define_insn "@aarch64_lut<VLUTx2:mode><VB:mode>" + [(set (match_operand:<VSTRUCT_ELT> 0 "register_operand" "=w") + (unspec:<VSTRUCT_ELT> + [(match_operand:VLUTx2 1 "register_operand" "w") + (match_operand:VB 2 "register_operand" "w") + (match_operand:SI 3 "const_int_operand") + (match_operand:SI 4 "const_int_operand")] + UNSPEC_LUTI))] + "TARGET_LUT && INTVAL (operands[4]) == 4" + "luti%4\t%0.8h, {%S1.8h, %T1.8h}, %2[%3]" +) diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index c5dcbe176dfd..b1c694e143f2 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -487,6 +487,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED #define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX) #define TARGET_SVE_FAMINMAX (TARGET_SVE && TARGET_FAMINMAX) +/* Lookup table (LUTI) extension instructions are enabled through +lut. */ +#define TARGET_LUT AARCH64_HAVE_ISA (LUT) + /* Prefer different predicate registers for the output of a predicated operation over re-using an existing input predicate. */ #define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 720d79db8e43..90725c7faebc 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -426,6 +426,10 @@ (V8HF "TARGET_SIMD_F16INST") V2SF V4SF]) +;; Modes available for Advanced SIMD lut operations. +(define_mode_iterator VLUT [V8QI V16QI V4HI V4HF V4BF]) +(define_mode_iterator VLUTx2 [V2x8HI V2x8HF V2x8BF]) + ;; Iterators for single modes, for "@" patterns. (define_mode_iterator VNx16QI_ONLY [VNx16QI]) (define_mode_iterator VNx16SI_ONLY [VNx16SI]) @@ -1109,6 +1113,9 @@ UNSPEC_FCVTXN ; Used in aarch64-simd.md. UNSPEC_FAMAX ; Used in aarch64-simd.md. UNSPEC_FAMIN ; Used in aarch64-simd.md. + UNSPEC_LUTI ; Used in aarch64-simd.md. + UNSPEC_LUTI2 ; Used in aarch64-simd.md. + UNSPEC_LUTI4 ; Used in aarch64-simd.md. ;; All used in aarch64-sve2.md UNSPEC_ADDQV @@ -1555,6 +1562,12 @@ (QI "8b") (HI "8b") (V4BF "8b") (V8BF "16b")]) +;; Mode to double type mapping. +(define_mode_attr Vconqtype [(V8QI ".16b") (V16QI ".16b") + (V4HI ".8h") (V8HI ".8h") + (V4HF ".8h") (V8HF ".8h") + (V4BF ".8h") (V8BF ".8h")]) + ;; Advanced SIMD vector structure to element modes. (define_mode_attr VSTRUCT_ELT [(V2x8QI "V8QI") (V2x4HI "V4HI") (V2x2SI "V2SI") (V2x1DI "DI") @@ -1685,6 +1698,7 @@ (V2SI "V4SI") (V4SI "V4SI") (DI "V2DI") (V2DI "V2DI") (V4HF "V8HF") (V8HF "V8HF") + (V4BF "V8BF") (V8BF "V8BF") (V2SF "V4SF") (V4SF "V4SF") (V2DF "V2DF") (SI "V4SI") (HI "V8HI") (QI "V16QI") diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index e27a92c270af..e3c2adc25077 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -22007,6 +22007,8 @@ Enable the fp8 (8-bit floating point) to half-precision 2-way dot product extension in streaming mode. @item faminmax Enable the Floating Point Absolute Maximum/Minimum extension. +@item lut +Enable the Lookup Table extension. @item sve-b16b16 Enable the SVE non-widening brain floating-point (@code{bf16}) extension. This only has an effect when @code{sve2} or @code{sme2} are also enabled. diff --git a/gcc/testsuite/gcc.target/aarch64/simd/lut-incorrect-range.c b/gcc/testsuite/gcc.target/aarch64/simd/lut-incorrect-range.c new file mode 100644 index 000000000000..24e5d46d353b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/lut-incorrect-range.c @@ -0,0 +1,221 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv9-a+lut" } */ + +#include "arm_neon.h" + +void +test_var(uint8x16_t a, uint8x8_t b, uint8x16_t c, int x) +{ + vluti2q_lane_u8(a, b, x); /* { dg-error {argument 3 of 'vluti2q_lane_u8' must be an integer constant expression} } */ + vluti2q_laneq_u8(a, c, x); /* { dg-error {argument 3 of 'vluti2q_laneq_u8' must be an integer constant expression} } */ + vluti4q_lane_u8(a, b, x); /* { dg-error {argument 3 of 'vluti4q_lane_u8' must be an integer constant expression} } */ + vluti4q_laneq_u8(a, c, x); /* { dg-error {argument 3 of 'vluti4q_laneq_u8' must be an integer constant expression} } */ +} + +void +test_vluti2_laneu8(uint8x8_t a, uint8x8_t b, uint8x16_t c, uint8x16_t d) +{ + vluti2_lane_u8(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti2_lane_u8(a, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti2_laneq_u8(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2_laneq_u8(a, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2q_lane_u8(c, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti2q_lane_u8(c, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti2q_laneq_u8(c, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2q_laneq_u8(c, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ +} + +void +test_vluti2_lanes8(int8x8_t a, uint8x8_t b, int8x16_t c, uint8x16_t d) +{ + vluti2_lane_s8(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti2_lane_s8(a, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti2_laneq_s8(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2_laneq_s8(a, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2q_lane_s8(c, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti2q_lane_s8(c, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti2q_laneq_s8(c, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2q_laneq_s8(c, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ +} + +void +test_vluti2_lanep8(poly8x8_t a, uint8x8_t b, poly8x16_t c, uint8x16_t d) +{ + vluti2_lane_p8(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti2_lane_p8(a, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti2_laneq_p8(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2_laneq_p8(a, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2q_lane_p8(c, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti2q_lane_p8(c, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti2q_laneq_p8(c, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2q_laneq_p8(c, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ +} + +void +test_vluti2_laneu16(uint16x4_t a, uint8x8_t b, uint16x8_t c, uint8x16_t d) +{ + vluti2_lane_u16(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2_lane_u16(a, b, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2_laneq_u16(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + vluti2_laneq_u16(a, d, 8); /* { dg-error {passing 8 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + + vluti2q_lane_u16(c, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2q_lane_u16(c, b, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2q_laneq_u16(c, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + vluti2q_laneq_u16(c, d, 8); /* { dg-error {passing 8 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ +} + +void +test_vluti2_lanes16(int16x4_t a, uint8x8_t b, int16x8_t c, uint8x16_t d) +{ + vluti2_lane_s16(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2_lane_s16(a, b, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2_laneq_s16(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + vluti2_laneq_s16(a, d, 8); /* { dg-error {passing 8 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + + vluti2q_lane_s16(c, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2q_lane_s16(c, b, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2q_laneq_s16(c, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + vluti2q_laneq_s16(c, d, 8); /* { dg-error {passing 8 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ +} + +void +test_vluti2_lanep16(poly16x4_t a, uint8x8_t b, poly16x8_t c, uint8x16_t d) +{ + vluti2_lane_p16(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2_lane_p16(a, b, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2_laneq_p16(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + vluti2_laneq_p16(a, d, 8); /* { dg-error {passing 8 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + + vluti2q_lane_p16(c, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2q_lane_p16(c, b, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2q_laneq_p16(c, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + vluti2q_laneq_p16(c, d, 8); /* { dg-error {passing 8 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ +} + +void +test_vluti2_lanef16(float16x4_t a, uint8x8_t b, float16x8_t c, uint8x16_t d) +{ + vluti2_lane_f16(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2_lane_f16(a, b, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2_laneq_f16(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + vluti2_laneq_f16(a, d, 8); /* { dg-error {passing 8 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + + vluti2q_lane_f16(c, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2q_lane_f16(c, b, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2q_laneq_f16(c, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + vluti2q_laneq_f16(c, d, 8); /* { dg-error {passing 8 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ +} + +void +test_vluti2_lanebf16(bfloat16x4_t a, uint8x8_t b, bfloat16x8_t c, uint8x16_t d) +{ + vluti2_lane_bf16(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2_lane_bf16(a, b, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2_laneq_bf16(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + vluti2_laneq_bf16(a, d, 8); /* { dg-error {passing 8 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + + vluti2q_lane_bf16(c, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti2q_lane_bf16(c, b, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + + vluti2q_laneq_bf16(c, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ + vluti2q_laneq_bf16(c, d, 8); /* { dg-error {passing 8 to argument 3 [^\n]*, which expects a value in the range \[0, 7\]} } */ +} + +void +test_vluti4q_laneu8(uint8x16_t a, uint8x8_t b, uint8x16_t d) +{ + vluti4q_lane_u8(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects the value 0} } */ + vluti4q_lane_u8(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects the value 0} } */ + + vluti4q_laneq_u8(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti4q_laneq_u8(a, d, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ +} + +void +test_vluti4q_lanes8(int8x16_t a, uint8x8_t b, uint8x16_t d) +{ + vluti4q_lane_s8(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects the value 0} } */ + vluti4q_lane_s8(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects the value 0} } */ + + vluti4q_laneq_s8(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti4q_laneq_s8(a, d, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ +} + +void +test_vluti4q_lanep8(poly8x16_t a, uint8x8_t b, uint8x16_t d) +{ + vluti4q_lane_p8(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects the value 0} } */ + vluti4q_lane_p8(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects the value 0} } */ + + vluti4q_laneq_p8(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti4q_laneq_p8(a, d, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ +} + +void +test_vluti4q_laneu16_x2(uint16x8x2_t a, uint8x8_t b, uint8x16_t d) +{ + vluti4q_lane_u16_x2(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti4q_lane_u16_x2(a, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti4q_laneq_u16_x2(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti4q_laneq_u16_x2(a, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ +} + +void +test_vluti4q_lanes16_x2(int16x8x2_t a, uint8x8_t b, uint8x16_t d) +{ + vluti4q_lane_s16_x2(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti4q_lane_s16_x2(a, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti4q_laneq_s16_x2(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti4q_laneq_s16_x2(a, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ +} + +void +test_vluti4q_lanep16_x2(poly16x8x2_t a, uint8x8_t b, uint8x16_t d) +{ + vluti4q_lane_p16_x2(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti4q_lane_p16_x2(a, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti4q_laneq_p16_x2(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti4q_laneq_p16_x2(a, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ +} + +void +test_vluti4q_lanef16_x2(float16x8x2_t a, uint8x8_t b, uint8x16_t d) +{ + vluti4q_lane_f16_x2(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti4q_lane_f16_x2(a, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti4q_laneq_f16_x2(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti4q_laneq_f16_x2(a, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ +} + +void +test_vluti4q_lanebf16_x2(bfloat16x8x2_t a, uint8x8_t b, uint8x16_t d) +{ + vluti4q_lane_bf16_x2(a, b, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + vluti4q_lane_bf16_x2(a, b, 2); /* { dg-error {passing 2 to argument 3 [^\n]*, which expects a value in the range \[0, 1\]} } */ + + vluti4q_laneq_bf16_x2(a, d, -1); /* { dg-error {passing -1 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ + vluti4q_laneq_bf16_x2(a, d, 4); /* { dg-error {passing 4 to argument 3 [^\n]*, which expects a value in the range \[0, 3\]} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/lut-no-flag.c b/gcc/testsuite/gcc.target/aarch64/simd/lut-no-flag.c new file mode 100644 index 000000000000..d180d8f2150e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/lut-no-flag.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv9-a" } */ + +#include "arm_neon.h" + +void +test (uint8x8_t a, uint8x8_t b) +{ + vluti2_lane_u8 (a, b, 0); /* { dg-error {ACLE function 'vluti2_lane_u8' requires ISA extension 'lut'} } */ +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/lut.c b/gcc/testsuite/gcc.target/aarch64/simd/lut.c new file mode 100644 index 000000000000..fc89b215a93b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/lut.c @@ -0,0 +1,849 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -march=armv9-a+lut" } */ +/* { dg-final { check-function-bodies "**" ""} } */ + +#include "arm_neon.h" + +/* +** test_vluti2_laneu8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti2_laneu8(uint8x8_t a, uint8x8_t b, uint8x16_t results[]) +{ + results[0] = vluti2_lane_u8(a, b, 0); + results[1] = vluti2_lane_u8(a, b, 1); +} + +/* +** test_vluti2_lanequ8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[2\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2_lanequ8(uint8x8_t a, uint8x16_t b, uint8x16_t results[]) +{ + results[0] = vluti2_laneq_u8(a, b, 0); + results[1] = vluti2_laneq_u8(a, b, 1); + results[2] = vluti2_laneq_u8(a, b, 2); + results[3] = vluti2_laneq_u8(a, b, 3); +} + +/* +** test_vluti2q_laneu8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti2q_laneu8(uint8x16_t a, uint8x8_t b, uint8x16_t results[]) +{ + results[0] = vluti2q_lane_u8(a, b, 0); + results[1] = vluti2q_lane_u8(a, b, 1); +} + +/* +** test_vluti2q_lanequ8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[2\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2q_lanequ8(uint8x16_t a, uint8x16_t b, uint8x16_t results[]) +{ + results[0] = vluti2q_laneq_u8(a, b, 0); + results[1] = vluti2q_laneq_u8(a, b, 1); + results[2] = vluti2q_laneq_u8(a, b, 2); + results[3] = vluti2q_laneq_u8(a, b, 3); +} + +/* +** test_vluti2_lanes8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti2_lanes8(int8x8_t a, uint8x8_t b, int8x16_t results[]) +{ + results[0] = vluti2_lane_s8(a, b, 0); + results[1] = vluti2_lane_s8(a, b, 1); +} + +/* +** test_vluti2_laneqs8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[2\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2_laneqs8(int8x8_t a, uint8x16_t b, int8x16_t results[]) +{ + results[0] = vluti2_laneq_s8(a, b, 0); + results[1] = vluti2_laneq_s8(a, b, 1); + results[2] = vluti2_laneq_s8(a, b, 2); + results[3] = vluti2_laneq_s8(a, b, 3); +} + +/* +** test_vluti2q_lanes8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti2q_lanes8(int8x16_t a, uint8x8_t b, int8x16_t results[]) +{ + results[0] = vluti2q_lane_s8(a, b, 0); + results[1] = vluti2q_lane_s8(a, b, 1); +} + +/* +** test_vluti2q_laneqs8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[2\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2q_laneqs8(int8x16_t a, uint8x16_t b, int8x16_t results[]) +{ + results[0] = vluti2q_laneq_s8(a, b, 0); + results[1] = vluti2q_laneq_s8(a, b, 1); + results[2] = vluti2q_laneq_s8(a, b, 2); + results[3] = vluti2q_laneq_s8(a, b, 3); +} + +/* +** test_vluti2_lanep8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti2_lanep8(poly8x8_t a, uint8x8_t b, poly8x16_t results[]) +{ + results[0] = vluti2_lane_p8(a, b, 0); + results[1] = vluti2_lane_p8(a, b, 1); +} + +/* +** test_vluti2_laneqp8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[2\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2_laneqp8(poly8x8_t a, uint8x16_t b, poly8x16_t results[]) +{ + results[0] = vluti2_laneq_p8(a, b, 0); + results[1] = vluti2_laneq_p8(a, b, 1); + results[2] = vluti2_laneq_p8(a, b, 2); + results[3] = vluti2_laneq_p8(a, b, 3); +} + +/* +** test_vluti2q_lanep8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti2q_lanep8(poly8x16_t a, uint8x8_t b, poly8x16_t results[]) +{ + results[0] = vluti2q_lane_p8(a, b, 0); + results[1] = vluti2q_lane_p8(a, b, 1); +} + +/* +** test_vluti2q_laneqp8: +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[2\] +** luti2 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2q_laneqp8(poly8x16_t a, uint8x16_t b, poly8x16_t results[]) +{ + results[0] = vluti2q_laneq_p8(a, b, 0); + results[1] = vluti2q_laneq_p8(a, b, 1); + results[2] = vluti2q_laneq_p8(a, b, 2); + results[3] = vluti2q_laneq_p8(a, b, 3); +} + +/* +** test_vluti2_laneu16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2_laneu16(uint16x4_t a, uint8x8_t b, uint16x8_t results[]) +{ + results[0] = vluti2_lane_u16(a, b, 0); + results[1] = vluti2_lane_u16(a, b, 1); + results[2] = vluti2_lane_u16(a, b, 2); + results[3] = vluti2_lane_u16(a, b, 3); +} + +/* +** test_vluti2_lanequ16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[4\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[5\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[6\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[7\] +** ... +** ret +*/ +void +test_vluti2_lanequ16(uint16x4_t a, uint8x16_t b, uint16x8_t results[]) +{ + results[0] = vluti2_laneq_u16(a, b, 0); + results[1] = vluti2_laneq_u16(a, b, 1); + results[2] = vluti2_laneq_u16(a, b, 2); + results[3] = vluti2_laneq_u16(a, b, 3); + results[4] = vluti2_laneq_u16(a, b, 4); + results[5] = vluti2_laneq_u16(a, b, 5); + results[6] = vluti2_laneq_u16(a, b, 6); + results[7] = vluti2_laneq_u16(a, b, 7); +} + +/* +** test_vluti2q_laneu16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2q_laneu16(uint16x8_t a, uint8x8_t b, uint16x8_t results[]) +{ + results[0] = vluti2q_lane_u16(a, b, 0); + results[1] = vluti2q_lane_u16(a, b, 1); + results[2] = vluti2q_lane_u16(a, b, 2); + results[3] = vluti2q_lane_u16(a, b, 3); +} + +/* +** test_vluti2q_lanequ16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[4\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[5\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[6\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[7\] +** ... +** ret +*/ +void +test_vluti2q_lanequ16(uint16x8_t a, uint8x16_t b, uint16x8_t results[]) +{ + results[0] = vluti2q_laneq_u16(a, b, 0); + results[1] = vluti2q_laneq_u16(a, b, 1); + results[2] = vluti2q_laneq_u16(a, b, 2); + results[3] = vluti2q_laneq_u16(a, b, 3); + results[4] = vluti2q_laneq_u16(a, b, 4); + results[5] = vluti2q_laneq_u16(a, b, 5); + results[6] = vluti2q_laneq_u16(a, b, 6); + results[7] = vluti2q_laneq_u16(a, b, 7); +} + +/* +** test_vluti2_lanes16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2_lanes16(int16x4_t a, uint8x8_t b, int16x8_t results[]) +{ + results[0] = vluti2_lane_s16(a, b, 0); + results[1] = vluti2_lane_s16(a, b, 1); + results[2] = vluti2_lane_s16(a, b, 2); + results[3] = vluti2_lane_s16(a, b, 3); +} + +/* +** test_vluti2_laneqs16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[4\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[5\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[6\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[7\] +** ... +** ret +*/ +void +test_vluti2_laneqs16(int16x4_t a, uint8x16_t b, int16x8_t results[]) +{ + results[0] = vluti2_laneq_s16(a, b, 0); + results[1] = vluti2_laneq_s16(a, b, 1); + results[2] = vluti2_laneq_s16(a, b, 2); + results[3] = vluti2_laneq_s16(a, b, 3); + results[4] = vluti2_laneq_s16(a, b, 4); + results[5] = vluti2_laneq_s16(a, b, 5); + results[6] = vluti2_laneq_s16(a, b, 6); + results[7] = vluti2_laneq_s16(a, b, 7); +} + +/* +** test_vluti2q_lanes16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2q_lanes16(int16x8_t a, uint8x8_t b, int16x8_t results[]) +{ + results[0] = vluti2q_lane_s16(a, b, 0); + results[1] = vluti2q_lane_s16(a, b, 1); + results[2] = vluti2q_lane_s16(a, b, 2); + results[3] = vluti2q_lane_s16(a, b, 3); +} + +/* +** test_vluti2q_laneqs16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[4\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[5\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[6\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[7\] +** ... +** ret +*/ +void +test_vluti2q_laneqs16(int16x8_t a, uint8x16_t b, int16x8_t results[]) +{ + results[0] = vluti2q_laneq_s16(a, b, 0); + results[1] = vluti2q_laneq_s16(a, b, 1); + results[2] = vluti2q_laneq_s16(a, b, 2); + results[3] = vluti2q_laneq_s16(a, b, 3); + results[4] = vluti2q_laneq_s16(a, b, 4); + results[5] = vluti2q_laneq_s16(a, b, 5); + results[6] = vluti2q_laneq_s16(a, b, 6); + results[7] = vluti2q_laneq_s16(a, b, 7); +} + +/* +** test_vluti2_lanep16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2_lanep16(poly16x4_t a, uint8x8_t b, poly16x8_t results[]) +{ + results[0] = vluti2_lane_p16(a, b, 0); + results[1] = vluti2_lane_p16(a, b, 1); + results[2] = vluti2_lane_p16(a, b, 2); + results[3] = vluti2_lane_p16(a, b, 3); +} + +/* +** test_vluti2_laneqp16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[4\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[5\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[6\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[7\] +** ... +** ret +*/ +void +test_vluti2_laneqp16(poly16x4_t a, uint8x16_t b, poly16x8_t results[]) +{ + results[0] = vluti2_laneq_p16(a, b, 0); + results[1] = vluti2_laneq_p16(a, b, 1); + results[2] = vluti2_laneq_p16(a, b, 2); + results[3] = vluti2_laneq_p16(a, b, 3); + results[4] = vluti2_laneq_p16(a, b, 4); + results[5] = vluti2_laneq_p16(a, b, 5); + results[6] = vluti2_laneq_p16(a, b, 6); + results[7] = vluti2_laneq_p16(a, b, 7); +} + +/* +** test_vluti2q_lanep16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2q_lanep16(poly16x8_t a, uint8x8_t b, poly16x8_t results[]) +{ + results[0] = vluti2q_lane_p16(a, b, 0); + results[1] = vluti2q_lane_p16(a, b, 1); + results[2] = vluti2q_lane_p16(a, b, 2); + results[3] = vluti2q_lane_p16(a, b, 3); +} + +/* +** test_vluti2q_laneqp16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[4\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[5\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[6\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[7\] +** ... +** ret +*/ +void +test_vluti2q_laneqp16(poly16x8_t a, uint8x16_t b, poly16x8_t results[]) +{ + results[0] = vluti2q_laneq_p16(a, b, 0); + results[1] = vluti2q_laneq_p16(a, b, 1); + results[2] = vluti2q_laneq_p16(a, b, 2); + results[3] = vluti2q_laneq_p16(a, b, 3); + results[4] = vluti2q_laneq_p16(a, b, 4); + results[5] = vluti2q_laneq_p16(a, b, 5); + results[6] = vluti2q_laneq_p16(a, b, 6); + results[7] = vluti2q_laneq_p16(a, b, 7); +} + +/* +** test_vluti2_lanef16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2_lanef16(float16x4_t a, uint8x8_t b, float16x8_t results[]) +{ + results[0] = vluti2_lane_f16(a, b, 0); + results[1] = vluti2_lane_f16(a, b, 1); + results[2] = vluti2_lane_f16(a, b, 2); + results[3] = vluti2_lane_f16(a, b, 3); +} + +/* +** test_vluti2_laneqf16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[4\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[5\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[6\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[7\] +** ... +** ret +*/ +void +test_vluti2_laneqf16(float16x4_t a, uint8x16_t b, float16x8_t results[]) +{ + results[0] = vluti2_laneq_f16(a, b, 0); + results[1] = vluti2_laneq_f16(a, b, 1); + results[2] = vluti2_laneq_f16(a, b, 2); + results[3] = vluti2_laneq_f16(a, b, 3); + results[4] = vluti2_laneq_f16(a, b, 4); + results[5] = vluti2_laneq_f16(a, b, 5); + results[6] = vluti2_laneq_f16(a, b, 6); + results[7] = vluti2_laneq_f16(a, b, 7); +} + +/* +** test_vluti2q_lanef16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2q_lanef16(float16x8_t a, uint8x8_t b, float16x8_t results[]) +{ + results[0] = vluti2q_lane_f16(a, b, 0); + results[1] = vluti2q_lane_f16(a, b, 1); + results[2] = vluti2q_lane_f16(a, b, 2); + results[3] = vluti2q_lane_f16(a, b, 3); +} + +/* +** test_vluti2q_laneqf16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[4\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[5\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[6\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[7\] +** ... +** ret +*/ +void +test_vluti2q_laneqf16(float16x8_t a, uint8x16_t b, float16x8_t results[]) +{ + results[0] = vluti2q_laneq_f16(a, b, 0); + results[1] = vluti2q_laneq_f16(a, b, 1); + results[2] = vluti2q_laneq_f16(a, b, 2); + results[3] = vluti2q_laneq_f16(a, b, 3); + results[4] = vluti2q_laneq_f16(a, b, 4); + results[5] = vluti2q_laneq_f16(a, b, 5); + results[6] = vluti2q_laneq_f16(a, b, 6); + results[7] = vluti2q_laneq_f16(a, b, 7); +} + +/* +** test_vluti2_lanebf16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2_lanebf16(bfloat16x4_t a, uint8x8_t b, bfloat16x8_t results[]) +{ + results[0] = vluti2_lane_bf16(a, b, 0); + results[1] = vluti2_lane_bf16(a, b, 1); + results[2] = vluti2_lane_bf16(a, b, 2); + results[3] = vluti2_lane_bf16(a, b, 3); +} + +/* +** test_vluti2_laneqbf16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[4\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[5\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[6\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[7\] +** ... +** ret +*/ +void +test_vluti2_laneqbf16(bfloat16x4_t a, uint8x16_t b, bfloat16x8_t results[]) +{ + results[0] = vluti2_laneq_bf16(a, b, 0); + results[1] = vluti2_laneq_bf16(a, b, 1); + results[2] = vluti2_laneq_bf16(a, b, 2); + results[3] = vluti2_laneq_bf16(a, b, 3); + results[4] = vluti2_laneq_bf16(a, b, 4); + results[5] = vluti2_laneq_bf16(a, b, 5); + results[6] = vluti2_laneq_bf16(a, b, 6); + results[7] = vluti2_laneq_bf16(a, b, 7); +} + +/* +** test_vluti2q_lanebf16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti2q_lanebf16(bfloat16x8_t a, uint8x8_t b, bfloat16x8_t results[]) +{ + results[0] = vluti2q_lane_bf16(a, b, 0); + results[1] = vluti2q_lane_bf16(a, b, 1); + results[2] = vluti2q_lane_bf16(a, b, 2); + results[3] = vluti2q_lane_bf16(a, b, 3); +} + +/* +** test_vluti2q_laneqbf16: +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[0\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[1\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[2\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[3\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[4\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[5\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[6\] +** luti2 v[0-9]+\.8h, {v[0-9]+\.8h}, v[0-9]+\[7\] +** ... +** ret +*/ +void +test_vluti2q_laneqbf16(bfloat16x8_t a, uint8x16_t b, bfloat16x8_t results[]) +{ + results[0] = vluti2q_laneq_bf16(a, b, 0); + results[1] = vluti2q_laneq_bf16(a, b, 1); + results[2] = vluti2q_laneq_bf16(a, b, 2); + results[3] = vluti2q_laneq_bf16(a, b, 3); + results[4] = vluti2q_laneq_bf16(a, b, 4); + results[5] = vluti2q_laneq_bf16(a, b, 5); + results[6] = vluti2q_laneq_bf16(a, b, 6); + results[7] = vluti2q_laneq_bf16(a, b, 7); +} + +/* +** test_vluti4q_laneu8: +** luti4 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** ... +** ret +*/ +void +test_vluti4q_laneu8(uint8x16_t a, uint8x8_t b, uint8x16_t results[]) +{ + results[0] = vluti4q_lane_u8(a, b, 0); +} + +/* +** test_vluti4q_lanequ8: +** luti4 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti4 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti4q_lanequ8(uint8x16_t a, uint8x16_t b, uint8x16_t results[]) +{ + results[0] = vluti4q_laneq_u8(a, b, 0); + results[1] = vluti4q_laneq_u8(a, b, 1); +} + +/* +** test_vluti4q_lanep8: +** luti4 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** ... +** ret +*/ +void +test_vluti4q_lanep8(poly8x16_t a, uint8x8_t b, poly8x16_t results[]) +{ + results[0] = vluti4q_lane_p8(a, b, 0); +} + +/* +** test_vluti4q_laneqp8: +** luti4 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[0\] +** luti4 v[0-9]+\.16b, {v[0-9]+\.16b}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti4q_laneqp8(poly8x16_t a, uint8x16_t b, poly8x16_t results[]) +{ + results[0] = vluti4q_laneq_p8(a, b, 0); + results[1] = vluti4q_laneq_p8(a, b, 1); +} + +/* +** test_vluti4q_laneu16_x2: +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti4q_laneu16_x2(uint16x8x2_t a, uint8x8_t b, uint16x8_t results[]) +{ + results[0] = vluti4q_lane_u16_x2(a, b, 0); + results[1] = vluti4q_lane_u16_x2(a, b, 1); +} + +/* +** test_vluti4q_lanequ16_x2: +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[2\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti4q_lanequ16_x2(uint16x8x2_t a, uint8x16_t b, uint16x8_t results[]) +{ + results[0] = vluti4q_laneq_u16_x2(a, b, 0); + results[1] = vluti4q_laneq_u16_x2(a, b, 1); + results[2] = vluti4q_laneq_u16_x2(a, b, 2); + results[3] = vluti4q_laneq_u16_x2(a, b, 3); +} + +/* +** test_vluti4q_lanes16_x2: +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti4q_lanes16_x2(int16x8x2_t a, uint8x8_t b, int16x8_t results[]) +{ + results[0] = vluti4q_lane_s16_x2(a, b, 0); + results[1] = vluti4q_lane_s16_x2(a, b, 1); +} + +/* +** test_vluti4q_laneqs16_x2: +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[2\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti4q_laneqs16_x2(int16x8x2_t a, uint8x16_t b, int16x8_t results[]) +{ + results[0] = vluti4q_laneq_s16_x2(a, b, 0); + results[1] = vluti4q_laneq_s16_x2(a, b, 1); + results[2] = vluti4q_laneq_s16_x2(a, b, 2); + results[3] = vluti4q_laneq_s16_x2(a, b, 3); +} + +/* +** test_vluti4q_lanep16_x2: +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti4q_lanep16_x2(poly16x8x2_t a, uint8x8_t b, poly16x8_t results[]) +{ + results[0] = vluti4q_lane_p16_x2(a, b, 0); + results[1] = vluti4q_lane_p16_x2(a, b, 1); +} + +/* +** test_vluti4q_laneqp16_x2: +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[2\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti4q_laneqp16_x2(poly16x8x2_t a, uint8x16_t b, poly16x8_t results[]) +{ + results[0] = vluti4q_laneq_p16_x2(a, b, 0); + results[1] = vluti4q_laneq_p16_x2(a, b, 1); + results[2] = vluti4q_laneq_p16_x2(a, b, 2); + results[3] = vluti4q_laneq_p16_x2(a, b, 3); +} + +/* +** test_vluti4q_lanef16_x2: +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti4q_lanef16_x2(float16x8x2_t a, uint8x8_t b, float16x8_t results[]) +{ + results[0] = vluti4q_lane_f16_x2(a, b, 0); + results[1] = vluti4q_lane_f16_x2(a, b, 1); +} + +/* +** test_vluti4q_laneqf16_x2: +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[2\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti4q_laneqf16_x2(float16x8x2_t a, uint8x16_t b, float16x8_t results[]) +{ + results[0] = vluti4q_laneq_f16_x2(a, b, 0); + results[1] = vluti4q_laneq_f16_x2(a, b, 1); + results[2] = vluti4q_laneq_f16_x2(a, b, 2); + results[3] = vluti4q_laneq_f16_x2(a, b, 3); +} + +/* +** test_vluti4q_lanebf16_x2: +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] +** ... +** ret +*/ +void +test_vluti4q_lanebf16_x2(bfloat16x8x2_t a, uint8x8_t b, bfloat16x8_t results[]) +{ + results[0] = vluti4q_lane_bf16_x2(a, b, 0); + results[1] = vluti4q_lane_bf16_x2(a, b, 1); +} + +/* +** test_vluti4q_laneqbf16_x2: +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[0\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[1\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[2\] +** luti4 v[0-9]+\.8h, {v[0-9]+\.8h, v[0-9]+\.8h}, v[0-9]+\[3\] +** ... +** ret +*/ +void +test_vluti4q_laneqbf16_x2(bfloat16x8x2_t a, uint8x16_t b, bfloat16x8_t results[]) +{ + results[0] = vluti4q_laneq_bf16_x2(a, b, 0); + results[1] = vluti4q_laneq_bf16_x2(a, b, 1); + results[2] = vluti4q_laneq_bf16_x2(a, b, 2); + results[3] = vluti4q_laneq_bf16_x2(a, b, 3); +}