This patch adds support for the following intrinsics:
- svdot[_f32_mf8]_fpm
- svdot_lane[_f32_mf8]_fpm
- svdot[_f16_mf8]_fpm
- svdot_lane[_f16_mf8]_fpm
The first two are available under a combination of the FP8DOT4 and SVE2
features.
Alternatively under the SSVE_FP8DOT4 feature under streaming mode.
The final two are available under a combination of the FP8DOT2 and SVE2
features.
Alternatively under the SSVE_FP8DOT2 feature under streaming mode.
gcc/
* config/aarch64/aarch64-option-extensions.def
(fp8dot4, ssve-fp8dot4): Add new extensions.
(fp8dot2, ssve-fp8dot2): Likewise.
* config/aarch64/aarch64-sve-builtins-base.cc (svdot_impl): Support fp8.
(svdotprod_lane_impl): Likewise.
(svdot_lane): Provide an unspec for fp8 types.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(ternary_mfloat8_def): Add new class.
(ternary_mfloat8): Add new shape.
(ternary_mfloat8_lane_group_selection_def): Add new class.
(ternary_mfloat8_lane_group_selection): Add new shape.
* config/aarch64/aarch64-sve-builtins-shapes.h
(ternary_mfloat8, ternary_mfloat8_lane_group_selection): Declare.
* config/aarch64/aarch64-sve-builtins-sve2.def
(svdot, svdot_lane): Add new DEF_SVE_FUNCTION_GS_FPM, twice to deal
with the combination of features providing support for 32 and 16 bit
floating point.
* config/aarch64/aarch64-sve2.md (@aarch64_sve_dot<mode>): Add new.
(@aarch64_sve_dot_lane<mode>): Likewise.
* config/aarch64/aarch64.h:
(TARGET_FP8DOT4, TARGET_SSVE_FP8DOT4): Add new defines.
(TARGET_FP8DOT2, TARGET_SSVE_FP8DOT2): Likewise.
* config/aarch64/iterators.md
(UNSPEC_DOT_FP8, UNSPEC_DOT_LANE_FP8): Add new unspecs.
gcc/testsuite
* gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_1.c: Add new.
gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_group_selection_1.c:
Likewise.
* gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c: Likewise.
* gcc.target/aarch64/sve2/acle/asm/dot_mf8.c: Likewise.
* lib/target-supports.exp: Add dg-require-effective-target support for
aarch64_asm_fp8dot2_ok aarch64_asm_fp8dot4_ok
aarch64_asm_ssve-fp8dot2_ok and
aarch64_asm_ssve-fp8dot4_ok.
---
.../aarch64/aarch64-option-extensions.def | 8 +
.../aarch64/aarch64-sve-builtins-base.cc | 58 +++---
.../aarch64/aarch64-sve-builtins-shapes.cc | 48 +++++
.../aarch64/aarch64-sve-builtins-shapes.h | 8 +-
.../aarch64/aarch64-sve-builtins-sve2.def | 12 ++
gcc/config/aarch64/aarch64-sve2.md | 41 +++++
gcc/config/aarch64/aarch64.h | 18 ++
gcc/config/aarch64/iterators.md | 2 +
.../sve/acle/general-c/ternary_mfloat8_1.c | 33 ++++
.../ternary_mfloat8_lane_group_selection_1.c | 49 +++++
.../aarch64/sve2/acle/asm/dot_lane_mf8.c | 169 ++++++++++++++++++
.../aarch64/sve2/acle/asm/dot_mf8.c | 98 ++++++++++
gcc/testsuite/lib/target-supports.exp | 3 +-
13 files changed, 523 insertions(+), 24 deletions(-)
create mode 100644
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_1.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_group_selection_1.c
create mode 100644
gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_mf8.c
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index e397e78a377..62793e6f6ac 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -242,6 +242,14 @@ AARCH64_OPT_EXTENSION("ssve-fp8fma", SSVE_FP8FMA, (SME2,FP8), (), (), "ssve-fp8f
AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax")
+AARCH64_OPT_EXTENSION("fp8dot4", FP8DOT4, (FP8FMA), (), (), "fp8dot4")
+
+AARCH64_OPT_EXTENSION("ssve-fp8dot4", SSVE_FP8DOT4, (SSVE_FP8FMA), (), (), "ssve-fp8dot4")
+
+AARCH64_OPT_EXTENSION("fp8dot2", FP8DOT2, (FP8DOT4), (), (), "fp8dot2")
+
+AARCH64_OPT_EXTENSION("ssve-fp8dot2", SSVE_FP8DOT2, (SSVE_FP8DOT4), (), (), "ssve-fp8dot2")
+
#undef AARCH64_OPT_FMV_EXTENSION
#undef AARCH64_OPT_EXTENSION
#undef AARCH64_FMV_FEATURE
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 022163f0726..65df48a3e65 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -835,21 +835,28 @@ public:
rtx
expand (function_expander &e) const override
{
- /* In the optab, the multiplication operands come before the accumulator
- operand. The optab is keyed off the multiplication mode. */
- e.rotate_inputs_left (0, 3);
insn_code icode;
- if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES)
- icode = e.convert_optab_handler_for_sign (sdot_prod_optab,
- udot_prod_optab,
- 0, e.result_mode (),
- GET_MODE (e.args[0]));
+ if (e.fpm_mode == aarch64_sve::FPM_set)
+ {
+ icode = code_for_aarch64_sve_dot (e.result_mode ());
+ }
else
- icode = (e.type_suffix (0).float_p
- ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf
- : e.type_suffix (0).unsigned_p
- ? CODE_FOR_udot_prodvnx4sivnx8hi
- : CODE_FOR_sdot_prodvnx4sivnx8hi);
+ {
+ /* In the optab, the multiplication operands come before the accumulator
+ operand. The optab is keyed off the multiplication mode. */
+ e.rotate_inputs_left (0, 3);
+ if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES)
+ icode = e.convert_optab_handler_for_sign (sdot_prod_optab,
+ udot_prod_optab,
+ 0, e.result_mode (),
+ GET_MODE (e.args[0]));
+ else
+ icode = (e.type_suffix (0).float_p
+ ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf
+ : e.type_suffix (0).unsigned_p
+ ? CODE_FOR_udot_prodvnx4sivnx8hi
+ : CODE_FOR_sdot_prodvnx4sivnx8hi);
+ }
return e.use_unpred_insn (icode);
}
};
@@ -862,17 +869,24 @@ public:
rtx
expand (function_expander &e) const override
{
+ insn_code icode;
machine_mode mode0 = GET_MODE (e.args[0]);
machine_mode mode1 = GET_MODE (e.args[1]);
- /* Use the same ordering as the dot_prod_optab, with the
- accumulator last. */
- e.rotate_inputs_left (0, 4);
- int unspec = unspec_for (e);
- insn_code icode;
- if (unspec == UNSPEC_FDOT)
- icode = CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf;
+ if (e.fpm_mode == aarch64_sve::FPM_set)
+ {
+ icode = code_for_aarch64_sve_dot_lane (mode0);
+ }
else
- icode = code_for_aarch64_dot_prod_lane (unspec, mode0, mode1);
+ {
+ /* Use the same ordering as the dot_prod_optab, with the
+ accumulator last. */
+ e.rotate_inputs_left (0, 4);
+ int unspec = unspec_for (e);
+ if (unspec == UNSPEC_FDOT)
+ icode = CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf;
+ else
+ icode = code_for_aarch64_dot_prod_lane (unspec, mode0, mode1);
+ }
return e.use_exact_insn (icode);
}
};
@@ -3246,7 +3260,7 @@ FUNCTION (svdiv, svdiv_impl,)
FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
FUNCTION (svdot, svdot_impl,)
FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT,
- UNSPEC_FDOT))
+ UNSPEC_FDOT, UNSPEC_DOT_LANE_FP8))
FUNCTION (svdup, svdup_impl,)
FUNCTION (svdup_lane, svdup_lane_impl,)
FUNCTION (svdupq, svdupq_impl,)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 09f343e7118..9f79f6e28c7 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -3994,6 +3994,34 @@ struct ternary_bfloat_def
};
SHAPE (ternary_bfloat)
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t). */
+struct ternary_mfloat8_def
+ : public ternary_resize2_base<8, TYPE_mfloat, TYPE_mfloat>
+{
+ void
+ build (function_builder &b, const function_group_info &group) const override
+ {
+ gcc_assert (group.fpm_mode == FPM_set);
+ b.add_overloaded_functions (group, MODE_none);
+ build_all (b, "v0,v0,vM,vM", group, MODE_none);
+ }
+
+ tree
+ resolve (function_resolver &r) const override
+ {
+ type_suffix_index type;
+ if (!r.check_num_arguments (4)
+ || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+ || !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t)
+ || !r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t)
+ || !r.require_scalar_type (3, "int64_t"))
+ return error_mark_node;
+
+ return r.resolve_to (r.mode_suffix_id, type, TYPE_SUFFIX_mf8, GROUP_none);
+ }
+};
+SHAPE (ternary_mfloat8)
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
where the final argument is an integer constant expression in the range
@@ -4046,6 +4074,26 @@ struct ternary_mfloat8_lane_def
};
SHAPE (ternary_mfloat8_lane)
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t, uint64_t)
+
+ where the final argument is an integer constant expression in the range
+ [0, 7] or [0, 3]. */
+struct ternary_mfloat8_lane_group_selection_def
+ : public ternary_mfloat8_lane_def
+{
+ bool
+ check (function_checker &c) const override
+ {
+ machine_mode mode = c.vector_mode (0);
+ if (mode == E_VNx8HFmode)
+ return c.require_immediate_lane_index (3, 2, 2);
+ else if (mode == E_VNx4SFmode)
+ return c.require_immediate_lane_index (3, 2, 4);
+ gcc_unreachable ();
+ }
+};
+SHAPE (ternary_mfloat8_lane_group_selection)
+
/* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t)
sv<t0>_t svfoo[_n_t0](sv<t0>_t, svbfloat16_t, bfloat16_t). */
struct ternary_bfloat_opt_n_def
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index 06824b6192d..2674109d9c3 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -71,7 +71,11 @@ namespace aarch64_sve
scalar displacement".
- "_pred" indicates that the function takes an svbool_t argument
- that does not act as a governing predicate.. */
+ that does not act as a governing predicate..
+
+ - "_group_selection" indicates that the function takes an imm integer
+ argument that selects a specific group of elements that fit a 128 bit
+ vector. */
namespace shapes
{
extern const function_shape *const adr_index;
@@ -212,7 +216,9 @@ namespace aarch64_sve
extern const function_shape *const ternary_lane_rotate;
extern const function_shape *const ternary_long_lane;
extern const function_shape *const ternary_long_opt_n;
+ extern const function_shape *const ternary_mfloat8;
extern const function_shape *const ternary_mfloat8_lane;
+ extern const function_shape *const ternary_mfloat8_lane_group_selection;
extern const function_shape *const ternary_mfloat8_opt_n;
extern const function_shape *const ternary_opt_n;
extern const function_shape *const ternary_qq_or_011_lane;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index c84c153e913..7d90e3b5e20 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -363,3 +363,15 @@ DEF_SVE_FUNCTION_GS_FPM (svmlallbb_lane, ternary_mfloat8_lane, s_float_mf8, none
DEF_SVE_FUNCTION_GS_FPM (svmlallbt_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set)
DEF_SVE_FUNCTION_GS_FPM (svmlalltb_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set)
#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+ streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT4, AARCH64_FL_SSVE_FP8DOT4)
+DEF_SVE_FUNCTION_GS_FPM (svdot, ternary_mfloat8, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svdot_lane, ternary_mfloat8_lane_group_selection, s_float_mf8, none, none, set)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+ streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT2, AARCH64_FL_SSVE_FP8DOT2)
+DEF_SVE_FUNCTION_GS_FPM (svdot, ternary_mfloat8, h_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svdot_lane, ternary_mfloat8_lane_group_selection, h_float_mf8, none, none, set)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 369b954043c..c3b88e39862 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -67,6 +67,7 @@
;; ---- [INT] Shift-and-insert operations
;; ---- [INT] Sum of absolute differences
;; ---- [FP] Mfloat8 Multiply-and-accumulate operations
+;; ---- [FP] Mfloat8 dot products
;;
;; == Extending arithmetic
;; ---- [INT] Multi-register widening conversions
@@ -2008,6 +2009,46 @@ (define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op><mode>"
}
)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Mfloat8 dot products
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FDOT (4-way, vectors)
+;; - FDOT (4-way, indexed)
+;; - FDOT (2-way, vectors)
+;; - FDOT (2-way, indexed)
+;; -------------------------------------------------------------------------
+(define_insn "@aarch64_sve_dot<mode>"
+ [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
+ (unspec:SVE_FULL_HSF
+ [(match_operand:SVE_FULL_HSF 1 "register_operand")
+ (match_operand:VNx16QI 2 "register_operand")
+ (match_operand:VNx16QI 3 "register_operand")
+ (reg:DI FPM_REGNUM)]
+ UNSPEC_DOT_FP8))]
+ "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , 0 , w , w ; * ] fdot\t%0.<Vetype>, %2.b, %3.b
+ [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b
+ }
+)
+
+(define_insn "@aarch64_sve_dot_lane<mode>"
+ [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
+ (unspec:SVE_FULL_HSF
+ [(match_operand:SVE_FULL_HSF 1 "register_operand")
+ (match_operand:VNx16QI 2 "register_operand")
+ (match_operand:VNx16QI 3 "register_operand")
+ (match_operand:SI 4 "const_int_operand")
+ (reg:DI FPM_REGNUM)]
+ UNSPEC_DOT_LANE_FP8))]
+ "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
+ {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+ [ w , 0 , w , y ; * ] fdot\t%0.<Vetype>, %2.b, %3.b[%4]
+ [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b[%4]
+ }
+)
+
;; =========================================================================
;; == Extending arithmetic
;; =========================================================================
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 4611d077e10..739b0933e99 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -507,6 +507,24 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
(TARGET_SVE2 && TARGET_FP8FMA) || TARGET_STREAMING) \
&& (AARCH64_HAVE_ISA(SSVE_FP8FMA) || TARGET_NON_STREAMING))
+/* fp8 four way dot product enabled through +fp8dot4. */
+#define TARGET_FP8DOT4 AARCH64_HAVE_ISA (FP8DOT4)
+
+/* Streaming versions of fp8 four way dot product instructions are enabled
+through +ssve-fp8dot4. */
+#define TARGET_SSVE_FP8DOT4 ((\
+ (TARGET_SVE2 && TARGET_FP8DOT4) || TARGET_STREAMING) \
+ && (AARCH64_HAVE_ISA(SSVE_FP8DOT4) || TARGET_NON_STREAMING))
+
+/* fp8 two way dot product enabled through +fp8dot2. */
+#define TARGET_FP8DOT2 AARCH64_HAVE_ISA (FP8DOT2)
+
+/* Streaming versions of fp8 two way dot product instructions are enabled
+through +ssve-fp8dot2. */
+#define TARGET_SSVE_FP8DOT2 ((\
+ (TARGET_SVE2 && TARGET_FP8DOT2) || TARGET_STREAMING) \
+ && (AARCH64_HAVE_ISA(SSVE_FP8DOT2) || TARGET_NON_STREAMING))
+
/* Standard register usage. */
/* 31 64-bit general purpose registers R0-R30:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 2f6e302375d..57843ac79dd 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -946,6 +946,8 @@ (define_c_enum "unspec"
UNSPEC_COND_FCVTX ; Used in aarch64-sve2.md.
UNSPEC_COND_FCVTXNT ; Used in aarch64-sve2.md.
UNSPEC_COND_FLOGB ; Used in aarch64-sve2.md.
+ UNSPEC_DOT_FP8 ; Used in aarch64-sve2.md.
+ UNSPEC_DOT_LANE_FP8 ; Used in aarch64-sve2.md.
UNSPEC_EORBT ; Used in aarch64-sve2.md.
UNSPEC_EORTB ; Used in aarch64-sve2.md.
UNSPEC_F1CVT ; Used in aarch64-sve2.md.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_1.c
new file mode 100644
index 00000000000..819a09e61e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+sve2+fp8dot2")
+
+void
+test (svfloat16_t f16, svmfloat8_t f8, fpm_t fpm,
+ svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, mfloat8_t f)
+{
+ svdot_fpm (f16, f8, f8, fpm);
+ svdot_fpm (f32, f8, f8, fpm);
+
+ svdot_fpm (f16); /* { dg-error {too few arguments to function 'svdot_fpm'} } */
+ svdot_fpm (f16, f8); /* { dg-error {too few arguments to function 'svdot_fpm'} } */
+ svdot_fpm (f16, f8, f8); /* { dg-error {too few arguments to function 'svdot_fpm'} } */
+ svdot_fpm (f8, f8, fpm); /* { dg-error {too few arguments to function 'svdot_fpm'} } */
+ svdot_fpm (f16, f8, fpm); /* { dg-error {too few arguments to function 'svdot_fpm'} } */
+ svdot_fpm (f16, f8, f8, fpm, 0); /* { dg-error {too many arguments to function 'svdot_fpm'} } */
+
+ svdot_fpm (0, f8, f8, fpm); /* { dg-error {passing 'int' to argument 1 of 'svdot_fpm', which expects an SVE type rather than a scalar} } */
+ svdot_fpm (f16, f8, f, fpm); /* { dg-error {passing 'mfloat8_t' {aka '__mfp8'} to argument 3 of 'svdot_fpm', which expects 'svmfloat8_t'} } */
+ svdot_fpm (pg, f8, f8, fpm); /* { dg-error {'svdot_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */
+ svdot_fpm (u8, f8, f8, fpm); /* { dg-error {'svdot_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */
+ svdot_fpm (u16, f8, f8, fpm); /* { dg-error {'svdot_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */
+ svdot_fpm (f64, f8, f8, fpm); /* { dg-error {'svdot_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */
+ svdot_fpm (f16, 0, f8, fpm); /* { dg-error {passing 'int' to argument 2 of 'svdot_fpm', which expects 'svmfloat8_t'} } */
+ svdot_fpm (f16, f16, f8, fpm); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svdot_fpm', which expects 'svmfloat8_t'} } */
+ svdot_fpm (f16, f8, 0, fpm); /* { dg-error {passing 'int' to argument 3 of 'svdot_fpm', which expects 'svmfloat8_t'} } */
+ svdot_fpm (f16, f8, f16, fpm); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svdot_fpm', which expects 'svmfloat8_t'} } */
+ svdot_fpm (f16, f8, f8, f8); /* { dg-error {passing 'svmfloat8_t' to argument 4 of 'svdot_fpm', which expects 'int64_t'} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_group_selection_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_group_selection_1.c
new file mode 100644
index 00000000000..dec00e3abf1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_mfloat8_lane_group_selection_1.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+
+#include <arm_sve.h>
+
+#pragma GCC target ("arch=armv8.2-a+ssve-fp8fma+ssve-fp8dot2")
+
+void
+f1 (svfloat16_t f16, svmfloat8_t f8, fpm_t fpm,
+ svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32,
+ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, mfloat8_t f, int i)
+ __arm_streaming
+{
+ svdot_lane_fpm (f32, f8, f8, 0, fpm);
+ svdot_lane_fpm (f32, f8, f8, 3, fpm);
+ svdot_lane_fpm (f16, f8, f8, 0, fpm);
+ svdot_lane_fpm (f16, f8, f8, 7, fpm);
+
+ svdot_lane_fpm (f32, f8, f8, -1, fpm); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_fpm', which expects a value in the range \[0, 3\]} } */
+ svdot_lane_fpm (f32, f8, f8, 4, fpm); /* { dg-error {passing 4 to argument 4 of 'svdot_lane_fpm', which expects a value in the range \[0, 3\]} } */
+ svdot_lane_fpm (f16, f8, f8, -1, fpm); /* { dg-error {passing -1 to argument 4 of 'svdot_lane_fpm', which expects a value in the range \[0, 7\]} } */
+ svdot_lane_fpm (f16, f8, f8, 8, fpm); /* { dg-error {passing 8 to argument 4 of 'svdot_lane_fpm', which expects a value in the range \[0, 7\]} } */
+
+ svdot_lane_fpm (f16); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */
+ svdot_lane_fpm (f16, f8); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */
+ svdot_lane_fpm (f16, f8, f8); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */
+ svdot_lane_fpm (f16, f8, f8, 0); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */
+ svdot_lane_fpm (f16, f8, f8, fpm); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */
+ svdot_lane_fpm (f16, f8, 15, fpm); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */
+ svdot_lane_fpm (f8, f8, 15, fpm); /* { dg-error {too few arguments to function 'svdot_lane_fpm'} } */
+
+ svdot_lane_fpm (f16, f8, f8, 15, 0, fpm); /* { dg-error {too many arguments to function 'svdot_lane_fpm'} } */
+ svdot_lane_fpm (f16, f8, f8, 15, fpm, fpm); /* { dg-error {too many arguments to function 'svdot_lane_fpm'} } */
+ svdot_lane_fpm (f16, f8, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svdot_lane_fpm'} } */
+ svdot_lane_fpm (f16, f16, f8, f8, 15, fpm); /* { dg-error {too many arguments to function 'svdot_lane_fpm'} } */
+
+ svdot_lane_fpm (f32, bf16, bf16, 0, fpm); /* { dg-error {passing 'svbfloat16_t' to argument 2 of 'svdot_lane_fpm', which expects 'svmfloat8_t'} } */
+ svdot_lane_fpm (0, f8, f8, 0, fpm); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane_fpm', which expects an SVE type rather than a scalar} } */
+ svdot_lane_fpm (pg, f8, f8, 0, fpm); /* { dg-error {'svdot_lane_fpm' has no form that takes 'svbool_t' and 'svmfloat8_t' arguments} } */
+ svdot_lane_fpm (u8, f8, f8, 0, fpm); /* { dg-error {'svdot_lane_fpm' has no form that takes 'svuint8_t' and 'svmfloat8_t' arguments} } */
+ svdot_lane_fpm (u16, f8, f8, 0, fpm); /* { dg-error {'svdot_lane_fpm' has no form that takes 'svuint16_t' and 'svmfloat8_t' arguments} } */
+ svdot_lane_fpm (f64, f8, f8, 0, fpm); /* { dg-error {'svdot_lane_fpm' has no form that takes 'svfloat64_t' and 'svmfloat8_t' arguments} } */
+ svdot_lane_fpm (f16, 0, f8, 0, fpm); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane_fpm', which expects 'svmfloat8_t'} } */
+ svdot_lane_fpm (f16, f32, f8, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svdot_lane_fpm', which expects 'svmfloat8_t'} } */
+ svdot_lane_fpm (f16, f8, 0, 0, fpm); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane_fpm', which expects 'svmfloat8_t'} } */
+ svdot_lane_fpm (f16, f8, f32, 0, fpm); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svdot_lane_fpm', which expects 'svmfloat8_t'} } */
+
+ svdot_lane_fpm (f16, f8, f8, s32, fpm); /* { dg-error {argument 4 of 'svdot_lane_fpm' must be an integer constant expression} } */
+ svdot_lane_fpm (f16, f8, f8, i, fpm); /* { dg-error {argument 4 of 'svdot_lane_fpm' must be an integer constant expression} } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c
new file mode 100644
index 00000000000..e1b75d68fbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_lane_mf8.c
@@ -0,0 +1,169 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.5-a+sve2+fp8dot2" } */
+/* { dg-require-effective-target aarch64_asm_fp8dot2_ok } */
+/* { dg-require-effective-target aarch64_asm_ssve-fp8dot2_ok } */
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** dot_lane_0_f16_tied1:
+** msr fpmr, x0
+** fdot z0\.h, z4\.b, z5\.b\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_f16_tied1, svfloat16_t, svmfloat8_t,
+ z0 = svdot_lane_f16_mf8_fpm (z0, z4, z5, 0, fpm0),
+ z0 = svdot_lane_fpm (z0, z4, z5, 0, fpm0))
+
+/*
+** dot_lane_0_f16_tied2:
+** msr fpmr, x0
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.h, \1\.b, z1\.b\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_f16_tied2, svfloat16_t, svmfloat8_t,
+ z0_res = svdot_lane_f16_mf8_fpm (z4, z0, z1, 0, fpm0),
+ z0_res = svdot_lane_fpm (z4, z0, z1, 0, fpm0))
+
+/*
+** dot_lane_0_f16_tied3:
+** msr fpmr, x0
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.h, z1\.b, \1\.b\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_f16_tied3, svfloat16_t, svmfloat8_t,
+ z0_res = svdot_lane_f16_mf8_fpm (z4, z1, z0, 0, fpm0),
+ z0_res = svdot_lane_fpm (z4, z1, z0, 0, fpm0))
+
+/*
+** dot_lane_0_f16_untied:
+** msr fpmr, x0
+** movprfx z0, z1
+** fdot z0\.h, z4\.b, z5\.b\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_f16_untied, svfloat16_t, svmfloat8_t,
+ z0 = svdot_lane_f16_mf8_fpm (z1, z4, z5, 0, fpm0),
+ z0 = svdot_lane_fpm (z1, z4, z5, 0, fpm0))
+
+/*
+** dot_lane_1_f16:
+** msr fpmr, x0
+** fdot z0\.h, z4\.b, z5\.b\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_f16, svfloat16_t, svmfloat8_t,
+ z0 = svdot_lane_f16_mf8_fpm (z0, z4, z5, 1, fpm0),
+ z0 = svdot_lane_fpm (z0, z4, z5, 1, fpm0))
+
+/*
+** dot_lane_z8_f16:
+** ...
+** msr fpmr, x0
+** mov (z[0-7])\.d, z8\.d
+** fdot z0\.h, z1\.b, \1\.b\[1\]
+** ldr d8, \[sp\], 32
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_f16, svfloat16_t, svmfloat8_t, z8,
+ z0 = svdot_lane_f16_mf8_fpm (z0, z1, z8, 1, fpm0),
+ z0 = svdot_lane_fpm (z0, z1, z8, 1, fpm0))
+
+/*
+** dot_lane_z16_f16:
+** ...
+** msr fpmr, x0
+** mov (z[0-7])\.d, z16\.d
+** fdot z0\.h, z1\.b, \1\.b\[1\]
+** ...
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z16_f16, svfloat16_t, svmfloat8_t, z16,
+ z0 = svdot_lane_f16_mf8_fpm (z0, z1, z16, 1, fpm0),
+ z0 = svdot_lane_fpm (z0, z1, z16, 1, fpm0))
+
+/*
+** dot_lane_0_f32_tied1:
+** msr fpmr, x0
+** fdot z0\.s, z4\.b, z5\.b\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_f32_tied1, svfloat32_t, svmfloat8_t,
+ z0 = svdot_lane_f32_mf8_fpm (z0, z4, z5, 0, fpm0),
+ z0 = svdot_lane_fpm (z0, z4, z5, 0, fpm0))
+
+/*
+** dot_lane_0_f32_tied2:
+** msr fpmr, x0
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, \1\.b, z1\.b\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_f32_tied2, svfloat32_t, svmfloat8_t,
+ z0_res = svdot_lane_f32_mf8_fpm (z4, z0, z1, 0, fpm0),
+ z0_res = svdot_lane_fpm (z4, z0, z1, 0, fpm0))
+
+/*
+** dot_lane_0_f32_tied3:
+** msr fpmr, x0
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, z1\.b, \1\.b\[0\]
+** ret
+*/
+TEST_DUAL_Z_REV (dot_lane_0_f32_tied3, svfloat32_t, svmfloat8_t,
+ z0_res = svdot_lane_f32_mf8_fpm (z4, z1, z0, 0, fpm0),
+ z0_res = svdot_lane_fpm (z4, z1, z0, 0, fpm0))
+
+/*
+** dot_lane_0_f32_untied:
+** msr fpmr, x0
+** movprfx z0, z1
+** fdot z0\.s, z4\.b, z5\.b\[0\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_0_f32_untied, svfloat32_t, svmfloat8_t,
+ z0 = svdot_lane_f32_mf8_fpm (z1, z4, z5, 0, fpm0),
+ z0 = svdot_lane_fpm (z1, z4, z5, 0, fpm0))
+
+/*
+** dot_lane_1_f32:
+** msr fpmr, x0
+** fdot z0\.s, z4\.b, z5\.b\[1\]
+** ret
+*/
+TEST_DUAL_Z (dot_lane_1_f32, svfloat32_t, svmfloat8_t,
+ z0 = svdot_lane_f32_mf8_fpm (z0, z4, z5, 1, fpm0),
+ z0 = svdot_lane_fpm (z0, z4, z5, 1, fpm0))
+
+/*
+** dot_lane_z8_f32:
+** ...
+** msr fpmr, x0
+** mov (z[0-7])\.d, z8\.d
+** fdot z0\.s, z1\.b, \1\.b\[1\]
+** ldr d8, \[sp\], 32
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z8_f32, svfloat32_t, svmfloat8_t, z8,
+ z0 = svdot_lane_f32_mf8_fpm (z0, z1, z8, 1, fpm0),
+ z0 = svdot_lane_fpm (z0, z1, z8, 1, fpm0))
+
+/*
+** dot_lane_z32_f32:
+** ...
+** msr fpmr, x0
+** mov (z[0-7])\.d, z16\.d
+** fdot z0\.s, z1\.b, \1\.b\[1\]
+** ...
+** ret
+*/
+TEST_DUAL_LANE_REG (dot_lane_z32_f32, svfloat32_t, svmfloat8_t, z16,
+ z0 = svdot_lane_f32_mf8_fpm (z0, z1, z16, 1, fpm0),
+ z0 = svdot_lane_fpm (z0, z1, z16, 1, fpm0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_mf8.c b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_mf8.c
new file mode 100644
index 00000000000..d942809011e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/dot_mf8.c
@@ -0,0 +1,98 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+/* { dg-additional-options "-march=armv8.5-a+sve2+fp8dot2" } */
+/* { dg-require-effective-target aarch64_asm_fp8dot2_ok } */
+/* { dg-require-effective-target aarch64_asm_ssve-fp8dot2_ok } */
+/* { dg-skip-if "" { *-*-* } { "-DSTREAMING_COMPATIBLE" } { "" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** dot_f16_mf8_tied1:
+** msr fpmr, x0
+** fdot z0\.h, z4\.b, z5\.b
+** ret
+*/
+TEST_DUAL_Z (dot_f16_mf8_tied1, svfloat16_t, svmfloat8_t,
+ z0 = svdot_f16_mf8_fpm (z0, z4, z5, fpm0),
+ z0 = svdot_fpm (z0, z4, z5, fpm0))
+
+/*
+** dot_f16_mf8_tied2:
+** msr fpmr, x0
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.h, \1\.b, z1\.b
+** ret
+*/
+TEST_DUAL_Z_REV (dot_f16_mf8_tied2, svfloat16_t, svmfloat8_t,
+ z0_res = svdot_f16_mf8_fpm (z4, z0, z1, fpm0),
+ z0_res = svdot_fpm (z4, z0, z1, fpm0))
+
+/*
+** dot_f16_mf8_tied3:
+** msr fpmr, x0
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.h, z1\.b, \1\.b
+** ret
+*/
+TEST_DUAL_Z_REV (dot_f16_mf8_tied3, svfloat16_t, svmfloat8_t,
+ z0_res = svdot_f16_mf8_fpm (z4, z1, z0, fpm0),
+ z0_res = svdot_fpm (z4, z1, z0, fpm0))
+
+/*
+** dot_f16_mf8_untied:
+** msr fpmr, x0
+** movprfx z0, z1
+** fdot z0\.h, z4\.b, z5\.b
+** ret
+*/
+TEST_DUAL_Z (dot_f16_mf8_untied, svfloat16_t, svmfloat8_t,
+ z0 = svdot_f16_mf8_fpm (z1, z4, z5, fpm0),
+ z0 = svdot_fpm (z1, z4, z5, fpm0))
+
+/*
+** dot_f32_mf8_tied1:
+** msr fpmr, x0
+** fdot z0\.s, z4\.b, z5\.b
+** ret
+*/
+TEST_DUAL_Z (dot_f32_mf8_tied1, svfloat32_t, svmfloat8_t,
+ z0 = svdot_f32_mf8_fpm (z0, z4, z5, fpm0),
+ z0 = svdot_fpm (z0, z4, z5, fpm0))
+
+/*
+** dot_f32_mf8_tied2:
+** msr fpmr, x0
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, \1\.b, z1\.b
+** ret
+*/
+TEST_DUAL_Z_REV (dot_f32_mf8_tied2, svfloat32_t, svmfloat8_t,
+ z0_res = svdot_f32_mf8_fpm (z4, z0, z1, fpm0),
+ z0_res = svdot_fpm (z4, z0, z1, fpm0))
+
+/*
+** dot_f32_mf8_tied3:
+** msr fpmr, x0
+** mov (z[0-9]+)\.d, z0\.d
+** movprfx z0, z4
+** fdot z0\.s, z1\.b, \1\.b
+** ret
+*/
+TEST_DUAL_Z_REV (dot_f32_mf8_tied3, svfloat32_t, svmfloat8_t,
+ z0_res = svdot_f32_mf8_fpm (z4, z1, z0, fpm0),
+ z0_res = svdot_fpm (z4, z1, z0, fpm0))
+
+/*
+** dot_f32_mf8_untied:
+** msr fpmr, x0
+** movprfx z0, z1
+** fdot z0\.s, z4\.b, z5\.b
+** ret
+*/
+TEST_DUAL_Z (dot_f32_mf8_untied, svfloat32_t, svmfloat8_t,
+ z0 = svdot_f32_mf8_fpm (z1, z4, z5, fpm0),
+ z0 = svdot_fpm (z1, z4, z5, fpm0))
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 758b8a5944e..fed6df20b1c 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -12122,7 +12122,8 @@ proc check_effective_target_aarch64_tiny { } {
foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
"i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64"
"sme" "sme-i16i64" "sme2" "fp8" "fp8fma"
- "ssve-fp8fma" } {
+ "ssve-fp8fma" "fp8dot2" "ssve-fp8dot2" "fp8dot4"
+ "ssve-fp8dot4"} {
eval [string map [list FUNC $aarch64_ext] {
proc check_effective_target_aarch64_asm_FUNC_ok { } {
if { [istarget aarch64*-*-*] } {