+
[...]
+/*
+** caller_1:
+** ...
+** mov (z[0-9]+\.b), w3
+** ...
+** st1b \1, p[0-7], \[x2\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+caller_1 (mfloat8_t *ptr, mfloat8_t in)
+{
+ callee_1 (ptr, 1, svdup_mf8 (in));
+}
+
+/*
+** callee_7:
+** ...
+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\]
+** ...
+** st1b \1, p[0-7], \[x0\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+callee_7 (mfloat8_t *ptr, ...)
+{
+ va_list va;
+ svmfloat8_t vec;
+
+ va_start (va, ptr);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ vec = va_arg (va, svmfloat8_t);
+ va_end (va);
+ svst1 (svptrue_b8 (), ptr, vec);
+}
+
+/*
+** caller_7:
+** ...
+** mov (z[0-9]+\.b), w8
+** ...
+** st1b \1, p[0-7], \[x7\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+caller_7 (mfloat8_t *ptr, mfloat8_t in)
+{
+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_mf8 (in));
+}
+
+/* FIXME: We should be able to get rid of the va_list object. */
+/*
+** callee_8:
+** sub sp, sp, #([0-9]+)
+** ...
+** ldr (x[0-9]+), \[sp, \1\]
+** ...
+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\]
+** ...
+** st1b \3, \4, \[x0\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+callee_8 (mfloat8_t *ptr, ...)
+{
+ va_list va;
+ svmfloat8_t vec;
+
+ va_start (va, ptr);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ va_arg (va, int);
+ vec = va_arg (va, svmfloat8_t);
+ va_end (va);
+ svst1 (svptrue_b8 (), ptr, vec);
+}
+
+/*
+** caller_8:
+** ...
+** mov (z[0-9]+\.b), w1
+** ...
+** st1b \1, p[0-7], \[(x[0-9]+)\]
+** ...
+** str \2, \[sp\]
+** ...
+** ret
+*/
+void __attribute__((noipa))
+caller_8 (mfloat8_t *ptr, mfloat8_t in)
+{
+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_mf8 (in));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbl2_mf8.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbl2_mf8.c
new file mode 100644
index 00000000000..19cc739e7ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbl2_mf8.c
@@ -0,0 +1,31 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** tbl2_mf8_tied1:
+** tbl z0\.b, {z0\.b(?:, | - )z1\.b}, z4\.b
+** ret
+*/
+TEST_TBL2 (tbl2_mf8_tied1, svmfloat8x2_t, svmfloat8_t, svuint8_t,
+ z0_res = svtbl2_mf8 (z0, z4),
+ z0_res = svtbl2 (z0, z4))
+
+/*
+** tbl2_mf8_tied2:
+** tbl z0\.b, {z1\.b(?:, | - )z2\.b}, z0\.b
+** ret
+*/
+TEST_TBL2_REV (tbl2_mf8_tied2, svmfloat8x2_t, svmfloat8_t, svuint8_t,
+ z0_res = svtbl2_mf8 (z1, z0),
+ z0_res = svtbl2 (z1, z0))
+
+/*
+** tbl2_mf8_untied:
+** tbl z0\.b, {z2\.b(?:, | - )z3\.b}, z4\.b
+** ret
+*/
+TEST_TBL2 (tbl2_mf8_untied, svmfloat8x2_t, svmfloat8_t, svuint8_t,
+ z0_res = svtbl2_mf8 (z2, z4),
+ z0_res = svtbl2 (z2, z4))
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbx_mf8.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbx_mf8.c
new file mode 100644
index 00000000000..ba0fef3934b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/tbx_mf8.c
@@ -0,0 +1,37 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** tbx_mf8_tied1:
+** tbx z0\.b, z1\.b, z4\.b
+** ret
+*/
+TEST_DUAL_Z (tbx_mf8_tied1, svmfloat8_t, svuint8_t,
+ z0 = svtbx_mf8 (z0, z1, z4),
+ z0 = svtbx (z0, z1, z4))
+
+/* Bad RA choice: no preferred output sequence. */
+TEST_DUAL_Z (tbx_mf8_tied2, svmfloat8_t, svuint8_t,
+ z0 = svtbx_mf8 (z1, z0, z4),
+ z0 = svtbx (z1, z0, z4))
+
+/* Bad RA choice: no preferred output sequence. */
+TEST_DUAL_Z_REV (tbx_mf8_tied3, svmfloat8_t, svuint8_t,
+ z0_res = svtbx_mf8 (z4, z5, z0),
+ z0_res = svtbx (z4, z5, z0))
+
+/*
+** tbx_mf8_untied:
+** (
+** mov z0\.d, z1\.d
+** tbx z0\.b, z2\.b, z4\.b
+** |
+** tbx z1\.b, z2\.b, z4\.b
+** mov z0\.d, z1\.d
+** )
+** ret
+*/
+TEST_DUAL_Z (tbx_mf8_untied, svmfloat8_t, svuint8_t,
+ z0 = svtbx_mf8 (z1, z2, z4),
+ z0 = svtbx (z1, z2, z4))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilerw_mf8.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilerw_mf8.c
new file mode 100644
index 00000000000..12cf0d2c365
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilerw_mf8.c
@@ -0,0 +1,50 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32
} } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** whilerw_rr_mf8:
+** whilerw p0\.b, x0, x1
+** ret
+*/
+TEST_COMPARE_S (whilerw_rr_mf8, const mfloat8_t *,
+ p0 = svwhilerw_mf8 (x0, x1),
+ p0 = svwhilerw (x0, x1))
+
+/*
+** whilerw_0r_mf8:
+** whilerw p0\.b, xzr, x1
+** ret
+*/
+TEST_COMPARE_S (whilerw_0r_mf8, const mfloat8_t *,
+ p0 = svwhilerw_mf8 ((const mfloat8_t *) 0, x1),
+ p0 = svwhilerw ((const mfloat8_t *) 0, x1))
+
+/*
+** whilerw_cr_mf8:
+** mov (x[0-9]+), #?1073741824
+** whilerw p0\.b, \1, x1
+** ret
+*/
+TEST_COMPARE_S (whilerw_cr_mf8, const mfloat8_t *,
+ p0 = svwhilerw_mf8 ((const mfloat8_t *) 1073741824, x1),
+ p0 = svwhilerw ((const mfloat8_t *) 1073741824, x1))
+
+/*
+** whilerw_r0_mf8:
+** whilerw p0\.b, x0, xzr
+** ret
+*/
+TEST_COMPARE_S (whilerw_r0_mf8, const mfloat8_t *,
+ p0 = svwhilerw_mf8 (x0, (const mfloat8_t *) 0),
+ p0 = svwhilerw (x0, (const mfloat8_t *) 0))
+
+/*
+** whilerw_rc_mf8:
+** mov (x[0-9]+), #?1073741824
+** whilerw p0\.b, x0, \1
+** ret
+*/
+TEST_COMPARE_S (whilerw_rc_mf8, const mfloat8_t *,
+ p0 = svwhilerw_mf8 (x0, (const mfloat8_t *) 1073741824),
+ p0 = svwhilerw (x0, (const mfloat8_t *) 1073741824))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilewr_mf8.c
b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilewr_mf8.c
new file mode 100644
index 00000000000..c4023a2fbff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/acle/asm/whilewr_mf8.c
@@ -0,0 +1,50 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32
} } } } */
+
+#include "test_sve_acle.h"
+
+/*
+** whilewr_rr_mf8:
+** whilewr p0\.b, x0, x1
+** ret
+*/
+TEST_COMPARE_S (whilewr_rr_mf8, const mfloat8_t *,
+ p0 = svwhilewr_mf8 (x0, x1),
+ p0 = svwhilewr (x0, x1))
+
+/*
+** whilewr_0r_mf8:
+** whilewr p0\.b, xzr, x1
+** ret
+*/
+TEST_COMPARE_S (whilewr_0r_mf8, const mfloat8_t *,
+ p0 = svwhilewr_mf8 ((const mfloat8_t *) 0, x1),
+ p0 = svwhilewr ((const mfloat8_t *) 0, x1))
+
+/*
+** whilewr_cr_mf8:
+** mov (x[0-9]+), #?1073741824
+** whilewr p0\.b, \1, x1
+** ret
+*/
+TEST_COMPARE_S (whilewr_cr_mf8, const mfloat8_t *,
+ p0 = svwhilewr_mf8 ((const mfloat8_t *) 1073741824, x1),
+ p0 = svwhilewr ((const mfloat8_t *) 1073741824, x1))
+
+/*
+** whilewr_r0_mf8:
+** whilewr p0\.b, x0, xzr
+** ret
+*/
+TEST_COMPARE_S (whilewr_r0_mf8, const mfloat8_t *,
+ p0 = svwhilewr_mf8 (x0, (const mfloat8_t *) 0),
+ p0 = svwhilewr (x0, (const mfloat8_t *) 0))
+
+/*
+** whilewr_rc_mf8:
+** mov (x[0-9]+), #?1073741824
+** whilewr p0\.b, x0, \1
+** ret
+*/
+TEST_COMPARE_S (whilewr_rc_mf8, const mfloat8_t *,
+ p0 = svwhilewr_mf8 (x0, (const mfloat8_t *) 1073741824),
+ p0 = svwhilewr (x0, (const mfloat8_t *) 1073741824))