[gcc r15-6076] aarch64: Add support for fp8fma instructions

Richard Sandiford via Gcc-cvs Tue, 10 Dec 2024 05:22:47 -0800

https://gcc.gnu.org/g:d71bb2c0c8b4bbf7fce5581932673fb67b58e8bb


commit r15-6076-gd71bb2c0c8b4bbf7fce5581932673fb67b58e8bb
Author: Saurabh Jha <saurabh....@arm.com>
Date:   Tue Dec 10 13:21:21 2024 +0000

    aarch64: Add support for fp8fma instructions
    
    The AArch64 FEAT_FP8FMA extension introduces instructions for
    multiply-add of vectors.
    
    This patch introduces the following instructions:
    1. {vmlalbq|vmlaltq}_f16_mf8_fpm.
    2. {vmlalbq|vmlaltq}_lane{q}_f16_mf8_fpm.
    3. {vmlallbbq|vmlallbtq|vmlalltbq|vmlallttq}_f32_mf8_fpm.
    4. {vmlallbbq|vmlallbtq|vmlalltbq|vmlallttq}_lane{q}_f32_mf8_fpm.
    
    gcc/ChangeLog:
    
            * config/aarch64/aarch64-builtins.cc
            (aarch64_pragma_builtins_checker::require_immediate_lane_index): New
            overload.
            (aarch64_pragma_builtins_checker::check): Add support for FP8FMA
            intrinsics.
            (aarch64_expand_pragma_builtins): Likewise.
            * config/aarch64/aarch64-c.cc
            (aarch64_update_cpp_builtins): Conditionally define TARGET_FP8FMA.
            * config/aarch64/aarch64-simd-pragma-builtins.def: Add the FP8FMA
            intrinsics.
            * config/aarch64/aarch64-simd.md:
            (@aarch64_<FMLAL_FP8_HF:insn><mode): New pattern.
            (@aarch64_<FMLAL_FP8_HF:insn>_lane<V8HF_ONLY:mode><VB:mode>):
            Likewise.
            (@aarch64_<FMLALL_FP8_SF:insn><mode): Likewise.
            (@aarch64_<FMLALL_FP8_SF:insn>_lane<V8HF_ONLY:mode><VB:mode>):
            Likewise.
            * config/aarch64/iterators.md (V8HF_ONLY): New mode iterator.
            (SVE2_FP8_TERNARY_VNX8HF): Rename to...
            (FMLAL_FP8_HF): ...this.
            (SVE2_FP8_TERNARY_LANE_VNX8HF): Delete in favor of FMLAL_FP8_HF.
            (SVE2_FP8_TERNARY_VNX4SF): Rename to...
            (FMLALL_FP8_SF): ...this.
            (SVE2_FP8_TERNARY_LANE_VNX4SF): Delete in favor of FMLALL_FP8_SF.
            (sve2_fp8_fma_op_vnx8hf, sve2_fp8_fma_op_vnx4sf): Fold into...
            (insn): ...here.
            * config/aarch64/aarch64-sve2.md: Update uses accordingly.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/pragma_cpp_predefs_4.c: Test TARGET_FP8FMA.
            * gcc.target/aarch64/simd/vmla_fpm.c: New test.
            * gcc.target/aarch64/simd/vmla_lane_indices_1.c: Likewise.
    
    Co-authored-by: Richard Sandiford <richard.sandif...@arm.com>

Diff:
---
 gcc/config/aarch64/aarch64-builtins.cc             |  43 +++
 gcc/config/aarch64/aarch64-c.cc                    |   2 +
 .../aarch64/aarch64-simd-pragma-builtins.def       |  16 +
 gcc/config/aarch64/aarch64-simd.md                 |  63 ++++
 gcc/config/aarch64/aarch64-sve2.md                 |  32 +-
 gcc/config/aarch64/iterators.md                    |  31 +-
 .../gcc.target/aarch64/pragma_cpp_predefs_4.c      |  20 ++
 gcc/testsuite/gcc.target/aarch64/simd/vmla_fpm.c   | 365 +++++++++++++++++++++
 .../gcc.target/aarch64/simd/vmla_lane_indices_1.c  |  55 ++++
 9 files changed, 589 insertions(+), 38 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 63e17eeb20e5..ca1dc5a3e6a7 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2596,6 +2596,7 @@ struct aarch64_pragma_builtins_checker
   bool require_immediate_range (unsigned int, HOST_WIDE_INT,
                                HOST_WIDE_INT);
   bool require_immediate_lane_index (unsigned int, unsigned int, unsigned int);
+  bool require_immediate_lane_index (unsigned int, unsigned int);
 
   bool check ();
 
@@ -2659,6 +2660,16 @@ require_immediate_lane_index (unsigned int lane_argno, 
unsigned vec_argno,
   return require_immediate_range (lane_argno, 0, nunits - 1);
 }
 
+/* Require argument LANE_ARGNO to be an immediate lane index that selects
+   one element of argument VEC_ARGNO.  Return true if the argument
+   is valid.  */
+bool
+aarch64_pragma_builtins_checker::
+require_immediate_lane_index (unsigned int lane_argno, unsigned int vec_argno)
+{
+  return require_immediate_lane_index (lane_argno, vec_argno, vec_argno);
+}
+
 /* Check the arguments to the intrinsic call and return true if they
    are valid.  */
 bool
@@ -2669,6 +2680,19 @@ aarch64_pragma_builtins_checker::check ()
     case UNSPEC_FDOT_LANE_FP8:
       return require_immediate_lane_index (nargs - 2, nargs - 3, 0);
 
+    case UNSPEC_FMLALB_FP8:
+    case UNSPEC_FMLALT_FP8:
+    case UNSPEC_FMLALLBB_FP8:
+    case UNSPEC_FMLALLBT_FP8:
+    case UNSPEC_FMLALLTB_FP8:
+    case UNSPEC_FMLALLTT_FP8:
+      if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane)
+       return require_immediate_lane_index (nargs - 2, nargs - 3);
+      else if (builtin_data.signature == aarch64_builtin_signatures::ternary)
+       return true;
+      else
+       gcc_unreachable ();
+
     case UNSPEC_LUTI2:
     case UNSPEC_LUTI4:
       {
@@ -3718,6 +3742,25 @@ aarch64_expand_pragma_builtin (tree exp, rtx target,
                                     ops[0].mode, ops[3].mode);
       break;
 
+    case UNSPEC_FMLALB_FP8:
+    case UNSPEC_FMLALT_FP8:
+    case UNSPEC_FMLALLBB_FP8:
+    case UNSPEC_FMLALLBT_FP8:
+    case UNSPEC_FMLALLTB_FP8:
+    case UNSPEC_FMLALLTT_FP8:
+      if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane)
+       {
+         ops[4].value = aarch64_endian_lane_rtx (ops[3].mode,
+                                                 INTVAL (ops[4].value));
+         icode = code_for_aarch64_lane (builtin_data.unspec,
+                                        ops[0].mode, ops[3].mode);
+       }
+      else if (builtin_data.signature == aarch64_builtin_signatures::ternary)
+       icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode);
+      else
+       gcc_unreachable ();
+      break;
+
     case UNSPEC_LUTI2:
     case UNSPEC_LUTI4:
       create_integer_operand (ops.safe_push ({}),
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 7591f1622d2d..4d308de1e2bd 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -274,6 +274,8 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 
   aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile);
 
+  aarch64_def_or_undef (TARGET_FP8FMA, "__ARM_FEATURE_FP8FMA", pfile);
+
   aarch64_def_or_undef (TARGET_LS64,
                        "__ARM_FEATURE_LS64", pfile);
   aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile);
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def 
b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
index 19277860b8ce..5dafa7bb6b91 100644
--- a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -104,6 +104,12 @@
   ENTRY_TERNARY_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, \
                      UNSPEC_FDOT_LANE_FP8, FP8)
 
+#undef ENTRY_FMA_FPM
+#define ENTRY_FMA_FPM(N, T, U)                                         \
+  ENTRY_TERNARY (N##q_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U, FP8)     \
+  ENTRY_TERNARY_LANE (N##q_lane_##T##_mf8_fpm, T##q, T##q, f8q, f8, U, FP8) \
+  ENTRY_TERNARY_LANE (N##q_laneq_##T##_mf8_fpm, T##q, T##q, f8q, f8q, U, FP8)
+
 // faminmax
 #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)
 ENTRY_BINARY_VHSDF (vamax, UNSPEC_FAMAX, FP)
@@ -154,3 +160,13 @@ ENTRY_VDOT_FPM (f16)
 #define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4)
 ENTRY_VDOT_FPM (f32)
 #undef REQUIRED_EXTENSIONS
+
+// fp8 multiply-add
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8FMA)
+ENTRY_FMA_FPM (vmlalb, f16, UNSPEC_FMLALB_FP8)
+ENTRY_FMA_FPM (vmlalt, f16, UNSPEC_FMLALT_FP8)
+ENTRY_FMA_FPM (vmlallbb, f32, UNSPEC_FMLALLBB_FP8)
+ENTRY_FMA_FPM (vmlallbt, f32, UNSPEC_FMLALLBT_FP8)
+ENTRY_FMA_FPM (vmlalltb, f32, UNSPEC_FMLALLTB_FP8)
+ENTRY_FMA_FPM (vmlalltt, f32, UNSPEC_FMLALLTT_FP8)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 69035c797fb0..fa72e6a6a604 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -10124,3 +10124,66 @@
   ""
   "<insn>\t%1.<VDQ_HSF_FDOT:Vtype>, %2.<VDQ_HSF_FDOT:Vnbtype>, 
%3.<VDQ_HSF_FDOT:Vnbsubtype>[%4]"
 )
+
+;; fpm fma instructions.
+(define_insn "@aarch64_<insn><mode>"
+  [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
+       (unspec:V8HF_ONLY
+        [(match_operand:V8HF_ONLY 1 "register_operand" "0")
+         (match_operand:V16QI 2 "register_operand" "w")
+         (match_operand:V16QI 3 "register_operand" "w")
+         (reg:DI FPM_REGNUM)]
+       FMLAL_FP8_HF))]
+  "TARGET_FP8FMA"
+  "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
+)
+
+(define_insn "@aarch64_<insn>_lane<V8HF_ONLY:mode><VB:mode>"
+  [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
+       (unspec:V8HF_ONLY
+        [(match_operand:V8HF_ONLY 1 "register_operand" "0")
+         (match_operand:V16QI 2 "register_operand" "w")
+         (vec_duplicate:V16QI
+           (vec_select:QI
+             (match_operand:VB 3 "register_operand" "w")
+             (parallel [(match_operand:SI 4 "immediate_operand")])))
+         (reg:DI FPM_REGNUM)]
+       FMLAL_FP8_HF))]
+  "TARGET_FP8FMA"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
+                                          INTVAL (operands[4]));
+    return "<insn>\t%0.<V8HF_ONLY:Vtype>, %2.16b, %3.b[%4]";
+  }
+)
+
+(define_insn "@aarch64_<insn><mode>"
+  [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
+       (unspec:V4SF_ONLY
+        [(match_operand:V4SF_ONLY 1 "register_operand" "0")
+         (match_operand:V16QI 2 "register_operand" "w")
+         (match_operand:V16QI 3 "register_operand" "w")
+         (reg:DI FPM_REGNUM)]
+       FMLALL_FP8_SF))]
+  "TARGET_FP8FMA"
+  "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
+)
+
+(define_insn "@aarch64_<insn>_lane<V4SF_ONLY:mode><VB:mode>"
+  [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
+       (unspec:V4SF_ONLY
+        [(match_operand:V4SF_ONLY 1 "register_operand" "0")
+         (match_operand:V16QI 2 "register_operand" "w")
+         (vec_duplicate:V16QI
+           (vec_select:QI
+             (match_operand:VB 3 "register_operand" "w")
+             (parallel [(match_operand:SI 4 "immediate_operand")])))
+         (reg:DI FPM_REGNUM)]
+       FMLALL_FP8_SF))]
+  "TARGET_FP8FMA"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
+                                          INTVAL (operands[4]));
+    return "<insn>\t%0.<V4SF_ONLY:Vtype>, %2.16b, %3.b[%4]";
+  }
+)
diff --git a/gcc/config/aarch64/aarch64-sve2.md 
b/gcc/config/aarch64/aarch64-sve2.md
index d26c0c55c555..7e8a505f707c 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -2013,37 +2013,37 @@
 ;; - FMLALLTT (indexed) (FP8FMA)
 ;; -------------------------------------------------------------------------
 
-(define_insn "@aarch64_sve_add_<sve2_fp8_fma_op_vnx8hf><mode>"
+(define_insn "@aarch64_sve_add_<insn><mode>"
   [(set (match_operand:VNx8HF_ONLY 0 "register_operand")
        (unspec:VNx8HF_ONLY
          [(match_operand:VNx8HF 1 "register_operand")
           (match_operand:VNx16QI 2 "register_operand")
           (match_operand:VNx16QI 3 "register_operand")
           (reg:DI FPM_REGNUM)]
-         SVE2_FP8_TERNARY_VNX8HF))]
+         FMLAL_FP8_HF))]
   "TARGET_SSVE_FP8FMA"
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
-     [ w        , 0 , w , w ; *              ] <sve2_fp8_fma_op_vnx8hf>\t%0.h, 
%2.b, %3.b
-     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, 
%1\;<sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b
+     [ w        , 0 , w , w ; *              ] <insn>\t%0.h, %2.b, %3.b
+     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<insn>\t%0.h, 
%2.b, %3.b
   }
 )
 
-(define_insn "@aarch64_sve_add_<sve2_fp8_fma_op_vnx4sf><mode>"
+(define_insn "@aarch64_sve_add_<insn><mode>"
   [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
        (unspec:VNx4SF_ONLY
          [(match_operand:VNx4SF 1 "register_operand")
           (match_operand:VNx16QI 2 "register_operand")
           (match_operand:VNx16QI 3 "register_operand")
           (reg:DI FPM_REGNUM)]
-         SVE2_FP8_TERNARY_VNX4SF))]
+         FMLALL_FP8_SF))]
   "TARGET_SSVE_FP8FMA"
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
-     [ w        , 0 , w , w ; *              ] <sve2_fp8_fma_op_vnx4sf>\t%0.s, 
%2.b, %3.b
-     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, 
%1\;<sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b
+     [ w        , 0 , w , w ; *              ] <insn>\t%0.s, %2.b, %3.b
+     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<insn>\t%0.s, 
%2.b, %3.b
   }
 )
 
-(define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx8hf><mode>"
+(define_insn "@aarch64_sve_add_lane_<insn><mode>"
   [(set (match_operand:VNx8HF_ONLY 0 "register_operand")
        (unspec:VNx8HF_ONLY
          [(match_operand:VNx8HF 1 "register_operand")
@@ -2051,15 +2051,15 @@
           (match_operand:VNx16QI 3 "register_operand")
           (match_operand:SI 4 "const_int_operand")
           (reg:DI FPM_REGNUM)]
-         SVE2_FP8_TERNARY_LANE_VNX8HF))]
+         FMLAL_FP8_HF))]
   "TARGET_SSVE_FP8FMA"
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
-     [ w        , 0 , w , y ; *              ] <sve2_fp8_fma_op_vnx8hf>\t%0.h, 
%2.b, %3.b[%4]
-     [ ?&w      , w , w , y ; yes            ] movprfx\t%0, 
%1\;<sve2_fp8_fma_op_vnx8hf>\t%0.h, %2.b, %3.b[%4]
+     [ w        , 0 , w , y ; *              ] <insn>\t%0.h, %2.b, %3.b[%4]
+     [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<insn>\t%0.h, 
%2.b, %3.b[%4]
   }
 )
 
-(define_insn "@aarch64_sve_add_lane_<sve2_fp8_fma_op_vnx4sf><mode>"
+(define_insn "@aarch64_sve_add_lane_<insn><mode>"
   [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
        (unspec:VNx4SF_ONLY
          [(match_operand:VNx4SF 1 "register_operand")
@@ -2067,11 +2067,11 @@
           (match_operand:VNx16QI 3 "register_operand")
           (match_operand:SI 4 "const_int_operand")
           (reg:DI FPM_REGNUM)]
-         SVE2_FP8_TERNARY_LANE_VNX4SF))]
+         FMLALL_FP8_SF))]
   "TARGET_SSVE_FP8FMA"
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
-     [ w        , 0 , w , y ; *              ] <sve2_fp8_fma_op_vnx4sf>\t%0.s, 
%2.b, %3.b[%4]
-     [ ?&w      , w , w , y ; yes            ] movprfx\t%0, 
%1\;<sve2_fp8_fma_op_vnx4sf>\t%0.s, %2.b, %3.b[%4]
+     [ w        , 0 , w , y ; *              ] <insn>\t%0.s, %2.b, %3.b[%4]
+     [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<insn>\t%0.s, 
%2.b, %3.b[%4]
   }
 )
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b28ba63cc9c0..47caddfe0c0f 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -41,6 +41,7 @@
 ;; Iterators for single modes, for "@" patterns.
 (define_mode_iterator SI_ONLY [SI])
 (define_mode_iterator DI_ONLY [DI])
+(define_mode_iterator V8HF_ONLY [V8HF])
 (define_mode_iterator V4SF_ONLY [V4SF])
 
 ;; Iterator for all integer modes (up to 64-bit)
@@ -3817,21 +3818,11 @@
    UNSPEC_F1CVTLT
    UNSPEC_F2CVTLT])
 
-(define_int_iterator SVE2_FP8_TERNARY_VNX8HF
+(define_int_iterator FMLAL_FP8_HF
   [UNSPEC_FMLALB_FP8
    UNSPEC_FMLALT_FP8])
 
-(define_int_iterator SVE2_FP8_TERNARY_VNX4SF
-  [UNSPEC_FMLALLBB_FP8
-   UNSPEC_FMLALLBT_FP8
-   UNSPEC_FMLALLTB_FP8
-   UNSPEC_FMLALLTT_FP8])
-
-(define_int_iterator SVE2_FP8_TERNARY_LANE_VNX8HF
-  [UNSPEC_FMLALB_FP8
-   UNSPEC_FMLALT_FP8])
-
-(define_int_iterator SVE2_FP8_TERNARY_LANE_VNX4SF
+(define_int_iterator FMLALL_FP8_SF
   [UNSPEC_FMLALLBB_FP8
    UNSPEC_FMLALLBT_FP8
    UNSPEC_FMLALLTB_FP8
@@ -3859,6 +3850,12 @@
    (UNSPEC_FCVTN_FP8 "fcvtn")
    (UNSPEC_FDOT_FP8 "fdot")
    (UNSPEC_FDOT_LANE_FP8 "fdot")
+   (UNSPEC_FMLALB_FP8 "fmlalb")
+   (UNSPEC_FMLALT_FP8 "fmlalt")
+   (UNSPEC_FMLALLBB_FP8 "fmlallbb")
+   (UNSPEC_FMLALLBT_FP8 "fmlallbt")
+   (UNSPEC_FMLALLTB_FP8 "fmlalltb")
+   (UNSPEC_FMLALLTT_FP8 "fmlalltt")
    (UNSPEC_FSCALE "fscale")])
 
 ;; The optab associated with an operation.  Note that for ANDF, IORF
@@ -4858,13 +4855,3 @@
    (UNSPEC_F2CVT "f2cvt")
    (UNSPEC_F1CVTLT "f1cvtlt")
    (UNSPEC_F2CVTLT "f2cvtlt")])
-
-(define_int_attr sve2_fp8_fma_op_vnx8hf
-  [(UNSPEC_FMLALB_FP8 "fmlalb")
-   (UNSPEC_FMLALT_FP8 "fmlalt")])
-
-(define_int_attr sve2_fp8_fma_op_vnx4sf
-  [(UNSPEC_FMLALLBB_FP8 "fmlallbb")
-   (UNSPEC_FMLALLBT_FP8 "fmlallbt")
-   (UNSPEC_FMLALLTB_FP8 "fmlalltb")
-   (UNSPEC_FMLALLTT_FP8 "fmlalltt")])
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c 
b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
index fb3dc139f1f7..0dcfbec05bad 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
@@ -274,10 +274,27 @@
 #error Foo
 #endif
 
+#pragma GCC target "arch=armv9-a+fp8fma"
+#ifndef __ARM_FEATURE_FP8
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_FP8DOT4
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_FP8DOT2
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_FP8FMA
+#error Foo
+#endif
+
 #pragma GCC target "arch=armv9-a+fp8dot4"
 #ifndef __ARM_FEATURE_FP8
 #error Foo
 #endif
+#ifndef __ARM_FEATURE_FP8FMA
+#error Foo
+#endif
 #ifndef __ARM_FEATURE_FP8DOT4
 #error Foo
 #endif
@@ -289,6 +306,9 @@
 #ifndef __ARM_FEATURE_FP8
 #error Foo
 #endif
+#ifndef __ARM_FEATURE_FP8FMA
+#error Foo
+#endif
 #ifndef __ARM_FEATURE_FP8DOT4
 #error Foo
 #endif
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmla_fpm.c 
b/gcc/testsuite/gcc.target/aarch64/simd/vmla_fpm.c
new file mode 100644
index 000000000000..51b47055ca2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmla_fpm.c
@@ -0,0 +1,365 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -march=armv9-a+fp8fma" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "arm_neon.h"
+
+/*
+** test_vmlalbq_f16_fpm:
+**     msr     fpmr, x0
+**     fmlalb  v0.8h, v1.16b, v2.16b
+**     ret
+*/
+float16x8_t
+test_vmlalbq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vmlalbq_f16_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlaltq_f16_fpm:
+**     msr     fpmr, x0
+**     fmlalt  v0.8h, v1.16b, v2.16b
+**     ret
+*/
+float16x8_t
+test_vmlaltq_f16_fpm (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vmlaltq_f16_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlallbbq_f32_fpm:
+**     msr     fpmr, x0
+**     fmlallbb        v0.4s, v1.16b, v2.16b
+**     ret
+*/
+float32x4_t
+test_vmlallbbq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vmlallbbq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlallbtq_f32_fpm:
+**     msr     fpmr, x0
+**     fmlallbt        v0.4s, v1.16b, v2.16b
+**     ret
+*/
+float32x4_t
+test_vmlallbtq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vmlallbtq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlalltbq_f32_fpm:
+**     msr     fpmr, x0
+**     fmlalltb        v0.4s, v1.16b, v2.16b
+**     ret
+*/
+float32x4_t
+test_vmlalltbq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vmlalltbq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlallttq_f32_fpm:
+**     msr     fpmr, x0
+**     fmlalltt        v0.4s, v1.16b, v2.16b
+**     ret
+*/
+float32x4_t
+test_vmlallttq_f32_fpm (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, fpm_t d)
+{
+  return vmlallttq_f32_mf8_fpm (a, b, c, d);
+}
+
+/*
+** test_vmlalbq_lane_f16_fpm_0:
+**     msr     fpmr, x0
+**     fmlalb  v0.8h, v1.16b, v2.b\[0\]
+**     ret
+*/
+float16x8_t
+test_vmlalbq_lane_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlalbq_lane_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlalbq_lane_f16_fpm_7:
+**     msr     fpmr, x0
+**     fmlalb  v0.8h, v1.16b, v2.b\[7\]
+**     ret
+*/
+float16x8_t
+test_vmlalbq_lane_f16_fpm_7 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlalbq_lane_f16_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlalbq_laneq_f16_fpm_0:
+**     msr     fpmr, x0
+**     fmlalb  v0.8h, v1.16b, v2.b\[0\]
+**     ret
+*/
+float16x8_t
+test_vmlalbq_laneq_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, 
fpm_t d)
+{
+  return vmlalbq_laneq_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlalbq_laneq_f16_fpm_15:
+**     msr     fpmr, x0
+**     fmlalb  v0.8h, v1.16b, v2.b\[15\]
+**     ret
+*/
+float16x8_t
+test_vmlalbq_laneq_f16_fpm_15 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, 
fpm_t d)
+{
+  return vmlalbq_laneq_f16_mf8_fpm (a, b, c, 15, d);
+}
+
+/*
+** test_vmlaltq_lane_f16_fpm_0:
+**     msr     fpmr, x0
+**     fmlalt  v0.8h, v1.16b, v2.b\[0\]
+**     ret
+*/
+float16x8_t
+test_vmlaltq_lane_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlaltq_lane_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlaltq_lane_f16_fpm_7:
+**     msr     fpmr, x0
+**     fmlalt  v0.8h, v1.16b, v2.b\[7\]
+**     ret
+*/
+float16x8_t
+test_vmlaltq_lane_f16_fpm_7 (float16x8_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlaltq_lane_f16_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlaltq_laneq_f16_fpm_0:
+**     msr     fpmr, x0
+**     fmlalt  v0.8h, v1.16b, v2.b\[0\]
+**     ret
+*/
+float16x8_t
+test_vmlaltq_laneq_f16_fpm_0 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, 
fpm_t d)
+{
+  return vmlaltq_laneq_f16_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlaltq_laneq_f16_fpm_15:
+**     msr     fpmr, x0
+**     fmlalt  v0.8h, v1.16b, v2.b\[15\]
+**     ret
+*/
+float16x8_t
+test_vmlaltq_laneq_f16_fpm_15 (float16x8_t a, mfloat8x16_t b, mfloat8x16_t c, 
fpm_t d)
+{
+  return vmlaltq_laneq_f16_mf8_fpm (a, b, c, 15, d);
+}
+
+/*
+** test_vmlallbbq_lane_f32_fpm_0:
+**     msr     fpmr, x0
+**     fmlallbb        v0.4s, v1.16b, v2.b\[0\]
+**     ret
+*/
+float32x4_t
+test_vmlallbbq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlallbbq_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallbbq_lane_f32_fpm_7:
+**     msr     fpmr, x0
+**     fmlallbb        v0.4s, v1.16b, v2.b\[7\]
+**     ret
+*/
+float32x4_t
+test_vmlallbbq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlallbbq_lane_f32_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlallbbq_laneq_f32_fpm_0:
+**     msr     fpmr, x0
+**     fmlallbb        v0.4s, v1.16b, v2.b\[0\]
+**     ret
+*/
+float32x4_t
+test_vmlallbbq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, 
fpm_t d)
+{
+  return vmlallbbq_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallbbq_laneq_f32_fpm_15:
+**     msr     fpmr, x0
+**     fmlallbb        v0.4s, v1.16b, v2.b\[15\]
+**     ret
+*/
+float32x4_t
+test_vmlallbbq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t 
c, fpm_t d)
+{
+  return vmlallbbq_laneq_f32_mf8_fpm (a, b, c, 15, d);
+}
+
+/*
+** test_vmlallbtq_lane_f32_fpm_0:
+**     msr     fpmr, x0
+**     fmlallbt        v0.4s, v1.16b, v2.b\[0\]
+**     ret
+*/
+float32x4_t
+test_vmlallbtq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlallbtq_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallbtq_lane_f32_fpm_7:
+**     msr     fpmr, x0
+**     fmlallbt        v0.4s, v1.16b, v2.b\[7\]
+**     ret
+*/
+float32x4_t
+test_vmlallbtq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlallbtq_lane_f32_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlallbtq_laneq_f32_fpm_0:
+**     msr     fpmr, x0
+**     fmlallbt        v0.4s, v1.16b, v2.b\[0\]
+**     ret
+*/
+float32x4_t
+test_vmlallbtq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, 
fpm_t d)
+{
+  return vmlallbtq_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallbtq_laneq_f32_fpm_15:
+**     msr     fpmr, x0
+**     fmlallbt        v0.4s, v1.16b, v2.b\[15\]
+**     ret
+*/
+float32x4_t
+test_vmlallbtq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t 
c, fpm_t d)
+{
+  return vmlallbtq_laneq_f32_mf8_fpm (a, b, c, 15, d);
+}
+
+/*
+** test_vmlalltbq_lane_f32_fpm_0:
+**     msr     fpmr, x0
+**     fmlalltb        v0.4s, v1.16b, v2.b\[0\]
+**     ret
+*/
+float32x4_t
+test_vmlalltbq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlalltbq_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlalltbq_lane_f32_fpm_7:
+**     msr     fpmr, x0
+**     fmlalltb        v0.4s, v1.16b, v2.b\[7\]
+**     ret
+*/
+float32x4_t
+test_vmlalltbq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlalltbq_lane_f32_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlalltbq_laneq_f32_fpm_0:
+**     msr     fpmr, x0
+**     fmlalltb        v0.4s, v1.16b, v2.b\[0\]
+**     ret
+*/
+float32x4_t
+test_vmlalltbq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, 
fpm_t d)
+{
+  return vmlalltbq_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlalltbq_laneq_f32_fpm_15:
+**     msr     fpmr, x0
+**     fmlalltb        v0.4s, v1.16b, v2.b\[15\]
+**     ret
+*/
+float32x4_t
+test_vmlalltbq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t 
c, fpm_t d)
+{
+  return vmlalltbq_laneq_f32_mf8_fpm (a, b, c, 15, d);
+}
+
+/*
+** test_vmlallttq_lane_f32_fpm_0:
+**     msr     fpmr, x0
+**     fmlalltt        v0.4s, v1.16b, v2.b\[0\]
+**     ret
+*/
+float32x4_t
+test_vmlallttq_lane_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlallttq_lane_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallttq_lane_f32_fpm_7:
+**     msr     fpmr, x0
+**     fmlalltt        v0.4s, v1.16b, v2.b\[7\]
+**     ret
+*/
+float32x4_t
+test_vmlallttq_lane_f32_fpm_7 (float32x4_t a, mfloat8x16_t b, mfloat8x8_t c, 
fpm_t d)
+{
+  return vmlallttq_lane_f32_mf8_fpm (a, b, c, 7, d);
+}
+
+/*
+** test_vmlallttq_laneq_f32_fpm_0:
+**     msr     fpmr, x0
+**     fmlalltt        v0.4s, v1.16b, v2.b\[0\]
+**     ret
+*/
+float32x4_t
+test_vmlallttq_laneq_f32_fpm_0 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t c, 
fpm_t d)
+{
+  return vmlallttq_laneq_f32_mf8_fpm (a, b, c, 0, d);
+}
+
+/*
+** test_vmlallttq_laneq_f32_fpm_15:
+**     msr     fpmr, x0
+**     fmlalltt        v0.4s, v1.16b, v2.b\[15\]
+**     ret
+*/
+float32x4_t
+test_vmlallttq_laneq_f32_fpm_15 (float32x4_t a, mfloat8x16_t b, mfloat8x16_t 
c, fpm_t d)
+{
+  return vmlallttq_laneq_f32_mf8_fpm (a, b, c, 15, d);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmla_lane_indices_1.c 
b/gcc/testsuite/gcc.target/aarch64/simd/vmla_lane_indices_1.c
new file mode 100644
index 000000000000..d1a69f4ba541
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmla_lane_indices_1.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+
+#include "arm_neon.h"
+
+#pragma GCC target "+fp8dot4+fp8dot2"
+
+void
+test(float16x4_t f16, float16x8_t f16q, float32x2_t f32,
+     float32x4_t f32q, mfloat8x8_t mf8, mfloat8x16_t mf8q, int x,
+     fpm_t fpm)
+{
+  vmlalbq_lane_f16_mf8_fpm (f16q, mf8q, mf8, x, fpm); /* { dg-error {argument 
4 of 'vmlalbq_lane_f16_mf8_fpm' must be an integer constant expression} } */
+  vmlalbq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, x, fpm); /* { dg-error 
{argument 4 of 'vmlalbq_laneq_f16_mf8_fpm' must be an integer constant 
expression} } */
+  vmlaltq_lane_f16_mf8_fpm (f16q, mf8q, mf8, x, fpm); /* { dg-error {argument 
4 of 'vmlaltq_lane_f16_mf8_fpm' must be an integer constant expression} } */
+  vmlaltq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, x, fpm); /* { dg-error 
{argument 4 of 'vmlaltq_laneq_f16_mf8_fpm' must be an integer constant 
expression} } */
+
+  vmlallbbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error 
{argument 4 of 'vmlallbbq_lane_f32_mf8_fpm' must be an integer constant 
expression} } */
+  vmlallbbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error 
{argument 4 of 'vmlallbbq_laneq_f32_mf8_fpm' must be an integer constant 
expression} } */
+  vmlallbtq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error 
{argument 4 of 'vmlallbtq_lane_f32_mf8_fpm' must be an integer constant 
expression} } */
+  vmlallbtq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error 
{argument 4 of 'vmlallbtq_laneq_f32_mf8_fpm' must be an integer constant 
expression} } */
+  vmlalltbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error 
{argument 4 of 'vmlalltbq_lane_f32_mf8_fpm' must be an integer constant 
expression} } */
+  vmlalltbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error 
{argument 4 of 'vmlalltbq_laneq_f32_mf8_fpm' must be an integer constant 
expression} } */
+  vmlallttq_lane_f32_mf8_fpm (f32q, mf8q, mf8, x, fpm); /* { dg-error 
{argument 4 of 'vmlallttq_lane_f32_mf8_fpm' must be an integer constant 
expression} } */
+  vmlallttq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, x, fpm); /* { dg-error 
{argument 4 of 'vmlallttq_laneq_f32_mf8_fpm' must be an integer constant 
expression} } */
+
+  vmlalbq_lane_f16_mf8_fpm (f16q, mf8q, mf8, -1, fpm); /* { dg-error { passing 
-1 to argument 4 of 'vmlalbq_lane_f16_mf8_fpm', which expects a value in the 
range \[0, 7\]} } */
+  vmlalbq_lane_f16_mf8_fpm (f16q, mf8q, mf8, 8, fpm); /* { dg-error { passing 
8 to argument 4 of 'vmlalbq_lane_f16_mf8_fpm', which expects a value in the 
range \[0, 7\]} } */
+  vmlalbq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, -1, fpm); /* { dg-error { 
passing -1 to argument 4 of 'vmlalbq_laneq_f16_mf8_fpm', which expects a value 
in the range \[0, 15\]} } */
+  vmlalbq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, 16, fpm); /* { dg-error { 
passing 16 to argument 4 of 'vmlalbq_laneq_f16_mf8_fpm', which expects a value 
in the range \[0, 15\]} } */
+
+  vmlaltq_lane_f16_mf8_fpm (f16q, mf8q, mf8, -1, fpm); /* { dg-error { passing 
-1 to argument 4 of 'vmlaltq_lane_f16_mf8_fpm', which expects a value in the 
range \[0, 7\]} } */
+  vmlaltq_lane_f16_mf8_fpm (f16q, mf8q, mf8, 8, fpm); /* { dg-error { passing 
8 to argument 4 of 'vmlaltq_lane_f16_mf8_fpm', which expects a value in the 
range \[0, 7\]} } */
+  vmlaltq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, -1, fpm); /* { dg-error { 
passing -1 to argument 4 of 'vmlaltq_laneq_f16_mf8_fpm', which expects a value 
in the range \[0, 15\]} } */
+  vmlaltq_laneq_f16_mf8_fpm (f16q, mf8q, mf8q, 16, fpm); /* { dg-error { 
passing 16 to argument 4 of 'vmlaltq_laneq_f16_mf8_fpm', which expects a value 
in the range \[0, 15\]} } */
+
+  vmlallbbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { 
passing -1 to argument 4 of 'vmlallbbq_lane_f32_mf8_fpm', which expects a value 
in the range \[0, 7\]} } */
+  vmlallbbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { 
passing 8 to argument 4 of 'vmlallbbq_lane_f32_mf8_fpm', which expects a value 
in the range \[0, 7\]} } */
+  vmlallbbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { 
passing -1 to argument 4 of 'vmlallbbq_laneq_f32_mf8_fpm', which expects a 
value in the range \[0, 15\]} } */
+  vmlallbbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { 
passing 16 to argument 4 of 'vmlallbbq_laneq_f32_mf8_fpm', which expects a 
value in the range \[0, 15\]} } */
+
+  vmlallbtq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { 
passing -1 to argument 4 of 'vmlallbtq_lane_f32_mf8_fpm', which expects a value 
in the range \[0, 7\]} } */
+  vmlallbtq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { 
passing 8 to argument 4 of 'vmlallbtq_lane_f32_mf8_fpm', which expects a value 
in the range \[0, 7\]} } */
+  vmlallbtq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { 
passing -1 to argument 4 of 'vmlallbtq_laneq_f32_mf8_fpm', which expects a 
value in the range \[0, 15\]} } */
+  vmlallbtq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { 
passing 16 to argument 4 of 'vmlallbtq_laneq_f32_mf8_fpm', which expects a 
value in the range \[0, 15\]} } */
+
+  vmlalltbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { 
passing -1 to argument 4 of 'vmlalltbq_lane_f32_mf8_fpm', which expects a value 
in the range \[0, 7\]} } */
+  vmlalltbq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { 
passing 8 to argument 4 of 'vmlalltbq_lane_f32_mf8_fpm', which expects a value 
in the range \[0, 7\]} } */
+  vmlalltbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { 
passing -1 to argument 4 of 'vmlalltbq_laneq_f32_mf8_fpm', which expects a 
value in the range \[0, 15\]} } */
+  vmlalltbq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { 
passing 16 to argument 4 of 'vmlalltbq_laneq_f32_mf8_fpm', which expects a 
value in the range \[0, 15\]} } */
+
+  vmlallttq_lane_f32_mf8_fpm (f32q, mf8q, mf8, -1, fpm); /* { dg-error { 
passing -1 to argument 4 of 'vmlallttq_lane_f32_mf8_fpm', which expects a value 
in the range \[0, 7\]} } */
+  vmlallttq_lane_f32_mf8_fpm (f32q, mf8q, mf8, 8, fpm); /* { dg-error { 
passing 8 to argument 4 of 'vmlallttq_lane_f32_mf8_fpm', which expects a value 
in the range \[0, 7\]} } */
+  vmlallttq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, -1, fpm); /* { dg-error { 
passing -1 to argument 4 of 'vmlallttq_laneq_f32_mf8_fpm', which expects a 
value in the range \[0, 15\]} } */
+  vmlallttq_laneq_f32_mf8_fpm (f32q, mf8q, mf8q, 16, fpm); /* { dg-error { 
passing 16 to argument 4 of 'vmlallttq_laneq_f32_mf8_fpm', which expects a 
value in the range \[0, 15\]} } */
+}

[gcc r15-6076] aarch64: Add support for fp8fma instructions

Reply via email to