https://github.com/virginia-cangelosi updated https://github.com/llvm/llvm-project/pull/130127
>From 7950a93de5f8e77aac3468dff0d4ca8fbcb21c79 Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi <virginia.cangel...@arm.com> Date: Thu, 6 Mar 2025 13:38:19 +0000 Subject: [PATCH 1/4] [Clang][llvm] Implement fp8 FMOP4A intrinsics --- clang/include/clang/Basic/arm_sme.td | 18 ++ .../sme2-intrinsics/acle_sme2_mop4_fp8.c | 160 ++++++++++++++++++ .../acle_sme2p2_fp8_imm.cpp | 31 ++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 33 ++++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 8 +- llvm/lib/Target/AArch64/SMEInstrFormats.td | 55 ++++-- .../AArch64/sme2-intrinsics-mop4-fp8.ll | 98 +++++++++++ 7 files changed, 388 insertions(+), 15 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c create mode 100644 clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_fp8_imm.cpp create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1bfcf4c31d552..62b95e19bf78a 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -321,6 +321,24 @@ let SMETargetGuard = "sme2,sme-mop4,sme-b16b16" in { defm SVBMOP4S_H : MOP4<"s", "_za16", "b", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>; } +//////////////////////////////////////////////////////////////////////////////// +// SME2 - FP8 FMOP4A, FMOP4S + +multiclass MOP4_FP8<string za, string t, list<ImmCheck> checks> { + def _1x1 : Inst<"svmop4a" # "[_1x1]" # za # "[_{d}_{d}]", "vidd>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; + def _1x2 : Inst<"svmop4a" # "[_1x2]" # za # "[_{d}_{d}]", "vid2>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; + def _2x1 : Inst<"svmop4a" # "[_2x1]" # za # "[_{d}_{d}]", "vi2d>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; + def _2x2 : Inst<"svmop4a" # "[_2x2]" # za # "[_{d}_{d}]", "vi22>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; +} + +let SMETargetGuard = "sme2,sme-mop4,sme-f8f32" in { + defm SVMOP4A_FP8_ZA32 : MOP4_FP8<"_za32", "m", [ImmCheck<0, ImmCheck0_3>]>; +} + +let SMETargetGuard = "sme2,sme-mop4,sme-f8f16" in { + defm SVMOP4A_FP8_ZA16 : MOP4_FP8<"_za16", "m", [ImmCheck<0, ImmCheck0_1>]>; +} + //////////////////////////////////////////////////////////////////////////////// // SME2 - SMOP4A, SMOP4S, UMOP4A, UMOP4S diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c new file mode 100644 index 0000000000000..24fa11538dd32 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + + +#include <arm_sme.h> + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svmop4a_1x1_za16_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x1_za16_mf8_mf8_fpmu13__SVMfloat8_tS_m( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_1x2_za16_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x2_za16_mf8_mf8_fpmu13__SVMfloat8_t13svmfloat8x2_tm( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x2_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_2x1_za16_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x1_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_2x1_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_2x1_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_2x2_za16_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x2_za16_mf8_mf8_fpm13svmfloat8x2_tS_m( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_2x2_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_2x2_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_1x1_za32_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x1_za32_mf8_mf8_fpmu13__SVMfloat8_tS_m( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_1x2_za32_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x2_za32_mf8_mf8_fpmu13__SVMfloat8_t13svmfloat8x2_tm( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x2_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_2x1_za32_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x1_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_2x1_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_2x1_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_2x2_za32_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x2_za32_mf8_mf8_fpm13svmfloat8x2_tS_m( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_2x2_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_2x2_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} diff --git a/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_fp8_imm.cpp b/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_fp8_imm.cpp new file mode 100644 index 0000000000000..e031cfe9b3cb4 --- /dev/null +++ b/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_fp8_imm.cpp @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -target-feature +sme2p2 -target-feature +sme-mop4 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -fsyntax-only -verify %s + +// REQUIRES: aarch64-registered-target + +#include <arm_sme.h> + +void tests_mop4_imm_1x1(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4a_1x1_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_1x2(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4a_1x2_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_2x1(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + svmop4a_2x1_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4a_2x1_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_2x2(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + svmop4a_2x2_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4a_2x2_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 2c6129cedebbf..97da76c713aa5 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3107,6 +3107,39 @@ let TargetPrefix = "aarch64" in { } } + class SME_FP8_OuterProduct_Intrinsic_Single_Single + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty], + [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + class SME_FP8_OuterProduct_Intrinsic_Single_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty], + [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + class SME_FP8_OuterProduct_Intrinsic_Multi_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty], + [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + def int_aarch64_sme_fp8_fmop4a_za16_1x1 : SME_FP8_OuterProduct_Intrinsic_Single_Single; + def int_aarch64_sme_fp8_fmop4a_za32_1x1 : SME_FP8_OuterProduct_Intrinsic_Single_Single; + def int_aarch64_sme_fp8_fmop4a_za16_1x2 : SME_FP8_OuterProduct_Intrinsic_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_za32_1x2 : SME_FP8_OuterProduct_Intrinsic_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_za16_2x1 : SME_FP8_OuterProduct_Intrinsic_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_za32_2x1 : SME_FP8_OuterProduct_Intrinsic_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_za16_2x2 : SME_FP8_OuterProduct_Intrinsic_Multi_Multi; + def int_aarch64_sme_fp8_fmop4a_za32_2x2 : SME_FP8_OuterProduct_Intrinsic_Multi_Multi; + class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index f992f73171e0e..af93cdb14a620 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1090,8 +1090,8 @@ let Predicates = [HasSME_TMOP, HasSMEF8F16], Uses = [FPMR, FPCR] in { def FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, "ftmopa">; } -let Predicates = [HasSME_MOP4, HasSMEF8F16], Uses = [FPMR, FPCR] in { - defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a">; +let Predicates = [HasSME_MOP4, HasSMEF8F16] in { + defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a", "int_aarch64_sme_fp8_fmop4a_za16">; } let Predicates = [HasSME_TMOP, HasSMEF16F16] in { @@ -1108,10 +1108,8 @@ let Predicates = [HasSME2, HasSVEBFSCALE] in { defm BFMUL : sme2_bfmul_multi<"bfmul">; } -let Uses = [FPMR, FPCR] in { let Predicates = [HasSME_MOP4, HasSMEF8F32] in { - defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a">; -} + defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a", "int_aarch64_sme_fp8_fmop4a_za32">; } let Predicates = [HasSME_MOP4, HasSMEB16B16] in { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index b611dddb0b045..8181be87427ac 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5809,20 +5809,37 @@ class sme2_fp8_fp32_quarter_tile_outer_product<bit M, bit N, string mnemonic, Re let Inst{1-0} = ZAda; let Constraints = "$ZAda = $_ZAda"; + let Uses = [FPMR, FPCR]; } -multiclass sme2_fmop4a_fp8_fp32_4way<string mnemonic> { +multiclass sme2_fmop4a_fp8_fp32_4way<string mnemonic, string op> { // Single vectors - def _MZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>; + def _MZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>, SMEPseudo2Instr<NAME # _MZZ_BtoS, 1>; + + def NAME # _MZZ_BtoS_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZPR8Mul2_Lo, ZPR8Mul2_Hi, SMEMatrixTileS>, SMEPseudo2Instr<NAME # _MZZ_BtoS, 0>; + + def : SME2_ZA_Tile_TwoVec_Pat<NAME # _MZZ_BtoS, !cast<SDPatternOperator>(op # "_1x1"), timm32_0_3, nxv16i8>; // Multiple and single vectors - def _M2ZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>; + def _M2ZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>, SMEPseudo2Instr<NAME # _M2ZZ_BtoS, 1>; + + def NAME # _M2ZZ_BtoS_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZZ_b_mul_r_Lo, ZPR8Mul2_Hi, SMEMatrixTileS>, SMEPseudo2Instr<NAME # _M2ZZ_BtoS, 0>; + + def : SME2_ZA_Tile_Vec_Multi_Single_Pat<NAME # _M2ZZ_BtoS, !cast<SDPatternOperator>(op # "_2x1"), timm32_0_3, nxv16i8>; // Single and multiple vectors - def _MZ2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>; + def _MZ2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>, SMEPseudo2Instr<NAME # _MZ2Z_BtoS, 1>; + + def NAME # _MZ2Z_BtoS_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZPR8Mul2_Lo, ZZ_b_mul_r_Hi, SMEMatrixTileS>, SMEPseudo2Instr<NAME # _MZ2Z_BtoS, 0>; + + def : SME2_ZA_Tile_Vec_Single_Multi_Pat<NAME # _MZ2Z_BtoS, !cast<SDPatternOperator>(op # "_1x2"), timm32_0_3, nxv16i8>; // Multiple vectors - def _M2Z2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>; + def _M2Z2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>, SMEPseudo2Instr<NAME # _M2Z2Z_BtoS, 1>; + + def NAME # _M2Z2Z_BtoS_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi, SMEMatrixTileS>, SMEPseudo2Instr<NAME # _M2Z2Z_BtoS, 0>; + + def : SME2_ZA_Tile_Vec_Multi_Multi_Pat<NAME # _M2Z2Z_BtoS, !cast<SDPatternOperator>(op # "_2x2"), timm32_0_3, nxv16i8>; } class sme2_bf16_fp16_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty> @@ -6056,20 +6073,38 @@ class sme2_fp8_fp16_quarter_tile_outer_product<bit M, bit N, string mnemonic, Re let Inst{0} = ZAda; let Constraints = "$ZAda = $_ZAda"; + let Uses = [FPMR, FPCR]; } -multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic> { +multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic, string op> { + // Single vectors - def _MZZ_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b0, 0b0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>; + def _MZZ_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b0, 0b0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>, SMEPseudo2Instr<NAME # _MZZ_BtoH, 1>; + + def NAME # _MZZ_BtoH_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZPR8Mul2_Lo, ZPR8Mul2_Hi, SMEMatrixTileH>, SMEPseudo2Instr<NAME # _MZZ_BtoH, 0>; + + def : SME2_ZA_Tile_TwoVec_Pat<NAME # _MZZ_BtoH, !cast<SDPatternOperator>(op # "_1x1"), timm32_0_1, nxv16i8>; // Multiple and single vectors - def _M2ZZ_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b0, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>; + def _M2ZZ_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b0, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>, SMEPseudo2Instr<NAME # _M2ZZ_BtoH, 1>; + + def NAME # _M2ZZ_BtoH_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZZ_b_mul_r_Lo, ZPR8Mul2_Hi, SMEMatrixTileH>, SMEPseudo2Instr<NAME # _M2ZZ_BtoH, 0>; + + def : SME2_ZA_Tile_Vec_Multi_Single_Pat<NAME # _M2ZZ_BtoH, !cast<SDPatternOperator>(op # "_2x1"), timm32_0_1, nxv16i8>; // Single and multiple vectors - def _MZ2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>; + def _MZ2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>, SMEPseudo2Instr<NAME # _MZ2Z_BtoH, 1>; + + def NAME # _MZ2Z_BtoH_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZPR8Mul2_Lo, ZZ_b_mul_r_Hi, SMEMatrixTileH>, SMEPseudo2Instr<NAME # _MZ2Z_BtoH, 0>; + + def : SME2_ZA_Tile_Vec_Single_Multi_Pat<NAME # _MZ2Z_BtoH, !cast<SDPatternOperator>(op # "_1x2"), timm32_0_1, nxv16i8>; // Multiple vectors - def _M2Z2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>; + def _M2Z2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>, SMEPseudo2Instr<NAME # _M2Z2Z_BtoH, 1>; + + def NAME # _M2Z2Z_BtoH_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi, SMEMatrixTileH>, SMEPseudo2Instr<NAME # _M2Z2Z_BtoH, 0>; + + def : SME2_ZA_Tile_Vec_Multi_Multi_Pat<NAME # _M2Z2Z_BtoH, !cast<SDPatternOperator>(op # "_2x2"), timm32_0_1, nxv16i8>; } // FP8 SME FDOT instructions diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll new file mode 100644 index 0000000000000..5a0cf8e57904b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -force-streaming -verify-machineinstrs < %s | FileCheck %s + +target triple = "aarch64-linux" + +define void @mop4a_za16_fp8_1x1(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) #0 { +; CHECK-LABEL: mop4a_za16_fp8_1x1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za0.h, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1(i32 0, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) + ret void +} + +define void @mop4a_za16_fp8_1x2(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { +; CHECK-LABEL: mop4a_za16_fp8_1x2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za0.h, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2(i32 0, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) + ret void +} + +define void @mop4a_za16_fp8_2x1(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm) #0 { +; CHECK-LABEL: mop4a_za16_fp8_2x1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z2.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmop4a za0.h, { z0.b, z1.b }, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm) + ret void +} + +define void @mop4a_za16_fp8_2x2(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { +; CHECK-LABEL: mop4a_za16_fp8_2x2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov z24.d, z2.d +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmop4a za0.h, { z0.b, z1.b }, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) + ret void +} + +define void @mop4a_za32_fp8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) #0 { +; CHECK-LABEL: mop4a_za32_fp8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za0.s, z0.b, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1(i32 0, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) + ret void +} + +define void @mop4a_za32_fp8_1x2(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { +; CHECK-LABEL: mop4a_za32_fp8_1x2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmop4a za0.s, z0.b, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2(i32 0, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) + ret void +} + +define void @mop4a_za32_fp8_2x1(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm) #0 { +; CHECK-LABEL: mop4a_za32_fp8_2x1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z24.d, z2.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmop4a za0.s, { z0.b, z1.b }, z24.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm) + ret void +} + +define void @mop4a_za32_fp8_2x2(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) #0 { +; CHECK-LABEL: mop4a_za32_fp8_2x2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z25.d, z3.d +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov z24.d, z2.d +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmop4a za0.s, { z0.b, z1.b }, { z24.b, z25.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) + ret void +} + +attributes #0 = {nounwind "target-features" = "+sme-f8f16,+sme-f8f32,+sme2p1,+sme-mop4" } >From 752d9fc74be7514d290b18ea925a0bdf4387b22c Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi <virginia.cangel...@arm.com> Date: Mon, 10 Mar 2025 09:44:16 +0000 Subject: [PATCH 2/4] Address review comments --- clang/include/clang/Basic/arm_sme.td | 14 +++++++------- llvm/include/llvm/IR/IntrinsicsAArch64.td | 15 +++++++-------- llvm/lib/Target/AArch64/SMEInstrFormats.td | 8 ++++---- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 62b95e19bf78a..bb501122d9526 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -324,19 +324,19 @@ let SMETargetGuard = "sme2,sme-mop4,sme-b16b16" in { //////////////////////////////////////////////////////////////////////////////// // SME2 - FP8 FMOP4A, FMOP4S -multiclass MOP4_FP8<string za, string t, list<ImmCheck> checks> { - def _1x1 : Inst<"svmop4a" # "[_1x1]" # za # "[_{d}_{d}]", "vidd>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; - def _1x2 : Inst<"svmop4a" # "[_1x2]" # za # "[_{d}_{d}]", "vid2>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; - def _2x1 : Inst<"svmop4a" # "[_2x1]" # za # "[_{d}_{d}]", "vi2d>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; - def _2x2 : Inst<"svmop4a" # "[_2x2]" # za # "[_{d}_{d}]", "vi22>", t, MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; +multiclass MOP4_FP8<string za, list<ImmCheck> checks> { + def _1x1 : Inst<"svmop4a" # "[_1x1]" # za # "[_{d}_{d}]", "vidd>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; + def _1x2 : Inst<"svmop4a" # "[_1x2]" # za # "[_{d}_{d}]", "vid2>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; + def _2x1 : Inst<"svmop4a" # "[_2x1]" # za # "[_{d}_{d}]", "vi2d>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; + def _2x2 : Inst<"svmop4a" # "[_2x2]" # za # "[_{d}_{d}]", "vi22>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; } let SMETargetGuard = "sme2,sme-mop4,sme-f8f32" in { - defm SVMOP4A_FP8_ZA32 : MOP4_FP8<"_za32", "m", [ImmCheck<0, ImmCheck0_3>]>; + defm SVMOP4A_FP8_ZA32 : MOP4_FP8<"_za32", [ImmCheck<0, ImmCheck0_3>]>; } let SMETargetGuard = "sme2,sme-mop4,sme-f8f16" in { - defm SVMOP4A_FP8_ZA16 : MOP4_FP8<"_za16", "m", [ImmCheck<0, ImmCheck0_1>]>; + defm SVMOP4A_FP8_ZA16 : MOP4_FP8<"_za16", [ImmCheck<0, ImmCheck0_1>]>; } //////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 97da76c713aa5..075366df178d9 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3131,14 +3131,13 @@ let TargetPrefix = "aarch64" in { llvm_nxv16i8_ty], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; - def int_aarch64_sme_fp8_fmop4a_za16_1x1 : SME_FP8_OuterProduct_Intrinsic_Single_Single; - def int_aarch64_sme_fp8_fmop4a_za32_1x1 : SME_FP8_OuterProduct_Intrinsic_Single_Single; - def int_aarch64_sme_fp8_fmop4a_za16_1x2 : SME_FP8_OuterProduct_Intrinsic_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_za32_1x2 : SME_FP8_OuterProduct_Intrinsic_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_za16_2x1 : SME_FP8_OuterProduct_Intrinsic_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_za32_2x1 : SME_FP8_OuterProduct_Intrinsic_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_za16_2x2 : SME_FP8_OuterProduct_Intrinsic_Multi_Multi; - def int_aarch64_sme_fp8_fmop4a_za32_2x2 : SME_FP8_OuterProduct_Intrinsic_Multi_Multi; + // 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product + foreach za = ["za16", "za32"] in { + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_FP8_OuterProduct_Intrinsic_Single_Single; + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_FP8_OuterProduct_Intrinsic_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_FP8_OuterProduct_Intrinsic_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_FP8_OuterProduct_Intrinsic_Multi_Multi; + } class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 8181be87427ac..95bccbfdfae5d 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5818,7 +5818,7 @@ multiclass sme2_fmop4a_fp8_fp32_4way<string mnemonic, string op> { def NAME # _MZZ_BtoS_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZPR8Mul2_Lo, ZPR8Mul2_Hi, SMEMatrixTileS>, SMEPseudo2Instr<NAME # _MZZ_BtoS, 0>; - def : SME2_ZA_Tile_TwoVec_Pat<NAME # _MZZ_BtoS, !cast<SDPatternOperator>(op # "_1x1"), timm32_0_3, nxv16i8>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat<NAME # _MZZ_BtoS, !cast<SDPatternOperator>(op # "_1x1"), timm32_0_3, nxv16i8>; // Multiple and single vectors def _M2ZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>, SMEPseudo2Instr<NAME # _M2ZZ_BtoS, 1>; @@ -5832,7 +5832,7 @@ multiclass sme2_fmop4a_fp8_fp32_4way<string mnemonic, string op> { def NAME # _MZ2Z_BtoS_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZPR8Mul2_Lo, ZZ_b_mul_r_Hi, SMEMatrixTileS>, SMEPseudo2Instr<NAME # _MZ2Z_BtoS, 0>; - def : SME2_ZA_Tile_Vec_Single_Multi_Pat<NAME # _MZ2Z_BtoS, !cast<SDPatternOperator>(op # "_1x2"), timm32_0_3, nxv16i8>; + def : SME2_ZA_Tile_Vec_Multi_Pat<NAME # _MZ2Z_BtoS, !cast<SDPatternOperator>(op # "_1x2"), timm32_0_3, nxv16i8>; // Multiple vectors def _M2Z2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>, SMEPseudo2Instr<NAME # _M2Z2Z_BtoS, 1>; @@ -6083,7 +6083,7 @@ multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic, string op> { def NAME # _MZZ_BtoH_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZPR8Mul2_Lo, ZPR8Mul2_Hi, SMEMatrixTileH>, SMEPseudo2Instr<NAME # _MZZ_BtoH, 0>; - def : SME2_ZA_Tile_TwoVec_Pat<NAME # _MZZ_BtoH, !cast<SDPatternOperator>(op # "_1x1"), timm32_0_1, nxv16i8>; + def : SME2_ZA_Tile_Vec_Single_Single_Pat<NAME # _MZZ_BtoH, !cast<SDPatternOperator>(op # "_1x1"), timm32_0_1, nxv16i8>; // Multiple and single vectors def _M2ZZ_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b0, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>, SMEPseudo2Instr<NAME # _M2ZZ_BtoH, 1>; @@ -6097,7 +6097,7 @@ multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic, string op> { def NAME # _MZ2Z_BtoH_PSEUDO : sme2_quarter_tile_outer_product_pseudo<ZPR8Mul2_Lo, ZZ_b_mul_r_Hi, SMEMatrixTileH>, SMEPseudo2Instr<NAME # _MZ2Z_BtoH, 0>; - def : SME2_ZA_Tile_Vec_Single_Multi_Pat<NAME # _MZ2Z_BtoH, !cast<SDPatternOperator>(op # "_1x2"), timm32_0_1, nxv16i8>; + def : SME2_ZA_Tile_Vec_Multi_Pat<NAME # _MZ2Z_BtoH, !cast<SDPatternOperator>(op # "_1x2"), timm32_0_1, nxv16i8>; // Multiple vectors def _M2Z2Z_BtoH : sme2_fp8_fp16_quarter_tile_outer_product<0b1, 0b1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>, SMEPseudo2Instr<NAME # _M2Z2Z_BtoH, 1>; >From f453dc2100725a77039e99a6ea4558ce95d91a39 Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi <virginia.cangel...@arm.com> Date: Mon, 14 Apr 2025 10:23:40 +0000 Subject: [PATCH 3/4] Fix tests --- .../sme2-intrinsics/acle_sme2_mop4_fp8.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c index 24fa11538dd32..f89277bb049f8 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c @@ -2,17 +2,17 @@ // REQUIRES: aarch64-registered-target // RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s #include <arm_sme.h> #ifdef SME_OVERLOADED_FORMS -#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 #else -#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#define SME_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 #endif // CHECK-LABEL: define dso_local void @test_svmop4a_1x1_za16_mf8_mf8_fpm( @@ -30,7 +30,7 @@ // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); + SME_ACLE_FUNC(svmop4a,_1x1,_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); } // CHECK-LABEL: define dso_local void @test_svmop4a_1x2_za16_mf8_mf8_fpm( @@ -48,7 +48,7 @@ void test_svmop4a_1x1_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x2_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x2_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); + SME_ACLE_FUNC(svmop4a,_1x2,_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); } // CHECK-LABEL: define dso_local void @test_svmop4a_2x1_za16_mf8_mf8_fpm( @@ -66,7 +66,7 @@ void test_svmop4a_1x2_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t f // CPP-CHECK-NEXT: ret void // void test_svmop4a_2x1_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_2x1_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); + SME_ACLE_FUNC(svmop4a,_2x1,_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); } // CHECK-LABEL: define dso_local void @test_svmop4a_2x2_za16_mf8_mf8_fpm( @@ -84,7 +84,7 @@ void test_svmop4a_2x1_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t f // CPP-CHECK-NEXT: ret void // void test_svmop4a_2x2_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_2x2_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); + SME_ACLE_FUNC(svmop4a,_2x2,_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); } // CHECK-LABEL: define dso_local void @test_svmop4a_1x1_za32_mf8_mf8_fpm( @@ -102,7 +102,7 @@ void test_svmop4a_2x2_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x1_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); + SME_ACLE_FUNC(svmop4a,_1x1,_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); } // CHECK-LABEL: define dso_local void @test_svmop4a_1x2_za32_mf8_mf8_fpm( @@ -120,7 +120,7 @@ void test_svmop4a_1x1_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x2_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_1x2_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); + SME_ACLE_FUNC(svmop4a,_1x2,_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); } // CHECK-LABEL: define dso_local void @test_svmop4a_2x1_za32_mf8_mf8_fpm( @@ -138,7 +138,7 @@ void test_svmop4a_1x2_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t f // CPP-CHECK-NEXT: ret void // void test_svmop4a_2x1_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_2x1_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); + SME_ACLE_FUNC(svmop4a,_2x1,_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); } // CHECK-LABEL: define dso_local void @test_svmop4a_2x2_za32_mf8_mf8_fpm( @@ -156,5 +156,5 @@ void test_svmop4a_2x1_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t f // CPP-CHECK-NEXT: ret void // void test_svmop4a_2x2_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SME_ACLE_FUNC(svmop4a_2x2_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); + SME_ACLE_FUNC(svmop4a,_2x2,_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); } >From bace01c263b864cd2a7eeff8e0dab64488d3f850 Mon Sep 17 00:00:00 2001 From: Virginia Cangelosi <virginia.cangel...@arm.com> Date: Wed, 16 Apr 2025 13:31:48 +0000 Subject: [PATCH 4/4] Reduce number of records --- clang/include/clang/Basic/arm_sme.td | 8 ++-- .../sme2-intrinsics/acle_sme2_mop4_fp8.c | 32 ++++++++-------- llvm/include/llvm/IR/IntrinsicsAArch64.td | 38 ++++--------------- 3 files changed, 27 insertions(+), 51 deletions(-) diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index bb501122d9526..d6bb5b34a91ac 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -325,10 +325,10 @@ let SMETargetGuard = "sme2,sme-mop4,sme-b16b16" in { // SME2 - FP8 FMOP4A, FMOP4S multiclass MOP4_FP8<string za, list<ImmCheck> checks> { - def _1x1 : Inst<"svmop4a" # "[_1x1]" # za # "[_{d}_{d}]", "vidd>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; - def _1x2 : Inst<"svmop4a" # "[_1x2]" # za # "[_{d}_{d}]", "vid2>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; - def _2x1 : Inst<"svmop4a" # "[_2x1]" # za # "[_{d}_{d}]", "vi2d>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; - def _2x2 : Inst<"svmop4a" # "[_2x2]" # za # "[_{d}_{d}]", "vi22>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; + def _1x1 : Inst<"svmop4a" # "[_1x1]" # za # "[_{d}_{d}]", "vidd>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x1", [IsInOutZA, IsStreaming], checks>; + def _1x2 : Inst<"svmop4a" # "[_1x2]" # za # "[_{d}_{d}]", "vid2>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x2", [IsInOutZA, IsStreaming], checks>; + def _2x1 : Inst<"svmop4a" # "[_2x1]" # za # "[_{d}_{d}]", "vi2d>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x1", [IsInOutZA, IsStreaming], checks>; + def _2x2 : Inst<"svmop4a" # "[_2x2]" # za # "[_{d}_{d}]", "vi22>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x2", [IsInOutZA, IsStreaming], checks>; } let SMETargetGuard = "sme2,sme-mop4,sme-f8f32" in { diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c index f89277bb049f8..f0d6f9d341dfc 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c @@ -19,14 +19,14 @@ // CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x1_za16_mf8_mf8_fpmu13__SVMfloat8_tS_m( // CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { // CPP-CHECK-NEXT: [[ENTRY:.*:]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { @@ -37,14 +37,14 @@ void test_svmop4a_1x1_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm // CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x2_za16_mf8_mf8_fpmu13__SVMfloat8_t13svmfloat8x2_tm( // CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: [[ENTRY:.*:]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x2_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { @@ -55,14 +55,14 @@ void test_svmop4a_1x2_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t f // CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x1_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tm( // CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: [[ENTRY:.*:]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_2x1_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { @@ -73,14 +73,14 @@ void test_svmop4a_2x1_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t f // CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x2_za16_mf8_mf8_fpm13svmfloat8x2_tS_m( // CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: [[ENTRY:.*:]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_2x2_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { @@ -91,14 +91,14 @@ void test_svmop4a_2x2_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t // CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x1_za32_mf8_mf8_fpmu13__SVMfloat8_tS_m( // CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: [[ENTRY:.*:]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x1_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { @@ -109,14 +109,14 @@ void test_svmop4a_1x1_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm // CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x2_za32_mf8_mf8_fpmu13__SVMfloat8_t13svmfloat8x2_tm( // CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: [[ENTRY:.*:]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_1x2_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { @@ -127,14 +127,14 @@ void test_svmop4a_1x2_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t f // CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x1_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tm( // CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: [[ENTRY:.*:]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_2x1_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { @@ -145,14 +145,14 @@ void test_svmop4a_2x1_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t f // CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) // CHECK-NEXT: ret void // // CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x2_za32_mf8_mf8_fpm13svmfloat8x2_tS_m( // CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { // CPP-CHECK-NEXT: [[ENTRY:.*:]] // CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) -// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2.nxv16i8(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) // CPP-CHECK-NEXT: ret void // void test_svmop4a_2x2_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 075366df178d9..d2e20c0a7e66f 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3068,14 +3068,14 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, - LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>; + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; class SME_OuterProduct_QuarterTile_Single_Multi : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, - LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>; + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; class SME_OuterProduct_QuarterTile_Multi_Multi : DefaultAttrsIntrinsic<[], @@ -3083,7 +3083,7 @@ let TargetPrefix = "aarch64" in { llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, - LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>; + LLVMMatchType<0>], [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; // 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S foreach mode = ["s", "a"] in { @@ -3107,36 +3107,12 @@ let TargetPrefix = "aarch64" in { } } - class SME_FP8_OuterProduct_Intrinsic_Single_Single - : DefaultAttrsIntrinsic<[], - [llvm_i32_ty, - llvm_nxv16i8_ty, - llvm_nxv16i8_ty], - [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; - - class SME_FP8_OuterProduct_Intrinsic_Single_Multi - : DefaultAttrsIntrinsic<[], - [llvm_i32_ty, - llvm_nxv16i8_ty, - llvm_nxv16i8_ty, - llvm_nxv16i8_ty], - [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; - - class SME_FP8_OuterProduct_Intrinsic_Multi_Multi - : DefaultAttrsIntrinsic<[], - [llvm_i32_ty, - llvm_nxv16i8_ty, - llvm_nxv16i8_ty, - llvm_nxv16i8_ty, - llvm_nxv16i8_ty], - [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; - // 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product foreach za = ["za16", "za32"] in { - def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_FP8_OuterProduct_Intrinsic_Single_Single; - def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_FP8_OuterProduct_Intrinsic_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_FP8_OuterProduct_Intrinsic_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_FP8_OuterProduct_Intrinsic_Multi_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_OuterProduct_QuarterTile_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_OuterProduct_QuarterTile_Multi_Multi; } class SME_AddVectorToTile_Intrinsic _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits