llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Virginia Cangelosi (virginia-cangelosi) <details> <summary>Changes</summary> Implement all mf8 FMOP4A instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files. This PR depends on https://github.com/llvm/llvm-project/pull/127797 --- Patch is 29.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130127.diff 7 Files Affected: - (modified) clang/include/clang/Basic/arm_sme.td (+18) - (added) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c (+160) - (added) clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_fp8_imm.cpp (+31) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+33) - (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+3-5) - (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+45-10) - (added) llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll (+98) ``````````diff diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1bfcf4c31d552..bb501122d9526 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -321,6 +321,24 @@ let SMETargetGuard = "sme2,sme-mop4,sme-b16b16" in { defm SVBMOP4S_H : MOP4<"s", "_za16", "b", "aarch64_sme_mop4s", [ImmCheck<0, ImmCheck0_1>]>; } +//////////////////////////////////////////////////////////////////////////////// +// SME2 - FP8 FMOP4A, FMOP4S + +multiclass MOP4_FP8<string za, list<ImmCheck> checks> { + def _1x1 : Inst<"svmop4a" # "[_1x1]" # za # "[_{d}_{d}]", "vidd>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; + def _1x2 : Inst<"svmop4a" # "[_1x2]" # za # "[_{d}_{d}]", "vid2>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_1x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; + def _2x1 : Inst<"svmop4a" # "[_2x1]" # za # "[_{d}_{d}]", "vi2d>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x1", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; + def _2x2 : Inst<"svmop4a" # "[_2x2]" # za # "[_{d}_{d}]", "vi22>", "m", MergeNone, "aarch64_sme_fp8_fmop4a" # za # "_2x2", [IsInOutZA, IsStreaming, IsOverloadNone], checks>; +} + +let SMETargetGuard = "sme2,sme-mop4,sme-f8f32" in { + defm SVMOP4A_FP8_ZA32 : MOP4_FP8<"_za32", [ImmCheck<0, ImmCheck0_3>]>; +} + +let SMETargetGuard = "sme2,sme-mop4,sme-f8f16" in { + defm SVMOP4A_FP8_ZA16 : MOP4_FP8<"_za16", [ImmCheck<0, ImmCheck0_1>]>; +} + //////////////////////////////////////////////////////////////////////////////// // SME2 - SMOP4A, SMOP4S, UMOP4A, UMOP4S diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c new file mode 100644 index 0000000000000..24fa11538dd32 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_fp8.c @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -target-feature +sme-mop4 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + + +#include <arm_sme.h> + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svmop4a_1x1_za16_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x1_za16_mf8_mf8_fpmu13__SVMfloat8_tS_m( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_1x2_za16_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x2_za16_mf8_mf8_fpmu13__SVMfloat8_t13svmfloat8x2_tm( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za16_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x2_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_2x1_za16_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x1_za16_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_2x1_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_2x1_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_2x2_za16_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x2_za16_mf8_mf8_fpm13svmfloat8x2_tS_m( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_2x2_za16_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_2x2_za16,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_1x1_za32_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x1_za32_mf8_mf8_fpmu13__SVMfloat8_tS_m( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x1(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_1x2_za32_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_1x2_za32_mf8_mf8_fpmu13__SVMfloat8_t13svmfloat8x2_tm( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.1x2(i32 1, <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x2_za32_mf8_mf8_fpm(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x2_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_2x1_za32_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x1_za32_mf8_mf8_fpm13svmfloat8x2_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_2x1_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_2x1_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svmop4a_2x2_za32_mf8_mf8_fpm( +// CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z33test_svmop4a_2x2_za32_mf8_mf8_fpm13svmfloat8x2_tS_m( +// CPP-CHECK-SAME: <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2(i32 1, <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_2x2_za32_mf8_mf8_fpm(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_2x2_za32,_mf8_mf8,_fpm)(1, zn, zm, fpmr); +} diff --git a/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_fp8_imm.cpp b/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_fp8_imm.cpp new file mode 100644 index 0000000000000..e031cfe9b3cb4 --- /dev/null +++ b/clang/test/Sema/aarch64-sme2p2-instrinsics/acle_sme2p2_fp8_imm.cpp @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -target-feature +sme2p2 -target-feature +sme-mop4 -target-feature +sme-f8f32 -target-feature +sme-f8f16 -fsyntax-only -verify %s + +// REQUIRES: aarch64-registered-target + +#include <arm_sme.h> + +void tests_mop4_imm_1x1(svmfloat8_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + svmop4a_1x1_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4a_1x1_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_1x2(svmfloat8_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + svmop4a_1x2_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4a_1x2_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_2x1(svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + svmop4a_2x1_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4a_2x1_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} + +void tests_mop4_imm_2x2(svmfloat8x2_t zn, svmfloat8x2_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { + svmop4a_2x2_za16_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + svmop4a_2x2_za32_mf8_mf8_fpm(-1, zn, zm, fpmr); // expected-error {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + return; +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 77ea0bcaa4b5f..eae23af98711a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3107,6 +3107,38 @@ let TargetPrefix = "aarch64" in { } } + class SME_FP8_OuterProduct_Intrinsic_Single_Single + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty], + [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + class SME_FP8_OuterProduct_Intrinsic_Single_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty], + [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + class SME_FP8_OuterProduct_Intrinsic_Multi_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty, + llvm_nxv16i8_ty], + [ImmArg<ArgIndex<0>>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + + // 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product + foreach za = ["za16", "za32"] in { + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_FP8_OuterProduct_Intrinsic_Single_Single; + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_FP8_OuterProduct_Intrinsic_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_FP8_OuterProduct_Intrinsic_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_FP8_OuterProduct_Intrinsic_Multi_Multi; + } + class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, @@ -4096,6 +4128,7 @@ let TargetPrefix = "aarch64" in { llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [IntrInaccessibleMemOnly, IntrHasSideEffects]>; + // // CVT from FP8 to half-precision/BFloat16 multi-vector // diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index f992f73171e0e..af93cdb14a620 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1090,8 +1090,8 @@ let Predicates = [HasSME_TMOP, HasSMEF8F16], Uses = [FPMR, FPCR] in { def FTMOPA_M2ZZZI_BtoH : sme_tmopa_16b<0b01001, ZZ_b_mul_r, ZPR8, "ftmopa">; } -let Predicates = [HasSME_MOP4, HasSMEF8F16], Uses = [FPMR, FPCR] in { - defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a">; +let Predicates = [HasSME_MOP4, HasSMEF8F16] in { + defm FMOP4A : sme2_fmop4a_fp8_fp16_2way<"fmop4a", "int_aarch64_sme_fp8_fmop4a_za16">; } let Predicates = [HasSME_TMOP, HasSMEF16F16] in { @@ -1108,10 +1108,8 @@ let Predicates = [HasSME2, HasSVEBFSCALE] in { defm BFMUL : sme2_bfmul_multi<"bfmul">; } -let Uses = [FPMR, FPCR] in { let Predicates = [HasSME_MOP4, HasSMEF8F32] in { - defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a">; -} + defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a", "int_aarch64_sme_fp8_fmop4a_za32">; } let Predicates = [HasSME_MOP4, HasSMEB16B16] in { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index c008cda21cf05..d701a12d31061 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5774,20 +5774,37 @@ class sme2_fp8_fp32_quarter_tile_outer_product<bit M, bit N, string mnemonic, Re let In... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/130127 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits