Author: Momchil Velikov Date: 2024-12-11T13:37:15Z New Revision: b1d8c60dd479d9c5d58bcfe33db57b68f834938d
URL: https://github.com/llvm/llvm-project/commit/b1d8c60dd479d9c5d58bcfe33db57b68f834938d DIFF: https://github.com/llvm/llvm-project/commit/b1d8c60dd479d9c5d58bcfe33db57b68f834938d.diff LOG: [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (#118124) This patch adds the following instrinsics: * Half-precision and BFloat16 convert, narrow, and interleave to 8-bit floating-point. // Variant is also available for: _bf16_x2 svmfloat8_t svcvtn_mf8[_f16_x2]_fpm(svfloat16x2_t zn, fpm_t fpm); * Single-precision convert, narrow, and interleave to 8-bit floating-point (top and bottom). svmfloat8_t svcvtnt_mf8[_f32_x2]_fpm(svmfloat8_t zd, svfloat32x2_t zn, fpm_t fpm); svmfloat8_t svcvtnb_mf8[_f32_x2]_fpm(svfloat32x2_t zn, fpm_t fpm); Added: clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll Modified: clang/include/clang/Basic/arm_sve.td clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c llvm/include/llvm/IR/IntrinsicsAArch64.td llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td llvm/lib/Target/AArch64/SVEInstrFormats.td Removed: ################################################################################ diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 30dc5c55d411ab..b9f40faf0b18e6 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2468,4 +2468,11 @@ let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in { // 8-bit floating-point convert to BFloat16/Float16 (top) def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>; def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>; + + // BFloat16/Float16 convert, narrow and interleave to 8-bit floating-point + def SVFCVTN : SInst<"svcvtn_mf8[_{d}_x2]_fpm", "~2>", "bh", MergeNone, "aarch64_sve_fp8_cvtn", [VerifyRuntimeMode, SetsFPMR]>; + + // Single-precision convert, narrow and interleave to 8-bit floating-point (top and bottom) + def SVFCVTNB : SInst<"svcvtnb_mf8[_f32_x2]_fpm", "~2>", "f", MergeNone, "aarch64_sve_fp8_cvtnb", [VerifyRuntimeMode, SetsFPMR]>; + def SVFCVTNT : SInst<"svcvtnt_mf8[_f32_x2]_fpm", "~~2>", "f", MergeNone, "aarch64_sve_fp8_cvtnt", [VerifyRuntimeMode, SetsFPMR]>; } diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c new file mode 100644 index 00000000000000..ed5b0ce02af4bd --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c @@ -0,0 +1,101 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#ifdef __ARM_FEATURE_SME +#include <arm_sme.h> +#else +#include <arm_sve.h> +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtn_f8_bf16( +// CHECK-SAME: <vscale x 8 x bfloat> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> [[ZN_ZM_COERCE0]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svcvtn_f8_bf1614svbfloat16x2_tm( +// CHECK-CXX-SAME: <vscale x 8 x bfloat> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> [[ZN_ZM_COERCE0]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1]]) +// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svcvtn_f8_bf16(svbfloat16x2_t zn_zm, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtn_mf8,_bf16_x2,_fpm)(zn_zm, fpm); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtn_f8_f16( +// CHECK-SAME: <vscale x 8 x half> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> [[ZN_ZM_COERCE0]], <vscale x 8 x half> [[ZN_ZM_COERCE1]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z18test_svcvtn_f8_f1613svfloat16x2_tm( +// CHECK-CXX-SAME: <vscale x 8 x half> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> [[ZN_ZM_COERCE0]], <vscale x 8 x half> [[ZN_ZM_COERCE1]]) +// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svcvtn_f8_f16(svfloat16x2_t zn_zm, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtn_mf8,_f16_x2,_fpm)(zn_zm, fpm); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtnb_f8_f32( +// CHECK-SAME: <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svcvtnb_f8_f3213svfloat32x2_tm( +// CHECK-CXX-SAME: <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]]) +// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svcvtnb_f8_f32(svfloat32x2_t zn_zm, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtnb_mf8,_f32_x2,_fpm)(zn_zm, fpm); +} + +// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtnt_f8_f32( +// CHECK-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnt.nxv4f32(<vscale x 16 x i8> [[ZD]], <vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svcvtnt_f8_f32u13__SVMfloat8_t13svfloat32x2_tm( +// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnt.nxv4f32(<vscale x 16 x i8> [[ZD]], <vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]]) +// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_svcvtnt_f8_f32(svmfloat8_t zd, svfloat32x2_t zn_zm, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvtnt_mf8,_f32_x2,_fpm)(zd, zn_zm, fpm); +} diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c index 2e94c2314f1824..e47efccf480433 100644 --- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c +++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c @@ -1,6 +1,6 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -emit-llvm -o - %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -verify -emit-llvm -o - %s #include <arm_sve.h> @@ -21,4 +21,13 @@ void test_features(svmfloat8_t zn, fpm_t fpm) { // expected-error@-1 {{'svcvtlt1_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} svcvtlt2_f16_mf8_fpm(zn, fpm); // expected-error@-1 {{'svcvtlt2_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + + svcvtn_mf8_bf16_x2_fpm(svcreate2(svundef_bf16(), svundef_bf16()), fpm); + // expected-error@-1 {{'svcvtn_mf8_bf16_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvtn_mf8_f16_x2_fpm(svcreate2(svundef_f16(), svundef_f16()), fpm); + // expected-error@-1 {{'svcvtn_mf8_f16_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvtnb_mf8_f32_x2_fpm(svcreate2(svundef_f32(), svundef_f32()), fpm); + // expected-error@-1 {{'svcvtnb_mf8_f32_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} + svcvtnt_mf8_f32_x2_fpm(zn, svcreate2(svundef_f32(), svundef_f32()), fpm); + // expected-error@-1 {{'svcvtnt_mf8_f32_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}} } diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b2f0aa2f7e4d90..0a1bd4c923b9b8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3872,6 +3872,20 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_fp8_cvtlt1 : SVE2_FP8_Cvt; def int_aarch64_sve_fp8_cvtlt2 : SVE2_FP8_Cvt; + // SVE Narrowing Conversions + class SVE2_FP8_Narrow_Cvt + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_anyvector_ty, LLVMMatchType<0>], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt; + def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt; + + def int_aarch64_sve_fp8_cvtnt + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrReadMem, IntrInaccessibleMemOnly]>; + class SME2_FP8_CVT_X2_Single_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_nxv16i8_ty], diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index da585dd3a21c88..1a5be28dce4a0c 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4379,10 +4379,11 @@ defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt", nxv8bf16, int_aar defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt2>; // FP8 downconvert -defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>; -defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r>; -defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r>; -defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>; +defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r, nxv8f16, int_aarch64_sve_fp8_cvtn>; +defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r, nxv4f32, int_aarch64_sve_fp8_cvtnb>; +defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r, nxv8bf16, int_aarch64_sve_fp8_cvtn>; + +defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single_top<0b11, "fcvtnt", ZZ_s_mul_r, nxv4f32, int_aarch64_sve_fp8_cvtnt>; } // End HasSVE2orSME2, HasFP8 let Predicates = [HasSVE2orSME2, HasFAMINMAX] in { diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 9ae66518dfb4ed..92aad3d2aec48b 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -10794,10 +10794,45 @@ class sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, let Inst{5} = 0b0; let Inst{4-0} = Zd; let Uses = [FPMR, FPCR]; + + let mayLoad = 1; + let mayStore = 0; } -multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src> { +multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src, + ValueType ty, SDPatternOperator op> { def NAME : sve2_fp8_down_cvt_single<opc, mnemonic, ZPR8, src>; + + def : Pat<(nxv16i8 (op ty:$Zn1, ty:$Zn2)), + (!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>; +} + +class sve2_fp8_down_cvt_single_top<bits<2> opc, string mnemonic, RegisterOperand src_ty> + : I<(outs ZPR8:$Zd), (ins ZPR8:$_Zd, src_ty:$Zn), mnemonic, "\t$Zd, $Zn","", []>, Sched<[]> { + bits<5> Zd; + bits<4> Zn; + + let Inst{31-12} = 0b01100101000010100011; + let Inst{11-10} = opc; + let Inst{9-6} = Zn; + let Inst{5} = 0b0; + let Inst{4-0} = Zd; + + let Constraints = "$Zd = $_Zd"; + let DestructiveInstType = DestructiveOther; + let ElementSize = ZPR8.ElementSize; + + let Uses = [FPMR, FPCR]; + let mayLoad = 1; + let mayStore = 0; +} + +multiclass sve2_fp8_down_cvt_single_top<bits<2> opc, string mnemonic, RegisterOperand src_ty, + ValueType ty, SDPatternOperator op> { + def NAME : sve2_fp8_down_cvt_single_top<opc, mnemonic, src_ty>; + + def : Pat<(nxv16i8 (op nxv16i8:$Zd, ty:$Zn1, ty:$Zn2)), + (!cast<Instruction>(NAME) $Zd, (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>; } // FP8 Widening Multiply-Add Long - Indexed Group diff --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll new file mode 100644 index 00000000000000..2ffba10e211007 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+sve2,+fp8 < %s | FileCheck %s +; RUN: llc -mattr=+sme2,+fp8 --force-streaming < %s | FileCheck %s + +target triple = "aarch64-linux" + +define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) { +; CHECK-LABEL: cvtn_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfcvtn z0.b, { z0.h, z1.h } +; CHECK-NEXT: ret + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) + ret <vscale x 16 x i8> %r +} + +define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) { +; CHECK-LABEL: cvtn_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtn z0.b, { z0.h, z1.h } +; CHECK-NEXT: ret + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) + ret <vscale x 16 x i8> %r +} + +define <vscale x 16 x i8> @cvtnb_f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) { +; CHECK-LABEL: cvtnb_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvtnb z0.b, { z0.s, z1.s } +; CHECK-NEXT: ret + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) + ret <vscale x 16 x i8> %r +} + +define <vscale x 16 x i8> @cvtnt_f32(<vscale x 16 x i8> %d, <vscale x 4 x float> %s1, <vscale x 4 x float> %s2) { +; CHECK-LABEL: cvtnt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: fcvtnt z0.b, { z2.s, z3.s } +; CHECK-NEXT: ret + %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnt.nxv4f32(<vscale x 16 x i8> %d, <vscale x 4 x float> %s1, <vscale x 4 x float> %s2) + ret <vscale x 16 x i8> %r +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits