https://github.com/MartinWehking created https://github.com/llvm/llvm-project/pull/186807
Add Clang/LLVM intrinsics for svcvt, scvtflt, ucvtf, ucvtflt and fcvtzsn, fcvtzun. The Clang intrinsics are guarded by the sve2.3 and sme2.3 feature flags. ACLE Patch: https://github.com/ARM-software/acle/pull/428 >From 759eecd18b5ba07c1d1c7fc151cf2c824baffb1b Mon Sep 17 00:00:00 2001 From: Martin Wehking <[email protected]> Date: Mon, 16 Mar 2026 11:01:10 +0000 Subject: [PATCH] [AArch64] Add 9.7 data processing intrinsics Add Clang/LLVM intrinsics for svcvt, scvtflt, ucvtf, ucvtflt and fcvtzsn, fcvtzun. The Clang intrinsics are guarded by the sve2.3 and sme2.3 feature flags. ACLE Patch: https://github.com/ARM-software/acle/pull/428 --- clang/include/clang/Basic/arm_sve.td | 27 ++ .../acle_sve2_fp_int_cvtn_x2.c | 105 ++++++++ .../sve2p3-intrinsics/acle_sve2_int_fp_cvt.c | 189 +++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 33 +++ .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 12 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 15 +- .../AArch64/sve2p3-intrinsics-fp-converts.ll | 255 ++++++++++++++++++ .../sve2p3-intrinsics-fp-converts_x2.ll | 157 +++++++++++ 8 files changed, 785 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c create mode 100644 clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll create mode 100644 llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index be3cd8a76503b..852cc60c6e0b3 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -997,6 +997,33 @@ def SVCVTLT_Z_F32_F16 : SInst<"svcvtlt_f32[_f16]", "dPh", "f", MergeZeroExp, "a def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "aarch64_sve_fcvtlt_f64f32", [IsOverloadNone, VerifyRuntimeMode]>; } + +let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in { +def SVCVT_S8_F16 : SInst<"svcvt_s8[_f16_x2]", "d2.O", "c", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_S16_F32 : SInst<"svcvt_s16[_f32_x2]", "d2.M", "s", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_S32_F64 : SInst<"svcvt_s32[_f64_x2]", "d2.N", "i", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; + +def SVCVT_U8_F16 : SInst<"svcvt_u8[_f16_x2]", "d2.O", "Uc", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_U16_F32 : SInst<"svcvt_u16[_f32_x2]", "d2.M", "Us", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVCVT_U32_F64 : SInst<"svcvt_u32[_f64_x2]", "d2.N", "Ui", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; + +def SVCVTT_F16_S8 : SInst<"svcvtt_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F32_S16 : SInst<"svcvtt_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F64_S32 : SInst<"svcvtt_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTT_F16_U8 : SInst<"svcvtt_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F32_U16 : SInst<"svcvtt_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTT_F64_U32 : SInst<"svcvtt_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTB_F16_S8 : SInst<"svcvtb_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F32_S16 : SInst<"svcvtb_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F64_S32 : SInst<"svcvtb_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; + +def SVCVTB_F16_U8 : SInst<"svcvtb_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F32_U16 : SInst<"svcvtb_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTB_F64_U32 : SInst<"svcvtb_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>; +} + //////////////////////////////////////////////////////////////////////////////// // Permutations and selection diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c new file mode 100644 index 0000000000000..a4a7c58e1ced9 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_fp_int_cvtn_x2.c @@ -0,0 +1,105 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +// CHECK-LABEL: @test_svcvt_s8_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svcvt_s8_f16_x213svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svint8_t test_svcvt_s8_f16_x2(svfloat16x2_t zn) MODE_ATTR { + return svcvt_s8_f16_x2(zn); +} + +// CHECK-LABEL: @test_svcvt_s16_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svcvt_s16_f32_x213svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svint16_t test_svcvt_s16_f32_x2(svfloat32x2_t zn) MODE_ATTR { + return svcvt_s16_f32_x2(zn); +} + +// CHECK-LABEL: @test_svcvt_s32_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f64_x213svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +svint32_t test_svcvt_s32_f64_x2(svfloat64x2_t zn) MODE_ATTR { + return svcvt_s32_f64_x2(zn); +} + +// CHECK-LABEL: @test_svcvt_u8_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z20test_svcvt_u8_f16_x213svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svuint8_t test_svcvt_u8_f16_x2(svfloat16x2_t zn) MODE_ATTR { + return svcvt_u8_f16_x2(zn); +} + +// CHECK-LABEL: @test_svcvt_u16_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svcvt_u16_f32_x213svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]] +// +svuint16_t test_svcvt_u16_f32_x2(svfloat32x2_t zn) MODE_ATTR { + return svcvt_u16_f32_x2(zn); +} + +// CHECK-LABEL: @test_svcvt_u32_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f64_x213svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]] +// +svuint32_t test_svcvt_u32_f64_x2(svfloat64x2_t zn) MODE_ATTR { + return svcvt_u32_f64_x2(zn); +} diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c new file mode 100644 index 0000000000000..6b7252e045e33 --- /dev/null +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2_int_fp_cvt.c @@ -0,0 +1,189 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\ +// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// +// REQUIRES: aarch64-registered-target + +#include <arm_sve.h> + +#if defined __ARM_FEATURE_SME +#define MODE_ATTR __arm_streaming +#else +#define MODE_ATTR +#endif + +// CHECK-LABEL: @test_svcvtb_f16_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR { + return svcvtb_f16_s8(zn); +} + +// CHECK-LABEL: @test_svcvtb_f32_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR { + return svcvtb_f32_s16(zn); +} + +// CHECK-LABEL: @test_svcvtb_f64_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR { + return svcvtb_f64_s32(zn); +} + +// CHECK-LABEL: @test_svcvtb_f16_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR { + return svcvtb_f16_u8(zn); +} + +// CHECK-LABEL: @test_svcvtb_f32_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR { + return svcvtb_f32_u16(zn); +} + +// CHECK-LABEL: @test_svcvtb_f64_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR { + return svcvtb_f64_u32(zn); +} + +// CHECK-LABEL: @test_svcvt_f16_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_s8u10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR { + return svcvtt_f16_s8(zn); +} + +// CHECK-LABEL: @test_svcvt_f32_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_s16u11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR { + return svcvtt_f32_s16(zn); +} + +// CHECK-LABEL: @test_svcvt_f64_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_s32u11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR { + return svcvtt_f64_s32(zn); +} + +// CHECK-LABEL: @test_svcvt_f16_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_u8u11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]] +// +svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR { + return svcvtt_f16_u8(zn); +} + +// CHECK-LABEL: @test_svcvt_f32_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_u16u12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]] +// +svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR { + return svcvtt_f32_u16(zn); +} + +// CHECK-LABEL: @test_svcvt_f64_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_u32u12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]] +// +svfloat64_t test_svcvt_f64_u32(svuint32_t zn) MODE_ATTR { + return svcvtt_f64_u32(zn); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 75929cbc222ad..d9f7314740953 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1051,6 +1051,7 @@ def llvm_nxv4i1_ty : LLVMType<nxv4i1>; def llvm_nxv8i1_ty : LLVMType<nxv8i1>; def llvm_nxv16i1_ty : LLVMType<nxv16i1>; def llvm_nxv16i8_ty : LLVMType<nxv16i8>; +def llvm_nxv8i16_ty : LLVMType<nxv8i16>; def llvm_nxv4i32_ty : LLVMType<nxv4i32>; def llvm_nxv2i64_ty : LLVMType<nxv2i64>; def llvm_nxv8f16_ty : LLVMType<nxv8f16>; @@ -2610,6 +2611,29 @@ def int_aarch64_sve_fmlslb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic; def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic; +// +// SVE2 - Multi-vector narrowing convert to floating point +// + +class Builtin_SVCVT_UNPRED<LLVMType OUT, LLVMType IN> + : DefaultAttrsIntrinsic<[OUT], [IN], [IntrNoMem]>; + +def int_aarch64_sve_scvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_scvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_scvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_scvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_scvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_scvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_ucvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_ucvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_ucvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + +def int_aarch64_sve_ucvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>; +def int_aarch64_sve_ucvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>; +def int_aarch64_sve_ucvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>; + // // SVE2 - Floating-point integer binary logarithm // @@ -3526,6 +3550,10 @@ let TargetPrefix = "aarch64" in { [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], [IntrNoMem]>; + class SVE2_CVT_VG2_Single_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>], + [IntrNoMem]>; // // Multi-vector fused multiply-add/subtract // @@ -4053,6 +4081,11 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic; +// SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point + +def int_aarch64_sve_fcvtzsn: SVE2_CVT_VG2_Single_Intrinsic; +def int_aarch64_sve_fcvtzun: SVE2_CVT_VG2_Single_Intrinsic; + // // FP8 Intrinsics // diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 926593022b537..72a6f3bd49abe 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4776,14 +4776,14 @@ let Predicates = [HasSVE2p3_or_SME2p3] in { def UDOT_ZZZI_BtoH : sve_intx_dot_by_indexed_elem_x<0b1, "udot">; // SVE2 fp convert, narrow and interleave to integer, rounding toward zero - defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0>; - defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1>; + defm FCVTZSN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzsn", 0b0, int_aarch64_sve_fcvtzsn>; + defm FCVTZUN_Z2Z : sve2_fp_to_int_downcvt<"fcvtzun", 0b1, int_aarch64_sve_fcvtzun>; // SVE2 signed/unsigned integer convert to floating-point - defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00>; - defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10>; - defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01>; - defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11>; + defm SCVTF_ZZ : sve2_int_to_fp_upcvt<"scvtf", 0b00, "int_aarch64_sve_scvtfb">; + defm SCVTFLT_ZZ : sve2_int_to_fp_upcvt<"scvtflt", 0b10, "int_aarch64_sve_scvtflt">; + defm UCVTF_ZZ : sve2_int_to_fp_upcvt<"ucvtf", 0b01, "int_aarch64_sve_ucvtfb">; + defm UCVTFLT_ZZ : sve2_int_to_fp_upcvt<"ucvtflt", 0b11, "int_aarch64_sve_ucvtflt">; // SVE2 saturating shift right narrow by immediate and interleave defm SQRSHRN_Z2ZI_HtoB : sve_multi_vec_round_shift_narrow<"sqrshrn", 0b101>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 8a3f52090ab4c..0958b3b665e32 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -11423,10 +11423,17 @@ class sve2_fp_to_int_downcvt<string asm, ZPRRegOp ZdRC, RegisterOperand ZSrcOp, let Inst{4-0} = Zd; } -multiclass sve2_fp_to_int_downcvt<string asm, bit U> { +multiclass sve2_fp_to_int_downcvt<string asm, bit U, SDPatternOperator op> { def _HtoB : sve2_fp_to_int_downcvt<asm, ZPR8, ZZ_h_mul_r, 0b01, U>; def _StoH : sve2_fp_to_int_downcvt<asm, ZPR16, ZZ_s_mul_r, 0b10, U>; def _DtoS : sve2_fp_to_int_downcvt<asm, ZPR32, ZZ_d_mul_r, 0b11, U>; + + def : Pat<(nxv16i8 (op nxv8f16:$Zn1, nxv8f16:$Zn2)), + (!cast<Instruction>(NAME # _HtoB) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>; + def : Pat<(nxv8i16 (op nxv4f32:$Zn1, nxv4f32:$Zn2)), + (!cast<Instruction>(NAME # _StoH) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>; + def : Pat<(nxv4i32 (op nxv2f64:$Zn1, nxv2f64:$Zn2)), + (!cast<Instruction>(NAME # _DtoS) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>; } //===----------------------------------------------------------------------===// @@ -11446,8 +11453,12 @@ class sve2_int_to_fp_upcvt<string asm, ZPRRegOp ZdRC, ZPRRegOp ZnRC, let Inst{4-0} = Zd; } -multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U> { +multiclass sve2_int_to_fp_upcvt<string asm, bits<2> U, string op> { def _BtoH : sve2_int_to_fp_upcvt<asm, ZPR16, ZPR8, 0b01, U>; def _HtoS : sve2_int_to_fp_upcvt<asm, ZPR32, ZPR16, 0b10, U>; def _StoD : sve2_int_to_fp_upcvt<asm, ZPR64, ZPR32, 0b11, U>; + + def : SVE_1_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # "_f16i8"), nxv16i8, !cast<Instruction>(NAME # _BtoH)>; + def : SVE_1_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # "_f32i16"), nxv8i16, !cast<Instruction>(NAME # _HtoS)>; + def : SVE_1_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # "_f64i32"), nxv4i32, !cast<Instruction>(NAME # _StoD)>; } diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll new file mode 100644 index 0000000000000..46778fc14b81f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts.ll @@ -0,0 +1,255 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=SVE2P3 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=SME2P3 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=STR +; +; SVCVTB (SCVTFB / UCVTFB) +; + +define <vscale x 8 x half> @scvtfb_f16_i8(<vscale x 16 x i8> %zn) { +; SVE2P3-LABEL: scvtfb_f16_i8: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtf z0.h, z0.b +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtfb_f16_i8: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtf z0.h, z0.b +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtfb_f16_i8: +; STR: // %bb.0: +; STR-NEXT: scvtf z0.h, z0.b +; STR-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @scvtfb_f32_i16(<vscale x 8 x i16> %zn) { +; SVE2P3-LABEL: scvtfb_f32_i16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtf z0.s, z0.h +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtfb_f32_i16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtf z0.s, z0.h +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtfb_f32_i16: +; STR: // %bb.0: +; STR-NEXT: scvtf z0.s, z0.h +; STR-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @scvtfb_f64_i32(<vscale x 4 x i32> %zn) { +; SVE2P3-LABEL: scvtfb_f64_i32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtf z0.d, z0.s +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtfb_f64_i32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtf z0.d, z0.s +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtfb_f64_i32: +; STR: // %bb.0: +; STR-NEXT: scvtf z0.d, z0.s +; STR-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +define <vscale x 8 x half> @ucvtfb_f16_i8(<vscale x 16 x i8> %zn) { +; SVE2P3-LABEL: ucvtfb_f16_i8: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtf z0.h, z0.b +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtfb_f16_i8: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtf z0.h, z0.b +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtfb_f16_i8: +; STR: // %bb.0: +; STR-NEXT: ucvtf z0.h, z0.b +; STR-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @ucvtfb_f32_i16(<vscale x 8 x i16> %zn) { +; SVE2P3-LABEL: ucvtfb_f32_i16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtf z0.s, z0.h +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtfb_f32_i16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtf z0.s, z0.h +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtfb_f32_i16: +; STR: // %bb.0: +; STR-NEXT: ucvtf z0.s, z0.h +; STR-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @ucvtfb_f64_i32(<vscale x 4 x i32> %zn) { +; SVE2P3-LABEL: ucvtfb_f64_i32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtf z0.d, z0.s +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtfb_f64_i32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtf z0.d, z0.s +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtfb_f64_i32: +; STR: // %bb.0: +; STR-NEXT: ucvtf z0.d, z0.s +; STR-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +; +; SVCVTT (SCVTFLT / UCVTFLT) +; + +define <vscale x 8 x half> @scvtflt_f16_i8(<vscale x 16 x i8> %zn) { +; SVE2P3-LABEL: scvtflt_f16_i8: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtflt z0.h, z0.b +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtflt_f16_i8: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtflt z0.h, z0.b +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtflt_f16_i8: +; STR: // %bb.0: +; STR-NEXT: scvtflt z0.h, z0.b +; STR-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @scvtflt_f32_i16(<vscale x 8 x i16> %zn) { +; SVE2P3-LABEL: scvtflt_f32_i16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtflt z0.s, z0.h +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtflt_f32_i16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtflt z0.s, z0.h +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtflt_f32_i16: +; STR: // %bb.0: +; STR-NEXT: scvtflt z0.s, z0.h +; STR-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @scvtflt_f64_i32(<vscale x 4 x i32> %zn) { +; SVE2P3-LABEL: scvtflt_f64_i32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: scvtflt z0.d, z0.s +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: scvtflt_f64_i32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: scvtflt z0.d, z0.s +; SME2P3-NEXT: ret +; +; STR-LABEL: scvtflt_f64_i32: +; STR: // %bb.0: +; STR-NEXT: scvtflt z0.d, z0.s +; STR-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +define <vscale x 8 x half> @ucvtflt_f16_i8(<vscale x 16 x i8> %zn) { +; SVE2P3-LABEL: ucvtflt_f16_i8: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtflt z0.h, z0.b +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtflt_f16_i8: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtflt z0.h, z0.b +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtflt_f16_i8: +; STR: // %bb.0: +; STR-NEXT: ucvtflt z0.h, z0.b +; STR-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> %zn) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @ucvtflt_f32_i16(<vscale x 8 x i16> %zn) { +; SVE2P3-LABEL: ucvtflt_f32_i16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtflt z0.s, z0.h +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtflt_f32_i16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtflt z0.s, z0.h +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtflt_f32_i16: +; STR: // %bb.0: +; STR-NEXT: ucvtflt z0.s, z0.h +; STR-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> %zn) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @ucvtflt_f64_i32(<vscale x 4 x i32> %zn) { +; SVE2P3-LABEL: ucvtflt_f64_i32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: ucvtflt z0.d, z0.s +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: ucvtflt_f64_i32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: ucvtflt z0.d, z0.s +; SME2P3-NEXT: ret +; +; STR-LABEL: ucvtflt_f64_i32: +; STR: // %bb.0: +; STR-NEXT: ucvtflt z0.d, z0.s +; STR-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> %zn) + ret <vscale x 2 x double> %res +} + +declare <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8>) +declare <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16>) +declare <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8>) +declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16>) +declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8>) +declare <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16>) +declare <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8>) +declare <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16>) +declare <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32>) diff --git a/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll new file mode 100644 index 0000000000000..4c99a4c241318 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p3-intrinsics-fp-converts_x2.ll @@ -0,0 +1,157 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 < %s | FileCheck %s --check-prefix=SVE2P3 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p3 < %s | FileCheck %s --check-prefix=SME2P3 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming < %s | FileCheck %s --check-prefix=STR +; +; FCVTZSN +; + +define <vscale x 16 x i8> @fcvtzsn_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) { +; SVE2P3-LABEL: fcvtzsn_i8_f16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzsn z0.b, { z0.h, z1.h } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzsn_i8_f16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzsn z0.b, { z0.h, z1.h } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzsn_i8_f16: +; STR: // %bb.0: +; STR-NEXT: fcvtzsn z0.b, { z0.h, z1.h } +; STR-NEXT: ret + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) + ret <vscale x 16 x i8> %res +} + +define <vscale x 8 x i16> @fcvtzsn_i16_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) { +; SVE2P3-LABEL: fcvtzsn_i16_f32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzsn z0.h, { z0.s, z1.s } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzsn_i16_f32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzsn z0.h, { z0.s, z1.s } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzsn_i16_f32: +; STR: // %bb.0: +; STR-NEXT: fcvtzsn z0.h, { z0.s, z1.s } +; STR-NEXT: ret + %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) + ret <vscale x 8 x i16> %res +} + +define <vscale x 4 x i32> @fcvtzsn_i32_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) { +; SVE2P3-LABEL: fcvtzsn_i32_f64: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzsn z0.s, { z0.d, z1.d } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzsn_i32_f64: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzsn z0.s, { z0.d, z1.d } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzsn_i32_f64: +; STR: // %bb.0: +; STR-NEXT: fcvtzsn z0.s, { z0.d, z1.d } +; STR-NEXT: ret + %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) + ret <vscale x 4 x i32> %res +} + +; +; FCVTZUN +; + +define <vscale x 16 x i8> @fcvtzun_i8_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) { +; SVE2P3-LABEL: fcvtzun_i8_f16: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzun z0.b, { z0.h, z1.h } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzun_i8_f16: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzun z0.b, { z0.h, z1.h } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzun_i8_f16: +; STR: // %bb.0: +; STR-NEXT: fcvtzun z0.b, { z0.h, z1.h } +; STR-NEXT: ret + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.i8f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) + ret <vscale x 16 x i8> %res +} + +define <vscale x 8 x i16> @fcvtzun_i16_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) { +; SVE2P3-LABEL: fcvtzun_i16_f32: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzun z0.h, { z0.s, z1.s } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzun_i16_f32: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzun z0.h, { z0.s, z1.s } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzun_i16_f32: +; STR: // %bb.0: +; STR-NEXT: fcvtzun z0.h, { z0.s, z1.s } +; STR-NEXT: ret + %res = call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.i16f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) + ret <vscale x 8 x i16> %res +} + +define <vscale x 4 x i32> @fcvtzun_i32_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) { +; SVE2P3-LABEL: fcvtzun_i32_f64: +; SVE2P3: // %bb.0: +; SVE2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SVE2P3-NEXT: fcvtzun z0.s, { z0.d, z1.d } +; SVE2P3-NEXT: ret +; +; SME2P3-LABEL: fcvtzun_i32_f64: +; SME2P3: // %bb.0: +; SME2P3-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; SME2P3-NEXT: fcvtzun z0.s, { z0.d, z1.d } +; SME2P3-NEXT: ret +; +; STR-LABEL: fcvtzun_i32_f64: +; STR: // %bb.0: +; STR-NEXT: fcvtzun z0.s, { z0.d, z1.d } +; STR-NEXT: ret + %res = call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.i32f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) + ret <vscale x 4 x i32> %res +} + +declare <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.i8f16(<vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.i16f32(<vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.i32f64(<vscale x 2 x double>, <vscale x 2 x double>) + +declare <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.i8f16(<vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.i16f32(<vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.i32f64(<vscale x 2 x double>, <vscale x 2 x double>) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
