llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-clang Author: Hassnaa Hamdi (hassnaaHamdi) <details> <summary>Changes</summary> According to specifications in [ARM-software/acle/pull/309](https://github.com/ARM-software/acle/pull/309) Add following intrinsics: ``` // svmax single,multi svbfloat16x2_t svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) svbfloat16x4_t svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) svbfloat16x2_t svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) svbfloat16x4_t svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) ``` ``` // svmin single,multi svbfloat16x2_t svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) svbfloat16x4_t svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) svbfloat16x2_t svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) svbfloat16x4_t svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) ``` ``` // svmaxnm single,multi svbfloat16x2_t svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) svbfloat16x4_t svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) svbfloat16x2_t svmaxnm_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) svbfloat16x4_t svmaxnm_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) ``` ``` // svminnm single,multi svbfloat16x2_t svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) svbfloat16x4_t svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) svbfloat16x2_t svminnm_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) svbfloat16x4_t svminnm_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm) ``` - Variations other than bfloat16 had been already supported. --- Patch is 124.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90105.diff 9 Files Affected: - (modified) clang/include/clang/Basic/arm_sve.td (+28) - (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c (+145-5) - (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c (+145-5) - (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c (+145-5) - (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c (+145-5) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+2-2) - (modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+113-1) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll (+125-1) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll (+126-1) ``````````diff diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 6cc249837d3f3d..884ca6a3363421 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2111,6 +2111,20 @@ let TargetGuard = "sme2" in { defm MIN_MULTI_X4 : MinMaxIntr<"min", "", "x4", "444">; } +multiclass BFMinMaxIntr<string name> { + def SVBF # NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", MergeNone, "aarch64_sve_bf" # name # "_single_x2", [IsStreaming], []>; + def SVBF # NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", MergeNone, "aarch64_sve_bf" # name # "_single_x4", [IsStreaming], []>; + + def SVBF # NAME # _MULTI_X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, "aarch64_sve_bf" # name # "_x2", [IsStreaming], []>; + def SVBF # NAME # _MULTI_X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, "aarch64_sve_bf" # name # "_x4", [IsStreaming], []>; +} + +let TargetGuard = "sme2,b16b16" in { +// == BFMIN / BFMAX == + defm SVMIN : BFMinMaxIntr<"min">; + defm SVMAX : BFMinMaxIntr<"max">; +} + multiclass SInstMinMaxByVector<string name> { def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>; def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", "hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>; @@ -2125,6 +2139,20 @@ let TargetGuard = "sme2" in { defm SVMAXNM : SInstMinMaxByVector<"max">; } +multiclass SInstBFMinMaxByVector<string name> { + def SVBF # NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", "22d", "b", MergeNone, "aarch64_sve_bf" # name # "_single_x2", [IsStreaming], []>; + def SVBF # NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", "44d", "b", MergeNone, "aarch64_sve_bf" # name # "_single_x4", [IsStreaming], []>; + + def SVBF # NAME # _MULTI_X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", MergeNone, "aarch64_sve_bf" # name # "_x2", [IsStreaming], []>; + def SVBF # NAME # _MULTI_X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", MergeNone, "aarch64_sve_bf" # name # "_x4", [IsStreaming], []>; +} + +let TargetGuard = "sme2,b16b16" in { +// == BFMINNM / BFMAXNM == + defm SVMINNM : SInstBFMinMaxByVector<"minnm">; + defm SVMAXNM : SInstBFMinMaxByVector<"maxnm">; +} + let TargetGuard = "sme2" in { // FRINTA / FRINTM / FRINTN / FRINTP def SVRINTA_X2 : SInst<"svrinta[_{d}_x2]", "22", "f", MergeNone, "aarch64_sve_frinta_x2", [IsStreaming], []>; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c index a4e2616784efa7..2f09ad8074e39e 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c @@ -1,9 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s // REQUIRES: aarch64-registered-target #include <arm_sme.h> @@ -224,6 +224,32 @@ svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, svuint64_t zm) __arm_str return SVE_ACLE_FUNC(svmax,_single_u64_x2)(zdn, zm); } +// CHECK-LABEL: @test_svmax_single_bf16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfmax.single.x2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[ZM:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[TMP3]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 1 +// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], i64 8) +// CHECK-NEXT: ret <vscale x 16 x bfloat> [[TMP6]] +// +// CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfmax.single.x2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 0 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[TMP3]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP2]], 1 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], i64 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x bfloat> [[TMP6]] +// +svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svmax,_single_bf16_x2)(zdn, zm); +} + // CHECK-LABEL: @test_svmax_single_f16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN:%.*]], i64 0) @@ -608,6 +634,44 @@ svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, svuint64_t zm) __arm_str return SVE_ACLE_FUNC(svmax,_single_u64_x4)(zdn, zm); } +// CHECK-LABEL: @test_svmax_single_bf16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfmax.single.x4.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[ZM:%.*]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 8 x bfloat> [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 8) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 2 +// CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP8]], <vscale x 8 x bfloat> [[TMP9]], i64 16) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 3 +// CHECK-NEXT: [[TMP12:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP10]], <vscale x 8 x bfloat> [[TMP11]], i64 24) +// CHECK-NEXT: ret <vscale x 32 x bfloat> [[TMP12]] +// +// CPP-CHECK-LABEL: @_Z25test_svmax_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfmax.single.x4.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[ZM:%.*]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 8 x bfloat> [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 8) +// CPP-CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 2 +// CPP-CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP8]], <vscale x 8 x bfloat> [[TMP9]], i64 16) +// CPP-CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 3 +// CPP-CHECK-NEXT: [[TMP12:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP10]], <vscale x 8 x bfloat> [[TMP11]], i64 24) +// CPP-CHECK-NEXT: ret <vscale x 32 x bfloat> [[TMP12]] +// +svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svmax,_single_bf16_x4)(zdn, zm); +} + // CHECK-LABEL: @test_svmax_single_f16_x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN:%.*]], i64 0) @@ -964,6 +1028,36 @@ svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, svuint64x2_t zm) __arm_streamin return SVE_ACLE_FUNC(svmax,_u64_x2)(zdn, zm); } +// CHECK-LABEL: @test_svmax_bf16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfmax.x2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[TMP5]], i64 0) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 8) +// CHECK-NEXT: ret <vscale x 16 x bfloat> [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x214svbfloat16x2_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfmax.x2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 0 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 8 x bfloat> [[TMP5]], i64 0) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 1 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 8) +// CPP-CHECK-NEXT: ret <vscale x 16 x bfloat> [[TMP8]] +// +svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) __arm_streaming { + return SVE_ACLE_FUNC(svmax,_bf16_x2)(zdn, zm); +} + // CHECK-LABEL: @test_svmax_f16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN:%.*]], i64 0) @@ -1424,6 +1518,52 @@ svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, svuint64x4_t zm) __arm_streamin return SVE_ACLE_FUNC(svmax,_u64_x4)(zdn, zm); } +// CHECK-LABEL: @test_svmax_bf16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call <vscale x 8 x bfloat> @llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfmax.x4.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]]) +// CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP8]], 0 +// CHECK-NEXT: [[TMP10:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 8 x bfloat> [[TMP9]], i64 0) +// CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP8]], 1 +// CHECK-NEXT: [[TMP12:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP10]], <vscale x 8 x bfloat> [[TMP11]], i64 8) +// CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP8]], 2 +// CHECK-NEXT: [[TMP14:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP12]], <vscale x 8 x bfloat> [[TMP13]], i64 16) +// CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/90105 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits