maxnm (PR #90105)

via cfe-commits Thu, 25 Apr 2024 12:16:45 -0700

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-clang

Author: Hassnaa Hamdi (hassnaaHamdi)

<details>
<summary>Changes</summary>

According to specifications in 
[ARM-software/acle/pull/309](https://github.com/ARM-software/acle/pull/309)
Add following intrinsics:

```
// svmax single,multi
svbfloat16x2_t svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm)
svbfloat16x4_t svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm)
svbfloat16x2_t svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm)
svbfloat16x4_t svmax_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm)
```

```
// svmin single,multi
svbfloat16x2_t svmin_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm)
svbfloat16x4_t svmin_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm)
svbfloat16x2_t svmin_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm)
svbfloat16x4_t svmin_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm)
```

```
// svmaxnm single,multi
svbfloat16x2_t svmaxnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm)
svbfloat16x4_t svmaxnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm)
svbfloat16x2_t svmaxnm_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm)
svbfloat16x4_t svmaxnm_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm)
```

```
// svminnm single,multi
svbfloat16x2_t svminnm_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm)
svbfloat16x4_t svminnm_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm)
svbfloat16x2_t svminnm_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm)
svbfloat16x4_t svminnm_bf16_x4(svbfloat16x4_t zdn, svbfloat16x4_t zm)
```
- Variations other than bfloat16 had been already supported.

---

Patch is 124.44 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/90105.diff


9 Files Affected:

- (modified) clang/include/clang/Basic/arm_sve.td (+28) 
- (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c 
(+145-5) 
- (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_maxnm.c 
(+145-5) 
- (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_min.c 
(+145-5) 
- (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_minnm.c 
(+145-5) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+2-2) 
- (modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+113-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll (+125-1) 
- (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll (+126-1) 


``````````diff
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 6cc249837d3f3d..884ca6a3363421 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2111,6 +2111,20 @@ let TargetGuard = "sme2" in {
   defm MIN_MULTI_X4  : MinMaxIntr<"min", "",        "x4", "444">;
 }
 
+multiclass BFMinMaxIntr<string name> {
+  def SVBF # NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", 
"22d", "b", MergeNone, "aarch64_sve_bf" # name # "_single_x2", [IsStreaming], 
[]>;
+  def SVBF # NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", 
"44d", "b", MergeNone, "aarch64_sve_bf" # name # "_single_x4", [IsStreaming], 
[]>;
+
+  def SVBF # NAME # _MULTI_X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", 
MergeNone, "aarch64_sve_bf" # name # "_x2", [IsStreaming], []>;
+  def SVBF # NAME # _MULTI_X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", 
MergeNone, "aarch64_sve_bf" # name # "_x4", [IsStreaming], []>;
+}
+
+let TargetGuard = "sme2,b16b16" in {
+// == BFMIN / BFMAX ==
+  defm SVMIN : BFMinMaxIntr<"min">;
+  defm SVMAX : BFMinMaxIntr<"max">;
+}
+
 multiclass SInstMinMaxByVector<string name> {
   def NAME # _SINGLE_X2 : SInst<"sv" # name # "nm[_single_{d}_x2]", "22d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x2", [IsStreaming], []>;
   def NAME # _SINGLE_X4 : SInst<"sv" # name # "nm[_single_{d}_x4]", "44d", 
"hfd", MergeNone, "aarch64_sve_f" # name # "nm_single_x4", [IsStreaming], []>;
@@ -2125,6 +2139,20 @@ let TargetGuard = "sme2" in {
   defm SVMAXNM : SInstMinMaxByVector<"max">;
 }
 
+multiclass SInstBFMinMaxByVector<string name> {
+  def SVBF # NAME # _SINGLE_X2 : SInst<"sv" # name # "[_single_{d}_x2]", 
"22d", "b", MergeNone, "aarch64_sve_bf" # name # "_single_x2", [IsStreaming], 
[]>;
+  def SVBF # NAME # _SINGLE_X4 : SInst<"sv" # name # "[_single_{d}_x4]", 
"44d", "b", MergeNone, "aarch64_sve_bf" # name # "_single_x4", [IsStreaming], 
[]>;
+
+  def SVBF # NAME # _MULTI_X2 : SInst<"sv" # name # "[_{d}_x2]", "222", "b", 
MergeNone, "aarch64_sve_bf" # name # "_x2", [IsStreaming], []>;
+  def SVBF # NAME # _MULTI_X4 : SInst<"sv" # name # "[_{d}_x4]", "444", "b", 
MergeNone, "aarch64_sve_bf" # name # "_x4", [IsStreaming], []>;
+}
+
+let TargetGuard = "sme2,b16b16" in {
+// == BFMINNM / BFMAXNM ==
+  defm SVMINNM : SInstBFMinMaxByVector<"minnm">;
+  defm SVMAXNM : SInstBFMinMaxByVector<"maxnm">;
+}
+
 let TargetGuard = "sme2" in {
   // FRINTA / FRINTM / FRINTN / FRINTP
   def SVRINTA_X2 : SInst<"svrinta[_{d}_x2]", "22", "f", MergeNone, 
"aarch64_sve_frinta_x2", [IsStreaming], []>;
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c
index a4e2616784efa7..2f09ad8074e39e 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_max.c
@@ -1,9 +1,9 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror 
-Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple 
aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu 
-target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror 
-Wall -o /dev/null %s
 // REQUIRES: aarch64-registered-target
 #include <arm_sme.h>
 
@@ -224,6 +224,32 @@ svuint64x2_t test_svmax_single_u64_x2(svuint64x2_t zdn, 
svuint64_t zm) __arm_str
   return SVE_ACLE_FUNC(svmax,_single_u64_x2)(zdn, zm);
 }
 
+// CHECK-LABEL: @test_svmax_single_bf16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN:%.*]], i64 
0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 
8 x bfloat> } @llvm.aarch64.sve.bfmax.single.x2.nxv8bf16(<vscale x 8 x bfloat> 
[[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat> } [[TMP2]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x bfloat> 
@llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 
8 x bfloat> [[TMP3]], i64 0)
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat> } [[TMP2]], 1
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> 
@llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP4]], <vscale 
x 8 x bfloat> [[TMP5]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP6]]
+//
+// CPP-CHECK-LABEL: 
@_Z25test_svmax_single_bf16_x214svbfloat16x2_tu14__SVBfloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN:%.*]], i64 
0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat> } @llvm.aarch64.sve.bfmax.single.x2.nxv8bf16(<vscale x 8 
x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> 
[[ZM:%.*]])
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat> } [[TMP2]], 0
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 16 x bfloat> 
@llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 
8 x bfloat> [[TMP3]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat> } [[TMP2]], 1
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> 
@llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP4]], <vscale 
x 8 x bfloat> [[TMP5]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP6]]
+//
+svbfloat16x2_t test_svmax_single_bf16_x2(svbfloat16x2_t zdn, svbfloat16_t zm) 
__arm_streaming {
+  return SVE_ACLE_FUNC(svmax,_single_bf16_x2)(zdn, zm);
+}
+
 // CHECK-LABEL: @test_svmax_single_f16_x2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN:%.*]], i64 0)
@@ -608,6 +634,44 @@ svuint64x4_t test_svmax_single_u64_x4(svuint64x4_t zdn, 
svuint64_t zm) __arm_str
   return SVE_ACLE_FUNC(svmax,_single_u64_x4)(zdn, zm);
 }
 
+// CHECK-LABEL: @test_svmax_single_bf16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN:%.*]], i64 
0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 24)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 
8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sve.bfmax.single.x4.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], 
<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x 
bfloat> [[TMP3]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 0
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 
8 x bfloat> [[TMP5]], i64 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 1
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP6]], <vscale 
x 8 x bfloat> [[TMP7]], i64 8)
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 2
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP8]], <vscale 
x 8 x bfloat> [[TMP9]], i64 16)
+// CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 
3
+// CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP10]], 
<vscale x 8 x bfloat> [[TMP11]], i64 24)
+// CHECK-NEXT:    ret <vscale x 32 x bfloat> [[TMP12]]
+//
+// CPP-CHECK-LABEL: 
@_Z25test_svmax_single_bf16_x414svbfloat16x4_tu14__SVBfloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN:%.*]], i64 
0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 24)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sve.bfmax.single.x4.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], 
<vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x 
bfloat> [[TMP3]], <vscale x 8 x bfloat> [[ZM:%.*]])
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 0
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 
8 x bfloat> [[TMP5]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 
1
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP6]], <vscale 
x 8 x bfloat> [[TMP7]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 
2
+// CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP8]], <vscale 
x 8 x bfloat> [[TMP9]], i64 16)
+// CPP-CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP4]], 
3
+// CPP-CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP10]], 
<vscale x 8 x bfloat> [[TMP11]], i64 24)
+// CPP-CHECK-NEXT:    ret <vscale x 32 x bfloat> [[TMP12]]
+//
+svbfloat16x4_t test_svmax_single_bf16_x4(svbfloat16x4_t zdn, svbfloat16_t zm) 
__arm_streaming {
+  return SVE_ACLE_FUNC(svmax,_single_bf16_x4)(zdn, zm);
+}
+
 // CHECK-LABEL: @test_svmax_single_f16_x4(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.vector.extract.nxv8f16.nxv32f16(<vscale x 32 x half> [[ZDN:%.*]], i64 0)
@@ -964,6 +1028,36 @@ svuint64x2_t test_svmax_u64_x2(svuint64x2_t zdn, 
svuint64x2_t zm) __arm_streamin
   return SVE_ACLE_FUNC(svmax,_u64_x2)(zdn, zm);
 }
 
+// CHECK-LABEL: @test_svmax_bf16_x2(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN:%.*]], i64 
0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM:%.*]], i64 
0)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM]], i64 8)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 
8 x bfloat> } @llvm.aarch64.sve.bfmax.x2.nxv8bf16(<vscale x 8 x bfloat> 
[[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], 
<vscale x 8 x bfloat> [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat> } [[TMP4]], 0
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> 
@llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 
8 x bfloat> [[TMP5]], i64 0)
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat> } [[TMP4]], 1
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x bfloat> 
@llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP6]], <vscale 
x 8 x bfloat> [[TMP7]], i64 8)
+// CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP8]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svmax_bf16_x214svbfloat16x2_tS_(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN:%.*]], i64 
0)
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZDN]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM:%.*]], i64 
0)
+// CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> [[ZM]], i64 8)
+// CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat> } @llvm.aarch64.sve.bfmax.x2.nxv8bf16(<vscale x 8 x 
bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> 
[[TMP2]], <vscale x 8 x bfloat> [[TMP3]])
+// CPP-CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat> } [[TMP4]], 0
+// CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> 
@llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> poison, <vscale x 
8 x bfloat> [[TMP5]], i64 0)
+// CPP-CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat> } [[TMP4]], 1
+// CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x bfloat> 
@llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP6]], <vscale 
x 8 x bfloat> [[TMP7]], i64 8)
+// CPP-CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP8]]
+//
+svbfloat16x2_t test_svmax_bf16_x2(svbfloat16x2_t zdn, svbfloat16x2_t zm) 
__arm_streaming {
+  return SVE_ACLE_FUNC(svmax,_bf16_x2)(zdn, zm);
+}
+
 // CHECK-LABEL: @test_svmax_f16_x2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> 
@llvm.vector.extract.nxv8f16.nxv16f16(<vscale x 16 x half> [[ZDN:%.*]], i64 0)
@@ -1424,6 +1518,52 @@ svuint64x4_t test_svmax_u64_x4(svuint64x4_t zdn, 
svuint64x4_t zm) __arm_streamin
   return SVE_ACLE_FUNC(svmax,_u64_x4)(zdn, zm);
 }
 
+// CHECK-LABEL: @test_svmax_bf16_x4(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN:%.*]], i64 
0)
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 8)
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 16)
+// CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZDN]], i64 24)
+// CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM:%.*]], i64 
0)
+// CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 8)
+// CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 16)
+// CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.vector.extract.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> [[ZM]], i64 24)
+// CHECK-NEXT:    [[TMP8:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 
8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } 
@llvm.aarch64.sve.bfmax.x4.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 
x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> 
[[TMP3]], <vscale x 8 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], 
<vscale x 8 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]])
+// CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 8 x bfloat>, <vscale 
x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP8]], 0
+// CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> poison, <vscale x 
8 x bfloat> [[TMP9]], i64 0)
+// CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP8]], 
1
+// CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP10]], 
<vscale x 8 x bfloat> [[TMP11]], i64 8)
+// CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP8]], 
2
+// CHECK-NEXT:    [[TMP14:%.*]] = tail call <vscale x 32 x bfloat> 
@llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP12]], 
<vscale x 8 x bfloat> [[TMP13]], i64 16)
+// CHECK-NEXT:    [[TMP15:%.*]] = extractvalue { <vscale x 8 x bfloat>, 
<vscale x 8 x bfloat>, <vscale x...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/90105
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AArch64] Add intrinsics for bflaot16 min/max/minnm/maxnm (PR #90105)

Reply via email to