[clang] [llvm] [AArch64] Add intrinsics support for SVE2p2 instructions (PR #163575)

via cfe-commits Wed, 15 Oct 2025 08:13:46 -0700

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: None (Lukacma)

<details>
<summary>Changes</summary>

This patch add intrinsics for SVE2p2 instructions defined in 
[this](https://github.com/ARM-software/acle/pull/412) ACLE proposal

---

Patch is 49.85 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/163575.diff


10 Files Affected:

- (modified) clang/include/clang/Basic/arm_sve.td (+11-1) 
- (modified) clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c (+6) 
- (added) clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c 
(+142) 
- (added) clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c 
(+243) 
- (added) clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_firstp.c 
(+101) 
- (added) clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_lastp.c 
(+101) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+3) 
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+3-3) 
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+10-8) 
- (modified) llvm/test/CodeGen/AArch64/sve2p2-intrinsics.ll (+173) 


``````````diff
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index d2b7b78b9970f..716c2cd68ffcc 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -984,6 +984,11 @@ let SMETargetGuard = "sme2p2" in {
 def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd",  "ilUiUlfd", MergeNone, 
"aarch64_sve_compact", [VerifyRuntimeMode]>;
 }
 
+let SVETargetGuard = "sve2p2|sme2p2",  SMETargetGuard = "sme2p2" in {
+def SVCOMPACT_BH : SInst<"svcompact[_{d}]", "dPd",  "cUcsUsmbh", MergeNone, 
"aarch64_sve_compact", [VerifyRuntimeMode]>;
+def SVEXPAND  : SInst<"svexpand[_{d}]",  "dPd",  "cUcsUsiUilUlmbhfd", 
MergeNone, "aarch64_sve_expand",  [VerifyRuntimeMode]>;
+}
+
 // Note: svdup_lane is implemented using the intrinsic for TBL to represent a
 // splat of any possible lane. It is upto LLVM to pick a more efficient
 // instruction such as DUP (indexed) if the lane index fits the range of the
@@ -1111,6 +1116,11 @@ def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, 
"aarch64_sve_cntd", [IsAppendS
 def SVCNTP : SInst<"svcntp_{d}",  "nPP", "PcPsPiPl",         MergeNone, 
"aarch64_sve_cntp", [VerifyRuntimeMode]>;
 def SVLEN  : SInst<"svlen[_{d}]", "nd",  "csilUcUsUiUlhfdb", MergeNone, "", 
[VerifyRuntimeMode]>;
 
+let SVETargetGuard = "sve2p2|sme2p2",  SMETargetGuard = "sve2p2|sme2p2" in {
+  def SVFIRSTP  : SInst<"svfirstp_{d}", "lPP", "PcPsPiPl", MergeNone, 
"aarch64_sve_firstp", [VerifyRuntimeMode], []>;
+  def SVLASTP  : SInst<"svlastp_{d}", "lPP", "PcPsPiPl", MergeNone, 
"aarch64_sve_lastp", [VerifyRuntimeMode], []>;
+}
+
 
////////////////////////////////////////////////////////////////////////////////
 // Saturating scalar arithmetic
 
@@ -2388,4 +2398,4 @@ let SVETargetGuard = "sve2,fp8fma", SMETargetGuard = 
"ssve-fp8fma" in {
   def SVFMLALLBT_LANE : SInst<"svmlallbt_lane[_f32_mf8]", "dd~~i>", "f", 
MergeNone, "aarch64_sve_fp8_fmlallbt_lane", [VerifyRuntimeMode], [ImmCheck<3, 
ImmCheck0_7>]>;
   def SVFMLALLTB_LANE : SInst<"svmlalltb_lane[_f32_mf8]", "dd~~i>", "f", 
MergeNone, "aarch64_sve_fp8_fmlalltb_lane", [VerifyRuntimeMode], [ImmCheck<3, 
ImmCheck0_7>]>;
   def SVFMLALLTT_LANE : SInst<"svmlalltt_lane[_f32_mf8]", "dd~~i>", "f", 
MergeNone, "aarch64_sve_fp8_fmlalltt_lane", [VerifyRuntimeMode], [ImmCheck<3, 
ImmCheck0_7>]>;
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c 
b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c
index 4c18969e78f0c..75ee18cb134d7 100644
--- a/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c
+++ b/clang/test/CodeGen/AArch64/sve-intrinsics/acle_sve_compact.c
@@ -14,6 +14,12 @@
 #define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
 #endif
 
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
 // CHECK-LABEL: @test_svcompact_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
new file mode 100644
index 0000000000000..8bee2ed1121a6
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_compact.c
@@ -0,0 +1,142 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2  -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#ifdef __ARM_FEATURE_SME
+#include "arm_sme.h"
+#else
+#include "arm_sve.h"
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: @test_svcompact_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svcompact_s8u10__SVBool_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svcompact_s8(svbool_t pg, svint8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_s8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
i16> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcompact_s16u10__SVBool_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
i16> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svcompact_s16(svbool_t pg, svint16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_s16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svcompact_u8u10__SVBool_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svcompact_u8(svbool_t pg, svuint8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_u8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
i16> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcompact_u16u10__SVBool_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.compact.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
i16> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svcompact_u16(svbool_t pg, svuint16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_u16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcompact_mf8u10__SVBool_tu13__SVMfloat8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.compact.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svcompact_mf8(svbool_t pg, svmfloat8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_mf8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
half> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svcompact_f16u10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.compact.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svcompact_f16(svbool_t pg, svfloat16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_f16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svcompact_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
bfloat> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z19test_svcompact_bf16u10__SVBool_tu14__SVBfloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.compact.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
bfloat> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svcompact_bf16(svbool_t pg, svbfloat16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svcompact,_bf16,,)(pg, op);
+}
diff --git a/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c 
b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
new file mode 100644
index 0000000000000..ece0ce795df39
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sve2p2-intriniscs/acle_sve2p2_expand.c
@@ -0,0 +1,243 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sme2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | 
opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-target-feature +sve2p2  -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x 
c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s 
-check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature 
+sve2p2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+#ifdef __ARM_FEATURE_SME
+#include "arm_sme.h"
+#else
+#include "arm_sve.h"
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: @test_svexpand_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svexpand_s8u10__SVBool_tu10__SVInt8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svint8_t test_svexpand_s8(svbool_t pg, svint8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_s8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> 
[[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_s16u10__SVBool_tu11__SVInt16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> 
[[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svint16_t test_svexpand_s16(svbool_t pg, svint16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_s16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z16test_svexpand_u8u10__SVBool_tu11__SVUint8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svuint8_t test_svexpand_u8(svbool_t pg, svuint8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_u8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> 
[[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_u16u10__SVBool_tu12__SVUint16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> 
@llvm.aarch64.sve.expand.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> 
[[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
+//
+svuint16_t test_svexpand_u16(svbool_t pg, svuint16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_u16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_mf8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_mf8u10__SVBool_tu13__SVMfloat8_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> 
@llvm.aarch64.sve.expand.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x 
i8> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
+//
+svmfloat8_t test_svexpand_mf8(svbool_t pg, svmfloat8_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_mf8,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.expand.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
half> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_f16u10__SVBool_tu13__SVFloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> 
@llvm.aarch64.sve.expand.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
half> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_svexpand_f16(svbool_t pg, svfloat16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_f16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_bf16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.expand.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
bfloat> [[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z18test_svexpand_bf16u10__SVBool_tu14__SVBfloat16_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
+// CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> 
@llvm.aarch64.sve.expand.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x 
bfloat> [[OP:%.*]])
+// CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t test_svexpand_bf16(svbool_t pg, svbfloat16_t op) STREAMING
+{
+  return SVE_ACLE_FUNC(svexpand,_bf16,,)(pg, op);
+}
+
+// CHECK-LABEL: @test_svexpand_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> 
@llvm.aarch64.sve.expand.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> 
[[OP:%.*]])
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+// CPP-CHECK-LABEL: @_Z17test_svexpand_s32u10__SVBool_tu11__SVInt32_t(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> 
@llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/163575
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [AArch64] Add intrinsics support for SVE2p2 instructions (PR #163575)

Reply via email to