Author: Andrzej WarzyƄski
Date: 2026-01-11T14:14:27Z
New Revision: 363903eb3ed34f64164632266140ba3d609bdb81

URL: 
https://github.com/llvm/llvm-project/commit/363903eb3ed34f64164632266140ba3d609bdb81
DIFF: 
https://github.com/llvm/llvm-project/commit/363903eb3ed34f64164632266140ba3d609bdb81.diff

LOG: [CIR][AArch64] Add lowering for unpredicated svdup builtins (#174433)

This PR adds CIR lowering support for unpredicated `svdup` SVE builtins.
The corresponding ACLE intrinsics are documented at:
* https://developer.arm.com/architectures/instruction-sets/intrinsics

(search for svdup).

Since LLVM provides a direct intrinsic for svdup with a 1:1 mapping, CIR
lowers these builtins by emitting a call to the corresponding LLVM
intrinsic.

DESIGN NOTES
------------
With this change, ACLE intrinsics that have a corresponding LLVM intrinsic can
generally be lowered by CIR by reusing LLVM intrinsic metadata, avoiding
duplicated intrinsic-name definitions, unless codegen-relevant SVETypeFlags are
involved. As a consequence, CIR may no longer emit NYI diagnostics for
intrinsics that (a) have a known LLVM intrinsic mapping and (b) do not use such
codegen-relevant `SVETypeFlag`s; these intrinsics are lowered directly.

IMPLEMENTATION NOTES
--------------------
* Intrinsic discovery logic mirrors the approach in
  CodeGen/TargetBuiltins/ARM.cpp, but is simplified since CIR only
  requires the intrinsic name.
* Test inputs are copied from the existing svdup tests:
  tests/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c.
* The LLVM IR produced _with_ and _without_ `-fclangir` is identical,
  modulo basic block labels, SROA, and function attributes.

EXAMPLE LOWERING
----------------
Input:
```C

svint8_t test_svdup_n_s8(int8_t op)
{
  return svdup_n_s8(op);
}
```

OUTPUT 1 (default):
```llvm
define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 noundef %op) #0 {
entry:
  %op.addr = alloca i8, align 1
  store i8 %op, ptr %op.addr, align 1
  %0 = load i8, ptr %op.addr, align 1
  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %0)
  ret <vscale x 16 x i8> %1
}
```

OUTPUT 2 (via `-fclangir`):
```llvm
define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 %0) #0 {
  %2 = alloca i8, i64 1, align 1
  %3 = alloca <vscale x 16 x i8>, i64 1, align 16
  store i8 %0, ptr %2, align 1
  %4 = load i8, ptr %2, align 1
  %5 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %4)
  store <vscale x 16 x i8> %5, ptr %3, align 16
  %6 = load <vscale x 16 x i8>, ptr %3, align 16
  ret <vscale x 16 x i8> %6
}
```

Added: 
    clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
    clang/lib/CIR/CodeGen/CIRGenFunction.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index e28b3c6cdc2ff..7998fb6b5eaac 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -13,6 +13,7 @@
 
 #include "CIRGenBuilder.h"
 #include "CIRGenFunction.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/CIR/MissingFeatures.h"
 
 // TODO(cir): once all builtins are covered, decide whether we still
@@ -25,7 +26,6 @@
 #include "mlir/IR/Value.h"
 #include "clang/AST/GlobalDecl.h"
 #include "clang/Basic/Builtins.h"
-#include "clang/Basic/TargetBuiltins.h"
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -52,6 +52,80 @@ static mlir::Value genVscaleTimesFactor(mlir::Location loc,
                                builder.getUInt64(scalingFactor, loc));
 }
 
+static bool aarch64SVEIntrinsicsProvenSorted = false;
+
+namespace {
+struct AArch64BuiltinInfo {
+  unsigned builtinID;
+  unsigned llvmIntrinsic;
+  uint64_t typeModifier;
+
+  bool operator<(unsigned rhsBuiltinID) const {
+    return builtinID < rhsBuiltinID;
+  }
+  bool operator<(const AArch64BuiltinInfo &te) const {
+    return builtinID < te.builtinID;
+  }
+};
+} // end anonymous namespace
+
+#define SVEMAP1(NameBase, llvmIntrinsic, TypeModifier)                         
\
+  {SVE::BI__builtin_sve_##NameBase, Intrinsic::llvmIntrinsic, TypeModifier}
+
+#define SVEMAP2(NameBase, TypeModifier)                                        
\
+  {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier}
+static const AArch64BuiltinInfo aarch64SVEIntrinsicMap[] = {
+#define GET_SVE_LLVM_INTRINSIC_MAP
+#include "clang/Basic/arm_sve_builtin_cg.inc"
+#undef GET_SVE_LLVM_INTRINSIC_MAP
+};
+
+static const AArch64BuiltinInfo *
+findARMVectorIntrinsicInMap(ArrayRef<AArch64BuiltinInfo> intrinsicMap,
+                            unsigned builtinID, bool &mapProvenSorted) {
+
+#ifndef NDEBUG
+  if (!mapProvenSorted) {
+    assert(llvm::is_sorted(intrinsicMap));
+    mapProvenSorted = true;
+  }
+#endif
+
+  const AArch64BuiltinInfo *info = llvm::lower_bound(intrinsicMap, builtinID);
+
+  if (info != intrinsicMap.end() && info->builtinID == builtinID)
+    return info;
+
+  return nullptr;
+}
+
+bool CIRGenFunction::getAArch64SVEProcessedOperands(
+    unsigned builtinID, const CallExpr *expr, SmallVectorImpl<mlir::Value> 
&ops,
+    SVETypeFlags typeFlags) {
+  // Find out if any arguments are required to be integer constant expressions.
+  unsigned iceArguments = 0;
+  ASTContext::GetBuiltinTypeError error;
+  getContext().GetBuiltinType(builtinID, error, &iceArguments);
+  assert(error == ASTContext::GE_None && "Should not codegen an error");
+
+  for (unsigned i = 0, e = expr->getNumArgs(); i != e; i++) {
+    bool isIce = iceArguments & (1 << i);
+    mlir::Value arg = emitScalarExpr(expr->getArg(i));
+
+    if (isIce) {
+      cgm.errorNYI(expr->getSourceRange(),
+                   std::string("unimplemented AArch64 builtin call: ") +
+                       getContext().BuiltinInfo.getName(builtinID));
+    }
+
+    // FIXME: Handle types like svint16x2_t, which are currently incorrectly
+    // converted to i32. These should be treated as structs and unpacked.
+
+    ops.push_back(arg);
+  }
+  return true;
+}
+
 std::optional<mlir::Value>
 CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID,
                                           const CallExpr *expr) {
@@ -65,8 +139,40 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned 
builtinID,
 
   assert(!cir::MissingFeatures::aarch64SVEIntrinsics());
 
+  auto *builtinIntrInfo = findARMVectorIntrinsicInMap(
+      aarch64SVEIntrinsicMap, builtinID, aarch64SVEIntrinsicsProvenSorted);
+
+  // The operands of the builtin call
+  llvm::SmallVector<mlir::Value> ops;
+
+  SVETypeFlags typeFlags(builtinIntrInfo->typeModifier);
+  if (!CIRGenFunction::getAArch64SVEProcessedOperands(builtinID, expr, ops,
+                                                      typeFlags))
+    return mlir::Value{};
+
+  if (typeFlags.isLoad() || typeFlags.isStore() || typeFlags.isGatherLoad() ||
+      typeFlags.isScatterStore() || typeFlags.isPrefetch() ||
+      typeFlags.isGatherPrefetch() || typeFlags.isStructLoad() ||
+      typeFlags.isStructStore() || typeFlags.isTupleSet() ||
+      typeFlags.isTupleGet() || typeFlags.isTupleCreate() ||
+      typeFlags.isUndef())
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+
   mlir::Location loc = getLoc(expr->getExprLoc());
 
+  if (builtinIntrInfo->llvmIntrinsic != 0) {
+    std::string llvmIntrName(Intrinsic::getBaseName(
+        (llvm::Intrinsic::ID)builtinIntrInfo->llvmIntrinsic));
+
+    llvmIntrName.erase(0, /*std::strlen(".llvm")=*/5);
+
+    return emitIntrinsicCallOp(builder, loc, llvmIntrName,
+                               convertType(expr->getType()),
+                               mlir::ValueRange{ops});
+  }
+
   switch (builtinID) {
   default:
     return std::nullopt;
@@ -103,10 +209,12 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned 
builtinID,
   case SVE::BI__builtin_sve_svpmullb_u64:
   case SVE::BI__builtin_sve_svpmullb_n_u16:
   case SVE::BI__builtin_sve_svpmullb_n_u64:
+
   case SVE::BI__builtin_sve_svdup_n_b8:
   case SVE::BI__builtin_sve_svdup_n_b16:
   case SVE::BI__builtin_sve_svdup_n_b32:
   case SVE::BI__builtin_sve_svdup_n_b64:
+
   case SVE::BI__builtin_sve_svdupq_n_b8:
   case SVE::BI__builtin_sve_svdupq_n_b16:
   case SVE::BI__builtin_sve_svdupq_n_b32:
@@ -129,22 +237,27 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned 
builtinID,
                  std::string("unimplemented AArch64 builtin call: ") +
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
+
   case SVE::BI__builtin_sve_svlen_u8:
   case SVE::BI__builtin_sve_svlen_s8:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 
16);
+
   case SVE::BI__builtin_sve_svlen_u16:
   case SVE::BI__builtin_sve_svlen_s16:
   case SVE::BI__builtin_sve_svlen_f16:
   case SVE::BI__builtin_sve_svlen_bf16:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 8);
+
   case SVE::BI__builtin_sve_svlen_u32:
   case SVE::BI__builtin_sve_svlen_s32:
   case SVE::BI__builtin_sve_svlen_f32:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 4);
+
   case SVE::BI__builtin_sve_svlen_u64:
   case SVE::BI__builtin_sve_svlen_s64:
   case SVE::BI__builtin_sve_svlen_f64:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 2);
+
   case SVE::BI__builtin_sve_svtbl2_u8:
   case SVE::BI__builtin_sve_svtbl2_s8:
   case SVE::BI__builtin_sve_svtbl2_u16:

diff  --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h 
b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 3101fc6cd228c..5fe1d9a4f2b76 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -31,6 +31,7 @@
 #include "clang/AST/Stmt.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/MissingFeatures.h"
 #include "clang/CIR/TypeEvaluationKind.h"
@@ -1265,6 +1266,9 @@ class CIRGenFunction : public CIRGenTypeCache {
   /// CIR emit functions
   /// ----------------------
 public:
+  bool getAArch64SVEProcessedOperands(unsigned builtinID, const CallExpr *expr,
+                                      SmallVectorImpl<mlir::Value> &ops,
+                                      clang::SVETypeFlags typeFlags);
   std::optional<mlir::Value>
   emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
                          ReturnValueSlot returnValue,

diff  --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c 
b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
new file mode 100644
index 0000000000000..3e0a892d6b368
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
@@ -0,0 +1,211 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s 
--check-prefixes=ALL,CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s 
--check-prefixes=ALL,CIR
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+#include <arm_sve.h>
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// ALL-LABEL: @test_svdup_n_s8
+svint8_t test_svdup_n_s8(int8_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s8i {{.*}} -> !cir.vector<[16] x !s8i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s8i) -> !cir.vector<[16] x !s8i>
+
+// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
+// LLVM_OGCG_CIR:    store i8 [[OP]], ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s8,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_s16
+svint16_t test_svdup_n_s16(int16_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s16i {{.*}} -> !cir.vector<[8] x !s16i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s16i) -> !cir.vector<[8] x !s16i>
+
+// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
+// LLVM_OGCG_CIR:    store i16 [[OP]], ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s16,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_s32
+svint32_t test_svdup_n_s32(int32_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s32i {{.*}} -> !cir.vector<[4] x !s32i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s32i) -> !cir.vector<[4] x !s32i>
+
+// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
+// LLVM_OGCG_CIR:    store i32 [[OP]], ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s32,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_s64
+svint64_t test_svdup_n_s64(int64_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s64i {{.*}} -> !cir.vector<[2] x !s64i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s64i) -> !cir.vector<[2] x !s64i>
+
+// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
+// LLVM_OGCG_CIR:    store i64 [[OP]], ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s64,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u8
+svuint8_t test_svdup_n_u8(uint8_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u8i {{.*}} -> !cir.vector<[16] x !u8i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u8i) -> !cir.vector<[16] x !u8i>
+
+// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
+// LLVM_OGCG_CIR:    store i8 [[OP]], ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u8,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u16
+svuint16_t test_svdup_n_u16(uint16_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u16i {{.*}} -> !cir.vector<[8] x !u16i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u16i) -> !cir.vector<[8] x !u16i>
+
+// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
+// LLVM_OGCG_CIR:    store i16 [[OP]], ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u16,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u32
+svuint32_t test_svdup_n_u32(uint32_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u32i {{.*}} -> !cir.vector<[4] x !u32i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u32i) -> !cir.vector<[4] x !u32i>
+
+// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
+// LLVM_OGCG_CIR:    store i32 [[OP]], ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u32,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u64
+svuint64_t test_svdup_n_u64(uint64_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u64i {{.*}} -> !cir.vector<[2] x !u64i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u64i) -> !cir.vector<[2] x !u64i>
+
+// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
+// LLVM_OGCG_CIR:    store i64 [[OP]], ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u64,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_f16
+svfloat16_t test_svdup_n_f16(float16_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !cir.f16 {{.*}} -> !cir.vector<[8] x !cir.f16>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.f16) -> !cir.vector<[8] x !cir.f16>
+
+// LLVM_OGCG_CIR-SAME: half {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca half,{{([[:space:]]?i64 1,)?}} 
align 2
+// LLVM_OGCG_CIR:    store half [[OP]], ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load half, ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.dup.x.nxv8f16(half [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_f16,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_f32
+svfloat32_t test_svdup_n_f32(float32_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !cir.float {{.*}} -> !cir.vector<[4] x 
!cir.float>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.float) -> !cir.vector<[4] x !cir.float>
+
+// LLVM_OGCG_CIR-SAME: float {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca float,{{([[:space:]]?i64 1,)?}} 
align 4
+// LLVM_OGCG_CIR:    store float [[OP]], ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load float, ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.dup.x.nxv4f32(float [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_f32,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_f64
+svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !cir.double {{.*}} -> !cir.vector<[2] x 
!cir.double>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.double) -> !cir.vector<[2] x !cir.double>
+
+// LLVM_OGCG_CIR-SAME: double {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca double,{{([[:space:]]?i64 1,)?}} 
align 8
+// LLVM_OGCG_CIR:    store double [[OP]], ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load double, ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.dup.x.nxv2f64(double [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_f64,)(op);
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to