[clang] [CIR][AArch64] Upstream pairwise-minimum NEON builtins (PR #191759)

Vicky Nguyen via cfe-commits Tue, 14 Apr 2026 23:15:41 -0700

https://github.com/iamvickynguyen updated 
https://github.com/llvm/llvm-project/pull/191759


>From 272287c9923fd1d84d1faae815b22e75fda27a36 Mon Sep 17 00:00:00 2001
From: Vicky Nguyen <[email protected]>
Date: Fri, 10 Apr 2026 21:53:37 -0700
Subject: [PATCH] [CIR][AArch64] Upstream pairwise-minimum NEON builtins

---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |  18 +
 clang/test/CodeGen/AArch64/neon-intrinsics.c  | 288 ----------------
 clang/test/CodeGen/AArch64/neon/intrinsics.c  | 320 ++++++++++++++++++
 3 files changed, 338 insertions(+), 288 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index 75dd19d880444..ad3a4cbc97e1f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -237,6 +237,10 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
   case NEON::BI__builtin_neon_vabds_f32:
   case NEON::BI__builtin_neon_vshld_s64:
   case NEON::BI__builtin_neon_vshld_u64:
+  case NEON::BI__builtin_neon_vpmins_f32:
+  case NEON::BI__builtin_neon_vpminqd_f64:
+  case NEON::BI__builtin_neon_vpminnms_f32:
+  case NEON::BI__builtin_neon_vpminnmqd_f64:
     return emitNeonCall(cgf.cgm, cgf.getBuilder(),
                         {cgf.convertType(expr->getArg(0)->getType())}, ops,
                         llvmIntrName, cgf.convertType(expr->getType()), loc);
@@ -2422,8 +2426,16 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
     return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
   case NEON::BI__builtin_neon_vpadal_v:
   case NEON::BI__builtin_neon_vpadalq_v:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case NEON::BI__builtin_neon_vpmin_v:
   case NEON::BI__builtin_neon_vpminq_v:
+    intrName = usgn ? "aarch64.neon.uminp" : "aarch64.neon.sminp";
+    if (cir::isFPOrVectorOfFPType(ty))
+      intrName = "aarch64.neon.fminp";
+    return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
   case NEON::BI__builtin_neon_vpmax_v:
   case NEON::BI__builtin_neon_vpmaxq_v:
     cgm.errorNYI(expr->getSourceRange(),
@@ -2568,8 +2580,14 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vmul_laneq_v:
   case NEON::BI__builtin_neon_vpmaxnm_v:
   case NEON::BI__builtin_neon_vpmaxnmq_v:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return mlir::Value{};
   case NEON::BI__builtin_neon_vpminnm_v:
   case NEON::BI__builtin_neon_vpminnmq_v:
+    intrName = "aarch64.neon.fminnmp";
+    return emitNeonCall(cgm, builder, {ty, ty}, ops, intrName, ty, loc);
   case NEON::BI__builtin_neon_vsqrth_f16:
   case NEON::BI__builtin_neon_vsqrt_v:
   case NEON::BI__builtin_neon_vsqrtq_v:
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c 
b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 82a10b626c223..21b230f513c46 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -4962,206 +4962,6 @@ float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t 
b) {
   return vpmaxq_f64(a, b);
 }
 
-// CHECK-LABEL: define dso_local <8 x i8> @test_vpmin_s8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.sminp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
-// CHECK-NEXT:    ret <8 x i8> [[VPMIN_I]]
-//
-int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
-  return vpmin_s8(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vpmin_s16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]])
-// CHECK-NEXT:    ret <4 x i16> [[VPMIN2_I]]
-//
-int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
-  return vpmin_s16(a, b);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vpmin_s32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]])
-// CHECK-NEXT:    ret <2 x i32> [[VPMIN2_I]]
-//
-int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
-  return vpmin_s32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vpmin_u8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = call <8 x i8> 
@llvm.aarch64.neon.uminp.v8i8(<8 x i8> [[A]], <8 x i8> [[B]])
-// CHECK-NEXT:    ret <8 x i8> [[VPMIN_I]]
-//
-uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
-  return vpmin_u8(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x i16> @test_vpmin_u16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]])
-// CHECK-NEXT:    ret <4 x i16> [[VPMIN2_I]]
-//
-uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
-  return vpmin_u16(a, b);
-}
-
-// CHECK-LABEL: define dso_local <2 x i32> @test_vpmin_u32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]])
-// CHECK-NEXT:    ret <2 x i32> [[VPMIN2_I]]
-//
-uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
-  return vpmin_u32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <2 x float> @test_vpmin_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <2 x float> 
@llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> 
[[VPMIN1_I]])
-// CHECK-NEXT:    ret <2 x float> [[VPMIN2_I]]
-//
-float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
-  return vpmin_f32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vpminq_s8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = call <16 x i8> 
@llvm.aarch64.neon.sminp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
-// CHECK-NEXT:    ret <16 x i8> [[VPMIN_I]]
-//
-int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
-  return vpminq_s8(a, b);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vpminq_s16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <8 x i16> 
@llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]])
-// CHECK-NEXT:    ret <8 x i16> [[VPMIN2_I]]
-//
-int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
-  return vpminq_s16(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vpminq_s32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <4 x i32> 
@llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]])
-// CHECK-NEXT:    ret <4 x i32> [[VPMIN2_I]]
-//
-int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
-  return vpminq_s32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vpminq_u8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = call <16 x i8> 
@llvm.aarch64.neon.uminp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
-// CHECK-NEXT:    ret <16 x i8> [[VPMIN_I]]
-//
-uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
-  return vpminq_u8(a, b);
-}
-
-// CHECK-LABEL: define dso_local <8 x i16> @test_vpminq_u16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <8 x i16> 
@llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]])
-// CHECK-NEXT:    ret <8 x i16> [[VPMIN2_I]]
-//
-uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
-  return vpminq_u16(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x i32> @test_vpminq_u32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <4 x i32> 
@llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]])
-// CHECK-NEXT:    ret <4 x i32> [[VPMIN2_I]]
-//
-uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
-  return vpminq_u32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_vpminq_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <4 x float> 
@llvm.aarch64.neon.fminp.v4f32(<4 x float> [[VPMIN_I]], <4 x float> 
[[VPMIN1_I]])
-// CHECK-NEXT:    ret <4 x float> [[VPMIN2_I]]
-//
-float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
-  return vpminq_f32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <2 x double> @test_vpminq_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
-// CHECK-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
-// CHECK-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
-// CHECK-NEXT:    [[VPMIN2_I:%.*]] = call <2 x double> 
@llvm.aarch64.neon.fminp.v2f64(<2 x double> [[VPMIN_I]], <2 x double> 
[[VPMIN1_I]])
-// CHECK-NEXT:    ret <2 x double> [[VPMIN2_I]]
-//
-float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
-  return vpminq_f64(a, b);
-}
-
 // CHECK-LABEL: define dso_local <2 x float> @test_vpmaxnm_f32(
 // CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) 
#[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -5210,54 +5010,6 @@ float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t 
b) {
   return vpmaxnmq_f64(a, b);
 }
 
-// CHECK-LABEL: define dso_local <2 x float> @test_vpminnm_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
-// CHECK-NEXT:    [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT:    [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
-// CHECK-NEXT:    [[VPMINNM2_I:%.*]] = call <2 x float> 
@llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[VPMINNM_I]], <2 x float> 
[[VPMINNM1_I]])
-// CHECK-NEXT:    ret <2 x float> [[VPMINNM2_I]]
-//
-float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
-  return vpminnm_f32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <4 x float> @test_vpminnmq_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
-// CHECK-NEXT:    [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT:    [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x 
float>
-// CHECK-NEXT:    [[VPMINNM2_I:%.*]] = call <4 x float> 
@llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[VPMINNM_I]], <4 x float> 
[[VPMINNM1_I]])
-// CHECK-NEXT:    ret <4 x float> [[VPMINNM2_I]]
-//
-float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
-  return vpminnmq_f32(a, b);
-}
-
-// CHECK-LABEL: define dso_local <2 x double> @test_vpminnmq_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
-// CHECK-NEXT:    [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x 
double>
-// CHECK-NEXT:    [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x 
double>
-// CHECK-NEXT:    [[VPMINNM2_I:%.*]] = call <2 x double> 
@llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[VPMINNM_I]], <2 x double> 
[[VPMINNM1_I]])
-// CHECK-NEXT:    ret <2 x double> [[VPMINNM2_I]]
-//
-float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
-  return vpminnmq_f64(a, b);
-}
-
 // CHECK-LABEL: define dso_local <8 x i8> @test_vpadd_s8(
 // CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) 
#[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -10723,46 +10475,6 @@ float64_t test_vpmaxqd_f64(float64x2_t a) {
   return vpmaxqd_f64(a);
 }
 
-// CHECK-LABEL: define dso_local float @test_vpminnms_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VPMINNMS_F32_I:%.*]] = call float 
@llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[A]])
-// CHECK-NEXT:    ret float [[VPMINNMS_F32_I]]
-//
-float32_t test_vpminnms_f32(float32x2_t a) {
-  return vpminnms_f32(a);
-}
-
-// CHECK-LABEL: define dso_local double @test_vpminnmqd_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VPMINNMQD_F64_I:%.*]] = call double 
@llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[A]])
-// CHECK-NEXT:    ret double [[VPMINNMQD_F64_I]]
-//
-float64_t test_vpminnmqd_f64(float64x2_t a) {
-  return vpminnmqd_f64(a);
-}
-
-// CHECK-LABEL: define dso_local float @test_vpmins_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VPMINS_F32_I:%.*]] = call float 
@llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[A]])
-// CHECK-NEXT:    ret float [[VPMINS_F32_I]]
-//
-float32_t test_vpmins_f32(float32x2_t a) {
-  return vpmins_f32(a);
-}
-
-// CHECK-LABEL: define dso_local double @test_vpminqd_f64(
-// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VPMINQD_F64_I:%.*]] = call double 
@llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[A]])
-// CHECK-NEXT:    ret double [[VPMINQD_F64_I]]
-//
-float64_t test_vpminqd_f64(float64x2_t a) {
-  return vpminqd_f64(a);
-}
-
 // CHECK-LABEL: define dso_local i16 @test_vqdmulhh_s16(
 // CHECK-SAME: i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c 
b/clang/test/CodeGen/AArch64/neon/intrinsics.c
index 241ddce6fe978..9abdb1308f1b3 100644
--- a/clang/test/CodeGen/AArch64/neon/intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c
@@ -1778,6 +1778,326 @@ float64x2_t test_vminnmq_f64(float64x2_t v1, 
float64x2_t v2) {
   return vminnmq_f64(v1, v2);
 }
 
+//===----------------------------------------------------------------------===//
+// 2.1.8.6 Pairwise minimum
+// 
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#pairwise-minimum
+//===----------------------------------------------------------------------===//
+
+// LLVM-LABEL: @test_vpmin_s8(
+// CIR-LABEL: @vpmin_s8(
+int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sminp" %{{.*}}, %{{.*}} : 
(!cir.vector<8 x !s8i>, !cir.vector<8 x !s8i>) -> !cir.vector<8 x !s8i>
+
+// LLVM-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]])
+// LLVM:    [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x 
i8> [[A]], <8 x i8> [[B]])
+// LLVM-NEXT:    ret <8 x i8> [[VPMIN_I]]
+  return vpmin_s8(a, b);
+}
+
+// LLVM-LABEL: @test_vpmin_s16(
+// CIR-LABEL: @vpmin_s16(
+int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sminp" %{{.*}}, %{{.*}} : 
(!cir.vector<4 x !s16i>, !cir.vector<4 x !s16i>) -> !cir.vector<4 x !s16i>
+
+// LLVM-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.sminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]])
+// LLVM-NEXT:    ret <4 x i16> [[VPMIN2_I]]
+  return vpmin_s16(a, b);
+}
+
+// LLVM-LABEL: @test_vpmin_s32(
+// CIR-LABEL: @vpmin_s32(
+int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sminp" %{{.*}}, %{{.*}} : 
(!cir.vector<2 x !s32i>, !cir.vector<2 x !s32i>) -> !cir.vector<2 x !s32i>
+
+// LLVM-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.sminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]])
+// LLVM-NEXT:    ret <2 x i32> [[VPMIN2_I]]
+  return vpmin_s32(a, b);
+}
+
+// LLVM-LABEL: @test_vpmin_u8(
+// CIR-LABEL: @vpmin_u8(
+uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uminp" %{{.*}}, %{{.*}} : 
(!cir.vector<8 x !u8i>, !cir.vector<8 x !u8i>) -> !cir.vector<8 x !u8i>
+
+// LLVM-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]])
+// LLVM:    [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x 
i8> [[A]], <8 x i8> [[B]])
+// LLVM-NEXT:    ret <8 x i8> [[VPMIN_I]]
+  return vpmin_u8(a, b);
+}
+
+// LLVM-LABEL: @test_vpmin_u16(
+// CIR-LABEL: @vpmin_u16(
+uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uminp" %{{.*}}, %{{.*}} : 
(!cir.vector<4 x !u16i>, !cir.vector<4 x !u16i>) -> !cir.vector<4 x !u16i>
+
+// LLVM-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <4 x i16> 
@llvm.aarch64.neon.uminp.v4i16(<4 x i16> [[VPMIN_I]], <4 x i16> [[VPMIN1_I]])
+// LLVM-NEXT:    ret <4 x i16> [[VPMIN2_I]]
+  return vpmin_u16(a, b);
+}
+
+// LLVM-LABEL: @test_vpmin_u32(
+// CIR-LABEL: @vpmin_u32(
+uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uminp" %{{.*}}, %{{.*}} : 
(!cir.vector<2 x !u32i>, !cir.vector<2 x !u32i>) -> !cir.vector<2 x !u32i>
+
+// LLVM-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <2 x i32> 
@llvm.aarch64.neon.uminp.v2i32(<2 x i32> [[VPMIN_I]], <2 x i32> [[VPMIN1_I]])
+// LLVM-NEXT:    ret <2 x i32> [[VPMIN2_I]]
+  return vpmin_u32(a, b);
+}
+
+// LLVM-LABEL: @test_vpmin_f32(
+// CIR-LABEL: @vpmin_f32(
+float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminp" %{{.*}}, %{{.*}} : 
(!cir.vector<2 x !cir.float>, !cir.vector<2 x !cir.float>) -> !cir.vector<2 x 
!cir.float>
+
+// LLVM-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <2 x float> 
@llvm.aarch64.neon.fminp.v2f32(<2 x float> [[VPMIN_I]], <2 x float> 
[[VPMIN1_I]])
+// LLVM-NEXT:    ret <2 x float> [[VPMIN2_I]]
+  return vpmin_f32(a, b);
+}
+
+// LLVM-LABEL: @test_vpminq_s8(
+// CIR-LABEL: @vpminq_s8(
+int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sminp" %{{.*}}, %{{.*}} : 
(!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>) -> !cir.vector<16 x !s8i>
+
+// LLVM-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]])
+// LLVM:    [[VPMIN_I:%.*]] = call <16 x i8> 
@llvm.aarch64.neon.sminp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// LLVM-NEXT:    ret <16 x i8> [[VPMIN_I]]
+  return vpminq_s8(a, b);
+}
+
+// LLVM-LABEL: @test_vpminq_s16(
+// CIR-LABEL: @vpminq_s16(
+int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sminp" %{{.*}}, %{{.*}} : 
(!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>) -> !cir.vector<8 x !s16i>
+
+// LLVM-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <8 x i16> 
@llvm.aarch64.neon.sminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]])
+// LLVM-NEXT:    ret <8 x i16> [[VPMIN2_I]]
+  return vpminq_s16(a, b);
+}
+
+// LLVM-LABEL: @test_vpminq_s32(
+// CIR-LABEL: @vpminq_s32(
+int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.sminp" %{{.*}}, %{{.*}} : 
(!cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>) -> !cir.vector<4 x !s32i>
+
+// LLVM-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <4 x i32> 
@llvm.aarch64.neon.sminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]])
+// LLVM-NEXT:    ret <4 x i32> [[VPMIN2_I]]
+  return vpminq_s32(a, b);
+}
+
+// LLVM-LABEL: @test_vpminq_u8(
+// CIR-LABEL: @vpminq_u8(
+uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uminp" %{{.*}}, %{{.*}} : 
(!cir.vector<16 x !u8i>, !cir.vector<16 x !u8i>) -> !cir.vector<16 x !u8i>
+
+// LLVM-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]])
+// LLVM:    [[VPMIN_I:%.*]] = call <16 x i8> 
@llvm.aarch64.neon.uminp.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+// LLVM-NEXT:    ret <16 x i8> [[VPMIN_I]]
+  return vpminq_u8(a, b);
+}
+
+// LLVM-LABEL: @test_vpminq_u16(
+// CIR-LABEL: @vpminq_u16(
+uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uminp" %{{.*}}, %{{.*}} : 
(!cir.vector<8 x !u16i>, !cir.vector<8 x !u16i>) -> !cir.vector<8 x !u16i>
+
+// LLVM-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <8 x i16> 
@llvm.aarch64.neon.uminp.v8i16(<8 x i16> [[VPMIN_I]], <8 x i16> [[VPMIN1_I]])
+// LLVM-NEXT:    ret <8 x i16> [[VPMIN2_I]]
+  return vpminq_u16(a, b);
+}
+
+// LLVM-LABEL: @test_vpminq_u32(
+// CIR-LABEL: @vpminq_u32(
+uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.uminp" %{{.*}}, %{{.*}} : 
(!cir.vector<4 x !u32i>, !cir.vector<4 x !u32i>) -> !cir.vector<4 x !u32i>
+
+// LLVM-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <4 x i32> 
@llvm.aarch64.neon.uminp.v4i32(<4 x i32> [[VPMIN_I]], <4 x i32> [[VPMIN1_I]])
+// LLVM-NEXT:    ret <4 x i32> [[VPMIN2_I]]
+  return vpminq_u32(a, b);
+}
+
+// LLVM-LABEL: @test_vpminq_f32(
+// CIR-LABEL: @vpminq_f32(
+float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminp" %{{.*}}, %{{.*}} : 
(!cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x 
!cir.float>
+
+// LLVM-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <4 x float> 
@llvm.aarch64.neon.fminp.v4f32(<4 x float> [[VPMIN_I]], <4 x float> 
[[VPMIN1_I]])
+// LLVM-NEXT:    ret <4 x float> [[VPMIN2_I]]
+  return vpminq_f32(a, b);
+}
+
+// LLVM-LABEL: @test_vpminq_f64(
+// CIR-LABEL: @vpminq_f64(
+float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminp" %{{.*}}, %{{.*}} : 
(!cir.vector<2 x !cir.double>, !cir.vector<2 x !cir.double>) -> !cir.vector<2 x 
!cir.double>
+
+// LLVM-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// LLVM-NEXT:    [[VPMIN_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// LLVM-NEXT:    [[VPMIN1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x double>
+// LLVM-NEXT:    [[VPMIN2_I:%.*]] = call <2 x double> 
@llvm.aarch64.neon.fminp.v2f64(<2 x double> [[VPMIN_I]], <2 x double> 
[[VPMIN1_I]])
+// LLVM-NEXT:    ret <2 x double> [[VPMIN2_I]]
+  return vpminq_f64(a, b);
+}
+
+//===----------------------------------------------------------------------===//
+// 2.1.8.7 Pairwise minimum (IEEE754)
+// 
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#pairwise-minimum-ieee754
+//===----------------------------------------------------------------------===//
+
+// LLVM-LABEL: @test_vpminnm_f32(
+// CIR-LABEL: @vpminnm_f32(
+float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminnmp" %{{.*}}, %{{.*}} : 
(!cir.vector<2 x !cir.float>, !cir.vector<2 x !cir.float>) -> !cir.vector<2 x 
!cir.float>
+
+// LLVM-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
+// LLVM-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
+// LLVM-NEXT:    [[VPMINNM_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
+// LLVM-NEXT:    [[VPMINNM1_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
+// LLVM-NEXT:    [[VPMINNM2_I:%.*]] = call <2 x float> 
@llvm.aarch64.neon.fminnmp.v2f32(<2 x float> [[VPMINNM_I]], <2 x float> 
[[VPMINNM1_I]])
+// LLVM-NEXT:    ret <2 x float> [[VPMINNM2_I]]
+  return vpminnm_f32(a, b);
+}
+
+// LLVM-LABEL: @test_vpminnmq_f32(
+// CIR-LABEL: @vpminnmq_f32(
+float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminnmp" %{{.*}}, %{{.*}} : 
(!cir.vector<4 x !cir.float>, !cir.vector<4 x !cir.float>) -> !cir.vector<4 x 
!cir.float>
+
+// LLVM-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
+// LLVM-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
+// LLVM-NEXT:    [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
+// LLVM-NEXT:    [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
+// LLVM-NEXT:    [[VPMINNM2_I:%.*]] = call <4 x float> 
@llvm.aarch64.neon.fminnmp.v4f32(<4 x float> [[VPMINNM_I]], <4 x float> 
[[VPMINNM1_I]])
+// LLVM-NEXT:    ret <4 x float> [[VPMINNM2_I]]
+  return vpminnmq_f32(a, b);
+}
+
+// LLVM-LABEL: @test_vpminnmq_f64(
+// CIR-LABEL: @vpminnmq_f64(
+float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminnmp" %{{.*}}, %{{.*}} : 
(!cir.vector<2 x !cir.double>, !cir.vector<2 x !cir.double>) -> !cir.vector<2 x 
!cir.double>
+
+// LLVM-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]])
+// LLVM:    [[TMP0:%.*]] = bitcast <2 x double> [[A]] to <2 x i64>
+// LLVM-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[B]] to <2 x i64>
+// LLVM-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP0]] to <16 x i8>
+// LLVM-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// LLVM-NEXT:    [[VPMINNM_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
+// LLVM-NEXT:    [[VPMINNM1_I:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x 
double>
+// LLVM-NEXT:    [[VPMINNM2_I:%.*]] = call <2 x double> 
@llvm.aarch64.neon.fminnmp.v2f64(<2 x double> [[VPMINNM_I]], <2 x double> 
[[VPMINNM1_I]])
+// LLVM-NEXT:    ret <2 x double> [[VPMINNM2_I]]
+  return vpminnmq_f64(a, b);
+}
+
+// LLVM-LABEL: @test_vpmins_f32(
+// CIR-LABEL: @vpmins_f32(
+float32_t test_vpmins_f32(float32x2_t a) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminv" %{{.*}} : (!cir.vector<2 
x !cir.float>) -> !cir.float
+
+// LLVM-SAME: <2 x float> noundef [[A:%.*]])
+// LLVM:    [[VPMINS_F32_I:%.*]] = call float 
@llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[A]])
+// LLVM-NEXT:    ret float [[VPMINS_F32_I]]
+  return vpmins_f32(a);
+}
+
+// LLVM-LABEL: @test_vpminqd_f64(
+// CIR-LABEL: @vpminqd_f64(
+float64_t test_vpminqd_f64(float64x2_t a) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminv" %{{.*}} : (!cir.vector<2 
x !cir.double>) -> !cir.double
+
+// LLVM-SAME: <2 x double> noundef [[A:%.*]])
+// LLVM:    [[VPMINQD_F64_I:%.*]] = call double 
@llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[A]])
+// LLVM-NEXT:    ret double [[VPMINQD_F64_I]]
+  return vpminqd_f64(a);
+}
+
+// LLVM-LABEL: @test_vpminnms_f32(
+// CIR-LABEL: @vpminnms_f32(
+float32_t test_vpminnms_f32(float32x2_t a) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminnmv" %{{.*}} : 
(!cir.vector<2 x !cir.float>) -> !cir.float
+
+// LLVM-SAME: <2 x float> noundef [[A:%.*]])
+// LLVM:    [[VPMINNMS_F32_I:%.*]] = call float 
@llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[A]])
+// LLVM-NEXT:    ret float [[VPMINNMS_F32_I]]
+  return vpminnms_f32(a);
+}
+
+// LLVM-LABEL: @test_vpminnmqd_f64(
+// CIR-LABEL: @vpminnmqd_f64(
+float64_t test_vpminnmqd_f64(float64x2_t a) {
+// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminnmv" %{{.*}} : 
(!cir.vector<2 x !cir.double>) -> !cir.double
+
+// LLVM-SAME: <2 x double> noundef [[A:%.*]])
+// LLVM:    [[VPMINNMQD_F64_I:%.*]] = call double 
@llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[A]])
+// LLVM-NEXT:    ret double [[VPMINNMQD_F64_I]]
+  return vpminnmqd_f64(a);
+}
+
 //===------------------------------------------------------===//
 // 2.1.3.1.1. Vector Shift Left
 // 
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#vector-shift-left

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][AArch64] Upstream pairwise-minimum NEON builtins (PR #191759)

Reply via email to