https://github.com/mrkajetanp updated 
https://github.com/llvm/llvm-project/pull/156029

>From 594e732db16a4f21aa11c5899fcf2a662d33c817 Mon Sep 17 00:00:00 2001
From: Kajetan Puchalski <kajetan.puchal...@arm.com>
Date: Fri, 29 Aug 2025 14:09:03 +0000
Subject: [PATCH] [clang][AArch64] Use .i16.f16 intrinsic formats for
 vcvth*_[s|u]16_f16

Use .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16.
Avoids issues with incorrect saturation that arise when using .i32.f16
formats for the same conversions.

Signed-off-by: Kajetan Puchalski <kajetan.puchal...@arm.com>
---
 clang/lib/CodeGen/TargetBuiltins/ARM.cpp      |  5 +-
 .../v8.2a-fp16-intrinsics-constrained.c       | 10 ++--
 .../CodeGen/AArch64/v8.2a-fp16-intrinsics.c   | 50 ++++++++-----------
 3 files changed, 26 insertions(+), 39 deletions(-)

diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp 
b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 60413e7b18e85..f4baf8c7f0dde 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -5847,7 +5847,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
   case NEON::BI__builtin_neon_vcvtph_s16_f16:
   case NEON::BI__builtin_neon_vcvth_s16_f16: {
     unsigned Int;
-    llvm::Type* InTy = Int32Ty;
+    llvm::Type *InTy = Int16Ty;
     llvm::Type* FTy  = HalfTy;
     llvm::Type *Tys[2] = {InTy, FTy};
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
@@ -5874,8 +5874,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned 
BuiltinID,
     case NEON::BI__builtin_neon_vcvth_s16_f16:
       Int = Intrinsic::aarch64_neon_fcvtzs; break;
     }
-    Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
-    return Builder.CreateTrunc(Ops[0], Int16Ty);
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
   }
   case NEON::BI__builtin_neon_vcaleh_f16:
   case NEON::BI__builtin_neon_vcalth_f16:
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c 
b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
index 9109626cea9ca..4c19d75df96e2 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c
@@ -105,9 +105,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
 }
 
 // COMMON-LABEL: test_vcvth_s16_f16
-// COMMONIR:       [[VCVT:%.*]] = call i32 
@llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
-// COMMONIR:       [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// COMMONIR:       ret i16 [[TRUNC]]
+// COMMONIR:       [[VCVT:%.*]] = call i16 
@llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+// COMMONIR:       ret i16 [[VCVT]]
 int16_t test_vcvth_s16_f16 (float16_t a) {
   return vcvth_s16_f16(a);
 }
@@ -127,9 +126,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
 }
 
 // COMMON-LABEL: test_vcvth_u16_f16
-// COMMONIR:       [[VCVT:%.*]] = call i32 
@llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
-// COMMONIR:       [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// COMMONIR:       ret i16 [[TRUNC]]
+// COMMONIR:       [[VCVT:%.*]] = call i16 
@llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
+// COMMONIR:       ret i16 [[VCVT]]
 uint16_t test_vcvth_u16_f16 (float16_t a) {
   return vcvth_u16_f16(a);
 }
diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c 
b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
index 90ee74e459ebd..c3bbd5a43398a 100644
--- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c
@@ -97,9 +97,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) {
 }
 
 // CHECK-LABEL: test_vcvth_s16_f16
-// CHECK:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a)
-// CHECK:  [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// CHECK:  ret i16 [[TRUNC]]
+// CHECK:  [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a)
+// CHECK:  ret i16 [[VCVT]]
 int16_t test_vcvth_s16_f16 (float16_t a) {
   return vcvth_s16_f16(a);
 }
@@ -119,9 +118,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvth_u16_f16
-// CHECK:  [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a)
-// CHECK:  [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16
-// CHECK:  ret i16 [[TRUNC]]
+// CHECK:  [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a)
+// CHECK:  ret i16 [[VCVT]]
 uint16_t test_vcvth_u16_f16 (float16_t a) {
   return vcvth_u16_f16(a);
 }
@@ -141,9 +139,8 @@ uint64_t test_vcvth_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtah_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtas.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtah_s16_f16 (float16_t a) {
   return vcvtah_s16_f16(a);
 }
@@ -163,9 +160,8 @@ int64_t test_vcvtah_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtah_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtau.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtah_u16_f16 (float16_t a) {
   return vcvtah_u16_f16(a);
 }
@@ -185,9 +181,8 @@ uint64_t test_vcvtah_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtmh_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtms.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtmh_s16_f16 (float16_t a) {
   return vcvtmh_s16_f16(a);
 }
@@ -207,9 +202,8 @@ int64_t test_vcvtmh_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtmh_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtmu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtmh_u16_f16 (float16_t a) {
   return vcvtmh_u16_f16(a);
 }
@@ -229,9 +223,8 @@ uint64_t test_vcvtmh_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtnh_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtns.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtnh_s16_f16 (float16_t a) {
   return vcvtnh_s16_f16(a);
 }
@@ -251,9 +244,8 @@ int64_t test_vcvtnh_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtnh_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtnu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtnh_u16_f16 (float16_t a) {
   return vcvtnh_u16_f16(a);
 }
@@ -273,9 +265,8 @@ uint64_t test_vcvtnh_u64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtph_s16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtps.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 int16_t test_vcvtph_s16_f16 (float16_t a) {
   return vcvtph_s16_f16(a);
 }
@@ -295,9 +286,8 @@ int64_t test_vcvtph_s64_f16 (float16_t a) {
 }
 
 // CHECK-LABEL: test_vcvtph_u16_f16
-// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a)
-// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16
-// CHECK: ret i16 [[RET]]
+// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtpu.i16.f16(half %a)
+// CHECK: ret i16 [[FCVT]]
 uint16_t test_vcvtph_u16_f16 (float16_t a) {
   return vcvtph_u16_f16(a);
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to