https://github.com/mrkajetanp updated https://github.com/llvm/llvm-project/pull/156029
>From 594e732db16a4f21aa11c5899fcf2a662d33c817 Mon Sep 17 00:00:00 2001 From: Kajetan Puchalski <kajetan.puchal...@arm.com> Date: Fri, 29 Aug 2025 14:09:03 +0000 Subject: [PATCH] [clang][AArch64] Use .i16.f16 intrinsic formats for vcvth*_[s|u]16_f16 Use .i16.f16 intrinsic formats for intrinsics like vcvth_s16_f16. Avoids issues with incorrect saturation that arise when using .i32.f16 formats for the same conversions. Signed-off-by: Kajetan Puchalski <kajetan.puchal...@arm.com> --- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 5 +- .../v8.2a-fp16-intrinsics-constrained.c | 10 ++-- .../CodeGen/AArch64/v8.2a-fp16-intrinsics.c | 50 ++++++++----------- 3 files changed, 26 insertions(+), 39 deletions(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 60413e7b18e85..f4baf8c7f0dde 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -5847,7 +5847,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvtph_s16_f16: case NEON::BI__builtin_neon_vcvth_s16_f16: { unsigned Int; - llvm::Type* InTy = Int32Ty; + llvm::Type *InTy = Int16Ty; llvm::Type* FTy = HalfTy; llvm::Type *Tys[2] = {InTy, FTy}; Ops.push_back(EmitScalarExpr(E->getArg(0))); @@ -5874,8 +5874,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vcvth_s16_f16: Int = Intrinsic::aarch64_neon_fcvtzs; break; } - Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); - return Builder.CreateTrunc(Ops[0], Int16Ty); + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); } case NEON::BI__builtin_neon_vcaleh_f16: case NEON::BI__builtin_neon_vcalth_f16: diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c index 9109626cea9ca..4c19d75df96e2 100644 --- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c +++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics-constrained.c @@ -105,9 +105,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) { } // COMMON-LABEL: test_vcvth_s16_f16 -// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) -// COMMONIR: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16 -// COMMONIR: ret i16 [[TRUNC]] +// COMMONIR: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a) +// COMMONIR: ret i16 [[VCVT]] int16_t test_vcvth_s16_f16 (float16_t a) { return vcvth_s16_f16(a); } @@ -127,9 +126,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) { } // COMMON-LABEL: test_vcvth_u16_f16 -// COMMONIR: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) -// COMMONIR: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16 -// COMMONIR: ret i16 [[TRUNC]] +// COMMONIR: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a) +// COMMONIR: ret i16 [[VCVT]] uint16_t test_vcvth_u16_f16 (float16_t a) { return vcvth_u16_f16(a); } diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c index 90ee74e459ebd..c3bbd5a43398a 100644 --- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c +++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c @@ -97,9 +97,8 @@ float16_t test_vcvth_f16_u64 (uint64_t a) { } // CHECK-LABEL: test_vcvth_s16_f16 -// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) -// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16 -// CHECK: ret i16 [[TRUNC]] +// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzs.i16.f16(half %a) +// CHECK: ret i16 [[VCVT]] int16_t test_vcvth_s16_f16 (float16_t a) { return vcvth_s16_f16(a); } @@ -119,9 +118,8 @@ int64_t test_vcvth_s64_f16 (float16_t a) { } // CHECK-LABEL: test_vcvth_u16_f16 -// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) -// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16 -// CHECK: ret i16 [[TRUNC]] +// CHECK: [[VCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtzu.i16.f16(half %a) +// CHECK: ret i16 [[VCVT]] uint16_t test_vcvth_u16_f16 (float16_t a) { return vcvth_u16_f16(a); } @@ -141,9 +139,8 @@ uint64_t test_vcvth_u64_f16 (float16_t a) { } // CHECK-LABEL: test_vcvtah_s16_f16 -// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a) -// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 -// CHECK: ret i16 [[RET]] +// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtas.i16.f16(half %a) +// CHECK: ret i16 [[FCVT]] int16_t test_vcvtah_s16_f16 (float16_t a) { return vcvtah_s16_f16(a); } @@ -163,9 +160,8 @@ int64_t test_vcvtah_s64_f16 (float16_t a) { } // CHECK-LABEL: test_vcvtah_u16_f16 -// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a) -// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 -// CHECK: ret i16 [[RET]] +// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtau.i16.f16(half %a) +// CHECK: ret i16 [[FCVT]] uint16_t test_vcvtah_u16_f16 (float16_t a) { return vcvtah_u16_f16(a); } @@ -185,9 +181,8 @@ uint64_t test_vcvtah_u64_f16 (float16_t a) { } // CHECK-LABEL: test_vcvtmh_s16_f16 -// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a) -// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 -// CHECK: ret i16 [[RET]] +// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtms.i16.f16(half %a) +// CHECK: ret i16 [[FCVT]] int16_t test_vcvtmh_s16_f16 (float16_t a) { return vcvtmh_s16_f16(a); } @@ -207,9 +202,8 @@ int64_t test_vcvtmh_s64_f16 (float16_t a) { } // CHECK-LABEL: test_vcvtmh_u16_f16 -// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a) -// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 -// CHECK: ret i16 [[RET]] +// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtmu.i16.f16(half %a) +// CHECK: ret i16 [[FCVT]] uint16_t test_vcvtmh_u16_f16 (float16_t a) { return vcvtmh_u16_f16(a); } @@ -229,9 +223,8 @@ uint64_t test_vcvtmh_u64_f16 (float16_t a) { } // CHECK-LABEL: test_vcvtnh_s16_f16 -// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a) -// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 -// CHECK: ret i16 [[RET]] +// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtns.i16.f16(half %a) +// CHECK: ret i16 [[FCVT]] int16_t test_vcvtnh_s16_f16 (float16_t a) { return vcvtnh_s16_f16(a); } @@ -251,9 +244,8 @@ int64_t test_vcvtnh_s64_f16 (float16_t a) { } // CHECK-LABEL: test_vcvtnh_u16_f16 -// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a) -// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 -// CHECK: ret i16 [[RET]] +// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtnu.i16.f16(half %a) +// CHECK: ret i16 [[FCVT]] uint16_t test_vcvtnh_u16_f16 (float16_t a) { return vcvtnh_u16_f16(a); } @@ -273,9 +265,8 @@ uint64_t test_vcvtnh_u64_f16 (float16_t a) { } // CHECK-LABEL: test_vcvtph_s16_f16 -// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a) -// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 -// CHECK: ret i16 [[RET]] +// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtps.i16.f16(half %a) +// CHECK: ret i16 [[FCVT]] int16_t test_vcvtph_s16_f16 (float16_t a) { return vcvtph_s16_f16(a); } @@ -295,9 +286,8 @@ int64_t test_vcvtph_s64_f16 (float16_t a) { } // CHECK-LABEL: test_vcvtph_u16_f16 -// CHECK: [[FCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a) -// CHECK: [[RET:%.*]] = trunc i32 [[FCVT]] to i16 -// CHECK: ret i16 [[RET]] +// CHECK: [[FCVT:%.*]] = call i16 @llvm.aarch64.neon.fcvtpu.i16.f16(half %a) +// CHECK: ret i16 [[FCVT]] uint16_t test_vcvtph_u16_f16 (float16_t a) { return vcvtph_u16_f16(a); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits