https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/180597
From 108e0357f93fdf648490b85399a22bbe020d3d81 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Fri, 23 Jan 2026 11:48:12 +0000 Subject: [PATCH 1/3] [CIR][NEON] Add lowering for `vnegd_s64` and `vnegh_f16` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CIR lowering support for the non-overloaded NEON intrinsics `vnegd_s64` and `vnegh_f16`. The associated tests are shared with the existing default codegen tests: * `neon-intrinsics.c` → `neon/intrinsics.c` * `v8.2a-fp16-intrinsics.c` → `neon/fullfp16.c` A new test file, * `clang/test/CodeGen/AArch64/neon/fullfp16.c` is introduced and is intended to eventually replace: * `clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c` Since both intrinsics are non-overloaded, the CIR and default codegen handling is moved to the appropriate switch statements. The previous placement was incorrect. This change also includes minor refactoring in `CIRGenBuilder.h` to better group related hooks. --- clang/lib/CIR/CodeGen/CIRGenBuilder.h | 34 +++++++++++++------ .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 14 ++++++-- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 8 ++--- clang/test/CodeGen/AArch64/neon-intrinsics.c | 10 ------ clang/test/CodeGen/AArch64/neon/fullfp16.c | 28 +++++++++++++++ clang/test/CodeGen/AArch64/neon/intrinsics.c | 11 ++++++ .../CodeGen/AArch64/v8.2a-fp16-intrinsics.c | 7 ---- 7 files changed, 79 insertions(+), 33 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/neon/fullfp16.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 99a8152e7d365..03f7f89aecf33 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -362,6 +362,20 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { return getConstantInt(loc, getUInt64Ty(), c); } + /// Create constant nullptr for pointer-to-data-member type ty. + cir::ConstantOp getNullDataMemberPtr(cir::DataMemberType ty, + mlir::Location loc) { + return cir::ConstantOp::create(*this, loc, getNullDataMemberAttr(ty)); + } + + cir::ConstantOp getNullMethodPtr(cir::MethodType ty, mlir::Location loc) { + return cir::ConstantOp::create(*this, loc, getNullMethodAttr(ty)); + } + + // + // UnaryOp creation helpers + // ------------------------- + // mlir::Value createNeg(mlir::Value value) { if (auto intTy = mlir::dyn_cast<cir::IntType>(value.getType())) { @@ -375,19 +389,18 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { llvm_unreachable("negation for the given type is NYI"); } - cir::IsFPClassOp createIsFPClass(mlir::Location loc, mlir::Value src, - cir::FPClassTest flags) { - return cir::IsFPClassOp::create(*this, loc, src, flags); - } + mlir::Value createFNeg(mlir::Value value) { + assert(!cir::MissingFeatures::metaDataNode()); + assert(!cir::MissingFeatures::fpConstraints()); + assert(!cir::MissingFeatures::fastMathFlags()); - /// Create constant nullptr for pointer-to-data-member type ty. - cir::ConstantOp getNullDataMemberPtr(cir::DataMemberType ty, - mlir::Location loc) { - return cir::ConstantOp::create(*this, loc, getNullDataMemberAttr(ty)); + return cir::UnaryOp::create(*this, value.getLoc(), value.getType(), + cir::UnaryOpKind::Minus, value); } - cir::ConstantOp getNullMethodPtr(cir::MethodType ty, mlir::Location loc) { - return cir::ConstantOp::create(*this, loc, getNullMethodAttr(ty)); + cir::IsFPClassOp createIsFPClass(mlir::Location loc, mlir::Value src, + cir::FPClassTest flags) { + return cir::IsFPClassOp::create(*this, loc, src, flags); } // TODO: split this to createFPExt/createFPTrunc when we have dedicated cast @@ -414,6 +427,7 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { return cir::BinOp::create(*this, loc, cir::BinOpKind::Add, lhs, rhs); } + mlir::Value createFMul(mlir::Location loc, mlir::Value lhs, mlir::Value rhs) { assert(!cir::MissingFeatures::metaDataNode()); assert(!cir::MissingFeatures::fpConstraints()); diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index ae0aa8b7f733f..71cf896aede10 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -1446,6 +1446,18 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vcged_s64: case NEON::BI__builtin_neon_vcled_u64: case NEON::BI__builtin_neon_vcled_s64: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented AArch64 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; + case NEON::BI__builtin_neon_vnegd_s64: { + ops.push_back(emitScalarExpr(expr->getArg(0))); + return builder.createNeg(ops[0]); + } + case NEON::BI__builtin_neon_vnegh_f16: { + ops.push_back(emitScalarExpr(expr->getArg(0))); + return builder.createFNeg(ops[0]); + } case NEON::BI__builtin_neon_vtstd_s64: case NEON::BI__builtin_neon_vtstd_u64: case NEON::BI__builtin_neon_vset_lane_i8: @@ -1706,8 +1718,6 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vmulxh_laneq_f16: case NEON::BI__builtin_neon_vmul_lane_v: case NEON::BI__builtin_neon_vmul_laneq_v: - case NEON::BI__builtin_neon_vnegd_s64: - case NEON::BI__builtin_neon_vnegh_f16: case NEON::BI__builtin_neon_vpmaxnm_v: case NEON::BI__builtin_neon_vpmaxnmq_v: case NEON::BI__builtin_neon_vpminnm_v: diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index c45e819a03855..a97609169b375 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -6299,6 +6299,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); } + case NEON::BI__builtin_neon_vnegd_s64: + return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); + case NEON::BI__builtin_neon_vnegh_f16: + return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0))); case NEON::BI__builtin_neon_vtstd_s64: case NEON::BI__builtin_neon_vtstd_u64: { Ops.push_back(EmitScalarExpr(E->getArg(1))); @@ -7170,10 +7174,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); return Builder.CreateBitCast(Result, Ty); } - case NEON::BI__builtin_neon_vnegd_s64: - return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); - case NEON::BI__builtin_neon_vnegh_f16: - return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh"); case NEON::BI__builtin_neon_vpmaxnm_v: case NEON::BI__builtin_neon_vpmaxnmq_v: { Int = Intrinsic::aarch64_neon_fmaxnmp; diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index 3fc299b926009..42799d27bba89 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -17463,16 +17463,6 @@ int64_t test_vqabsd_s64(int64_t a) { return (int64_t)vqabsd_s64(a); } -// CHECK-LABEL: define dso_local i64 @test_vnegd_s64( -// CHECK-SAME: i64 noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VNEGD_I:%.*]] = sub i64 0, [[A]] -// CHECK-NEXT: ret i64 [[VNEGD_I]] -// -int64_t test_vnegd_s64(int64_t a) { - return (int64_t)vnegd_s64(a); -} - // CHECK-LABEL: define dso_local i8 @test_vqnegb_s8( // CHECK-SAME: i8 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c new file mode 100644 index 0000000000000..525ab3fbfe473 --- /dev/null +++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c @@ -0,0 +1,28 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=LLVM +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -disable-O0-optnone -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,simplifycfg | FileCheck %s --check-prefixes=LLVM %} +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -disable-O0-optnone -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=CIR %} + +//============================================================================= +// NOTES +// +// Minor differences between RUNs (e.g. presence of `noundef` attached to +// argumens, `align` attribute attached to pointers), are matched using +// catch-alls like {{.*}}. +// +// Different labels for CIR stem from an additional function call that is +// present at the AST and CIR levels, but is inlined at the LLVM IR level. +//============================================================================= + +#include <arm_fp16.h> + +// ALL-LABEL: @test_vnegh_f16 +float16_t test_vnegh_f16(float16_t a) { +// CIR: cir.unary(minus, {{.*}}) : !cir.f16 + +// LLVM: half{{.*}} [[A:%.*]]) +// LLVM: [[NEG:%.*]] = fneg half [[A:%.*]] +// LLVM: ret half [[NEG]] + return vnegh_f16(a); +} diff --git a/clang/test/CodeGen/AArch64/neon/intrinsics.c b/clang/test/CodeGen/AArch64/neon/intrinsics.c index 6d0b25e6a66b7..039a08c23852e 100644 --- a/clang/test/CodeGen/AArch64/neon/intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon/intrinsics.c @@ -31,3 +31,14 @@ uint64_t test_vceqzd_s64(int64_t a) { // LLVM-NEXT: ret i64 [[VCEQZ_I]] return (uint64_t)vceqzd_s64(a); } + +// LLVM-LABEL: @test_vnegd_s64 +// CIR-LABEL: @vnegd_s64 +int64_t test_vnegd_s64(int64_t a) { +// CIR: cir.unary(minus, {{.*}}) : !s64 + +// LLVM-SAME: i64{{.*}} [[A:%.*]]) +// LLVM: [[VNEGD_I:%.*]] = sub i64 0, [[A]] +// LLVM-NEXT: ret i64 [[VNEGD_I]] + return (int64_t)vnegd_s64(a); +} diff --git a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c index c3bbd5a43398a..a4d544d26ae27 100644 --- a/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c +++ b/clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c @@ -306,13 +306,6 @@ uint64_t test_vcvtph_u64_f16 (float16_t a) { return vcvtph_u64_f16(a); } -// CHECK-LABEL: test_vnegh_f16 -// CHECK: [[NEG:%.*]] = fneg half %a -// CHECK: ret half [[NEG]] -float16_t test_vnegh_f16(float16_t a) { - return vnegh_f16(a); -} - // CHECK-LABEL: test_vrecpeh_f16 // CHECK: [[VREC:%.*]] = call half @llvm.aarch64.neon.frecpe.f16(half %a) // CHECK: ret half [[VREC]] From ff9a9cb07a8a9e4eff1584a2fadaf15ae5fd8123 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Tue, 10 Feb 2026 12:24:08 +0000 Subject: [PATCH 2/3] Address PR comments --- clang/lib/CIR/CodeGen/CIRGenBuilder.h | 23 ++++++++++++---------- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 2 +- clang/test/CodeGen/AArch64/neon/fullfp16.c | 8 ++++---- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 03f7f89aecf33..c262f5dc3d812 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -362,16 +362,6 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { return getConstantInt(loc, getUInt64Ty(), c); } - /// Create constant nullptr for pointer-to-data-member type ty. - cir::ConstantOp getNullDataMemberPtr(cir::DataMemberType ty, - mlir::Location loc) { - return cir::ConstantOp::create(*this, loc, getNullDataMemberAttr(ty)); - } - - cir::ConstantOp getNullMethodPtr(cir::MethodType ty, mlir::Location loc) { - return cir::ConstantOp::create(*this, loc, getNullMethodAttr(ty)); - } - // // UnaryOp creation helpers // ------------------------- @@ -390,6 +380,9 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { } mlir::Value createFNeg(mlir::Value value) { + assert(mlir::isa<cir::FPTypeInterface>(value.getType()) && + "Non-fp input type!"); + assert(!cir::MissingFeatures::metaDataNode()); assert(!cir::MissingFeatures::fpConstraints()); assert(!cir::MissingFeatures::fastMathFlags()); @@ -403,6 +396,16 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { return cir::IsFPClassOp::create(*this, loc, src, flags); } + /// Create constant nullptr for pointer-to-data-member type ty. + cir::ConstantOp getNullDataMemberPtr(cir::DataMemberType ty, + mlir::Location loc) { + return cir::ConstantOp::create(*this, loc, getNullDataMemberAttr(ty)); + } + + cir::ConstantOp getNullMethodPtr(cir::MethodType ty, mlir::Location loc) { + return cir::ConstantOp::create(*this, loc, getNullMethodAttr(ty)); + } + // TODO: split this to createFPExt/createFPTrunc when we have dedicated cast // operations. mlir::Value createFloatingCast(mlir::Value v, mlir::Type destType) { diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index a97609169b375..65aec8bee52cf 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -6302,7 +6302,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case NEON::BI__builtin_neon_vnegd_s64: return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); case NEON::BI__builtin_neon_vnegh_f16: - return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0))); + return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh"); case NEON::BI__builtin_neon_vtstd_s64: case NEON::BI__builtin_neon_vtstd_u64: { Ops.push_back(EmitScalarExpr(E->getArg(1))); diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c index 525ab3fbfe473..f3396f008f5b1 100644 --- a/clang/test/CodeGen/AArch64/neon/fullfp16.c +++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c @@ -1,8 +1,8 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=LLVM -// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -disable-O0-optnone -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,simplifycfg | FileCheck %s --check-prefixes=LLVM %} -// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -disable-O0-optnone -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=CIR %} +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s --check-prefixes=ALL,LLVM +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -disable-O0-optnone -fclangir -emit-llvm -o - %s | opt -S -passes=mem2reg,simplifycfg | FileCheck %s --check-prefixes=ALL,LLVM %} +// RUN: %if cir-enabled %{%clang_cc1 -triple arm64-none-linux-gnu -target-feature +fullfp16 -disable-O0-optnone -fclangir -emit-cir -o - %s | FileCheck %s --check-prefixes=ALL,CIR %} //============================================================================= // NOTES @@ -21,7 +21,7 @@ float16_t test_vnegh_f16(float16_t a) { // CIR: cir.unary(minus, {{.*}}) : !cir.f16 -// LLVM: half{{.*}} [[A:%.*]]) +// LLVM-SAME: half{{.*}} [[A:%.*]]) // LLVM: [[NEG:%.*]] = fneg half [[A:%.*]] // LLVM: ret half [[NEG]] return vnegh_f16(a); From e621eb79b336e015e45357cfe0ba7c7b8f1963ba Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski <[email protected]> Date: Wed, 11 Feb 2026 08:34:54 +0000 Subject: [PATCH 3/3] Update comment --- clang/test/CodeGen/AArch64/neon/fullfp16.c | 24 +++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/clang/test/CodeGen/AArch64/neon/fullfp16.c b/clang/test/CodeGen/AArch64/neon/fullfp16.c index f3396f008f5b1..b4aca52372bf7 100644 --- a/clang/test/CodeGen/AArch64/neon/fullfp16.c +++ b/clang/test/CodeGen/AArch64/neon/fullfp16.c @@ -7,12 +7,26 @@ //============================================================================= // NOTES // -// Minor differences between RUNs (e.g. presence of `noundef` attached to -// argumens, `align` attribute attached to pointers), are matched using -// catch-alls like {{.*}}. +// Tests for unconstrained intrinsics that require the fullfp16 extension. // -// Different labels for CIR stem from an additional function call that is -// present at the AST and CIR levels, but is inlined at the LLVM IR level. +// These intrinsics expand to code containing multiple compound and declaration +// statements rather than just plain function calls, which leads to: +// * "scopes" at the CIR level, and then +// * redundant branches at the LLVM IR level. +// The default lowering path never generates those redundant LLVM IR branches, +// hence for CIR we use `opt -passes=simplifycfg` to reduce the control flow +// and to make LLVM IR match for all paths. +// +// Minor differences between RUN lines (e.g., the presence of `noundef` on +// arguments or the `align` attribute on pointers) are matched using +// catch-alls such as `{{.*}}`. +// +// TODO: Remove `-simplifycfg` once CIR lowering includes the relevant +// optimizations to reduce the CFG. +// +// TODO: Merge this file with +// * clang/test/CodeGen/AArch64/v8.2a-fp16-intrinsics.c +// (the source of these tests). //============================================================================= #include <arm_fp16.h> _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
