https://github.com/woruyu created https://github.com/llvm/llvm-project/pull/167125
### Summary This PR resolves https://github.com/llvm/llvm-project/issues/163895. Just add fcmp-sse part of X86 vector builtins for CIR. >From c6c359e544eaa9137e8397ab95111b54dfc5dbd5 Mon Sep 17 00:00:00 2001 From: liuzhenya <[email protected]> Date: Sat, 8 Nov 2025 01:26:12 -1000 Subject: [PATCH] [CIR] X86 vector fcmp-sse vector builtins --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 18 ++++ clang/include/clang/CIR/MissingFeatures.h | 1 + clang/lib/CIR/CodeGen/CIRGenBuilder.h | 22 ++++ clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 96 +++++++++++++++-- clang/lib/CIR/CodeGen/CIRGenFunction.h | 3 + clang/test/CIR/CodeGen/builtin-fcmp-sse.c | 102 ++++++++++++++++++ 6 files changed, 231 insertions(+), 11 deletions(-) create mode 100644 clang/test/CIR/CodeGen/builtin-fcmp-sse.c diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 3288f5b12c77e..d05c5181fce70 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -131,6 +131,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { return cir::IntType::get(getContext(), n, false); } + static unsigned getCIRIntOrFloatBitWidth(mlir::Type eltTy) { + if (auto intType = mlir::dyn_cast<cir::IntTypeInterface>(eltTy)) + return intType.getWidth(); + if (auto floatType = mlir::dyn_cast<cir::FPTypeInterface>(eltTy)) + return floatType.getWidth(); + + llvm_unreachable("Wrong type passed in or Non-CIR type passed in"); + } cir::IntType getSIntNTy(int n) { return cir::IntType::get(getContext(), n, true); } @@ -575,6 +583,16 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { return cir::CmpOp::create(*this, loc, getBoolTy(), kind, lhs, rhs); } + cir::VecCmpOp createVecCompare(mlir::Location loc, cir::CmpOpKind kind, + mlir::Value lhs, mlir::Value rhs) { + VectorType vecCast = mlir::cast<VectorType>(lhs.getType()); + auto integralTy = + getSIntNTy(getCIRIntOrFloatBitWidth(vecCast.getElementType())); + VectorType integralVecTy = + VectorType::get(context, integralTy, vecCast.getSize()); + return cir::VecCmpOp::create(*this, loc, integralVecTy, kind, lhs, rhs); + } + mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) { return createCompare(loc, cir::CmpOpKind::ne, operand, operand); } diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index af1ffffcf54c0..70b9a1f9163d7 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -253,6 +253,7 @@ struct MissingFeatures { static bool emitBranchThroughCleanup() { return false; } static bool emitCheckedInBoundsGEP() { return false; } static bool emitCondLikelihoodViaExpectIntrinsic() { return false; } + static bool emitConstrainedFPCall() { return false; } static bool emitLifetimeMarkers() { return false; } static bool emitLValueAlignmentAssumption() { return false; } static bool emitNullCheckForDeleteCalls() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index e5066fac19185..4686cfa99b963 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -27,6 +27,7 @@ namespace clang::CIRGen { class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { const CIRGenTypeCache &typeCache; + bool IsFPConstrained = false; llvm::StringMap<unsigned> recordNames; llvm::StringMap<unsigned> globalsVersioning; @@ -34,6 +35,27 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { CIRGenBuilderTy(mlir::MLIRContext &mlirContext, const CIRGenTypeCache &tc) : CIRBaseBuilderTy(mlirContext), typeCache(tc) {} + // + // Floating point specific helpers + // ------------------------------- + // + + /// Enable/Disable use of constrained floating point math. When enabled the + /// CreateF<op>() calls instead create constrained floating point intrinsic + /// calls. Fast math flags are unaffected by this setting. + void setIsFPConstrained(bool IsCon) { + if (IsCon) + llvm_unreachable("Constrained FP NYI"); + IsFPConstrained = IsCon; + } + + /// Query for the use of constrained floating point math + bool getIsFPConstrained() { + if (IsFPConstrained) + llvm_unreachable("Constrained FP NYI"); + return IsFPConstrained; + } + /// Get a cir::ConstArrayAttr for a string literal. /// Note: This is different from what is returned by /// mlir::Builder::getStringAttr() which is an mlir::StringAttr. diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 0198a9d4eb192..9448f06e4e4aa 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -21,18 +21,18 @@ using namespace clang; using namespace clang::CIRGen; -mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, - const CallExpr *e) { - if (builtinID == Builtin::BI__builtin_cpu_is) { - cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_is"); +mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + if (BuiltinID == Builtin::BI__builtin_cpu_is) { + cgm.errorNYI(E->getSourceRange(), "__builtin_cpu_is"); return {}; } - if (builtinID == Builtin::BI__builtin_cpu_supports) { - cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_supports"); + if (BuiltinID == Builtin::BI__builtin_cpu_supports) { + cgm.errorNYI(E->getSourceRange(), "__builtin_cpu_supports"); return {}; } - if (builtinID == Builtin::BI__builtin_cpu_init) { - cgm.errorNYI(e->getSourceRange(), "__builtin_cpu_init"); + if (BuiltinID == Builtin::BI__builtin_cpu_init) { + cgm.errorNYI(E->getSourceRange(), "__builtin_cpu_init"); return {}; } @@ -43,7 +43,56 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, // Find out if any arguments are required to be integer constant expressions. assert(!cir::MissingFeatures::handleBuiltinICEArguments()); - switch (builtinID) { + llvm::SmallVector<mlir::Value, 4> Ops; + + // Find out if any arguments are required to be integer constant expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + Ops.push_back(emitScalarOrConstFoldImmArg(ICEArguments, i, E)); + } + + // OG has unordered comparison as a form of optimization in addition to + // ordered comparison, while CIR doesn't. + // + // This means that we can't encode the comparison code of UGT (unordered + // greater than), at least not at the CIR level. + // + // The boolean shouldInvert compensates for this. + // For example: to get to the comparison code UGT, we pass in + // getVectorFCmpIR(OLE, shouldInvert = true) since OLE is the inverse of UGT. + + // There are several ways to support this otherwise: + // - register extra CmpOpKind for unordered comparison types and build the + // translation code for + // to go from CIR -> LLVM dialect. Notice we get this naturally with + // shouldInvert, benefiting from existing infrastructure, albeit having to + // generate an extra `not` at CIR). + // - Just add extra comparison code to a new VecCmpOpKind instead of + // cluttering CmpOpKind. + // - Add a boolean in VecCmpOp to indicate if it's doing unordered or ordered + // comparison + // - Just emit the intrinsics call instead of calling this helper, see how the + // LLVM lowering handles this. + auto getVectorFCmpIR = [this, &Ops, &E](cir::CmpOpKind pred, + bool shouldInvert, bool isSignaling) { + assert(!cir::MissingFeatures::cgFPOptionsRAII()); + auto loc = getLoc(E->getExprLoc()); + mlir::Value cmp; + if (builder.getIsFPConstrained()) + // TODO: Add isSignaling boolean once emitConstrainedFPCall implemented + assert(cir::MissingFeatures::emitConstrainedFPCall()); + else + cmp = builder.createVecCompare(loc, pred, Ops[0], Ops[1]); + + mlir::Value bitCast = builder.createBitcast( + shouldInvert ? builder.createNot(cmp) : cmp, Ops[0].getType()); + return bitCast; + }; + + switch (BuiltinID) { default: return {}; case X86::BI_mm_prefetch: @@ -710,10 +759,18 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_cmpunordpd: case X86::BI__builtin_ia32_cmpneqps: case X86::BI__builtin_ia32_cmpneqpd: + cgm.errorNYI(E->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(BuiltinID)); + return {}; case X86::BI__builtin_ia32_cmpnltps: case X86::BI__builtin_ia32_cmpnltpd: + return getVectorFCmpIR(cir::CmpOpKind::lt, /*shouldInvert=*/true, + /*isSignaling=*/true); case X86::BI__builtin_ia32_cmpnleps: case X86::BI__builtin_ia32_cmpnlepd: + return getVectorFCmpIR(cir::CmpOpKind::le, /*shouldInvert=*/true, + /*isSignaling=*/true); case X86::BI__builtin_ia32_cmpordps: case X86::BI__builtin_ia32_cmpordpd: case X86::BI__builtin_ia32_cmpph128_mask: @@ -798,9 +855,26 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: case X86::BI__builtin_ia32_prefetchi: - cgm.errorNYI(e->getSourceRange(), + cgm.errorNYI(E->getSourceRange(), std::string("unimplemented X86 builtin call: ") + - getContext().BuiltinInfo.getName(builtinID)); + getContext().BuiltinInfo.getName(BuiltinID)); return {}; } } + +mlir::Value CIRGenFunction::emitScalarOrConstFoldImmArg(unsigned ICEArguments, + unsigned Idx, + const CallExpr *E) { + mlir::Value Arg = {}; + if ((ICEArguments & (1 << Idx)) == 0) { + Arg = emitScalarExpr(E->getArg(Idx)); + } else { + // If this is required to be a constant, constant fold it so that we + // know that the generated intrinsic gets a ConstantInt. + std::optional<llvm::APSInt> Result = + E->getArg(Idx)->getIntegerConstantExpr(getContext()); + assert(Result && "Expected argument to be a constant"); + Arg = builder.getConstInt(getLoc(E->getSourceRange()), *Result); + } + return Arg; +} diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index f879e580989f7..5a88b37ceb352 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1699,6 +1699,9 @@ class CIRGenFunction : public CIRGenTypeCache { void emitScalarInit(const clang::Expr *init, mlir::Location loc, LValue lvalue, bool capturedByInit = false); + mlir::Value emitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, + const CallExpr *E); + void emitStaticVarDecl(const VarDecl &d, cir::GlobalLinkageKind linkage); void emitStoreOfComplex(mlir::Location loc, mlir::Value v, LValue dest, diff --git a/clang/test/CIR/CodeGen/builtin-fcmp-sse.c b/clang/test/CIR/CodeGen/builtin-fcmp-sse.c new file mode 100644 index 0000000000000..c90ff08c19542 --- /dev/null +++ b/clang/test/CIR/CodeGen/builtin-fcmp-sse.c @@ -0,0 +1,102 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -emit-llvm %s -o - | FileCheck %s -check-prefix=OGCG + +typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); +typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); + +__m128 test_cmpnleps(__m128 A, __m128 B) { + // CIR-LABEL: @test_cmpnleps + // CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4> + // CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4> + // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP:%.*]] : !cir.vector<!s32i x 4> -> !cir.vector<!cir.float x 4> + // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>> + // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] : + // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4> + + // LLVM-LABEL: test_cmpnleps + // LLVM: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}} + // LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // LLVM-NEXT: ret <4 x float> [[CAST]] + + // OGCG-LABEL: test_cmpnleps + // OGCG: [[CMP:%.*]] = fcmp ugt <4 x float> {{.*}}, {{.*}} + // OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // OGCG-NEXT: ret <4 x float> [[CAST]] + return __builtin_ia32_cmpnleps(A, B); //done! +} + + +__m128d test_cmpnlepd(__m128d A, __m128d B) { + // CIR-LABEL: @test_cmpnlepd + // CIR: [[CMP:%.*]] = cir.vec.cmp(le, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2> + // CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2> + // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP]] : !cir.vector<!s64i x 2> -> !cir.vector<!cir.double x 2> + // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>> + // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] : + // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.double x 2> + + // LLVM-LABEL: test_cmpnlepd + // LLVM: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}} + // LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // LLVM-NEXT: ret <2 x double> [[CAST]] + + // OGCG-LABEL: test_cmpnlepd + // OGCG: [[CMP:%.*]] = fcmp ugt <2 x double> {{.*}}, {{.*}} + // OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // OGCG-NEXT: ret <2 x double> [[CAST]] + return __builtin_ia32_cmpnlepd(A, B); // done! +} + + +__m128 test_cmpnltps(__m128 A, __m128 B) { + // CIR-LABEL: @test_cmpnltps + // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.float x 4>, !cir.vector<!s32i x 4> + // CIR: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s32i x 4>, !cir.vector<!s32i x 4> + // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP:%.*]] : !cir.vector<!s32i x 4> -> !cir.vector<!cir.float x 4> + // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.float x 4>, !cir.ptr<!cir.vector<!cir.float x 4>> + // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] : + // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.float x 4> + + // LLVM-LABEL: test_cmpnltps + // LLVM: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}} + // LLVM-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // LLVM-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // LLVM-NEXT: ret <4 x float> [[CAST]] + + // OGCG-LABEL: test_cmpnltps + // OGCG: [[CMP:%.*]] = fcmp uge <4 x float> {{.*}}, {{.*}} + // OGCG-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> + // OGCG-NEXT: [[CAST:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> + // OGCG-NEXT: ret <4 x float> [[CAST]] + return __builtin_ia32_cmpnltps(A, B); // done! +} + + +__m128d test_cmpnltpd(__m128d A, __m128d B) { + // CIR-LABEL: @test_cmpnltpd + // CIR: [[CMP:%.*]] = cir.vec.cmp(lt, [[A:%.*]], [[B:%.*]]) : !cir.vector<!cir.double x 2>, !cir.vector<!s64i x 2> + // CIR-NEXT: [[NOTCMP:%.*]] = cir.unary(not, [[CMP]]) : !cir.vector<!s64i x 2>, !cir.vector<!s64i x 2> + // CIR-NEXT: [[CAST:%.*]] = cir.cast bitcast [[NOTCMP]] : !cir.vector<!s64i x 2> -> !cir.vector<!cir.double x 2> + // CIR-NEXT: cir.store [[CAST]], [[ALLOCA:%.*]] : !cir.vector<!cir.double x 2>, !cir.ptr<!cir.vector<!cir.double x 2>> + // CIR-NEXT: [[LD:%.*]] = cir.load [[ALLOCA]] : + // CIR-NEXT: cir.return [[LD]] : !cir.vector<!cir.double x 2> + + // LLVM-LABEL: test_cmpnltpd + // LLVM: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}} + // LLVM-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // LLVM-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // LLVM-NEXT: ret <2 x double> [[CAST]] + + // OGCG-LABEL: test_cmpnltpd + // OGCG: [[CMP:%.*]] = fcmp uge <2 x double> {{.*}}, {{.*}} + // OGCG-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64> + // OGCG-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double> + // OGCG-NEXT: ret <2 x double> [[CAST]] + return __builtin_ia32_cmpnltpd(A, B); // done! +} + _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
