[lld] [compiler-rt] [llvm] [openmp] [clang-tools-extra] [clang] [mlir] [ADT] Make SmallVectorImpl destructor protected (PR #71437)
https://github.com/andykaylor created https://github.com/llvm/llvm-project/pull/71437 Because the SmallVectorImpl destructor is not virtual, the destructor of derived classes will not be called if pointers to the SmallVectorImpl class are deleted directly. Making the SmallVectorImpl destructor protected will prevent this. >From 856a471b40f19f5fcf6aab7591009555b6a3841f Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Fri, 20 Oct 2023 12:33:51 -0700 Subject: [PATCH 1/2] Update SimplifyIndVar.cpp In SimplifyIndvar::replaceFloatIVWithIntegerIV() the return value of getFPMantissaWidth() was being cast as an unsigned integer and then compared with the number of bits needed to represent an integer that was cast to and from a floating-point type. This is a problem because getFPMantissaWidth() returns -1 if the type does not have a stable mantissa. Currently the only type that returns -1 is ppc_fp128, so you'd need a pretty big induction variable to cause a problem. However, this problem will be more likely to be exposed when we implement support for decimal floating-point types. Strictly speaking, what we want to know here is the size of the biggest integer that can be represented exactly. We could get that information even with an unstable mantissa width, but getFPMantissaWidth() won't do it. --- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 1caf708bcc35254..45cbdd235898b28 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -664,7 +664,7 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { MaskBits = SE->getSignedRange(IV).getMinSignedBits(); else MaskBits = SE->getUnsignedRange(IV).getActiveBits(); - unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); + int DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); if (MaskBits <= DestNumSigBits) { for (User *U : UseInst->users()) { // Match for fptosi/fptoui of sitofp and with same type. >From 2524d41a34ccaa3932e5b4ab2876ea02744f6304 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Fri, 20 Oct 2023 16:40:26 -0700 Subject: [PATCH 2/2] Fix build warning This fixes a build warning caused by mixed signed/unsigned compare --- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 45cbdd235898b28..a23ac41acaa58aa 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -659,11 +659,11 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { Instruction *IVOperand = cast(UseInst->getOperand(0)); // Get the symbolic expression for this instruction. const SCEV *IV = SE->getSCEV(IVOperand); - unsigned MaskBits; + int MaskBits; if (UseInst->getOpcode() == CastInst::SIToFP) -MaskBits = SE->getSignedRange(IV).getMinSignedBits(); +MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits(); else -MaskBits = SE->getUnsignedRange(IV).getActiveBits(); +MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits(); int DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); if (MaskBits <= DestNumSigBits) { for (User *U : UseInst->users()) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[lld] [compiler-rt] [llvm] [openmp] [clang-tools-extra] [clang] [mlir] [ADT] Make SmallVectorImpl destructor protected (PR #71437)
https://github.com/andykaylor closed https://github.com/llvm/llvm-project/pull/71437 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[flang] [libcxx] [libc] [compiler-rt] [llvm] [clang-tools-extra] [clang] [mlir] Make SmallVectorImpl destructor protected (PR #71439)
https://github.com/andykaylor created https://github.com/llvm/llvm-project/pull/71439 Because the SmallVectorImpl destructor is not virtual, the destructor of derived classes will not be called if pointers to the SmallVectorImpl class are deleted directly. Making the SmallVectorImpl destructor protected will prevent this. >From 0a599a8259d4162d2478b5b5ae3be098bfc70f30 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Mon, 6 Nov 2023 11:43:42 -0800 Subject: [PATCH 1/2] Update SerializeToHsaco to avoid unique_ptr The use of unique_ptr can lead to leaked resources. This change replaces a use of that template in SerializeToHsaco with a similar unique_ptr>. This may not be the best replacement. It is being done to enable builds with a protected destructor for SmallVectorImpl. --- mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp index 5c288e977ad0f23..97b3b66eda00133 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp @@ -95,7 +95,7 @@ class SerializeToHsacoPass std::unique_ptr> serializeISA(const std::string &isa) override; - std::unique_ptr> assembleIsa(const std::string &isa); + std::unique_ptr> assembleIsa(const std::string &isa); std::unique_ptr> createHsaco(const SmallVectorImpl &isaBinary); @@ -318,7 +318,7 @@ SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { return ret; } -std::unique_ptr> +std::unique_ptr> SerializeToHsacoPass::assembleIsa(const std::string &isa) { auto loc = getOperation().getLoc(); @@ -381,7 +381,7 @@ SerializeToHsacoPass::assembleIsa(const std::string &isa) { parser->setTargetParser(*tap); parser->Run(false); - return std::make_unique>(std::move(result)); + return std::make_unique>(std::move(result)); } std::unique_ptr> >From af52cde83236b000f948805374dd29765bad765d Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Mon, 6 Nov 2023 11:46:17 -0800 Subject: [PATCH 2/2] Make SmallVectorImpl destructor protected Because the SmallVectorImpl destructor is not virtual, the destructor of derived classes will not be called if pointers to the SmallVectorImpl class are deleted directly. Making the SmallVectorImpl destructor protected will prevent this. --- llvm/include/llvm/ADT/SmallVector.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 53a107b1574c6a3..2e6d2dc6ce90a2c 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -601,9 +601,6 @@ class SmallVectorImpl : public SmallVectorTemplateBase { RHS.resetToSmall(); } -public: - SmallVectorImpl(const SmallVectorImpl &) = delete; - ~SmallVectorImpl() { // Subclass has already destructed this vector's elements. // If this wasn't grown from the inline copy, deallocate the old space. @@ -611,6 +608,9 @@ class SmallVectorImpl : public SmallVectorTemplateBase { free(this->begin()); } +public: + SmallVectorImpl(const SmallVectorImpl &) = delete; + void clear() { this->destroy_range(this->begin(), this->end()); this->Size = 0; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[flang] [clang] [libcxx] [mlir] [compiler-rt] [libc] [llvm] [clang-tools-extra] Make SmallVectorImpl destructor protected (PR #71439)
@@ -95,7 +95,7 @@ class SerializeToHsacoPass std::unique_ptr> serializeISA(const std::string &isa) override; - std::unique_ptr> assembleIsa(const std::string &isa); + std::unique_ptr> assembleIsa(const std::string &isa); andykaylor wrote: I'm not familiar enough with this code to know what would be best, and honestly I didn't spend any time trying to figure that out. My goal was just to get it to compile with my SmallVectorImpl change. I hope that the maintainers of this code will make a change that will make my change unnecessary. https://github.com/llvm/llvm-project/pull/71439 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[libc] [llvm] [compiler-rt] [flang] [clang-tools-extra] [libcxx] [mlir] [clang] Make SmallVectorImpl destructor protected (PR #71439)
andykaylor wrote: > We probably (pretty sure) don't want to add a virtual dtor to SmallVector - > that'd add a vtable pointer, increasing the size in ways that are probably > unacceptable given the pervasive use of the data structure. > > We should make the Impl dtor protected so it can't be polymorphically > destroyed. That's what I thought as well. It's not particularly clear since GitHub is highlighting the deleted constructor as the thing that changed, but that is what this PR does -- the "public" line is dropped below the destructor, making it protected. https://github.com/llvm/llvm-project/pull/71439 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[libc] [clang-tools-extra] [libcxx] [clang] [llvm] [flang] [compiler-rt] [mlir] Make SmallVectorImpl destructor protected (PR #71439)
andykaylor wrote: I don't think this change is time-critical. I can wait a few days for the SerializeToHsaco fix, but if it's going to be more than a few days, I'd prefer to land this change with an interim fix in SerializeToHsaco. @krzysz00 what's your preference? https://github.com/llvm/llvm-project/pull/71439 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Reapply "InstCombine: Introduce SimplifyDemandedUseFPClass"" (PR #74056)
andykaylor wrote: It seems like there are two possible interpretations of -ffinite-math-only: 1. There will be no NaN or inf values seen anywhere in the program (other than loads and stores?). 2. No floating-point operation (other than compares?) will have NaN or inf inputs or results. The text of the option in both the clang and gcc documentation says "Allow floating-point optimizations that assume arguments and results are not NaNs or +-inf." That sounds a lot like (1) above, though it could be more clearly stated. "Arguments" sounds like it would refer to arguments in a function call, which in C++ would include operators. Whether or not the operands of an arithmetic operator are "arguments" might be a bit of a semantic argument, but I think it's clear that the option definitely intends to include them. The term "results" is a bit more ambiguous. Does it intend to include return values of any called function? Or does it just mean the results of floating-point operations and math library calls? For quite some time, clang has been marking all functions which return a floating-point value as nnan and ninf when -ffinite-math-only is used. If you took the second approach, function calls which return a floating-point value wouldn't be marked with the fast-math flags, only floating-point operations. This would be useful for cases like the povray, which uses INF as a sort of sentinel value. Near as I can tell, its calculations don't produce infinities (at least not for inputs I've seen), but it does some comparisons with infinity to see if it needs to process some data. If we had a mode that honored NaN and inf for load, store, and compares, I think it could be useful. But in a very real sense, this ship has sailed for the -ffinite-math-only option. We have established behavior in the front-end that marks functions which return floating-point values with constructs indicating that they won't return NaN or inf. There are certainly existing optimizations that act on that information (perhaps without even knowing it if they use value tracking). This PR just extends that sort of optimization behavior based on semantics that are already in place. https://github.com/llvm/llvm-project/pull/74056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Reapply "InstCombine: Introduce SimplifyDemandedUseFPClass"" (PR #74056)
andykaylor wrote: For those who haven't already seen it, there was a related discussion here: https://discourse.llvm.org/t/should-isnan-be-optimized-out-in-fast-math-mode/5 I think that discussion could be fairly summarized by saying that no consensus was reached, and many people wished they had never entered the discussion. Reviewing this discussion led me to do some experiments and I find that in C++ mode, using -ffinite-math-only cannot be mixed well with "#pragma float_control(precise, on)" or various methods for disabling optimizations altogether, because templates from the header files are instantiated outside the pragmas. That may be defensible behavior, but I suspect it would come as a surprise to people who think they can control fast-math behavior locally. https://github.com/llvm/llvm-project/pull/74056 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Add warning when comparing to INF or NAN in fast math mode. (PR #76873)
https://github.com/andykaylor edited https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Add warning when comparing to INF or NAN in fast math mode. (PR #76873)
@@ -6771,6 +6771,9 @@ def warn_pointer_sub_null_ptr : Warning< def warn_floatingpoint_eq : Warning< "comparing floating point with == or != is unsafe">, InGroup>, DefaultIgnore; +def warn_fast_floatingpoint_eq : Warning< + "explicit comparison with %0 in fast floating point mode">, andykaylor wrote: Maybe this message should either be more specific or not mention the floating point mode at all. Since this will trigger with just "-fno-honor-nans" or "-fno-honor-infinities" the message is somewhat off target. I guess either of these is a fast floating point mode, but the message makes me think of full fast-math mode, which isn't necessarily the case. ```suggestion "explicit comparison with %0 when the program is assumed to not use or produce %0">, ``` https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Add warning when comparing to INF or NAN in fast math mode. (PR #76873)
https://github.com/andykaylor commented: Expanding the scope a bit, it would also be useful to have warnings for constant NaN or Inf values passed as arguments or used in binary operations. https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Add warning when comparing to INF or NAN in fast math mode. (PR #76873)
@@ -2245,6 +2246,11 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_islessequal: case Builtin::BI__builtin_islessgreater: case Builtin::BI__builtin_isunordered: +if (BuiltinID == Builtin::BI__builtin_isunordered) { andykaylor wrote: Should this check be in SemaBuiltinUnorderedCompare()? https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Add warning when comparing to INF or NAN in fast math mode. (PR #76873)
@@ -13846,6 +13880,37 @@ Sema::CheckReturnValExpr(Expr *RetValExp, QualType lhsType, CheckPPCMMAType(RetValExp->getType(), ReturnLoc); } +/// Diagnose comparison to NAN or INFINITY in fast math modes. +/// The comparison to NaN or INFINITY is always false in +/// fast modes: float evaluation will not result in inf or nan. +void Sema::CheckInfNaNFloatComparison(SourceLocation Loc, Expr *LHS, Expr *RHS, andykaylor wrote: This line is 81 characters long. https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Add warning when comparing to INF or NAN in fast math mode. (PR #76873)
@@ -2267,6 +2273,16 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, case Builtin::BI__builtin_signbit: case Builtin::BI__builtin_signbitf: case Builtin::BI__builtin_signbitl: +FPO = TheCall->getFPFeaturesInEffect(getLangOpts()); andykaylor wrote: Should these checks be in SemaBuiltinFPClassification()? https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Add warning when comparing to INF or NAN in fast math mode. (PR #76873)
@@ -13044,9 +13044,12 @@ static QualType checkArithmeticOrEnumeralCompare(Sema &S, ExprResult &LHS, if (Type->isAnyComplexType() && BinaryOperator::isRelationalOp(Opc)) return S.InvalidOperands(Loc, LHS, RHS); - // Check for comparisons of floating point operands using != and ==. - if (Type->hasFloatingRepresentation()) + if (Type->hasFloatingRepresentation()) { +// Check for comparisons to NAN or INFINITY in fast math mode. +S.CheckInfNaNFloatComparison(Loc, LHS.get(), RHS.get(), Opc); andykaylor wrote: Should this be done in CheckFloatComparison()? https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Add warning when comparing to INF or NAN in fast math mode. (PR #76873)
@@ -13846,6 +13880,37 @@ Sema::CheckReturnValExpr(Expr *RetValExp, QualType lhsType, CheckPPCMMAType(RetValExp->getType(), ReturnLoc); } +/// Diagnose comparison to NAN or INFINITY in fast math modes. +/// The comparison to NaN or INFINITY is always false in +/// fast modes: float evaluation will not result in inf or nan. +void Sema::CheckInfNaNFloatComparison(SourceLocation Loc, Expr *LHS, Expr *RHS, + BinaryOperatorKind Opcode) { + Expr *LeftExprSansParen = LHS->IgnoreParenImpCasts(); + Expr *RightExprSansParen = RHS->IgnoreParenImpCasts(); + + FPOptions FPO = LHS->getFPFeaturesInEffect(getLangOpts()); + bool NoHonorNaNs = FPO.getNoHonorNaNs(); + bool NoHonorInfs = FPO.getNoHonorInfs(); + llvm::APFloat Value(0.0); + bool IsConstant; + IsConstant = !LHS->isValueDependent() && andykaylor wrote: You could use a lambda here to avoid code duplication for LHS and RHS checking. https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [clang-tools-extra] DAG: Implement promotion for strict_fp_round (PR #74332)
@@ -2621,6 +2642,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) { return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round); } +// Explicit operation to reduce precision. Reduce the value to half precision +// and promote it back to the legal type. +SDValue DAGTypeLegalizer::PromoteFloatRes_STRICT_FP_ROUND(SDNode *N) { + SDLoc DL(N); + + SDValue Chain = N->getOperand(0); + SDValue Op = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT OpVT = Op->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + // Round promoted float to desired precision + SDValue Round = DAG.getNode(GetPromotionOpcodeStrict(OpVT, VT), DL, + DAG.getVTList(IVT, MVT::Other), Chain, Op); + // Promote it back to the legal output type + SDValue Res = + DAG.getNode(GetPromotionOpcodeStrict(VT, NVT), DL, andykaylor wrote: GetPromotionOpcode() is used in a lot more places. Are those not needed for strict or are they coming in later patches? https://github.com/llvm/llvm-project/pull/74332 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [clang] [llvm] DAG: Implement promotion for strict_fp_round (PR #74332)
https://github.com/andykaylor edited https://github.com/llvm/llvm-project/pull/74332 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [clang] [llvm] DAG: Implement promotion for strict_fp_round (PR #74332)
https://github.com/andykaylor approved this pull request. This looks good to me. I've added Phoebe Wang as a reviewer because she knows the SelectionDAG code better than I do and might want to take a look. I'm assuming you have reviewed the AMDGPU-specific parts of this with someone who knows that architecture. https://github.com/llvm/llvm-project/pull/74332 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Add warning when comparing to INF or NAN in fast math mode. (PR #76873)
@@ -13846,6 +13880,37 @@ Sema::CheckReturnValExpr(Expr *RetValExp, QualType lhsType, CheckPPCMMAType(RetValExp->getType(), ReturnLoc); } +/// Diagnose comparison to NAN or INFINITY in fast math modes. +/// The comparison to NaN or INFINITY is always false in +/// fast modes: float evaluation will not result in inf or nan. +void Sema::CheckInfNaNFloatComparison(SourceLocation Loc, Expr *LHS, Expr *RHS, andykaylor wrote: Oh, sorry. I must have copied the "+ " when I checked it. It just looked longer than the surrounding text. https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -2807,9 +2791,17 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, FPExceptionBehavior = ""; // If fast-math is set then set the fp-contract mode to fast. FPContract = "fast"; -// ffast-math enables limited range rules for complex multiplication and +// ffast-math enables basic range rules for complex multiplication and // division. -Range = LangOptions::ComplexRangeKind::CX_Limited; +// Warn if user expects to perform full implementation of complex +// multiplication or division in the presence of nnan or ninf flags. +if (Range == LangOptions::ComplexRangeKind::CX_Full) andykaylor wrote: Shouldn't we also warn if this is overriding promoted or improved? https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -51,11 +51,12 @@ class ComplexExprEmitter CGBuilderTy &Builder; bool IgnoreReal; bool IgnoreImag; -public: - ComplexExprEmitter(CodeGenFunction &cgf, bool ir=false, bool ii=false) -: CGF(cgf), Builder(CGF.Builder), IgnoreReal(ir), IgnoreImag(ii) { - } + LangOptions::ComplexRangeKind FPHasBeenPromoted; andykaylor wrote: This is a somewhat misleading variable name. The name implies that it will be a Boolean value. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -794,8 +834,10 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { ResR = Builder.CreateFSub(AC, BD, "mul_r"); ResI = Builder.CreateFAdd(AD, BC, "mul_i"); - if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited || - Op.FPFeatures.getComplexRange() == LangOptions::CX_Fortran) + if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || + Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || + Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted || + CGF.getLangOpts().NoHonorInfs || CGF.getLangOpts().NoHonorNaNs) andykaylor wrote: I'm not sure NoHonorInfs or NoHonorNaNs should be checked here. Given that we have explicit control for complex arithmetic behavior, maybe that should take precedence. That seems to be the way gcc handles it: https://godbolt.org/z/1oGo7jznz https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -2807,9 +2791,17 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, FPExceptionBehavior = ""; // If fast-math is set then set the fp-contract mode to fast. FPContract = "fast"; -// ffast-math enables limited range rules for complex multiplication and +// ffast-math enables basic range rules for complex multiplication and andykaylor wrote: We may have a naming problem here. The term "limited range rules" has a direct definition in the C standard, whereas "basic range rules" is essentially our construct to get the same behavior. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -986,13 +1028,17 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { llvm::Value *OrigLHSi = LHSi; if (!LHSi) LHSi = llvm::Constant::getNullValue(RHSi->getType()); -if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Fortran) +QualType ComplexElementTy = Op.Ty->castAs()->getElementType(); +if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || +(Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted && + FPHasBeenPromoted == LangOptions::CX_Improved)) andykaylor wrote: It seems that this value is only ever set to CX_Improved or CX_None. Why not use a Boolean? As it is, I'm left with questions about what would happen if the value were CX_Basic or CX_None (expecting that CX_Promoted would be used if we did promote). https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -2824,26 +2816,89 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, switch (optID) { default: break; -case options::OPT_fcx_limited_range: { - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Limited); - Range = LangOptions::ComplexRangeKind::CX_Limited; +case options::OPT_fcx_limited_range: + if (GccRangeComplexOption.empty()) { +if (Range != LangOptions::ComplexRangeKind::CX_Basic) + EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fcx-limited-range"); + } else { +if (GccRangeComplexOption != "-fno-cx-limited-range") + EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-limited-range"); + } + GccRangeComplexOption = "-fcx-limited-range"; + Range = LangOptions::ComplexRangeKind::CX_Basic; break; -} case options::OPT_fno_cx_limited_range: - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full, - "-fno-cx-limited-range"); + if (GccRangeComplexOption.empty()) { +EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fno-cx-limited-range"); + } else { +if (GccRangeComplexOption.compare("-fcx-limited-range") != 0 && +GccRangeComplexOption.compare("-fno-cx-fortran-rules") != 0) + EmitComplexRangeDiag(D, GccRangeComplexOption, + "-fno-cx-limited-range"); + } + GccRangeComplexOption = "-fno-cx-limited-range"; Range = LangOptions::ComplexRangeKind::CX_Full; break; -case options::OPT_fcx_fortran_rules: { - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Fortran); - Range = LangOptions::ComplexRangeKind::CX_Fortran; +case options::OPT_fcx_fortran_rules: + if (GccRangeComplexOption.empty()) +EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fcx-fortran-rules"); + else +EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-fortran-rules"); + GccRangeComplexOption = "-fcx-fortran-rules"; + Range = LangOptions::ComplexRangeKind::CX_Improved; break; -} case options::OPT_fno_cx_fortran_rules: - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full, - "-fno-cx-fortran-rules"); + if (GccRangeComplexOption.empty()) { +EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fno-cx-fortran-rules"); + } else { +if (GccRangeComplexOption != "-fno-cx-limited-range") + EmitComplexRangeDiag(D, GccRangeComplexOption, + "-fno-cx-fortran-rules"); + } + GccRangeComplexOption = "-fno-cx-fortran-rules"; Range = LangOptions::ComplexRangeKind::CX_Full; break; +case options::OPT_fcomplex_arithmetic_EQ: { + LangOptions::ComplexRangeKind RangeVal; + StringRef Val = A->getValue(); + if (Val.equals("full")) +RangeVal = LangOptions::ComplexRangeKind::CX_Full; + else if (Val.equals("improved")) +RangeVal = LangOptions::ComplexRangeKind::CX_Improved; + else if (Val.equals("promoted")) +RangeVal = LangOptions::ComplexRangeKind::CX_Promoted; + else if (Val.equals("basic")) +RangeVal = LangOptions::ComplexRangeKind::CX_Basic; + else +D.Diag(diag::err_drv_unsupported_option_argument) +<< A->getSpelling() << LangOptions::ComplexRangeKind::CX_None; + if (GccRangeComplexOption.empty() && !SeenUnsafeMathModeOption) { andykaylor wrote: I'm not clear what this is doing. Is this warning for things like "-fcomplex-arithmetic=full -fcomplex-arithmetic=basic"? https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -986,13 +1028,17 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { llvm::Value *OrigLHSi = LHSi; if (!LHSi) LHSi = llvm::Constant::getNullValue(RHSi->getType()); -if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Fortran) +QualType ComplexElementTy = Op.Ty->castAs()->getElementType(); +if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || +(Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted && + FPHasBeenPromoted == LangOptions::CX_Improved)) return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi); -else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited) +else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || + Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) return EmitAlgebraicDiv(LHSr, LHSi, RHSr, RHSi); else if (!CGF.getLangOpts().FastMath || andykaylor wrote: I think we should remove the fast-math check here. The driver handling of fast-math sets the complex arithmetic option. This check has always been problematic because disabling just one component of fast-math (such as enabling signed zeros) causes this to be false. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -794,8 +834,10 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { ResR = Builder.CreateFSub(AC, BD, "mul_r"); ResI = Builder.CreateFAdd(AD, BC, "mul_i"); - if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Limited || - Op.FPFeatures.getComplexRange() == LangOptions::CX_Fortran) + if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || + Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || + Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted || + CGF.getLangOpts().NoHonorInfs || CGF.getLangOpts().NoHonorNaNs) andykaylor wrote: Oh, sorry, for some reason I thought this was invoking the special handling for division. I think the for honor NaNs and infinities still isn't necessary because we'll emit the comparison with the fast-math flags set and the backend will optimize it away, which is what happens today: https://godbolt.org/z/e8EnKodj6 https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -2824,26 +2816,89 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, switch (optID) { default: break; -case options::OPT_fcx_limited_range: { - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Limited); - Range = LangOptions::ComplexRangeKind::CX_Limited; +case options::OPT_fcx_limited_range: + if (GccRangeComplexOption.empty()) { +if (Range != LangOptions::ComplexRangeKind::CX_Basic) + EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fcx-limited-range"); + } else { +if (GccRangeComplexOption != "-fno-cx-limited-range") + EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-limited-range"); + } + GccRangeComplexOption = "-fcx-limited-range"; + Range = LangOptions::ComplexRangeKind::CX_Basic; break; -} case options::OPT_fno_cx_limited_range: - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full, - "-fno-cx-limited-range"); + if (GccRangeComplexOption.empty()) { +EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fno-cx-limited-range"); + } else { +if (GccRangeComplexOption.compare("-fcx-limited-range") != 0 && +GccRangeComplexOption.compare("-fno-cx-fortran-rules") != 0) + EmitComplexRangeDiag(D, GccRangeComplexOption, + "-fno-cx-limited-range"); + } + GccRangeComplexOption = "-fno-cx-limited-range"; Range = LangOptions::ComplexRangeKind::CX_Full; break; -case options::OPT_fcx_fortran_rules: { - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Fortran); - Range = LangOptions::ComplexRangeKind::CX_Fortran; +case options::OPT_fcx_fortran_rules: + if (GccRangeComplexOption.empty()) +EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fcx-fortran-rules"); + else +EmitComplexRangeDiag(D, GccRangeComplexOption, "-fcx-fortran-rules"); + GccRangeComplexOption = "-fcx-fortran-rules"; + Range = LangOptions::ComplexRangeKind::CX_Improved; break; -} case options::OPT_fno_cx_fortran_rules: - EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Full, - "-fno-cx-fortran-rules"); + if (GccRangeComplexOption.empty()) { +EmitComplexRangeDiag(D, RenderComplexRangeOption(Range), + "-fno-cx-fortran-rules"); + } else { +if (GccRangeComplexOption != "-fno-cx-limited-range") + EmitComplexRangeDiag(D, GccRangeComplexOption, + "-fno-cx-fortran-rules"); + } + GccRangeComplexOption = "-fno-cx-fortran-rules"; Range = LangOptions::ComplexRangeKind::CX_Full; break; +case options::OPT_fcomplex_arithmetic_EQ: { + LangOptions::ComplexRangeKind RangeVal; + StringRef Val = A->getValue(); + if (Val.equals("full")) +RangeVal = LangOptions::ComplexRangeKind::CX_Full; + else if (Val.equals("improved")) +RangeVal = LangOptions::ComplexRangeKind::CX_Improved; + else if (Val.equals("promoted")) +RangeVal = LangOptions::ComplexRangeKind::CX_Promoted; + else if (Val.equals("basic")) +RangeVal = LangOptions::ComplexRangeKind::CX_Basic; + else +D.Diag(diag::err_drv_unsupported_option_argument) +<< A->getSpelling() << LangOptions::ComplexRangeKind::CX_None; + if (GccRangeComplexOption.empty() && !SeenUnsafeMathModeOption) { andykaylor wrote: I thought @AaronBallman had suggested that we shouldn't warn in that case since the same base option is being used and the user is likely to have intended it. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
https://github.com/andykaylor commented: Except for lacking a couple of tests, I think this looks good. @jcranmer-intel do you agree? https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
https://github.com/andykaylor edited https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -287,9 +288,47 @@ class ComplexExprEmitter ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); - QualType getPromotionType(QualType Ty) { + QualType GetHigherPrecisionFPType(QualType ElementType) { +const auto *CurrentBT = dyn_cast(ElementType); +switch (CurrentBT->getKind()) { +case BuiltinType::Kind::Float16: + return CGF.getContext().FloatTy; +case BuiltinType::Kind::Float: +case BuiltinType::Kind::BFloat16: + return CGF.getContext().DoubleTy; +case BuiltinType::Kind::Double: + return CGF.getContext().LongDoubleTy; +default: + return ElementType; +} + } + + QualType HigherPrecisionTypeForComplexArithmetic(QualType ElementType, + bool IsDivOpCode) { +QualType HigherElementType = GetHigherPrecisionFPType(ElementType); +const llvm::fltSemantics &ElementTypeSemantics = +CGF.getContext().getFloatTypeSemantics(ElementType); +const llvm::fltSemantics &HigherElementTypeSemantics = +CGF.getContext().getFloatTypeSemantics(HigherElementType); +if (llvm::APFloat::semanticsMaxExponent(ElementTypeSemantics) * 2 + 1 <= +llvm::APFloat::semanticsMaxExponent(HigherElementTypeSemantics)) { + return CGF.getContext().getComplexType(HigherElementType); +} else { + FPHasBeenPromoted = LangOptions::ComplexRangeKind::CX_Improved; andykaylor wrote: Can you add a test for this case? https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
andykaylor wrote: Can you add test cases for targets that will have problems with promotion. Something like this? ``` // RUN: %clang_cc1 -triple x86_64-windows-pc \ // RUN: -complex-range=promoted -emit-llvm -o - %s \ // RUN: | FileCheck %s --check-prefix=X86WINPRMTD // RUN: %clang_cc1 -triple=avr-unknown-unknown -mdouble=32 \ // RUN: -complex-range=promoted -emit-llvm -o - %s \ // RUN: | FileCheck %s --check-prefix=AVRFP32 // RUN: %clang_cc1 -triple=avr-unknown-unknown -mdouble=64 \ // RUN: -complex-range=promoted -emit-llvm -o - %s \ // RUN: | FileCheck %s --check-prefix=AVRFP64 ``` https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
https://github.com/andykaylor approved this pull request. This looks good to me. Thanks for the updates! https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
andykaylor wrote: > > I may have mentioned a few times that I don't like function attributes > > controlling fast-math behaviors. > > It doesn't control it, it's informative. You just get undefined behavior if > you end up calling mismatched mode functions. What I meant to say was that I don't like function attributes controlling compiler behavior related to fast-math options. I'd like to have the abilitity to switch things on and off at the instruction level. That's not really relevant to this PR though. > It does control it in the AMDGPU entry point function case, since we are more > or less directly programming the initial register state on kernel launch in > the compiler Do you only set the register for kernel entries? Is the attribute ignored for other functions? This is actually similar to how the Intel C/C++ compiler behaves when compiling hosted code. We insert code into the main() function to set MXCSR if that function was compiled with FTZ/DAZ enabled. I think this makes more sense than linking with crtfastmath.o, and it's a lot easier to explain the behavior to users. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
andykaylor wrote: > > > So, alternatively...we could just go with the simplest solution, and use > > > "ieee" as the default even under -ffast-math. > > +1. **There hasn't been a performance reason to use FTZ/DAZ since ~2011.** > Maybe there's still a power benefit? But in that case you could still > explicitly request the flush separate from -ffast-math I don't think this is correct. I know there have been improvements, but there are still cases where setting ftz can have a significant performance impact. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
andykaylor wrote: > Just to be clear: I'm not proposing entirely eliminating the "link against > crtfastmath.o" behavior, when linking a binary with `-ffast-math` (though, > separately from _this_ review, that may be worth considering!). I only meant > we should stop attempting to infer anything about `-fdenormal-fp-math` due to > using `-ffast-math`. (as per the other paragraph in my last comment). Are youe suggesting that we should continue linking against crtfastmath.o when it is available and fast-math is enabled but we should set the "denormal-fp-math" attributes to "ieee" or "dynamic"? I want to make sure I'm understanding you correctly. Personally, I don't like linking with crtfastmath.o to get this behavior. I would prefer to insert code into whatever function(s), if any, we identify as a relevant entry point. That would require having the attribute set, at least for the entry function(s). Linking with crtfastmath.o depends on finding it, which I think makes it dependent on the GNU toolchain, and the static initializer is less visible to users debugging their program. For x86-based CPU targets, we really shouldn't be making assumptions about the FTZ/DAZ state for any function where we aren't setting it, but if it's useful for other targets, we shouldn't take that away. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
andykaylor wrote: > I'm suggesting that we modify Clang so that `-ffast-math` _doesn't affect_ > `denormal-fp-math`, by (as I mentioned before) removing the overload > [Linux::getDefaultDenormalModeForType](https://github.com/llvm/llvm-project/blob/d4c5acac99e83ffa12d2d720c9e502a181cbd7ea/clang/lib/Driver/ToolChains/Linux.cpp#L838). > This makes Clang for Linux X86 and X86-64 work the same as every other > OS/CPU combo. I don't see why the current denormal-fp-math setting behavior is a blocking issue for this change. Yes, compiling a shared library with ffast-math would lead to the "denormal-fp-math" attribute being set to a potentially misleading value, but that's already true for any file that is compiled with -ffast-math if the file containing the entry function isn't compiled with -ffast-math. I'd see fixing that as a separate issue. I'd like to see this change land, but with the "-mdaz-ftz" option removed (because I don't think it's useful). That would fix the critical problem of fast-math infecting shared libraries with the ftz setting, and we could straighten out the other problems, which are relatively minor, afterwards. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC] Refactor fast-math handling for clang driver (PR #81173)
https://github.com/andykaylor created https://github.com/llvm/llvm-project/pull/81173 This refactors the fast-math handling in the clang driver, moving the settings into a lambda that is shared by the -ffp-model=fast and -ffast-math code. Previously the -ffp-model=fast handler changed the local option variable and fell through to the -ffast-math handler. This refactoring is intended to prepare the way for decoupling the -ffp-model=fast settings from the -ffast-math settings and possibly introduce a less aggressive fp-model. >From 21299e729ad71cdb1c2a2737508ffc2ee6b21d0f Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Thu, 8 Feb 2024 10:44:22 -0800 Subject: [PATCH] [NFC] Refactor fast-math handling for clang driver This refactors the fast-math handling in the clang driver, moving the settings into a lambda that is shared by the -ffp-model=fast and -ffast-math code. Previously the -ffp-model=fast handler changed the local option variable and fell through to the -ffast-math handler. This refactoring is intended to prepare the way for decoupling the -ffp-model=fast settings from the -ffast-math settings and possibly introduce a less aggressive fp-model. --- clang/lib/Driver/ToolChains/Clang.cpp | 40 +++ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 942ebbc4106078..4459d86e77d5d9 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2778,6 +2778,26 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, LangOptions::ComplexRangeKind Range = LangOptions::ComplexRangeKind::CX_None; std::string ComplexRangeStr = ""; + // Lambda to set fast-math options. This is also used by -ffp-model=fast + auto applyFastMath = [&]() { +HonorINFs = false; +HonorNaNs = false; +MathErrno = false; +AssociativeMath = true; +ReciprocalMath = true; +ApproxFunc = true; +SignedZeros = false; +TrappingMath = false; +RoundingFPMath = false; +FPExceptionBehavior = ""; +// If fast-math is set then set the fp-contract mode to fast. +FPContract = "fast"; +// ffast-math enables limited range rules for complex multiplication and +// division. +Range = LangOptions::ComplexRangeKind::CX_Limited; +SeenUnsafeMathModeOption = true; + }; + if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) { CmdArgs.push_back("-mlimit-float-precision"); CmdArgs.push_back(A->getValue()); @@ -2842,9 +2862,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, << Args.MakeArgString("-ffp-model=" + FPModel) << Args.MakeArgString("-ffp-model=" + Val); if (Val.equals("fast")) { -optID = options::OPT_ffast_math; FPModel = Val; -FPContract = "fast"; +applyFastMath(); } else if (Val.equals("precise")) { optID = options::OPT_ffp_contract; FPModel = Val; @@ -3061,22 +3080,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, continue; [[fallthrough]]; case options::OPT_ffast_math: { - HonorINFs = false; - HonorNaNs = false; - MathErrno = false; - AssociativeMath = true; - ReciprocalMath = true; - ApproxFunc = true; - SignedZeros = false; - TrappingMath = false; - RoundingFPMath = false; - FPExceptionBehavior = ""; - // If fast-math is set then set the fp-contract mode to fast. - FPContract = "fast"; - SeenUnsafeMathModeOption = true; - // ffast-math enables fortran rules for complex multiplication and - // division. - Range = LangOptions::ComplexRangeKind::CX_Limited; + applyFastMath(); break; } case options::OPT_fno_fast_math: ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC] Refactor fast-math handling for clang driver (PR #81173)
@@ -3061,22 +3080,7 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, continue; [[fallthrough]]; andykaylor wrote: This is falling through from OPT_Ofast to OPT_ffast_math. I think we still want that to happen. It's not obvious from the diff, but the "fp-model" handler and the "ffast-math" handler are in different switch statements. https://github.com/llvm/llvm-project/pull/81173 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC] Refactor fast-math handling for clang driver (PR #81173)
@@ -2842,9 +2862,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, << Args.MakeArgString("-ffp-model=" + FPModel) << Args.MakeArgString("-ffp-model=" + Val); if (Val.equals("fast")) { -optID = options::OPT_ffast_math; FPModel = Val; -FPContract = "fast"; +applyFastMath(); andykaylor wrote: I'm pretty sure it is NFC. We have tests that verify this ([clang/test/Driver/fp-model.c](https://github.com/llvm/llvm-project/blob/main/clang/test/Driver/fp-model.c) and [clang/test/Driver/fast-math.c](https://github.com/llvm/llvm-project/blob/main/clang/test/Driver/fast-math.c)). I'm only changing where the local variables are set. The FPContract value that was being set here was also being set in the OPT_ffast_math handler. Now both places call the lambda for everything they set. https://github.com/llvm/llvm-project/pull/81173 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC] Refactor fast-math handling for clang driver (PR #81173)
@@ -2842,9 +2862,8 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, << Args.MakeArgString("-ffp-model=" + FPModel) << Args.MakeArgString("-ffp-model=" + Val); if (Val.equals("fast")) { -optID = options::OPT_ffast_math; FPModel = Val; -FPContract = "fast"; +applyFastMath(); andykaylor wrote: I just re-read you comment, and I think I see the confusion now. The previous code was not easy to follow. We were changing the value of optID here, so when we finished with this switch statement execution would continue on to the switch statement below where the "whole pile of flags" was being set by the OPT_ffast_math handler. Now I'm not changing the value of optID here and instead calling the lambda to set the pile of flags. In a future revision I'd like to add a parameter to the lambda to indicate that I want slightly less aggressive fast math settings. I started out with a change that chained all the OPT_ffast_math, OPT_fno_fast_math, OPT_funsafe_math_optimizations, and OPT_fno_unsafe_math_optimizations into a pair of nested lambdas with a parameter for positive and negative versions, but that got way too convoluted to handle all the variations needed to make it NFC. I think that pointed to some things we're doing wrong, but I'll address those separately to keep the history clean. This seemed like a manageable place to start. https://github.com/llvm/llvm-project/pull/81173 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC] Refactor fast-math handling for clang driver (PR #81173)
https://github.com/andykaylor edited https://github.com/llvm/llvm-project/pull/81173 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
andykaylor wrote: > > I'd like to see this change land, but with the "-mdaz-ftz" option removed > > (because I don't think it's useful). That would fix the critical problem of > > fast-math infecting shared libraries with the ftz setting, and we could > > straighten out the other problems, which are relatively minor, afterwards. > > There is, without this change, no way to control whether or not > `crtfastmath.o` is linked independent of all of the other fast-math options. > The `-mdaz-ftz` option would at least add a flag to explicitly control the > parameter (for the people who care), and we can then have discussions about > different ways to effect setting DAZ/FTZ bits or what options imply > `-mdaz-ftz` in future PRs. That alone makes it a worthy addition IMHO; the > compatibility with gcc is another nice feature. You can always link crtfastmath.o directly, of course. Ultimately, I don't think the compiler should ever be adding the crtfastmath.o file. I would prefer to insert code directly into the entry function as @arsenm indicated the AMDGPU backend does for kernels. That would then be controlled by the -fdenormal-fp-math option or something more explicitly linked to the entry function. I don't want to add -mdaz-ftz because once we do we're kind of stuck with it. If you don't add it here, we'd continue the current behavior of linking with crtfastmath.o normally but we'd stop infecting shared libraries with it. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
andykaylor wrote: > > I don't see why the current denormal-fp-math setting behavior is a blocking > > issue for this change > > Because this PR as-is causes us to start parsing the "-shared" flag for > compilation actions in order to determine which denormal-fp-math setting to > use. Users should not pass that to compile actions, and if they do, it should > result in a `-Wunused-command-line-argument` diagnostic, NOT a behavior > change. We're already parsing -nostartfiles and -nostdlib to determine the default setting. I don't see how looking at -shared makes it any worse. I would agree that this whole model is broken. If I use -ffast-math to create a bunch of object files (including the one containing main()) and then I link without that option, crtfastmath.o doesn't get linked, even though we used "denorm-fp-math"="preserveSign" on every function. I think we're in agreement that this needs to be fixed for x86-based targets. We just need to agree on how to do it. I think https://github.com/llvm/llvm-project/issues/57589 is a pretty egregious problem, and I'd like to see it fixed without delay, but given that it requires setting -ffast-math on your link line maybe it can wait for a proper fix. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
andykaylor wrote: I just created https://github.com/llvm/llvm-project/issues/81204 to describe the linking versus compiling problem. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] InstCombine: Enable SimplifyDemandedUseFPClass and remove flag (PR #81108)
https://github.com/andykaylor approved this pull request. lgtm https://github.com/llvm/llvm-project/pull/81108 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
andykaylor wrote: > I don't think it's unreasonable to switch the logic of `-mdaz-ftz` from > linking a file with a global initializer that sets the flags to making it > emit the entry-point call to such a function instead, it still largely > follows the same logic to me. And having a command line flag makes it easier > for users to access rather than manually linking in a file located > who-knows-where in the toolchain (although I suspect anyone who cares hard > enough would rather just write the calls to set FTZ/DAZ than track it down > from the toolchain). I suppose there's also some portability value in supporting this gcc option. I guess I'd be OK with it. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
@@ -1569,6 +1569,40 @@ // RUN:--gcc-toolchain="" \ // RUN:--sysroot=%S/Inputs/basic_linux_tree 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NOCRTFASTMATH %s +// Don't link crtfastmath.o with -shared +// RUN: %clang --target=x86_64-unknown-linux -no-pie -### %s -ffast-math -shared \ +// RUN:--gcc-toolchain="" \ andykaylor wrote: Do we have trouble with this test on systems where crtfastmath.o is not provided by the gcc toolchain? I don't think we provide our own version of this, do we? https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
@@ -117,6 +117,11 @@ C23 Feature Support Non-comprehensive list of changes in this release - +* Code compiled with ``-shared`` and ``-ffast-math`` will no longer enable + flush-to-zero floating-point mode by default. This decision can be overridden andykaylor wrote: You may want to revise this to clarify that this only refers to the use of -shared and -ffast-math on the command-line used to link the program. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
@@ -117,6 +117,11 @@ C23 Feature Support Non-comprehensive list of changes in this release - +* Code compiled with ``-shared`` and ``-ffast-math`` will no longer enable andykaylor wrote: ```suggestion * Code compiled with ``-shared`` and either ``-ffast-math`` or ``-funsafe-math-optimizations`` will no longer enable ``` https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC] Refactor fast-math handling for clang driver (PR #81173)
https://github.com/andykaylor closed https://github.com/llvm/llvm-project/pull/81173 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,25 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``limited``, ``smith``, ``full`` and ``extend``. + + * ``limited`` Implementation of complex division and multiplication using + algebraic formulas at source precision. Overflow and non-finites values + are not handled. + * ``smith`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + Overflow is handled. andykaylor wrote: There are some cases at the extreme end of the value range where overflow can still occur. I think this should say that it offers improved handling for overflow in intermediate calculations, but mention that overflow is still possible. We should also mention that this does not handle non-finite values in all cases. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -283,9 +283,23 @@ class ComplexExprEmitter ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); - QualType getPromotionType(QualType Ty) { + QualType getPromotionType(QualType Ty, bool IsDivOpCode = false) { if (auto *CT = Ty->getAs()) { QualType ElementType = CT->getElementType(); + if (CGF.getLangOpts().getComplexRange() == + LangOptions::ComplexRangeKind::CX_Extend && + IsDivOpCode) { +if (ElementType->isFloatingType()) { + if (const auto *BT = dyn_cast(ElementType)) +switch (BT->getKind()) { +case BuiltinType::Kind::Float: + return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); +default: andykaylor wrote: This doesn't look general enough. I'm not sure how to implement this. We need some handling for fp16 and fp128. I guess fp16 would promote to float, but fp128 will require using runtime library calls. For Windows, double and long double are the same by default. Does the front end have a way to specifically recognize x86_fp80 and ppc_fp128? Will something in Sema prevent us from getting here with bf16? https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,25 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``limited``, ``smith``, ``full`` and ``extend``. + + * ``limited`` Implementation of complex division and multiplication using + algebraic formulas at source precision. Overflow and non-finites values + are not handled. + * ``smith`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + Overflow is handled. + * ``full`` Implementation of complex division and multiplication using a + call to runtime library functions (generally the case, but the BE might + sometimes replace the library call if it knows enough about the potential + range of the inputs). Overflow and non-finite values are handled. + * ``extend`` Implementation of complex division using algebraic formulas at + higher precision. Overflow is handled. andykaylor wrote: It is probably worth adding an example to illustrate both the overflow and non-finite handling cases so users can properly understand the risk of ignoring those cases. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,25 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``limited``, ``smith``, ``full`` and ``extend``. + + * ``limited`` Implementation of complex division and multiplication using + algebraic formulas at source precision. Overflow and non-finites values andykaylor wrote: I don't think it's accurate to say that "non-finite values are not handled." I would say instead that infinite values are not handled correctly in all cases. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,25 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``limited``, ``smith``, ``full`` and ``extend``. + + * ``limited`` Implementation of complex division and multiplication using + algebraic formulas at source precision. Overflow and non-finites values + are not handled. + * ``smith`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + Overflow is handled. + * ``full`` Implementation of complex division and multiplication using a + call to runtime library functions (generally the case, but the BE might + sometimes replace the library call if it knows enough about the potential + range of the inputs). Overflow and non-finite values are handled. + * ``extend`` Implementation of complex division using algebraic formulas at + higher precision. Overflow is handled. andykaylor wrote: Again, explicitly mention that non-finite values are not handled in all cases. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,25 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``limited``, ``smith``, ``full`` and ``extend``. + + * ``limited`` Implementation of complex division and multiplication using + algebraic formulas at source precision. Overflow and non-finites values + are not handled. + * ``smith`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + Overflow is handled. + * ``full`` Implementation of complex division and multiplication using a + call to runtime library functions (generally the case, but the BE might + sometimes replace the library call if it knows enough about the potential + range of the inputs). Overflow and non-finite values are handled. + * ``extend`` Implementation of complex division using algebraic formulas at + higher precision. Overflow is handled. andykaylor wrote: You should specify that "full" is the default. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,25 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``limited``, ``smith``, ``full`` and ``extend``. + + * ``limited`` Implementation of complex division and multiplication using + algebraic formulas at source precision. Overflow and non-finites values + are not handled. + * ``smith`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + Overflow is handled. + * ``full`` Implementation of complex division and multiplication using a + call to runtime library functions (generally the case, but the BE might + sometimes replace the library call if it knows enough about the potential + range of the inputs). Overflow and non-finite values are handled. andykaylor wrote: ```suggestion range of the inputs). Overflow and non-finite values are handled by the library implementation. ``` I want to emphasize the location of the handling because I found a case where the current LLVM library function overflows with complex division. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,33 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``basic``, ``improved``, ``full`` and ``promoted``. + + * ``basic`` Implementation of complex division and multiplication using + algebraic formulas at source precision. No special handling to avoid + overflow. NaN and infinite and values are not handled. + * ``improved`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + This value offers improved handling for overflow in intermediate calculations, + but overflow may occur. NaN and infinite and values are not handled in some + cases. + * ``full`` Implementation of complex division and multiplication using a + call to runtime library functions (generally the case, but the BE might + sometimes replace the library call if it knows enough about the potential + range of the inputs). Overflow and non-finite values are handled by the + library implementation. + * ``promoted`` Implementation of complex division using algebraic formulas at + higher precision. Overflow is handled. Non-finite values are handled in some + cases. If the target hardware does not have native support for a higher precision + data type, an implementation for the complex operation will be used to provide + improved guards against intermediate overflow, but overflow and underflow may + still occur in some cases. NaN and infinite and values are not handled. + This is the default value. andykaylor wrote: I see that gcc uses a runtime library call with both c89 and c99. That seems like a reasonable way to go. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -283,9 +283,46 @@ class ComplexExprEmitter ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); - QualType getPromotionType(QualType Ty) { + QualType HigherPrecisionTypeForComplexArithmetic(QualType ElementType, + bool IsDivOpCode) { +const TargetInfo &TI = CGF.getContext().getTargetInfo(); +const LangOptions Opts = CGF.getLangOpts(); +if (const auto *BT = dyn_cast(ElementType)) { + switch (BT->getKind()) { + case BuiltinType::Kind::Float16: { +if (TI.hasFloat16Type() && !TI.hasLegalHalfType()) + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); +break; + } + case BuiltinType::Kind::BFloat16: { +if (TI.hasBFloat16Type() && !TI.hasFullBFloat16Type()) + return CGF.getContext().getComplexType(CGF.getContext().FloatTy); +break; + } + case BuiltinType::Kind::Float: +return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); +break; + case BuiltinType::Kind::Double: { +if (TI.hasLongDoubleType()) + return CGF.getContext().getComplexType(CGF.getContext().LongDoubleTy); +return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); andykaylor wrote: Yes, we need to check the sizes. If we "promote" from a 64-bit double to a 64-bit long double, we'll probably end up with something that gets optimized directly to the cx-limited-range ("basic") implementation. This can also be an issue for float. For example, AVR targets use a 32-bit type for both float and double. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,50 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``basic``, ``improved``, ``full`` and ``promoted``. + + * ``basic`` Implementation of complex division and multiplication using + algebraic formulas at source precision. No special handling to avoid + overflow. NaN and infinite values are not handled. + * ``improved`` Implementation of complex division using the Smith algorithm + at source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + This value offers improved handling for overflow in intermediate + calculations, but overflow may occur. NaN and infinite values are not + handled in some cases. + * ``full`` Implementation of complex division and multiplication using a + call to runtime library functions (generally the case, but the BE might + sometimes replace the library call if it knows enough about the potential + range of the inputs). Overflow and non-finite values are handled by the + library implementation. For the case of multiplication overflow will occur in + accordance with normal floating-point rules. This is the default value. + * ``promoted`` Implementation of complex division using algebraic formulas at + higher precision. Overflow is handled. Non-finite values are handled in some + cases. If the target does not have native support for a higher precision + data type, an implementation for the complex operation will be used to provide + improved guards against intermediate overflow, but overflow and underflow may + still occur in some cases. NaN and infinite values are not handled. andykaylor wrote: The intention here was that if the target doesn't support a higher precision type, we will do what we would have done with "improved". Some targets don't even support a 64-bit floating-point type, so the way we apply this needs to be generalized. Should we issue a warning if the user specifies "promoted" but we can't promote? Zahira and I talked about this offline, and my suggestion was that if LongDoubleSize is greater than DoubleSize, we can promote double to long double, but if it isn't we will use the Smith algorithm (i.e. "improved"). Windows on x86-64 is the really ugly case here, because the target hardware supports an 80-bit floating-point type, but by default the operating system configures the x87 layer to perform calculations as if it were a 64-bit type. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
andykaylor wrote: I don't know anything about how non-x86 targets implement DAZ/FTZ, but for x86-based targets, I think trying to make any assumptions about the setting is bound to be wrong. In theory, it's part of the floating-point environment and shouldn't be modified during execution unless fenv-access is enabled. In practical terms, I can't see any reason that you would ever want a shared library to be able to control this. I don't like the idea of adding an option to maintain that behavior, and I think we should make it a high priority to stop doing it by default when a shared library is compiled with fast-math enabled. More generally, if you're going to be mixing fast-math with non-fast-math within a program, you don't want FTZ/DAZ to be enabled just because some parts of the program are using fast-math, so the current behavior of linking with crtfastmath.o is fairly reckless. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Disable FTZ/DAZ when compiling shared libraries by default. (PR #80475)
andykaylor wrote: > > (Sidenote: "dynamic" isn't even > > [documented](https://clang.llvm.org/docs/UsersManual.html#cmdoption-fdenormal-fp-math)). > > It's not a selectable enum of the Clang `-fdenormal-fp-math` flag, but it is > one for the LLVM function attribute `denormal-fp-math`. This is also a bit of a problem. After inlining we can have instructions with fast-math disabled appearing in the middle of functions that were compiled with fast-math enabled. What happens to the "denormal-fp-math" attribute in such cases? I wanted to show this in compiler explorer, but "#pragma float_control()" doesn't update the "denormal-fp-math attribute. https://godbolt.org/z/KEP3YEEe9 I may have mentioned a few times that I don't like function attributes controlling fast-math behaviors. https://github.com/llvm/llvm-project/pull/80475 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Add warning when INF or NAN are used in a binary operation or as function argument in fast math mode. (PR #76873)
@@ -6771,6 +6771,11 @@ def warn_pointer_sub_null_ptr : Warning< def warn_floatingpoint_eq : Warning< "comparing floating point with == or != is unsafe">, InGroup>, DefaultIgnore; +def warn_fast_floatingpoint_eq : Warning< + "use of %select{infinity|NaN}0 will always be 'false' because " andykaylor wrote: This doesn't make sense to me. What does it mean for a use to be false? Maybe we're trying to wrap too much into a single diagnostic. For comparisons, the result of the comparison will be constant, so it makes sense to have a warning that tells the user what that constant result will be. However, when a constant NaN or infinity is passed as an argument to a function call, the situation is a bit more complicated. In that case, the optimizer can treat the input as poison. What effect that will have depends on the content of the called function. It might do nothing. It might cause the entire function call to be erased. https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[lldb] [flang] [libcxx] [lld] [clang] [compiler-rt] [libcxxabi] [libunwind] [libc] [llvm] [clang-tools-extra] Fix a bug in Smith's algorithm used in complex div/mul. (PR #78330)
@@ -2780,24 +2781,24 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, case options::OPT_fcx_limited_range: { EmitComplexRangeDiag(D, Range, LangOptions::ComplexRangeKind::CX_Limited); Range = LangOptions::ComplexRangeKind::CX_Limited; - std::string ComplexRangeStr = RenderComplexRangeOption("limited"); - if (!ComplexRangeStr.empty()) -CmdArgs.push_back(Args.MakeArgString(ComplexRangeStr)); + ComplexRangeStr = RenderComplexRangeOption("limited"); andykaylor wrote: I think it would be better to move the rendering of this option to the bottom of this function (around line 3124) to avoid inserting multiple, conflicting options if more than one -f[no-]cx-* option is used. https://github.com/llvm/llvm-project/pull/78330 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver,sanitizer] Remove RequiresPIE and msan's NeedPIE setting (PR #77689)
andykaylor wrote: @MaskRay I see that in 3bbc912d37f03d9ad3be330b81d91c2eaf6c37f2 you removed some tests that fail because of this change. Why do you think that is an appropriate solution? I have some other tests in a downstream product that are failing because we build with CLANG_DEFAULT_PIE_ON_LINUX=OFF. You said in your initial comment here that "most builds set CLANG_DEFAULT_PIE_ON_LINUX to 1, making RequiresPIE/NeedPIE redundant on Linux." But apparently it's not redundant for builds that don't use that setting. Do you have another solution in progress (or already committed that I haven't seen yet)? It seems that as long as this is a configurable option, we need to support both settings. Intel's SYCL project (https://github.com/intel/llvm) currently sets CLANG_DEFAULT_PIE_ON_LINUX to zero for compatibility with gcc in Fedora releases (at least, I think that's the reason). https://github.com/llvm/llvm-project/pull/77689 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[flang] [lld] [compiler-rt] [clang-tools-extra] [clang] [libc] [libcxx] [libunwind] [llvm] [lldb] [libcxxabi] Fix a bug in Smith's algorithm used in complex div. (PR #78330)
andykaylor wrote: > I mean that we could lazily emit a helper function like > `__clang_smiths_division_double` and call it instead of emitting like twenty > instructions inline. That's a reasonable suggestion. We may also want to add the ability to emit a helper function to handle the full range instead of calling the compiler-rt function when -ffreestanding has been used. Are you aware of the PR that @jcranmer-intel posted to add intrinsics for complex multiplication and division (https://github.com/llvm/llvm-project/pull/68742)? That would allow us to move this problem out of the front end, which seems good. https://github.com/llvm/llvm-project/pull/78330 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver,sanitizer] Remove RequiresPIE and msan's NeedPIE setting (PR #77689)
andykaylor wrote: > > @MaskRay I see that in > > [3bbc912](https://github.com/llvm/llvm-project/commit/3bbc912d37f03d9ad3be330b81d91c2eaf6c37f2) > > you removed some tests that fail because of this change. Why do you think > > that is an appropriate solution? I have some other tests in a downstream > > product that are failing because we build with > > CLANG_DEFAULT_PIE_ON_LINUX=OFF. You said in your initial comment here that > > "most builds set CLANG_DEFAULT_PIE_ON_LINUX to 1, making > > RequiresPIE/NeedPIE redundant on Linux." But apparently it's not redundant > > for builds that don't use that setting. > > Do you have another solution in progress (or already committed that I > > haven't seen yet)? It seems that as long as this is a configurable option, > > we need to support both settings. Intel's SYCL project > > ([intel/llvm](https://github.com/intel/llvm)) currently sets > > CLANG_DEFAULT_PIE_ON_LINUX to zero for compatibility with gcc in Fedora > > releases (at least, I think that's the reason). > > My internal users also use `CLANG_DEFAULT_PIE_ON_LINUX OFF`, so I definitely > want to support both flavors. > > The RUN lines removed by > [3bbc912](https://github.com/llvm/llvm-project/commit/3bbc912d37f03d9ad3be330b81d91c2eaf6c37f2) > no longer made sense (the commit message could have been reworded). They > wanted to check that we defaulted to `-fPIE` even when no > `-fno-pic/-fpie/-fpic` was specified. The force-PIC effect might be a > previous limitation, or possibly just something cargo culted from the > previous msan limitation. > > If I use `scudo_flags = ["-fsanitize=scudo", "-fno-pic", "-no-pie"]` in > `test/scudo/lit.cfg.py`, `check-scudo_standalone` still passes. I see. The change to the tests makes sense with that explanation. The test we were seeing fail is compiler-rt/test/dfsan/custom.cpp, and I'm told it fails with the main LLVM project if `CLANG_DEFAULT_PIE_ON_LINUX=OFF` is used. I guess we don't have a buildbot that runs that particular test with that setting? The failure is a segmentation fault in dfsan/X86_64Config/Output/custom.cpp.script. I'm not familiar enough with the dfsan tests to say what this means, but it is triggered by the change in this PR. https://github.com/llvm/llvm-project/pull/77689 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang-tools-extra] [llvm] [CLANG] Add warning when INF or NAN are used in a binary operation or as function argument in fast math mode. (PR #76873)
@@ -2835,6 +2835,9 @@ class Preprocessor { if (Identifier.getIdentifierInfo()->isRestrictExpansion() && !SourceMgr.isInMainFile(Identifier.getLocation())) emitRestrictExpansionWarning(Identifier); + +if (Identifier.getIdentifierInfo()->getName() == "INFINITY") andykaylor wrote: > Since NAN is defined as (-(float)(INFINITY * 0.0F)) when used in the program > it will trigger the "use of infinity via a macro results in undefined > behavior due to the currently enabled floating-point options > [-Wnan-and-infinity-disabled]". Wouldn't that be enough? If I add NaN here we > would get 2 warnings. This is actually a bit unfortunate. If I use "-fhonor-nans -fno-honor-infinities" this definition of NAN will trigger a warning, even though I expect that we'll constant-fold it as expected without regard to the "no-honor-infinities" setting. https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[libcxxabi] [clang] [lld] [lldb] [flang] [libunwind] [compiler-rt] [libc] [libcxx] [llvm] [clang-tools-extra] Fix a bug in Smith's algorithm used in complex div. (PR #78330)
https://github.com/andykaylor approved this pull request. lgtm https://github.com/llvm/llvm-project/pull/78330 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [clang-tools-extra] [CLANG] Add warning when INF or NAN are used in a binary operation or as function argument in fast math mode. (PR #76873)
https://github.com/andykaylor approved this pull request. I'm happy with this. https://github.com/llvm/llvm-project/pull/76873 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fix build warning caused by mixed signed/unsigned compare (PR #69797)
https://github.com/andykaylor created https://github.com/llvm/llvm-project/pull/69797 None >From 856a471b40f19f5fcf6aab7591009555b6a3841f Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Fri, 20 Oct 2023 12:33:51 -0700 Subject: [PATCH 1/2] Update SimplifyIndVar.cpp In SimplifyIndvar::replaceFloatIVWithIntegerIV() the return value of getFPMantissaWidth() was being cast as an unsigned integer and then compared with the number of bits needed to represent an integer that was cast to and from a floating-point type. This is a problem because getFPMantissaWidth() returns -1 if the type does not have a stable mantissa. Currently the only type that returns -1 is ppc_fp128, so you'd need a pretty big induction variable to cause a problem. However, this problem will be more likely to be exposed when we implement support for decimal floating-point types. Strictly speaking, what we want to know here is the size of the biggest integer that can be represented exactly. We could get that information even with an unstable mantissa width, but getFPMantissaWidth() won't do it. --- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 1caf708bcc35254..45cbdd235898b28 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -664,7 +664,7 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { MaskBits = SE->getSignedRange(IV).getMinSignedBits(); else MaskBits = SE->getUnsignedRange(IV).getActiveBits(); - unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); + int DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); if (MaskBits <= DestNumSigBits) { for (User *U : UseInst->users()) { // Match for fptosi/fptoui of sitofp and with same type. >From 2524d41a34ccaa3932e5b4ab2876ea02744f6304 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Fri, 20 Oct 2023 16:40:26 -0700 Subject: [PATCH 2/2] Fix build warning This fixes a build warning caused by mixed signed/unsigned compare --- llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 45cbdd235898b28..a23ac41acaa58aa 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -659,11 +659,11 @@ bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) { Instruction *IVOperand = cast(UseInst->getOperand(0)); // Get the symbolic expression for this instruction. const SCEV *IV = SE->getSCEV(IVOperand); - unsigned MaskBits; + int MaskBits; if (UseInst->getOpcode() == CastInst::SIToFP) -MaskBits = SE->getSignedRange(IV).getMinSignedBits(); +MaskBits = (int)SE->getSignedRange(IV).getMinSignedBits(); else -MaskBits = SE->getUnsignedRange(IV).getActiveBits(); +MaskBits = (int)SE->getUnsignedRange(IV).getActiveBits(); int DestNumSigBits = UseInst->getType()->getFPMantissaWidth(); if (MaskBits <= DestNumSigBits) { for (User *U : UseInst->users()) { ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fix build warning caused by mixed signed/unsigned compare (PR #69797)
https://github.com/andykaylor closed https://github.com/llvm/llvm-project/pull/69797 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] clang: Add pragma clang fp reciprocal (PR #68267)
https://github.com/andykaylor edited https://github.com/llvm/llvm-project/pull/68267 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] clang: Add pragma clang fp reciprocal (PR #68267)
https://github.com/andykaylor commented: Thanks for adding this! I have just a few minor comments. https://github.com/llvm/llvm-project/pull/68267 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] clang: Add pragma clang fp reciprocal (PR #68267)
@@ -4609,6 +4609,22 @@ The pragma can take two values: ``on`` and ``off``. float v = t + z; } +``#pragma clang fp reciprocal`` allows control over using reciprocal +approximations in floating point expressions. When enabled, this +pragma allows the expression ``x / y`` to be approximated as ``x * +(1.0 / y)``. This pragma can be used to disable reciprocal +approximation when it is otherwise enabled for the translation unit +with the ``-fallow-reciprocal`` flag. The pragma can take two values: andykaylor wrote: ```suggestion with the ``-freciprocal-math`` flag or other fast-math options. The pragma can take two values: ``` https://github.com/llvm/llvm-project/pull/68267 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] clang: Add pragma clang fp reciprocal (PR #68267)
@@ -1309,6 +1309,13 @@ void Sema::ActOnPragmaFPReassociate(SourceLocation Loc, bool IsEnabled) { CurFPFeatures = NewFPFeatures.applyOverrides(getLangOpts()); } +void Sema::ActOnPragmaFPReciprocal(SourceLocation Loc, bool IsEnabled) { andykaylor wrote: This should have the same checks for ffp-eval-method use as are done in ActOnPragmaFPReassociate(). Probably it's better to combine the two into something like ActOnPragmaFPValueChangingOption() since we'll likely be adding something similar for the other fast-math flags eventually. https://github.com/llvm/llvm-project/pull/68267 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] clang: Add pragma clang fp reciprocal (PR #68267)
andykaylor wrote: err_pragma_fp_invalid_option needs to be updated to add 'reciprocal' https://github.com/llvm/llvm-project/pull/68267 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] clang: Add pragma clang fp reciprocal (PR #68267)
@@ -1309,6 +1309,13 @@ void Sema::ActOnPragmaFPReassociate(SourceLocation Loc, bool IsEnabled) { CurFPFeatures = NewFPFeatures.applyOverrides(getLangOpts()); } +void Sema::ActOnPragmaFPReciprocal(SourceLocation Loc, bool IsEnabled) { andykaylor wrote: There's some discussion of it here: https://reviews.llvm.org/D122155 The basic idea is that it doesn't make sense to allow value-changing optimizations while also specifying extra precision to be used for intermediate calculations. https://github.com/llvm/llvm-project/pull/68267 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fix math-errno issue (PR #66381)
@@ -2339,6 +2345,28 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // LLVM counterparts if the call is marked 'const' (known to never set errno). // In case FP exceptions are enabled, the experimental versions of the // intrinsics model those. + bool ConstAlways = + getContext().BuiltinInfo.isConst(BuiltinID); + + // There's a special case with the fma builtins where they are always const + // if the target environment is GNU or the target is OS is Windows and we're + // targeting the MSVCRT.dll environment. + switch (BuiltinID) { andykaylor wrote: @zahiraam I think the best way to do that would be to add a new letter vcode to the attributes here: https://github.com/llvm/llvm-project/blob/cbdccb30c23f71f20d05b19256232419e7c5e517/clang/include/clang/Basic/Builtins.def#L74 -- something like "// m -> const when we GNU or MSVCRT libraries are targeted" and then fma, for example, would become "BUILTIN(__builtin_fma, "", "Fmne")" I'd suggest doing that as a separate patch, because it isn't obvious exactly how it should be managed. I'm more concerned about the fact that Sema is marking calls as const in the AST based on the builtins information but we later discover that they aren't, in fact, const because of a pragma being used. @AaronBallman do you have any ideas about how that could be improved? https://github.com/llvm/llvm-project/pull/66381 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -396,7 +396,41 @@ class LangOptionsBase { IncompleteOnly = 3, }; - enum ComplexRangeKind { CX_Full, CX_Limited, CX_Fortran, CX_None }; + /// Controls the various implementations for complex multiplication and + // division. + enum ComplexRangeKind { +/// Implementation of complex division and multiplication using a call to +/// runtime library functions(generally the case, but the BE might +/// sometimes replace the library call if it knows enough about the +/// potential range of the inputs). Overflow and non -finite values are +/// handled by the library implementation. +CX_Full, + +/// Implementation of complex division using the Smith algorithm at +/// source precision. Smith's algorithm for complex division. +/// See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 +/// (1962). This value offers improved handling for overflow in intermediate +/// calculations, but overflow may occur. NaN and infinite and values are +/// not handled in some cases. +CX_Improved, + +/// Implementation of complex division using algebraic formulas at +/// higher precision. Overflow is handled. Non-finite values are handled in +/// some cases. If the target hardware does not have native support for a +/// higher precision data type, an implementation for the complex operation +/// will be used to provide improved guards against intermediate overflow, +/// but overflow and underflow may still occur in some cases. NaN and +/// infinite and values are not handled. This is the default value. +CX_Promoted, + +/// Implementation of complex division and multiplication using +/// algebraic formulas at source precision.No special handling to avoid andykaylor wrote: ```suggestion /// algebraic formulas at source precision. No special handling to avoid ``` https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,33 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``basic``, ``improved``, ``full`` and ``promoted``. + + * ``basic`` Implementation of complex division and multiplication using + algebraic formulas at source precision. No special handling to avoid + overflow. NaN and infinite and values are not handled. + * ``improved`` Implementation of complex division using the Smith algorithm at andykaylor wrote: I'm not sure we should document this as being implemented using the Smith algorithm. That may be left as an implementation detail, particularly if we start generating intrinsics which are handled in the backend or by an offload target. I would prefer to just describe the characteristics this option is intended to provide -- improved handling for overflow, but no special handling for the "NaN + NaNi" cases. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -79,29 +91,152 @@ _Complex float pragma_on_div(_Complex float a, _Complex float b) { // LMTD-NEXT: fdiv float // LMTD-NEXT: fdiv float - // FRTRN: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fadd float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fadd float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fsub float - // FRTRN-NEXT: fdiv float - // FRTRN-NEXT: fdiv float + // SMITH: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fadd float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fadd float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fmul float + // SMITH-NEXT: fsub float + // SMITH-NEXT: fdiv float + // SMITH-NEXT: fdiv float + + // EXTND: fpext float {{.*}} to double + // EXTND: fpext float {{.*}} to double + // EXTND: fmul double + // EXTND: fmul double + // EXTND: fadd double + // EXTND: fmul double + // EXTND: fmul double + // EXTND: fadd double + // EXTND: fmul double + // EXTND: fmul double + // EXTND: fsub double + // EXTND: fdiv double + // EXTND: fdiv double + // EXTND: fptrunc double + // EXTND: fptrunc double return a / b; } _Complex float pragma_off_div(_Complex float a, _Complex float b) { #pragma STDC CX_LIMITED_RANGE OFF // LABEL: define {{.*}} @pragma_off_div( + // FULL: call {{.*}} @__divsc3 // LMTD: call {{.*}} @__divsc3 - // FRTRN: call {{.*}} @__divsc3 + // SMITH: call {{.*}} @__divsc3 + + // EXTND: call {{.*}} @__divdc3 + + return a / b; +} + +_Complex float pragma_default_mul(_Complex float a, _Complex float b) { +#pragma STDC CX_LIMITED_RANGE DEFAULT + // LABEL: define {{.*}} @pragma_on_mul( + + // FULL: fmul float + // FULL-NEXT: fmul float + // FULL-NEXT: fmul float + // FULL-NEXT: fmul float + // FULL-NEXT: fsub float + // FULL-NEXT: fadd float andykaylor wrote: Shouldn't there be a check for NaN comparisons and a library call in this case? https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,33 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``basic``, ``improved``, ``full`` and ``promoted``. + + * ``basic`` Implementation of complex division and multiplication using + algebraic formulas at source precision. No special handling to avoid + overflow. NaN and infinite and values are not handled. + * ``improved`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + This value offers improved handling for overflow in intermediate calculations, + but overflow may occur. NaN and infinite and values are not handled in some + cases. + * ``full`` Implementation of complex division and multiplication using a andykaylor wrote: What are we doing with fast-math flags in these expansions? In the case of complex multiplication, if the 'nnan' and 'ninf' flags are set on the generated instructions, the "full" implementation will be optimized to the "basic" implementation. I think that's probably what we want since "full" is going to be the default. It may warrant a warning if we see an explicit "-fcomplex-arithmetic=full" on the command line with any of the options that sets either 'nnan' or 'ninf'. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,33 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``basic``, ``improved``, ``full`` and ``promoted``. + + * ``basic`` Implementation of complex division and multiplication using + algebraic formulas at source precision. No special handling to avoid + overflow. NaN and infinite and values are not handled. + * ``improved`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + This value offers improved handling for overflow in intermediate calculations, + but overflow may occur. NaN and infinite and values are not handled in some + cases. + * ``full`` Implementation of complex division and multiplication using a + call to runtime library functions (generally the case, but the BE might + sometimes replace the library call if it knows enough about the potential + range of the inputs). Overflow and non-finite values are handled by the + library implementation. + * ``promoted`` Implementation of complex division using algebraic formulas at + higher precision. Overflow is handled. Non-finite values are handled in some + cases. If the target hardware does not have native support for a higher precision + data type, an implementation for the complex operation will be used to provide + improved guards against intermediate overflow, but overflow and underflow may + still occur in some cases. NaN and infinite and values are not handled. + This is the default value. andykaylor wrote: I think for many users this would make sense as the default value, but "full" is required for conformance to the C standard. Can we use this as the default if we're targeting pre-C99 but use "full" with C99 and later? I'm not sure what C++ expects, but probably "full" there too. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1041,28 +1041,15 @@ defm offload_uniform_block : BoolFOption<"offload-uniform-block", NegFlag, BothFlags<[], [ClangOption], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>; -def fcx_limited_range : Joined<["-"], "fcx-limited-range">, andykaylor wrote: I didn't realize these had made it into the 18.0 release when I suggested that we could remove them. We would need at least one release where they are marked as deprecated, but since they are standard gcc options, maybe it makes sense to just keep them and have them alias to the new option as: -fcx-limited-range --> -fcomplex-arithmetic=basic -fcx-fortran-rules --> -fcomplex-arithmetic=improved -fno-cx-limited-range --> -fcomplex-arithmetic=full -fno-cx-fortran-rules --> -fcomplex-arithmetic=full https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -283,9 +283,48 @@ class ComplexExprEmitter ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); - QualType getPromotionType(QualType Ty) { + QualType HigherPrecisionTypeForComplexArithmetic(QualType ElementType, + bool IsDivOpCode) { +const TargetInfo &TI = CGF.getContext().getTargetInfo(); +if (const auto *BT = dyn_cast(ElementType)) { + switch (BT->getKind()) { + case BuiltinType::Kind::Float16: + case BuiltinType::Kind::BFloat16: { +return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + } + case BuiltinType::Kind::Float: +return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); + case BuiltinType::Kind::Double: +if (TI.hasLongDoubleType()) { andykaylor wrote: What happens with targets where double and long double are the same size? https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,33 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``basic``, ``improved``, ``full`` and ``promoted``. + + * ``basic`` Implementation of complex division and multiplication using + algebraic formulas at source precision. No special handling to avoid + overflow. NaN and infinite and values are not handled. + * ``improved`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + This value offers improved handling for overflow in intermediate calculations, + but overflow may occur. NaN and infinite and values are not handled in some + cases. + * ``full`` Implementation of complex division and multiplication using a + call to runtime library functions (generally the case, but the BE might + sometimes replace the library call if it knows enough about the potential + range of the inputs). Overflow and non-finite values are handled by the + library implementation. + * ``promoted`` Implementation of complex division using algebraic formulas at + higher precision. Overflow is handled. Non-finite values are handled in some + cases. If the target hardware does not have native support for a higher precision andykaylor wrote: ```suggestion cases. If the target does not have native support for a higher precision ``` I suggest removing "hardware" since the target may be SPIRV with unknown hardware. I'm not sure what we should do in the case of soft-float targets. Probably "full" makes most sense there, but I'd like to hear from someone who works with one of those targets. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,33 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``basic``, ``improved``, ``full`` and ``promoted``. + + * ``basic`` Implementation of complex division and multiplication using + algebraic formulas at source precision. No special handling to avoid + overflow. NaN and infinite and values are not handled. + * ``improved`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + This value offers improved handling for overflow in intermediate calculations, + but overflow may occur. NaN and infinite and values are not handled in some + cases. + * ``full`` Implementation of complex division and multiplication using a + call to runtime library functions (generally the case, but the BE might + sometimes replace the library call if it knows enough about the potential + range of the inputs). Overflow and non-finite values are handled by the + library implementation. andykaylor wrote: In the case of multiplication, the library call is only needed to handle non-finite values. Overflow occurs in accordance with normal floating-point rules. That is, even if we promoted to a higher precision type, the same overflow would occur when we truncate back to the source type. This isn't true for division because of the nature of the intermediate calculations. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1847,19 +1847,33 @@ floating point semantic models: precise (the default), strict, and fast. * ``16`` - Forces ``_Float16`` operations to be emitted without using excess precision arithmetic. -.. option:: -fcx-limited-range: - - This option enables the naive mathematical formulas for complex division and - multiplication with no NaN checking of results. The default is - ``-fno-cx-limited-range``, but this option is enabled by the ``-ffast-math`` - option. - -.. option:: -fcx-fortran-rules: - - This option enables the naive mathematical formulas for complex - multiplication and enables application of Smith's algorithm for complex - division. See SMITH, R. L. Algorithm 116: Complex division. Commun. - ACM 5, 8 (1962). The default is ``-fno-cx-fortran-rules``. +.. option:: -fcomplex-arithmetic=: + + This option specifies the implementation for complex multiplication and division. + + Valid values are: ``basic``, ``improved``, ``full`` and ``promoted``. + + * ``basic`` Implementation of complex division and multiplication using + algebraic formulas at source precision. No special handling to avoid + overflow. NaN and infinite and values are not handled. + * ``improved`` Implementation of complex division using the Smith algorithm at + source precision. Smith's algorithm for complex division. + See SMITH, R. L. Algorithm 116: Complex division. Commun. ACM 5, 8 (1962). + This value offers improved handling for overflow in intermediate calculations, + but overflow may occur. NaN and infinite and values are not handled in some andykaylor wrote: "but overflow may occur" -- I'm getting a little bit over my head here, but I think the academic papers for the Smith algorithm say that it underflows but doesn't overflow. Or maybe that was a description of an improvement to Smith that I came across. If we're going to be technical here, we should be sure that our wording is accurate. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -283,9 +283,48 @@ class ComplexExprEmitter ComplexPairTy EmitComplexBinOpLibCall(StringRef LibCallName, const BinOpInfo &Op); - QualType getPromotionType(QualType Ty) { + QualType HigherPrecisionTypeForComplexArithmetic(QualType ElementType, + bool IsDivOpCode) { +const TargetInfo &TI = CGF.getContext().getTargetInfo(); +if (const auto *BT = dyn_cast(ElementType)) { + switch (BT->getKind()) { + case BuiltinType::Kind::Float16: + case BuiltinType::Kind::BFloat16: { +return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + } + case BuiltinType::Kind::Float: +return CGF.getContext().getComplexType(CGF.getContext().DoubleTy); + case BuiltinType::Kind::Double: +if (TI.hasLongDoubleType()) { + return CGF.getContext().getComplexType(CGF.getContext().LongDoubleTy); +} else { + return QualType(); +} + case BuiltinType::Kind::LongDouble: andykaylor wrote: This is more complicated than what you have here. The C "long double" type may be 64-bit, 80-bit, or 128-bit, depending on the target. You can get the size from LangOpts::LongDoubleSize if that's accessible here. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -67,41 +79,164 @@ _Complex float pragma_on_div(_Complex float a, _Complex float b) { // FULL-NEXT: fdiv float // FULL: fdiv float - // LMTD: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fadd float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fadd float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fmul float - // LMTD-NEXT: fsub float - // LMTD-NEXT: fdiv float - // LMTD-NEXT: fdiv float - - // FRTRN: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fadd float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fadd float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fmul float - // FRTRN-NEXT: fsub float - // FRTRN-NEXT: fdiv float - // FRTRN-NEXT: fdiv float + // BASIC: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fadd float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fadd float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fmul float + // BASIC-NEXT: fsub float + // BASIC-NEXT: fdiv float + // BASIC-NEXT: fdiv float + + // IMPRVD: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fadd float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fadd float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fmul float + // IMPRVD-NEXT: fsub float + // IMPRVD-NEXT: fdiv float + // IMPRVD-NEXT: fdiv float + + // PRMTD: fpext float {{.*}} to double andykaylor wrote: Shouldn't the pragma override the need to extend in this case? https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1041,28 +1041,15 @@ defm offload_uniform_block : BoolFOption<"offload-uniform-block", NegFlag, BothFlags<[], [ClangOption], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>; -def fcx_limited_range : Joined<["-"], "fcx-limited-range">, andykaylor wrote: Sorry. I meant "aliasing" in the non-technical sense of "having the same meaning." How that gets implemented is another matter. I think the driver could translate them to the same cc1 option. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [CLANG] Full support of complex multiplication and division. (PR #81514)
@@ -1041,28 +1041,15 @@ defm offload_uniform_block : BoolFOption<"offload-uniform-block", NegFlag, BothFlags<[], [ClangOption], " that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)">>; -def fcx_limited_range : Joined<["-"], "fcx-limited-range">, andykaylor wrote: What I meant to suggest is that you can leave the driver-level options as if they were independent, but when we process them in RenderFloatingPointOptions, -fcx-limited-range and -fcomplex-arithmetic=basic (for example), would add the same cc1 option. Since the warning is generated from the RenderFloatingPointOptions we should be able to make that report the expected output. https://github.com/llvm/llvm-project/pull/81514 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [NFC] Add assertion to ensure FiniteMathOnly is in sync with HonorINFs and HonorNANs. (PR #97342)
@@ -816,6 +816,11 @@ class FPOptions { setAllowFPReassociate(LO.AllowFPReassoc); setNoHonorNaNs(LO.NoHonorNaNs); setNoHonorInfs(LO.NoHonorInfs); +// Ensure that if FiniteMathOnly is enabled, NoHonorNaNs and NoHonorInfs are +// also enabled. This is because FiniteMathOnly mode assumes no NaNs or Infs +// are present in computations. +if (!LO.NoHonorInfs || !LO.NoHonorInfs) + assert(!LO.FiniteMathOnly && "FiniteMathOnly implies NoHonorInfs"); andykaylor wrote: I agree with Aaron that we should get rid of the internal state that combines these two modes. I expect we'll only want to define `__FINITE_MATH_ONLY__` when we aren't honoring either. You could create code that made incorrect assumptions either way, but the state described by the symbol is that we aren't honoring either. https://github.com/llvm/llvm-project/pull/97342 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Introduce ffp-model=aggressive (PR #100453)
andykaylor wrote: @kpneal I think the tolerance of excess precision is mostly a concession to the hardware limitations. I believe we only apply it when half-precision types are used but the hardware doesn't support native half-precision operations. In this sense, it is equivalent to the situation when math was commonly performed using x87 instructions for x86 targets (pre-SSE). To get the semantics described by the source types, you'd need to perform intermediate rounding all over the place, but in the interest of performance most people preferred to allow excess precision, and so the language standards also allowed this. The result you get with excess precision is more accurate relative to the theoretically correct result with unbounded precision, but it doesn't match the result implied by the source types. As with the older x87 case, I think most people would prefer the performance of keeping excess precision, but there may be some people who prefer to round everything to source precision. In general, though, I think you're right that this should be independent of whether strict mode is enabled. I don't think we should disallow combining strict exception semantics with forcing source precision. In any case, all of that is beyond the scope of the changes I'm proposing in this PR. https://github.com/llvm/llvm-project/pull/100453 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Introduce ffp-model=aggressive (PR #100453)
andykaylor wrote: @MaskRay Are you OK with this change? https://github.com/llvm/llvm-project/pull/100453 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Fix -fno-unsafe-math-optimizations behavior (PR #89473)
https://github.com/andykaylor created https://github.com/llvm/llvm-project/pull/89473 This changes the handling of -fno-unsafe-fp-math to stop having that option imply -ftrapping-math. In gcc, -fno-unsafe-math-optimizations sets -ftrapping-math, but that dependency is based on the fact the -ftrapping-math is enabled by default in gcc. Because clang does not enabled -ftrapping-math by default, there is no reason for -fno-unsafe-math-optimizations to set it. On the other hand, -funsafe-math-optimizations continues to imply -fno-trapping-math because this option necessarily disables strict exception semantics. This fixes https://github.com/llvm/llvm-project/issues/87523 >From 100fc9dfb2b071877d758ce71bddeec693d986da Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Fri, 19 Apr 2024 16:35:00 -0700 Subject: [PATCH] Fix -fno-unsafe-math-optimizations behavior This changes the handling of -fno-unsafe-fp-math to stop having that option imply -ftrapping-math. In gcc, -fno-unsafe-math-optimizations sets -ftrapping-math, but that dependency is based on the fact the -ftrapping-math is enabled by default in gcc. Because clang does not enabled -ftrapping-math by default, there is no reason for -fno-unsafe-math-optimizations to set it. On the other hand, -funsafe-math-optimizations continues to imply -fno-trapping-math because this option necessarily disables strict exception semantics. This fixes https://github.com/llvm/llvm-project/issues/87523 --- clang/docs/UsersManual.rst| 1 - clang/lib/Driver/ToolChains/Clang.cpp | 2 -- clang/test/Driver/fast-math.c | 24 +--- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index c464bc3a69adc5..2b4155d4b65a48 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1690,7 +1690,6 @@ floating point semantic models: precise (the default), strict, and fast. * ``-fno-associative-math`` * ``-fno-reciprocal-math`` * ``-fsigned-zeros`` - * ``-ftrapping-math`` * ``-ffp-contract=on`` * ``-fdenormal-fp-math=ieee`` diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 456ea74caadb00..0776d095327219 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3137,8 +3137,6 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, ReciprocalMath = false; SignedZeros = true; ApproxFunc = false; - TrappingMath = true; - FPExceptionBehavior = "strict"; // The target may have opted to flush by default, so force IEEE. DenormalFPMath = llvm::DenormalMode::getIEEE(); diff --git a/clang/test/Driver/fast-math.c b/clang/test/Driver/fast-math.c index 882e81fd14d34a..ef23f88dd817ea 100644 --- a/clang/test/Driver/fast-math.c +++ b/clang/test/Driver/fast-math.c @@ -271,11 +271,11 @@ // RUN: 2>&1 | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s // RUN: %clang -### -funsafe-math-optimizations -fno-reciprocal-math -c %s \ -// RUN: 2>&1 | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s +// RUN: 2>&1 | FileCheck --check-prefix=CHECK-REASSOC-NO-UNSAFE-MATH %s // RUN: %clang -### -funsafe-math-optimizations -fsigned-zeros -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s // RUN: %clang -### -funsafe-math-optimizations -ftrapping-math -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s +// RUN: | FileCheck --check-prefix=CHECK-TRAPPING-NO-UNSAFE-MATH %s // RUN: %clang -### -funsafe-math-optimizations -fno-unsafe-math-optimizations \ // RUN: -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s @@ -283,18 +283,20 @@ // RUN: | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s // RUN: %clang -### -ffast-math -fno-reciprocal-math -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s +// RUN: | FileCheck --check-prefix=CHECK-REASSOC-NO-UNSAFE-MATH %s // RUN: %clang -### -ffast-math -fsigned-zeros -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s // RUN: %clang -### -ffast-math -ftrapping-math -c %s 2>&1 \ -// RUN: | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s +// RUN: | FileCheck --check-prefix=CHECK-TRAPPING-NO-UNSAFE-MATH %s // RUN: %clang -### -ffast-math -fno-unsafe-math-optimizations -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s // CHECK-NO-UNSAFE-MATH: "-cc1" // CHECK-NO-UNSAFE-MATH-NOT: "-funsafe-math-optimizations" -// CHECK-NO_UNSAFE-MATH-NOT: "-mreassociate" +// CHECK-NO-UNSAFE-MATH-NOT: "-mreassociate" // CHECK-NO-UNSAFE-MATH: "-o" +// CHECK-NO-UNSAFE-MATH-NOT: "-ffp-exception-behavior=strict" +// CHECK-TRAPPING-NO-UNSAFE-MATH: "-ffp-exception-behavior=strict" // Reassociate is allowed because it does not require reciprocal-math. @@ -304,8 +306,8 @@ // RUN: | FileCheck --check-prefix=CHECK-REA
[clang] Align -ffp-model=fast denormal handling with -ffast-math (PR #89477)
https://github.com/andykaylor created https://github.com/llvm/llvm-project/pull/89477 This is an attempt to better align the denormal handling of -ffp-model=fast with the behavior of -ffast-math. The clang user's manual claims that -ffp-model=fast "behaves identically to specifying both -ffast-math and -ffp-contract=fast." That isn't entirely correct. One difference is that -ffast-math causes crtfastmath.o to be linked if it is available, and passes -fdenormal-fp-math=PreserveSign as a -cc1 option when crtfastmath.o is available. I'm not sure the current behavior is reasonable and consistent for all tool chains, but I'm not trying to fix that here. This is just trying to make an incremental improvement. I am going to be proposing further changes to -ffp-model=fast, decoupling it from -ffast-math and introducing a new -ffp-model=aggressive that matches the current behavior, but I wanted to solidfy the current behavior before I do that. >From cd8df1939a456c05a1d94c471627fc5a7a332fc1 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Fri, 19 Apr 2024 17:53:52 -0700 Subject: [PATCH] Align -ffp-model=fast denormal handling with -ffast-math This is an attempt to better align the denormal handling of -ffp-model=fast with the behavior of -ffast-math. The clang user's manual claims that -ffp-model=fast "behaves identically to specifying both -ffast-math and -ffp-contract=fast." That isn't entirely correct. One difference is that -ffast-math causes crtfastmath.o to be linked if it is available, and passes -fdenormal-fp-math=PreserveSign as a -cc1 option when crtfastmath.o is available. I'm not sure the current behavior is reasonable and consistent for all tool chains, but I'm not trying to fix that here. This is just trying to make an incremental improvement. I am going to be proposing further changes to -ffp-model=fast, decoupling it from -ffast-math and introducing a new -ffp-model=aggressive that matches the current behavior, but I wanted to solidfy the current behavior before I do that. --- clang/docs/UsersManual.rst | 4 ++-- clang/lib/Driver/ToolChain.cpp | 8 +++- clang/lib/Driver/ToolChains/Clang.cpp| 4 clang/test/Driver/default-denormal-fp-math.c | 3 +++ clang/test/Driver/linux-ld.c | 3 +++ clang/test/Driver/solaris-ld.c | 3 +++ 6 files changed, 22 insertions(+), 3 deletions(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index c464bc3a69adc5..00bb1e779308ef 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1452,8 +1452,8 @@ floating point semantic models: precise (the default), strict, and fast. "fenv_access", "off", "on", "off" "rounding_mode", "tonearest", "dynamic", "tonearest" "contract", "on", "off", "fast" - "denormal_fp_math", "IEEE", "IEEE", "IEEE" - "denormal_fp32_math", "IEEE","IEEE", "IEEE" + "denormal_fp_math", "IEEE", "IEEE", "target-dependent" + "denormal_fp32_math", "IEEE","IEEE", "target-dependent" "support_math_errno", "on", "on", "off" "no_honor_nans", "off", "off", "on" "no_honor_infinities", "off", "off", "on" diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 237092ed07e5dc..a36d8eb1750a2d 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -1314,11 +1314,17 @@ bool ToolChain::isFastMathRuntimeAvailable(const ArgList &Args, Arg *A = Args.getLastArg(options::OPT_ffast_math, options::OPT_fno_fast_math, options::OPT_funsafe_math_optimizations, - options::OPT_fno_unsafe_math_optimizations); + options::OPT_fno_unsafe_math_optimizations, + options::OPT_ffp_model_EQ); if (!A || A->getOption().getID() == options::OPT_fno_fast_math || A->getOption().getID() == options::OPT_fno_unsafe_math_optimizations) return false; +if (A && A->getOption().getID() == options::OPT_ffp_model_EQ) { + StringRef Model = A->getValue(); + if (!Model.equals("fast")) +return false; +} } // If crtfastmath.o exists add it to the arguments. Path = GetFilePath("crtfastmath.o"); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 456ea74caadb00..0ee74855891b03 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2942,6 +2942,10 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, if (Val.equals("fast")) { FPModel = Val; applyFastMath(); +// The target-specific getDefaultDenormalModeForType handler should +// account for -ffp-model=fast and choose its behavior +DenormalFPMath = DefaultDenormalFPMath; +DenormalFP32Math = DefaultDenormalFP32Math; } else if (Val.equals("precise")) { optID = options::OPT_ffp_contract; FPModel = Val; diff --git a/cl