https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/110506
>From 328357f2300ebe55b8385c01f9c655f703933736 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <benjamin.maxw...@arm.com> Date: Mon, 30 Sep 2024 11:07:45 +0000 Subject: [PATCH 1/4] [IR] Allow fast math flags on calls with homogeneous FP struct types This extends FPMathOperator to allow calls that return literal structs of homogeneous floating-point or vector-of-floating-point types. The intended use case for this is to support FP intrinsics that return multiple values (such as `llvm.sincos`). --- llvm/docs/LangRef.rst | 19 ++++++------ llvm/include/llvm/IR/DerivedTypes.h | 4 +++ llvm/include/llvm/IR/Operator.h | 14 +++++++-- llvm/lib/IR/Type.cpp | 13 +++++---- llvm/test/Bitcode/compatibility.ll | 20 +++++++++++++ llvm/unittests/IR/InstructionsTest.cpp | 40 ++++++++++++++++++++++---- 6 files changed, 87 insertions(+), 23 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 3f39d58b322a4f..1eb2982385fda0 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -12472,9 +12472,8 @@ instruction's return value on the same edge). The optional ``fast-math-flags`` marker indicates that the phi has one or more :ref:`fast-math-flags <fastmath>`. These are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math-flags -are only valid for phis that return a floating-point scalar or vector -type, or an array (nested to any depth) of floating-point scalar or vector -types. +are only valid for phis that return a floating-point scalar or vector type, +possibly within an array (nested to any depth), or a homogeneous struct literal. Semantics: """""""""" @@ -12523,8 +12522,8 @@ class <t_firstclass>` type. #. The optional ``fast-math flags`` marker indicates that the select has one or more :ref:`fast-math flags <fastmath>`. These are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math flags are only valid - for selects that return a floating-point scalar or vector type, or an array - (nested to any depth) of floating-point scalar or vector types. + for selects that return a floating-point scalar or vector type, possibly + within an array (nested to any depth), or a homogeneous struct literal. Semantics: """""""""" @@ -12762,8 +12761,8 @@ This instruction requires several arguments: #. The optional ``fast-math flags`` marker indicates that the call has one or more :ref:`fast-math flags <fastmath>`, which are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math flags are only valid - for calls that return a floating-point scalar or vector type, or an array - (nested to any depth) of floating-point scalar or vector types. + for calls that return a floating-point scalar or vector type, possibly within + an array (nested to any depth), or a homogeneous struct literal. #. The optional "cconv" marker indicates which :ref:`calling convention <callingconv>` the call should use. If none is @@ -20528,7 +20527,8 @@ the explicit vector length. more :ref:`fast-math flags <fastmath>`. These are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math flags are only valid for selects that return a floating-point scalar or vector type, - or an array (nested to any depth) of floating-point scalar or vector types. + possibly within an array (nested to any depth), or a homogeneous struct + literal. Semantics: """""""""" @@ -20586,7 +20586,8 @@ is the pivot. more :ref:`fast-math flags <fastmath>`. These are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math flags are only valid for merges that return a floating-point scalar or vector type, - or an array (nested to any depth) of floating-point scalar or vector types. + possibly within an array (nested to any depth), or a homogeneous struct + literal. Semantics: """""""""" diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index 975c142f1a4572..a24801d8bdf834 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -301,6 +301,10 @@ class StructType : public Type { /// {<vscale x 2 x i32>, <vscale x 4 x i64>}} bool containsHomogeneousScalableVectorTypes() const; + /// Return true if this struct is non-empty and all element types are the + /// same. + bool containsHomogeneousTypes() const; + /// Return true if this is a named struct that has a non-empty name. bool hasName() const { return SymbolTableEntry != nullptr; } diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index 88b9bfc0be4b15..22ffcc730e7b68 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -15,6 +15,7 @@ #define LLVM_IR_OPERATOR_H #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/FMF.h" #include "llvm/IR/GEPNoWrapFlags.h" @@ -351,8 +352,17 @@ class FPMathOperator : public Operator { case Instruction::Select: case Instruction::Call: { Type *Ty = V->getType(); - while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) - Ty = ArrTy->getElementType(); + TypeSwitch<Type *>(Ty) + .Case([&](StructType *StructTy) { + if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes()) + return; + Ty = StructTy->elements().front(); + }) + .Case([&](ArrayType *ArrTy) { + do { + Ty = ArrTy->getElementType(); + } while ((ArrTy = dyn_cast<ArrayType>(Ty))); + }); return Ty->isFPOrFPVectorTy(); } default: diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 3784ad28d7219d..f618263f79c313 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -430,13 +430,14 @@ bool StructType::containsScalableVectorType( } bool StructType::containsHomogeneousScalableVectorTypes() const { - Type *FirstTy = getNumElements() > 0 ? elements()[0] : nullptr; - if (!FirstTy || !isa<ScalableVectorType>(FirstTy)) + if (getNumElements() <= 0 || !isa<ScalableVectorType>(elements().front())) return false; - for (Type *Ty : elements()) - if (Ty != FirstTy) - return false; - return true; + return containsHomogeneousTypes(); +} + +bool StructType::containsHomogeneousTypes() const { + ArrayRef<Type *> ElementTys = elements(); + return !ElementTys.empty() && all_equal(ElementTys); } void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) { diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index ea29ff634a43bb..4fe9d9b11f8831 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1122,6 +1122,26 @@ define void @fastMathFlagsForArrayCalls([2 x float] %f, [2 x double] %d1, [2 x < ret void } +declare { float, float } @fmf_struct_f32() +declare { double, double } @fmf_struct_f64() +declare { <4 x double>, <4 x double> } @fmf_struct_v4f64() + +; CHECK-LABEL: fastMathFlagsForStructCalls( +define void @fastMathFlagsForStructCalls({ float, float } %f, { double, double } %d1, { <4 x double>, <4 x double> } %d2) { + %call.fast = call fast { float, float } @fmf_struct_f32() + ; CHECK: %call.fast = call fast { float, float } @fmf_struct_f32() + + ; Throw in some other attributes to make sure those stay in the right places. + + %call.nsz.arcp = notail call nsz arcp { double, double } @fmf_struct_f64() + ; CHECK: %call.nsz.arcp = notail call nsz arcp { double, double } @fmf_struct_f64() + + %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x double> } @fmf_struct_v4f64() + ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x double> } @fmf_struct_v4f64() + + ret void +} + ;; Type System %opaquety = type opaque define void @typesystem() { diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp index 481fe96607e48e..9d8056a768af2f 100644 --- a/llvm/unittests/IR/InstructionsTest.cpp +++ b/llvm/unittests/IR/InstructionsTest.cpp @@ -1559,12 +1559,40 @@ TEST(InstructionsTest, FPCallIsFPMathOperator) { CallInst::Create(AVFFnTy, AVFCallee, {}, "")); EXPECT_TRUE(isa<FPMathOperator>(AVFCall)); - Type *AAVFTy = ArrayType::get(AVFTy, 2); - FunctionType *AAVFFnTy = FunctionType::get(AAVFTy, {}); - Value *AAVFCallee = Constant::getNullValue(PtrTy); - std::unique_ptr<CallInst> AAVFCall( - CallInst::Create(AAVFFnTy, AAVFCallee, {}, "")); - EXPECT_TRUE(isa<FPMathOperator>(AAVFCall)); + Type *StructITy = StructType::get(ITy, ITy); + FunctionType *StructIFnTy = FunctionType::get(StructITy, {}); + Value *StructICallee = Constant::getNullValue(PtrTy); + std::unique_ptr<CallInst> StructICall( + CallInst::Create(StructIFnTy, StructICallee, {}, "")); + EXPECT_FALSE(isa<FPMathOperator>(StructICall)); + + Type *NamedStructFTy = StructType::create({FTy, FTy}, "AStruct"); + FunctionType *NamedStructFFnTy = FunctionType::get(NamedStructFTy, {}); + Value *NamedStructFCallee = Constant::getNullValue(PtrTy); + std::unique_ptr<CallInst> NamedStructFCall( + CallInst::Create(NamedStructFFnTy, NamedStructFCallee, {}, "")); + EXPECT_FALSE(isa<FPMathOperator>(NamedStructFCall)); + + Type *MixedStructTy = StructType::get(FTy, ITy); + FunctionType *MixedStructFnTy = FunctionType::get(MixedStructTy, {}); + Value *MixedStructCallee = Constant::getNullValue(PtrTy); + std::unique_ptr<CallInst> MixedStructCall( + CallInst::Create(MixedStructFnTy, MixedStructCallee, {}, "")); + EXPECT_FALSE(isa<FPMathOperator>(MixedStructCall)); + + Type *StructFTy = StructType::get(FTy, FTy); + FunctionType *StructFFnTy = FunctionType::get(StructFTy, {}); + Value *StructFCallee = Constant::getNullValue(PtrTy); + std::unique_ptr<CallInst> StructFCall( + CallInst::Create(StructFFnTy, StructFCallee, {}, "")); + EXPECT_TRUE(isa<FPMathOperator>(StructFCall)); + + Type *StructVFTy = StructType::get(VFTy, VFTy); + FunctionType *StructVFFnTy = FunctionType::get(StructVFTy, {}); + Value *StructVFCallee = Constant::getNullValue(PtrTy); + std::unique_ptr<CallInst> StructVFCall( + CallInst::Create(StructVFFnTy, StructVFCallee, {}, "")); + EXPECT_TRUE(isa<FPMathOperator>(StructVFCall)); } TEST(InstructionsTest, FNegInstruction) { >From 688154050d02b3d118031d2fdd532a7f5d2500a8 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <benjamin.maxw...@arm.com> Date: Mon, 30 Sep 2024 15:06:11 +0000 Subject: [PATCH 2/4] Remove TypeSwitch --- llvm/include/llvm/IR/Operator.h | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index 22ffcc730e7b68..2cacc632f3a8cb 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -15,7 +15,6 @@ #define LLVM_IR_OPERATOR_H #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/TypeSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/FMF.h" #include "llvm/IR/GEPNoWrapFlags.h" @@ -352,17 +351,15 @@ class FPMathOperator : public Operator { case Instruction::Select: case Instruction::Call: { Type *Ty = V->getType(); - TypeSwitch<Type *>(Ty) - .Case([&](StructType *StructTy) { - if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes()) - return; - Ty = StructTy->elements().front(); - }) - .Case([&](ArrayType *ArrTy) { - do { - Ty = ArrTy->getElementType(); - } while ((ArrTy = dyn_cast<ArrayType>(Ty))); - }); + if (StructType *StructTy = dyn_cast<StructType>(Ty)) { + if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes()) + return false; + Ty = StructTy->elements().front(); + } else if (ArrayType *ArrayTy = dyn_cast<ArrayType>(Ty)) { + do { + Ty = ArrayTy->getElementType(); + } while ((ArrayTy = dyn_cast<ArrayType>(Ty))); + } return Ty->isFPOrFPVectorTy(); } default: >From 5cc741f73aced83fc8826438e0a018776586a5e2 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <benjamin.maxw...@arm.com> Date: Mon, 30 Sep 2024 15:31:09 +0000 Subject: [PATCH 3/4] Update clang tests --- clang/test/CodeGen/X86/cx-complex-range.c | 2 +- clang/test/CodeGen/cx-complex-range.c | 10 +++++----- clang/test/CodeGen/nofpclass.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/clang/test/CodeGen/X86/cx-complex-range.c b/clang/test/CodeGen/X86/cx-complex-range.c index 14887637d516ef..da580d54c9f618 100644 --- a/clang/test/CodeGen/X86/cx-complex-range.c +++ b/clang/test/CodeGen/X86/cx-complex-range.c @@ -1220,7 +1220,7 @@ _Complex _Float16 mulf16(_Complex _Float16 a, _Complex _Float16 b) { // FULL_FAST-NEXT: [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2 // FULL_FAST-NEXT: [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80 // FULL_FAST-NEXT: [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80 -// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR1]] +// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR1]] // FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 // FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 // FULL_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to half diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index d4fb62a7dfec35..b780d4d1767c12 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -1444,7 +1444,7 @@ _Complex float mulf(_Complex float a, _Complex float b) { // FULL_FAST-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 // FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[B]], i32 0, i32 1 // FULL_FAST-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 -// FULL_FAST-NEXT: [[CALL:%.*]] = call { double, double } @__divdc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] +// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { double, double } @__divdc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] // FULL_FAST-NEXT: [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0 // FULL_FAST-NEXT: [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 1 // FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds nuw { double, double }, ptr [[RETVAL]], i32 0, i32 0 @@ -2003,7 +2003,7 @@ _Complex double divd(_Complex double a, _Complex double b) { // FULL_FAST-NEXT: [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno double [[MUL_I]], [[MUL_I]] // FULL_FAST-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] // FULL_FAST: complex_mul_libcall: -// FULL_FAST-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] +// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { double, double } @__muldc3(double noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] // FULL_FAST-NEXT: [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0 // FULL_FAST-NEXT: [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 1 // FULL_FAST-NEXT: br label [[COMPLEX_MUL_CONT]] @@ -2535,7 +2535,7 @@ _Complex double muld(_Complex double a, _Complex double b) { // FULL_FAST-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 // FULL_FAST-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds nuw { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 // FULL_FAST-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 -// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] +// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] // FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 // FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 // FULL_FAST-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds nuw { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 @@ -3028,7 +3028,7 @@ _Complex long double divld(_Complex long double a, _Complex long double b) { // FULL_FAST-NEXT: [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn uno x86_fp80 [[MUL_I]], [[MUL_I]] // FULL_FAST-NEXT: br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] // FULL_FAST: complex_mul_libcall: -// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__mulxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] +// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__mulxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]] // FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 // FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 // FULL_FAST-NEXT: br label [[COMPLEX_MUL_CONT]] @@ -3753,7 +3753,7 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) { // FULL_FAST-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 // FULL_FAST-NEXT: [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80 // FULL_FAST-NEXT: [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80 -// FULL_FAST-NEXT: [[CALL:%.*]] = call { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR2]] +// FULL_FAST-NEXT: [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR2]] // FULL_FAST-NEXT: [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 0 // FULL_FAST-NEXT: [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } [[CALL]], 1 // FULL_FAST-NEXT: [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to float diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c index 16a3823a5c02de..ca86e67a5d27c7 100644 --- a/clang/test/CodeGen/nofpclass.c +++ b/clang/test/CodeGen/nofpclass.c @@ -548,7 +548,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple // CFINITEONLY-NEXT: [[ISNAN_CMP5:%.*]] = fcmp nnan ninf uno double [[MUL_I]], [[MUL_I]] // CFINITEONLY-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] // CFINITEONLY: complex_mul_libcall: -// CFINITEONLY-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan inf) [[C_REAL]], double noundef nofpclass(nan inf) [[C_IMAG]], double noundef nofpclass(nan inf) [[C_REAL2]], double noundef nofpclass(nan inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] +// CFINITEONLY-NEXT: [[CALL:%.*]] = call nnan ninf { double, double } @__muldc3(double noundef nofpclass(nan inf) [[C_REAL]], double noundef nofpclass(nan inf) [[C_IMAG]], double noundef nofpclass(nan inf) [[C_REAL2]], double noundef nofpclass(nan inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] // CFINITEONLY-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0 // CFINITEONLY-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1 // CFINITEONLY-NEXT: br label [[COMPLEX_MUL_CONT]] @@ -605,7 +605,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple // NONANS-NEXT: [[ISNAN_CMP5:%.*]] = fcmp nnan uno double [[MUL_I]], [[MUL_I]] // NONANS-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] // NONANS: complex_mul_libcall: -// NONANS-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(nan) [[C_REAL]], double noundef nofpclass(nan) [[C_IMAG]], double noundef nofpclass(nan) [[C_REAL2]], double noundef nofpclass(nan) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] +// NONANS-NEXT: [[CALL:%.*]] = call nnan { double, double } @__muldc3(double noundef nofpclass(nan) [[C_REAL]], double noundef nofpclass(nan) [[C_IMAG]], double noundef nofpclass(nan) [[C_REAL2]], double noundef nofpclass(nan) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] // NONANS-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0 // NONANS-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1 // NONANS-NEXT: br label [[COMPLEX_MUL_CONT]] @@ -649,7 +649,7 @@ _Complex float defined_complex_func(_Complex float a, _Complex double b, _Comple // NOINFS-NEXT: [[ISNAN_CMP5:%.*]] = fcmp ninf uno double [[MUL_I]], [[MUL_I]] // NOINFS-NEXT: br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]] // NOINFS: complex_mul_libcall: -// NOINFS-NEXT: [[CALL:%.*]] = call { double, double } @__muldc3(double noundef nofpclass(inf) [[C_REAL]], double noundef nofpclass(inf) [[C_IMAG]], double noundef nofpclass(inf) [[C_REAL2]], double noundef nofpclass(inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] +// NOINFS-NEXT: [[CALL:%.*]] = call ninf { double, double } @__muldc3(double noundef nofpclass(inf) [[C_REAL]], double noundef nofpclass(inf) [[C_IMAG]], double noundef nofpclass(inf) [[C_REAL2]], double noundef nofpclass(inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]] // NOINFS-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0 // NOINFS-NEXT: [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1 // NOINFS-NEXT: br label [[COMPLEX_MUL_CONT]] >From 8d3353c6c290c60c25abaea4b309f7ec4b525abf Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell <benjamin.maxw...@arm.com> Date: Mon, 30 Sep 2024 16:15:01 +0000 Subject: [PATCH 4/4] Fixups --- llvm/docs/LangRef.rst | 39 +++++++++++++++----------- llvm/test/Bitcode/compatibility.ll | 14 ++++----- llvm/unittests/IR/InstructionsTest.cpp | 25 +++++++++++++++-- 3 files changed, 53 insertions(+), 25 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 1eb2982385fda0..0462b5d7328737 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3685,9 +3685,9 @@ Fast-Math Flags LLVM IR floating-point operations (:ref:`fneg <i_fneg>`, :ref:`fadd <i_fadd>`, :ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`, -:ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`), :ref:`phi <i_phi>`, -:ref:`select <i_select>` and :ref:`call <i_call>` -may use the following flags to enable otherwise unsafe +:ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`), and :ref:`phi <i_phi>`, +:ref:`select <i_select>`, or :ref:`call <i_call>` instructions that return +floating-point types may use the following flags to enable otherwise unsafe floating-point transformations. ``fast`` @@ -3709,6 +3709,16 @@ floating-point transformations. argument or zero result as insignificant. This does not imply that -0.0 is poison and/or guaranteed to not exist in the operation. +.. _fastmath_return_types: + +Note: For :ref:`phi <i_phi>`, :ref:`select <i_select>`, and :ref:`call <i_call>` +instructions, the following return types are considered to be floating-point +types: + +- Floating-point scalar or vector types +- Array types (nested to any depth) of floating-point scalar or vector types +- Homogeneous literal struct types of floating-point scalar or vector types + Rewrite-based flags ^^^^^^^^^^^^^^^^^^^ @@ -4343,7 +4353,7 @@ recursive, can be opaqued, and are never uniqued. :Examples: +------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| ``{ i32, i32, i32 }`` | A triple of three ``i32`` values | +| ``{ i32, i32, i32 }`` | A triple of three ``i32`` values (this is a "homogeneous" struct as all element types are the same) | +------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | ``{ float, ptr }`` | A pair, where the first element is a ``float`` and the second element is a :ref:`pointer <t_pointer>`. | +------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -12472,8 +12482,8 @@ instruction's return value on the same edge). The optional ``fast-math-flags`` marker indicates that the phi has one or more :ref:`fast-math-flags <fastmath>`. These are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math-flags -are only valid for phis that return a floating-point scalar or vector type, -possibly within an array (nested to any depth), or a homogeneous struct literal. +are only valid for phis that return :ref:`supported floating-point types +<fastmath_return_types>`. Semantics: """""""""" @@ -12522,8 +12532,8 @@ class <t_firstclass>` type. #. The optional ``fast-math flags`` marker indicates that the select has one or more :ref:`fast-math flags <fastmath>`. These are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math flags are only valid - for selects that return a floating-point scalar or vector type, possibly - within an array (nested to any depth), or a homogeneous struct literal. + for selects that return :ref:`supported floating-point types + <fastmath_return_types>`.. Semantics: """""""""" @@ -12761,8 +12771,7 @@ This instruction requires several arguments: #. The optional ``fast-math flags`` marker indicates that the call has one or more :ref:`fast-math flags <fastmath>`, which are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math flags are only valid - for calls that return a floating-point scalar or vector type, possibly within - an array (nested to any depth), or a homogeneous struct literal. + for calls that return :ref:`supported floating-point types <fastmath_return_types>`. #. The optional "cconv" marker indicates which :ref:`calling convention <callingconv>` the call should use. If none is @@ -20526,9 +20535,8 @@ the explicit vector length. #. The optional ``fast-math flags`` marker indicates that the select has one or more :ref:`fast-math flags <fastmath>`. These are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math flags are - only valid for selects that return a floating-point scalar or vector type, - possibly within an array (nested to any depth), or a homogeneous struct - literal. + only valid for selects that return :ref:`supported floating-point types + <fastmath_return_types>`. Semantics: """""""""" @@ -20585,9 +20593,8 @@ is the pivot. #. The optional ``fast-math flags`` marker indicates that the merge has one or more :ref:`fast-math flags <fastmath>`. These are optimization hints to enable otherwise unsafe floating-point optimizations. Fast-math flags are - only valid for merges that return a floating-point scalar or vector type, - possibly within an array (nested to any depth), or a homogeneous struct - literal. + only valid for merges that return :ref:`supported floating-point types + <fastmath_return_types>`. Semantics: """""""""" diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index 4fe9d9b11f8831..b6050944e637a3 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1123,21 +1123,21 @@ define void @fastMathFlagsForArrayCalls([2 x float] %f, [2 x double] %d1, [2 x < } declare { float, float } @fmf_struct_f32() -declare { double, double } @fmf_struct_f64() -declare { <4 x double>, <4 x double> } @fmf_struct_v4f64() +declare { double, double, double } @fmf_struct_f64() +declare { <4 x double> } @fmf_struct_v4f64() ; CHECK-LABEL: fastMathFlagsForStructCalls( -define void @fastMathFlagsForStructCalls({ float, float } %f, { double, double } %d1, { <4 x double>, <4 x double> } %d2) { +define void @fastMathFlagsForStructCalls() { %call.fast = call fast { float, float } @fmf_struct_f32() ; CHECK: %call.fast = call fast { float, float } @fmf_struct_f32() ; Throw in some other attributes to make sure those stay in the right places. - %call.nsz.arcp = notail call nsz arcp { double, double } @fmf_struct_f64() - ; CHECK: %call.nsz.arcp = notail call nsz arcp { double, double } @fmf_struct_f64() + %call.nsz.arcp = notail call nsz arcp { double, double, double } @fmf_struct_f64() + ; CHECK: %call.nsz.arcp = notail call nsz arcp { double, double, double } @fmf_struct_f64() - %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x double> } @fmf_struct_v4f64() - ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x double> } @fmf_struct_v4f64() + %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double> } @fmf_struct_v4f64() + ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double> } @fmf_struct_v4f64() ret void } diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp index 9d8056a768af2f..0af812564c0267 100644 --- a/llvm/unittests/IR/InstructionsTest.cpp +++ b/llvm/unittests/IR/InstructionsTest.cpp @@ -1566,6 +1566,13 @@ TEST(InstructionsTest, FPCallIsFPMathOperator) { CallInst::Create(StructIFnTy, StructICallee, {}, "")); EXPECT_FALSE(isa<FPMathOperator>(StructICall)); + Type *EmptyStructTy = StructType::get(C); + FunctionType *EmptyStructFnTy = FunctionType::get(EmptyStructTy, {}); + Value *EmptyStructCallee = Constant::getNullValue(PtrTy); + std::unique_ptr<CallInst> EmptyStructCall( + CallInst::Create(EmptyStructFnTy, EmptyStructCallee, {}, "")); + EXPECT_FALSE(isa<FPMathOperator>(EmptyStructCall)); + Type *NamedStructFTy = StructType::create({FTy, FTy}, "AStruct"); FunctionType *NamedStructFFnTy = FunctionType::get(NamedStructFTy, {}); Value *NamedStructFCallee = Constant::getNullValue(PtrTy); @@ -1580,19 +1587,33 @@ TEST(InstructionsTest, FPCallIsFPMathOperator) { CallInst::Create(MixedStructFnTy, MixedStructCallee, {}, "")); EXPECT_FALSE(isa<FPMathOperator>(MixedStructCall)); - Type *StructFTy = StructType::get(FTy, FTy); + Type *StructFTy = StructType::get(FTy, FTy, FTy); FunctionType *StructFFnTy = FunctionType::get(StructFTy, {}); Value *StructFCallee = Constant::getNullValue(PtrTy); std::unique_ptr<CallInst> StructFCall( CallInst::Create(StructFFnTy, StructFCallee, {}, "")); EXPECT_TRUE(isa<FPMathOperator>(StructFCall)); - Type *StructVFTy = StructType::get(VFTy, VFTy); + Type *StructVFTy = StructType::get(VFTy, VFTy, VFTy, VFTy); FunctionType *StructVFFnTy = FunctionType::get(StructVFTy, {}); Value *StructVFCallee = Constant::getNullValue(PtrTy); std::unique_ptr<CallInst> StructVFCall( CallInst::Create(StructVFFnTy, StructVFCallee, {}, "")); EXPECT_TRUE(isa<FPMathOperator>(StructVFCall)); + + Type *NestedStructFTy = StructType::get(StructFTy, StructFTy, StructFTy); + FunctionType *NestedStructFFnTy = FunctionType::get(NestedStructFTy, {}); + Value *NestedStructFCallee = Constant::getNullValue(PtrTy); + std::unique_ptr<CallInst> NestedStructFCall( + CallInst::Create(NestedStructFFnTy, NestedStructFCallee, {}, "")); + EXPECT_FALSE(isa<FPMathOperator>(NestedStructFCall)); + + Type *AStructFTy = ArrayType::get(StructFTy, 5); + FunctionType *AStructFFnTy = FunctionType::get(AStructFTy, {}); + Value *AStructFCallee = Constant::getNullValue(PtrTy); + std::unique_ptr<CallInst> AStructFCall( + CallInst::Create(AStructFFnTy, AStructFCallee, {}, "")); + EXPECT_FALSE(isa<FPMathOperator>(AStructFCall)); } TEST(InstructionsTest, FNegInstruction) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits