[clang] [llvm] [IR] Allow fast math flags on calls with homogeneous FP struct types (PR #110506)

Benjamin Maxwell via cfe-commits Mon, 30 Sep 2024 10:14:39 -0700

https://github.com/MacDue updated 
https://github.com/llvm/llvm-project/pull/110506


>From 328357f2300ebe55b8385c01f9c655f703933736 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxw...@arm.com>
Date: Mon, 30 Sep 2024 11:07:45 +0000
Subject: [PATCH 1/4] [IR] Allow fast math flags on calls with homogeneous FP
 struct types

This extends FPMathOperator to allow calls that return literal structs
of homogeneous floating-point or vector-of-floating-point types.

The intended use case for this is to support FP intrinsics that return
multiple values (such as `llvm.sincos`).
---
 llvm/docs/LangRef.rst                  | 19 ++++++------
 llvm/include/llvm/IR/DerivedTypes.h    |  4 +++
 llvm/include/llvm/IR/Operator.h        | 14 +++++++--
 llvm/lib/IR/Type.cpp                   | 13 +++++----
 llvm/test/Bitcode/compatibility.ll     | 20 +++++++++++++
 llvm/unittests/IR/InstructionsTest.cpp | 40 ++++++++++++++++++++++----
 6 files changed, 87 insertions(+), 23 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 3f39d58b322a4f..1eb2982385fda0 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -12472,9 +12472,8 @@ instruction's return value on the same edge).
 The optional ``fast-math-flags`` marker indicates that the phi has one
 or more :ref:`fast-math-flags <fastmath>`. These are optimization hints
 to enable otherwise unsafe floating-point optimizations. Fast-math-flags
-are only valid for phis that return a floating-point scalar or vector
-type, or an array (nested to any depth) of floating-point scalar or vector
-types.
+are only valid for phis that return a floating-point scalar or vector type,
+possibly within an array (nested to any depth), or a homogeneous struct 
literal.
 
 Semantics:
 """"""""""
@@ -12523,8 +12522,8 @@ class <t_firstclass>` type.
 #. The optional ``fast-math flags`` marker indicates that the select has one 
or more
    :ref:`fast-math flags <fastmath>`. These are optimization hints to enable
    otherwise unsafe floating-point optimizations. Fast-math flags are only 
valid
-   for selects that return a floating-point scalar or vector type, or an array
-   (nested to any depth) of floating-point scalar or vector types.
+   for selects that return a floating-point scalar or vector type, possibly
+   within an array (nested to any depth), or a homogeneous struct literal.
 
 Semantics:
 """"""""""
@@ -12762,8 +12761,8 @@ This instruction requires several arguments:
 #. The optional ``fast-math flags`` marker indicates that the call has one or 
more
    :ref:`fast-math flags <fastmath>`, which are optimization hints to enable
    otherwise unsafe floating-point optimizations. Fast-math flags are only 
valid
-   for calls that return a floating-point scalar or vector type, or an array
-   (nested to any depth) of floating-point scalar or vector types.
+   for calls that return a floating-point scalar or vector type, possibly 
within
+   an array (nested to any depth), or a homogeneous struct literal.
 
 #. The optional "cconv" marker indicates which :ref:`calling
    convention <callingconv>` the call should use. If none is
@@ -20528,7 +20527,8 @@ the explicit vector length.
    more :ref:`fast-math flags <fastmath>`. These are optimization hints to
    enable otherwise unsafe floating-point optimizations. Fast-math flags are
    only valid for selects that return a floating-point scalar or vector type,
-   or an array (nested to any depth) of floating-point scalar or vector types.
+   possibly within an array (nested to any depth), or a homogeneous struct
+   literal.
 
 Semantics:
 """"""""""
@@ -20586,7 +20586,8 @@ is the pivot.
    more :ref:`fast-math flags <fastmath>`. These are optimization hints to
    enable otherwise unsafe floating-point optimizations. Fast-math flags are
    only valid for merges that return a floating-point scalar or vector type,
-   or an array (nested to any depth) of floating-point scalar or vector types.
+   possibly within an array (nested to any depth), or a homogeneous struct
+   literal.
 
 Semantics:
 """"""""""
diff --git a/llvm/include/llvm/IR/DerivedTypes.h 
b/llvm/include/llvm/IR/DerivedTypes.h
index 975c142f1a4572..a24801d8bdf834 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -301,6 +301,10 @@ class StructType : public Type {
   ///  {<vscale x 2 x i32>, <vscale x 4 x i64>}}
   bool containsHomogeneousScalableVectorTypes() const;
 
+  /// Return true if this struct is non-empty and all element types are the
+  /// same.
+  bool containsHomogeneousTypes() const;
+
   /// Return true if this is a named struct that has a non-empty name.
   bool hasName() const { return SymbolTableEntry != nullptr; }
 
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 88b9bfc0be4b15..22ffcc730e7b68 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -15,6 +15,7 @@
 #define LLVM_IR_OPERATOR_H
 
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/TypeSwitch.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/FMF.h"
 #include "llvm/IR/GEPNoWrapFlags.h"
@@ -351,8 +352,17 @@ class FPMathOperator : public Operator {
     case Instruction::Select:
     case Instruction::Call: {
       Type *Ty = V->getType();
-      while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty))
-        Ty = ArrTy->getElementType();
+      TypeSwitch<Type *>(Ty)
+          .Case([&](StructType *StructTy) {
+            if (!StructTy->isLiteral() || 
!StructTy->containsHomogeneousTypes())
+              return;
+            Ty = StructTy->elements().front();
+          })
+          .Case([&](ArrayType *ArrTy) {
+            do {
+              Ty = ArrTy->getElementType();
+            } while ((ArrTy = dyn_cast<ArrayType>(Ty)));
+          });
       return Ty->isFPOrFPVectorTy();
     }
     default:
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 3784ad28d7219d..f618263f79c313 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -430,13 +430,14 @@ bool StructType::containsScalableVectorType(
 }
 
 bool StructType::containsHomogeneousScalableVectorTypes() const {
-  Type *FirstTy = getNumElements() > 0 ? elements()[0] : nullptr;
-  if (!FirstTy || !isa<ScalableVectorType>(FirstTy))
+  if (getNumElements() <= 0 || !isa<ScalableVectorType>(elements().front()))
     return false;
-  for (Type *Ty : elements())
-    if (Ty != FirstTy)
-      return false;
-  return true;
+  return containsHomogeneousTypes();
+}
+
+bool StructType::containsHomogeneousTypes() const {
+  ArrayRef<Type *> ElementTys = elements();
+  return !ElementTys.empty() && all_equal(ElementTys);
 }
 
 void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
diff --git a/llvm/test/Bitcode/compatibility.ll 
b/llvm/test/Bitcode/compatibility.ll
index ea29ff634a43bb..4fe9d9b11f8831 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -1122,6 +1122,26 @@ define void @fastMathFlagsForArrayCalls([2 x float] %f, 
[2 x double] %d1, [2 x <
   ret void
 }
 
+declare { float, float } @fmf_struct_f32()
+declare { double, double } @fmf_struct_f64()
+declare { <4 x double>, <4 x double> } @fmf_struct_v4f64()
+
+; CHECK-LABEL: fastMathFlagsForStructCalls(
+define void @fastMathFlagsForStructCalls({ float, float } %f, { double, double 
} %d1, { <4 x double>, <4 x double> } %d2) {
+  %call.fast = call fast { float, float } @fmf_struct_f32()
+  ; CHECK: %call.fast = call fast { float, float } @fmf_struct_f32()
+
+  ; Throw in some other attributes to make sure those stay in the right places.
+
+  %call.nsz.arcp = notail call nsz arcp { double, double } @fmf_struct_f64()
+  ; CHECK: %call.nsz.arcp = notail call nsz arcp { double, double } 
@fmf_struct_f64()
+
+  %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x double> } 
@fmf_struct_v4f64()
+  ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x 
double> } @fmf_struct_v4f64()
+
+  ret void
+}
+
 ;; Type System
 %opaquety = type opaque
 define void @typesystem() {
diff --git a/llvm/unittests/IR/InstructionsTest.cpp 
b/llvm/unittests/IR/InstructionsTest.cpp
index 481fe96607e48e..9d8056a768af2f 100644
--- a/llvm/unittests/IR/InstructionsTest.cpp
+++ b/llvm/unittests/IR/InstructionsTest.cpp
@@ -1559,12 +1559,40 @@ TEST(InstructionsTest, FPCallIsFPMathOperator) {
       CallInst::Create(AVFFnTy, AVFCallee, {}, ""));
   EXPECT_TRUE(isa<FPMathOperator>(AVFCall));
 
-  Type *AAVFTy = ArrayType::get(AVFTy, 2);
-  FunctionType *AAVFFnTy = FunctionType::get(AAVFTy, {});
-  Value *AAVFCallee = Constant::getNullValue(PtrTy);
-  std::unique_ptr<CallInst> AAVFCall(
-      CallInst::Create(AAVFFnTy, AAVFCallee, {}, ""));
-  EXPECT_TRUE(isa<FPMathOperator>(AAVFCall));
+  Type *StructITy = StructType::get(ITy, ITy);
+  FunctionType *StructIFnTy = FunctionType::get(StructITy, {});
+  Value *StructICallee = Constant::getNullValue(PtrTy);
+  std::unique_ptr<CallInst> StructICall(
+      CallInst::Create(StructIFnTy, StructICallee, {}, ""));
+  EXPECT_FALSE(isa<FPMathOperator>(StructICall));
+
+  Type *NamedStructFTy = StructType::create({FTy, FTy}, "AStruct");
+  FunctionType *NamedStructFFnTy = FunctionType::get(NamedStructFTy, {});
+  Value *NamedStructFCallee = Constant::getNullValue(PtrTy);
+  std::unique_ptr<CallInst> NamedStructFCall(
+      CallInst::Create(NamedStructFFnTy, NamedStructFCallee, {}, ""));
+  EXPECT_FALSE(isa<FPMathOperator>(NamedStructFCall));
+
+  Type *MixedStructTy = StructType::get(FTy, ITy);
+  FunctionType *MixedStructFnTy = FunctionType::get(MixedStructTy, {});
+  Value *MixedStructCallee = Constant::getNullValue(PtrTy);
+  std::unique_ptr<CallInst> MixedStructCall(
+      CallInst::Create(MixedStructFnTy, MixedStructCallee, {}, ""));
+  EXPECT_FALSE(isa<FPMathOperator>(MixedStructCall));
+
+  Type *StructFTy = StructType::get(FTy, FTy);
+  FunctionType *StructFFnTy = FunctionType::get(StructFTy, {});
+  Value *StructFCallee = Constant::getNullValue(PtrTy);
+  std::unique_ptr<CallInst> StructFCall(
+      CallInst::Create(StructFFnTy, StructFCallee, {}, ""));
+  EXPECT_TRUE(isa<FPMathOperator>(StructFCall));
+
+  Type *StructVFTy = StructType::get(VFTy, VFTy);
+  FunctionType *StructVFFnTy = FunctionType::get(StructVFTy, {});
+  Value *StructVFCallee = Constant::getNullValue(PtrTy);
+  std::unique_ptr<CallInst> StructVFCall(
+      CallInst::Create(StructVFFnTy, StructVFCallee, {}, ""));
+  EXPECT_TRUE(isa<FPMathOperator>(StructVFCall));
 }
 
 TEST(InstructionsTest, FNegInstruction) {

>From 688154050d02b3d118031d2fdd532a7f5d2500a8 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxw...@arm.com>
Date: Mon, 30 Sep 2024 15:06:11 +0000
Subject: [PATCH 2/4] Remove TypeSwitch

---
 llvm/include/llvm/IR/Operator.h | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index 22ffcc730e7b68..2cacc632f3a8cb 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -15,7 +15,6 @@
 #define LLVM_IR_OPERATOR_H
 
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/TypeSwitch.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/FMF.h"
 #include "llvm/IR/GEPNoWrapFlags.h"
@@ -352,17 +351,15 @@ class FPMathOperator : public Operator {
     case Instruction::Select:
     case Instruction::Call: {
       Type *Ty = V->getType();
-      TypeSwitch<Type *>(Ty)
-          .Case([&](StructType *StructTy) {
-            if (!StructTy->isLiteral() || 
!StructTy->containsHomogeneousTypes())
-              return;
-            Ty = StructTy->elements().front();
-          })
-          .Case([&](ArrayType *ArrTy) {
-            do {
-              Ty = ArrTy->getElementType();
-            } while ((ArrTy = dyn_cast<ArrayType>(Ty)));
-          });
+      if (StructType *StructTy = dyn_cast<StructType>(Ty)) {
+        if (!StructTy->isLiteral() || !StructTy->containsHomogeneousTypes())
+          return false;
+        Ty = StructTy->elements().front();
+      } else if (ArrayType *ArrayTy = dyn_cast<ArrayType>(Ty)) {
+        do {
+          Ty = ArrayTy->getElementType();
+        } while ((ArrayTy = dyn_cast<ArrayType>(Ty)));
+      }
       return Ty->isFPOrFPVectorTy();
     }
     default:

>From 5cc741f73aced83fc8826438e0a018776586a5e2 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxw...@arm.com>
Date: Mon, 30 Sep 2024 15:31:09 +0000
Subject: [PATCH 3/4] Update clang tests

---
 clang/test/CodeGen/X86/cx-complex-range.c |  2 +-
 clang/test/CodeGen/cx-complex-range.c     | 10 +++++-----
 clang/test/CodeGen/nofpclass.c            |  6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/clang/test/CodeGen/X86/cx-complex-range.c 
b/clang/test/CodeGen/X86/cx-complex-range.c
index 14887637d516ef..da580d54c9f618 100644
--- a/clang/test/CodeGen/X86/cx-complex-range.c
+++ b/clang/test/CodeGen/X86/cx-complex-range.c
@@ -1220,7 +1220,7 @@ _Complex _Float16 mulf16(_Complex _Float16 a, _Complex 
_Float16 b) {
 // FULL_FAST-NEXT:    [[C_IMAG:%.*]] = load half, ptr [[C_IMAGP]], align 2
 // FULL_FAST-NEXT:    [[CONV:%.*]] = fpext half [[C_REAL]] to x86_fp80
 // FULL_FAST-NEXT:    [[CONV1:%.*]] = fpext half [[C_IMAG]] to x86_fp80
-// FULL_FAST-NEXT:    [[CALL:%.*]] = call { x86_fp80, x86_fp80 } 
@__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef 
nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], 
x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR1]]
+// FULL_FAST-NEXT:    [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { 
x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], 
x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan 
inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR1]]
 // FULL_FAST-NEXT:    [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } 
[[CALL]], 0
 // FULL_FAST-NEXT:    [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } 
[[CALL]], 1
 // FULL_FAST-NEXT:    [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to half
diff --git a/clang/test/CodeGen/cx-complex-range.c 
b/clang/test/CodeGen/cx-complex-range.c
index d4fb62a7dfec35..b780d4d1767c12 100644
--- a/clang/test/CodeGen/cx-complex-range.c
+++ b/clang/test/CodeGen/cx-complex-range.c
@@ -1444,7 +1444,7 @@ _Complex float mulf(_Complex float a, _Complex float b) {
 // FULL_FAST-NEXT:    [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8
 // FULL_FAST-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds nuw { double, 
double }, ptr [[B]], i32 0, i32 1
 // FULL_FAST-NEXT:    [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8
-// FULL_FAST-NEXT:    [[CALL:%.*]] = call { double, double } @__divdc3(double 
noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) 
[[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef 
nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT:    [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { 
double, double } @__divdc3(double noundef nofpclass(nan inf) [[A_REAL]], double 
noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) 
[[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
 // FULL_FAST-NEXT:    [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0
 // FULL_FAST-NEXT:    [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 
1
 // FULL_FAST-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds nuw { 
double, double }, ptr [[RETVAL]], i32 0, i32 0
@@ -2003,7 +2003,7 @@ _Complex double divd(_Complex double a, _Complex double 
b) {
 // FULL_FAST-NEXT:    [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn 
uno double [[MUL_I]], [[MUL_I]]
 // FULL_FAST-NEXT:    br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], 
label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
 // FULL_FAST:       complex_mul_libcall:
-// FULL_FAST-NEXT:    [[CALL:%.*]] = call { double, double } @__muldc3(double 
noundef nofpclass(nan inf) [[A_REAL]], double noundef nofpclass(nan inf) 
[[A_IMAG]], double noundef nofpclass(nan inf) [[B_REAL]], double noundef 
nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT:    [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { 
double, double } @__muldc3(double noundef nofpclass(nan inf) [[A_REAL]], double 
noundef nofpclass(nan inf) [[A_IMAG]], double noundef nofpclass(nan inf) 
[[B_REAL]], double noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
 // FULL_FAST-NEXT:    [[TMP4:%.*]] = extractvalue { double, double } [[CALL]], 0
 // FULL_FAST-NEXT:    [[TMP5:%.*]] = extractvalue { double, double } [[CALL]], 
1
 // FULL_FAST-NEXT:    br label [[COMPLEX_MUL_CONT]]
@@ -2535,7 +2535,7 @@ _Complex double muld(_Complex double a, _Complex double 
b) {
 // FULL_FAST-NEXT:    [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16
 // FULL_FAST-NEXT:    [[B_IMAGP:%.*]] = getelementptr inbounds nuw { x86_fp80, 
x86_fp80 }, ptr [[B]], i32 0, i32 1
 // FULL_FAST-NEXT:    [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16
-// FULL_FAST-NEXT:    [[CALL:%.*]] = call { x86_fp80, x86_fp80 } 
@__divxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef 
nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], 
x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT:    [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { 
x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], 
x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan 
inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
 // FULL_FAST-NEXT:    [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } 
[[CALL]], 0
 // FULL_FAST-NEXT:    [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } 
[[CALL]], 1
 // FULL_FAST-NEXT:    [[RETVAL_REALP:%.*]] = getelementptr inbounds nuw { 
x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0
@@ -3028,7 +3028,7 @@ _Complex long double divld(_Complex long double a, 
_Complex long double b) {
 // FULL_FAST-NEXT:    [[ISNAN_CMP1:%.*]] = fcmp reassoc nnan ninf nsz arcp afn 
uno x86_fp80 [[MUL_I]], [[MUL_I]]
 // FULL_FAST-NEXT:    br i1 [[ISNAN_CMP1]], label [[COMPLEX_MUL_LIBCALL:%.*]], 
label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
 // FULL_FAST:       complex_mul_libcall:
-// FULL_FAST-NEXT:    [[CALL:%.*]] = call { x86_fp80, x86_fp80 } 
@__mulxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], x86_fp80 noundef 
nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], 
x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
+// FULL_FAST-NEXT:    [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { 
x86_fp80, x86_fp80 } @__mulxc3(x86_fp80 noundef nofpclass(nan inf) [[A_REAL]], 
x86_fp80 noundef nofpclass(nan inf) [[A_IMAG]], x86_fp80 noundef nofpclass(nan 
inf) [[B_REAL]], x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]]) #[[ATTR2]]
 // FULL_FAST-NEXT:    [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } 
[[CALL]], 0
 // FULL_FAST-NEXT:    [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } 
[[CALL]], 1
 // FULL_FAST-NEXT:    br label [[COMPLEX_MUL_CONT]]
@@ -3753,7 +3753,7 @@ _Complex long double mulld(_Complex long double a, 
_Complex long double b) {
 // FULL_FAST-NEXT:    [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4
 // FULL_FAST-NEXT:    [[CONV:%.*]] = fpext float [[C_REAL]] to x86_fp80
 // FULL_FAST-NEXT:    [[CONV1:%.*]] = fpext float [[C_IMAG]] to x86_fp80
-// FULL_FAST-NEXT:    [[CALL:%.*]] = call { x86_fp80, x86_fp80 } 
@__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], x86_fp80 noundef 
nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan inf) [[CONV]], 
x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR2]]
+// FULL_FAST-NEXT:    [[CALL:%.*]] = call reassoc nnan ninf nsz arcp afn { 
x86_fp80, x86_fp80 } @__divxc3(x86_fp80 noundef nofpclass(nan inf) [[B_REAL]], 
x86_fp80 noundef nofpclass(nan inf) [[B_IMAG]], x86_fp80 noundef nofpclass(nan 
inf) [[CONV]], x86_fp80 noundef nofpclass(nan inf) [[CONV1]]) #[[ATTR2]]
 // FULL_FAST-NEXT:    [[TMP0:%.*]] = extractvalue { x86_fp80, x86_fp80 } 
[[CALL]], 0
 // FULL_FAST-NEXT:    [[TMP1:%.*]] = extractvalue { x86_fp80, x86_fp80 } 
[[CALL]], 1
 // FULL_FAST-NEXT:    [[CONV2:%.*]] = fptrunc x86_fp80 [[TMP0]] to float
diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c
index 16a3823a5c02de..ca86e67a5d27c7 100644
--- a/clang/test/CodeGen/nofpclass.c
+++ b/clang/test/CodeGen/nofpclass.c
@@ -548,7 +548,7 @@ _Complex float defined_complex_func(_Complex float a, 
_Complex double b, _Comple
 // CFINITEONLY-NEXT:    [[ISNAN_CMP5:%.*]] = fcmp nnan ninf uno double 
[[MUL_I]], [[MUL_I]]
 // CFINITEONLY-NEXT:    br i1 [[ISNAN_CMP5]], label 
[[COMPLEX_MUL_LIBCALL:%.*]], label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
 // CFINITEONLY:       complex_mul_libcall:
-// CFINITEONLY-NEXT:    [[CALL:%.*]] = call { double, double } 
@__muldc3(double noundef nofpclass(nan inf) [[C_REAL]], double noundef 
nofpclass(nan inf) [[C_IMAG]], double noundef nofpclass(nan inf) [[C_REAL2]], 
double noundef nofpclass(nan inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
+// CFINITEONLY-NEXT:    [[CALL:%.*]] = call nnan ninf { double, double } 
@__muldc3(double noundef nofpclass(nan inf) [[C_REAL]], double noundef 
nofpclass(nan inf) [[C_IMAG]], double noundef nofpclass(nan inf) [[C_REAL2]], 
double noundef nofpclass(nan inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
 // CFINITEONLY-NEXT:    [[TMP2:%.*]] = extractvalue { double, double } 
[[CALL]], 0
 // CFINITEONLY-NEXT:    [[TMP3:%.*]] = extractvalue { double, double } 
[[CALL]], 1
 // CFINITEONLY-NEXT:    br label [[COMPLEX_MUL_CONT]]
@@ -605,7 +605,7 @@ _Complex float defined_complex_func(_Complex float a, 
_Complex double b, _Comple
 // NONANS-NEXT:    [[ISNAN_CMP5:%.*]] = fcmp nnan uno double [[MUL_I]], 
[[MUL_I]]
 // NONANS-NEXT:    br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], 
label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
 // NONANS:       complex_mul_libcall:
-// NONANS-NEXT:    [[CALL:%.*]] = call { double, double } @__muldc3(double 
noundef nofpclass(nan) [[C_REAL]], double noundef nofpclass(nan) [[C_IMAG]], 
double noundef nofpclass(nan) [[C_REAL2]], double noundef nofpclass(nan) 
[[C_IMAG4]]) #[[ATTR7:[0-9]+]]
+// NONANS-NEXT:    [[CALL:%.*]] = call nnan { double, double } 
@__muldc3(double noundef nofpclass(nan) [[C_REAL]], double noundef 
nofpclass(nan) [[C_IMAG]], double noundef nofpclass(nan) [[C_REAL2]], double 
noundef nofpclass(nan) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
 // NONANS-NEXT:    [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0
 // NONANS-NEXT:    [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1
 // NONANS-NEXT:    br label [[COMPLEX_MUL_CONT]]
@@ -649,7 +649,7 @@ _Complex float defined_complex_func(_Complex float a, 
_Complex double b, _Comple
 // NOINFS-NEXT:    [[ISNAN_CMP5:%.*]] = fcmp ninf uno double [[MUL_I]], 
[[MUL_I]]
 // NOINFS-NEXT:    br i1 [[ISNAN_CMP5]], label [[COMPLEX_MUL_LIBCALL:%.*]], 
label [[COMPLEX_MUL_CONT]], !prof [[PROF2]]
 // NOINFS:       complex_mul_libcall:
-// NOINFS-NEXT:    [[CALL:%.*]] = call { double, double } @__muldc3(double 
noundef nofpclass(inf) [[C_REAL]], double noundef nofpclass(inf) [[C_IMAG]], 
double noundef nofpclass(inf) [[C_REAL2]], double noundef nofpclass(inf) 
[[C_IMAG4]]) #[[ATTR7:[0-9]+]]
+// NOINFS-NEXT:    [[CALL:%.*]] = call ninf { double, double } 
@__muldc3(double noundef nofpclass(inf) [[C_REAL]], double noundef 
nofpclass(inf) [[C_IMAG]], double noundef nofpclass(inf) [[C_REAL2]], double 
noundef nofpclass(inf) [[C_IMAG4]]) #[[ATTR7:[0-9]+]]
 // NOINFS-NEXT:    [[TMP2:%.*]] = extractvalue { double, double } [[CALL]], 0
 // NOINFS-NEXT:    [[TMP3:%.*]] = extractvalue { double, double } [[CALL]], 1
 // NOINFS-NEXT:    br label [[COMPLEX_MUL_CONT]]

>From 8d3353c6c290c60c25abaea4b309f7ec4b525abf Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxw...@arm.com>
Date: Mon, 30 Sep 2024 16:15:01 +0000
Subject: [PATCH 4/4] Fixups

---
 llvm/docs/LangRef.rst                  | 39 +++++++++++++++-----------
 llvm/test/Bitcode/compatibility.ll     | 14 ++++-----
 llvm/unittests/IR/InstructionsTest.cpp | 25 +++++++++++++++--
 3 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 1eb2982385fda0..0462b5d7328737 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3685,9 +3685,9 @@ Fast-Math Flags
 
 LLVM IR floating-point operations (:ref:`fneg <i_fneg>`, :ref:`fadd <i_fadd>`,
 :ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
-:ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`), :ref:`phi <i_phi>`,
-:ref:`select <i_select>` and :ref:`call <i_call>`
-may use the following flags to enable otherwise unsafe
+:ref:`frem <i_frem>`, :ref:`fcmp <i_fcmp>`), and :ref:`phi <i_phi>`,
+:ref:`select <i_select>`, or :ref:`call <i_call>` instructions that return
+floating-point types may use the following flags to enable otherwise unsafe
 floating-point transformations.
 
 ``fast``
@@ -3709,6 +3709,16 @@ floating-point transformations.
    argument or zero result as insignificant. This does not imply that -0.0
    is poison and/or guaranteed to not exist in the operation.
 
+.. _fastmath_return_types:
+
+Note: For :ref:`phi <i_phi>`, :ref:`select <i_select>`, and :ref:`call 
<i_call>`
+instructions, the following return types are considered to be floating-point
+types:
+
+- Floating-point scalar or vector types
+- Array types (nested to any depth) of floating-point scalar or vector types
+- Homogeneous literal struct types of floating-point scalar or vector types
+
 Rewrite-based flags
 ^^^^^^^^^^^^^^^^^^^
 
@@ -4343,7 +4353,7 @@ recursive, can be opaqued, and are never uniqued.
 :Examples:
 
 
+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ``{ i32, i32, i32 }``        | A triple of three ``i32`` values              
                                                                                
                                                        |
+| ``{ i32, i32, i32 }``        | A triple of three ``i32`` values (this is a 
"homogeneous" struct as all element types are the same)                         
                                                          |
 
+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 | ``{ float, ptr }``           | A pair, where the first element is a 
``float`` and the second element is a :ref:`pointer <t_pointer>`.               
                                                                 |
 
+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
@@ -12472,8 +12482,8 @@ instruction's return value on the same edge).
 The optional ``fast-math-flags`` marker indicates that the phi has one
 or more :ref:`fast-math-flags <fastmath>`. These are optimization hints
 to enable otherwise unsafe floating-point optimizations. Fast-math-flags
-are only valid for phis that return a floating-point scalar or vector type,
-possibly within an array (nested to any depth), or a homogeneous struct 
literal.
+are only valid for phis that return :ref:`supported floating-point types
+<fastmath_return_types>`.
 
 Semantics:
 """"""""""
@@ -12522,8 +12532,8 @@ class <t_firstclass>` type.
 #. The optional ``fast-math flags`` marker indicates that the select has one 
or more
    :ref:`fast-math flags <fastmath>`. These are optimization hints to enable
    otherwise unsafe floating-point optimizations. Fast-math flags are only 
valid
-   for selects that return a floating-point scalar or vector type, possibly
-   within an array (nested to any depth), or a homogeneous struct literal.
+   for selects that return :ref:`supported floating-point types
+   <fastmath_return_types>`..
 
 Semantics:
 """"""""""
@@ -12761,8 +12771,7 @@ This instruction requires several arguments:
 #. The optional ``fast-math flags`` marker indicates that the call has one or 
more
    :ref:`fast-math flags <fastmath>`, which are optimization hints to enable
    otherwise unsafe floating-point optimizations. Fast-math flags are only 
valid
-   for calls that return a floating-point scalar or vector type, possibly 
within
-   an array (nested to any depth), or a homogeneous struct literal.
+   for calls that return :ref:`supported floating-point types 
<fastmath_return_types>`.
 
 #. The optional "cconv" marker indicates which :ref:`calling
    convention <callingconv>` the call should use. If none is
@@ -20526,9 +20535,8 @@ the explicit vector length.
 #. The optional ``fast-math flags`` marker indicates that the select has one or
    more :ref:`fast-math flags <fastmath>`. These are optimization hints to
    enable otherwise unsafe floating-point optimizations. Fast-math flags are
-   only valid for selects that return a floating-point scalar or vector type,
-   possibly within an array (nested to any depth), or a homogeneous struct
-   literal.
+   only valid for selects that return :ref:`supported floating-point types
+   <fastmath_return_types>`.
 
 Semantics:
 """"""""""
@@ -20585,9 +20593,8 @@ is the pivot.
 #. The optional ``fast-math flags`` marker indicates that the merge has one or
    more :ref:`fast-math flags <fastmath>`. These are optimization hints to
    enable otherwise unsafe floating-point optimizations. Fast-math flags are
-   only valid for merges that return a floating-point scalar or vector type,
-   possibly within an array (nested to any depth), or a homogeneous struct
-   literal.
+   only valid for merges that return :ref:`supported floating-point types
+   <fastmath_return_types>`.
 
 Semantics:
 """"""""""
diff --git a/llvm/test/Bitcode/compatibility.ll 
b/llvm/test/Bitcode/compatibility.ll
index 4fe9d9b11f8831..b6050944e637a3 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -1123,21 +1123,21 @@ define void @fastMathFlagsForArrayCalls([2 x float] %f, 
[2 x double] %d1, [2 x <
 }
 
 declare { float, float } @fmf_struct_f32()
-declare { double, double } @fmf_struct_f64()
-declare { <4 x double>, <4 x double> } @fmf_struct_v4f64()
+declare { double, double, double } @fmf_struct_f64()
+declare { <4 x double> } @fmf_struct_v4f64()
 
 ; CHECK-LABEL: fastMathFlagsForStructCalls(
-define void @fastMathFlagsForStructCalls({ float, float } %f, { double, double 
} %d1, { <4 x double>, <4 x double> } %d2) {
+define void @fastMathFlagsForStructCalls() {
   %call.fast = call fast { float, float } @fmf_struct_f32()
   ; CHECK: %call.fast = call fast { float, float } @fmf_struct_f32()
 
   ; Throw in some other attributes to make sure those stay in the right places.
 
-  %call.nsz.arcp = notail call nsz arcp { double, double } @fmf_struct_f64()
-  ; CHECK: %call.nsz.arcp = notail call nsz arcp { double, double } 
@fmf_struct_f64()
+  %call.nsz.arcp = notail call nsz arcp { double, double, double } 
@fmf_struct_f64()
+  ; CHECK: %call.nsz.arcp = notail call nsz arcp { double, double, double } 
@fmf_struct_f64()
 
-  %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x double> } 
@fmf_struct_v4f64()
-  ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double>, <4 x 
double> } @fmf_struct_v4f64()
+  %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double> } 
@fmf_struct_v4f64()
+  ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc { <4 x double> } 
@fmf_struct_v4f64()
 
   ret void
 }
diff --git a/llvm/unittests/IR/InstructionsTest.cpp 
b/llvm/unittests/IR/InstructionsTest.cpp
index 9d8056a768af2f..0af812564c0267 100644
--- a/llvm/unittests/IR/InstructionsTest.cpp
+++ b/llvm/unittests/IR/InstructionsTest.cpp
@@ -1566,6 +1566,13 @@ TEST(InstructionsTest, FPCallIsFPMathOperator) {
       CallInst::Create(StructIFnTy, StructICallee, {}, ""));
   EXPECT_FALSE(isa<FPMathOperator>(StructICall));
 
+  Type *EmptyStructTy = StructType::get(C);
+  FunctionType *EmptyStructFnTy = FunctionType::get(EmptyStructTy, {});
+  Value *EmptyStructCallee = Constant::getNullValue(PtrTy);
+  std::unique_ptr<CallInst> EmptyStructCall(
+      CallInst::Create(EmptyStructFnTy, EmptyStructCallee, {}, ""));
+  EXPECT_FALSE(isa<FPMathOperator>(EmptyStructCall));
+
   Type *NamedStructFTy = StructType::create({FTy, FTy}, "AStruct");
   FunctionType *NamedStructFFnTy = FunctionType::get(NamedStructFTy, {});
   Value *NamedStructFCallee = Constant::getNullValue(PtrTy);
@@ -1580,19 +1587,33 @@ TEST(InstructionsTest, FPCallIsFPMathOperator) {
       CallInst::Create(MixedStructFnTy, MixedStructCallee, {}, ""));
   EXPECT_FALSE(isa<FPMathOperator>(MixedStructCall));
 
-  Type *StructFTy = StructType::get(FTy, FTy);
+  Type *StructFTy = StructType::get(FTy, FTy, FTy);
   FunctionType *StructFFnTy = FunctionType::get(StructFTy, {});
   Value *StructFCallee = Constant::getNullValue(PtrTy);
   std::unique_ptr<CallInst> StructFCall(
       CallInst::Create(StructFFnTy, StructFCallee, {}, ""));
   EXPECT_TRUE(isa<FPMathOperator>(StructFCall));
 
-  Type *StructVFTy = StructType::get(VFTy, VFTy);
+  Type *StructVFTy = StructType::get(VFTy, VFTy, VFTy, VFTy);
   FunctionType *StructVFFnTy = FunctionType::get(StructVFTy, {});
   Value *StructVFCallee = Constant::getNullValue(PtrTy);
   std::unique_ptr<CallInst> StructVFCall(
       CallInst::Create(StructVFFnTy, StructVFCallee, {}, ""));
   EXPECT_TRUE(isa<FPMathOperator>(StructVFCall));
+
+  Type *NestedStructFTy = StructType::get(StructFTy, StructFTy, StructFTy);
+  FunctionType *NestedStructFFnTy = FunctionType::get(NestedStructFTy, {});
+  Value *NestedStructFCallee = Constant::getNullValue(PtrTy);
+  std::unique_ptr<CallInst> NestedStructFCall(
+      CallInst::Create(NestedStructFFnTy, NestedStructFCallee, {}, ""));
+  EXPECT_FALSE(isa<FPMathOperator>(NestedStructFCall));
+
+  Type *AStructFTy = ArrayType::get(StructFTy, 5);
+  FunctionType *AStructFFnTy = FunctionType::get(AStructFTy, {});
+  Value *AStructFCallee = Constant::getNullValue(PtrTy);
+  std::unique_ptr<CallInst> AStructFCall(
+      CallInst::Create(AStructFFnTy, AStructFCallee, {}, ""));
+  EXPECT_FALSE(isa<FPMathOperator>(AStructFCall));
 }
 
 TEST(InstructionsTest, FNegInstruction) {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [IR] Allow fast math flags on calls with homogeneous FP struct types (PR #110506)

Reply via email to