https://github.com/andykaylor updated 
https://github.com/llvm/llvm-project/pull/105746

>From 94940279aea5c01f0e608a91dc5835859314d88b Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Thu, 22 Aug 2024 15:35:07 -0700
Subject: [PATCH 1/3] Honor pragmas with -ffp-contract=fast, depecate
 fast-honor-pragmas

Change the handling of -ffp-contract=fast such that the contract
semantics described in the IR are always respected. The front end was
already generating IR that enabled or disabled contraction as requested
by relevant pragmas, but it was setting a TargetOption (AllowFPOpFusion)
that causes the LLVM code generator to disregard the IR restrictions.

This makes -ffp-contract=fast-honor-pragmas redundant, so it is now
reported as deprecated.

For more discussion, see: 
https://discourse.llvm.org/t/rfc-honor-pragmas-with-ffp-contract-fast/80797
---
 clang/docs/LanguageExtensions.rst             |  4 +--
 clang/docs/ReleaseNotes.rst                   |  4 +++
 clang/docs/UsersManual.rst                    |  9 +++----
 clang/include/clang/Basic/LangOptions.h       | 10 +------
 clang/include/clang/Options/Options.td        |  6 ++---
 clang/lib/Basic/LangOptions.cpp               | 14 ++--------
 clang/lib/CodeGen/BackendUtil.cpp             | 16 ++---------
 clang/lib/Driver/ToolChains/Clang.cpp         |  7 +++++
 clang/lib/Frontend/CompilerInvocation.cpp     |  6 ++---
 clang/lib/Sema/SemaAttr.cpp                   |  1 -
 .../ffp-contract-fast-honor-pramga-option.cpp |  2 +-
 .../ffp-contract-fhp-pragma-override.cpp      |  2 +-
 clang/test/CodeGen/fp-function-attrs.cpp      | 14 +++++++++-
 clang/test/CodeGenCUDA/fp-contract.cu         | 27 ++-----------------
 clang/test/Driver/fp-contract.c               |  7 ++---
 15 files changed, 47 insertions(+), 82 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index a3db3e5d356b3..16731bf8037c2 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -5733,9 +5733,7 @@ statements in C).
 
 The pragma can also be used with ``off`` which turns FP contraction off for a
 section of the code. This can be useful when fast contraction is otherwise
-enabled for the translation unit with the ``-ffp-contract=fast-honor-pragmas`` 
flag.
-Note that ``-ffp-contract=fast`` will override pragmas to fuse multiply and
-addition across statements regardless of any controlling pragmas.
+enabled for the translation unit with the ``-ffp-contract=fast`` flag.
 
 ``#pragma clang fp exceptions`` specifies floating point exception behavior. It
 may take one of the values: ``ignore``, ``maytrap`` or ``strict``. Meaning of
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7459127670cc3..2dcd66343ff7c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -341,6 +341,10 @@ Modified Compiler Flags
 - The `-gkey-instructions` compiler flag is now enabled by default when DWARF 
is emitted for plain C/C++ and optimizations are enabled. (#GH149509)
 - The `-fconstexpr-steps` compiler flag now accepts value `0` to opt out of 
this limit. (#GH160440)
 
+- The ``-ffp-contract`` option now honors pragmas by default when the ``fast``
+  argument is used. The ``fast-honor-pragmas`` option is now deprecated and 
acts
+  as an alias for ``fast``.
+
 Removed Compiler Flags
 -------------------------
 
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index d267eec9425b3..c3aa7914ddf0f 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -1557,7 +1557,7 @@ describes the various floating point semantic modes and 
the corresponding option
   "ffp-exception-behavior", "{ignore, strict, maytrap}",
   "fenv_access", "{off, on}", "(none)"
   "frounding-math", "{dynamic, tonearest, downward, upward, towardzero}"
-  "ffp-contract", "{on, off, fast, fast-honor-pragmas}"
+  "ffp-contract", "{on, off, fast}"
   "fdenormal-fp-math", "{IEEE, PreserveSign, PositiveZero}"
   "fdenormal-fp-math-fp32", "{IEEE, PreserveSign, PositiveZero}"
   "fmath-errno", "{on, off}"
@@ -1764,13 +1764,12 @@ for more details.
 
    Valid values are:
 
-   * ``fast``: enable fusion across statements disregarding pragmas, breaking
-     compliance with the C and C++ standards (default for CUDA).
+   * ``fast``: enable fusion across statements unless dictated by pragmas,
+     breaking compliance with the C and C++ standards (default for CUDA).
    * ``on``: enable C and C++ standard compliant fusion in the same statement
      unless dictated by pragmas (default for languages other than CUDA/HIP)
    * ``off``: disable fusion
-   * ``fast-honor-pragmas``: fuse across statements unless dictated by pragmas
-     (default for HIP)
+   * ``fast-honor-pragmas``: deprecated, aliases fast
 
 .. option:: -f[no-]honor-infinities
 
diff --git a/clang/include/clang/Basic/LangOptions.h 
b/clang/include/clang/Basic/LangOptions.h
index 8aa89d8c8c807..3eb53b18adf7e 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -217,11 +217,8 @@ class LangOptionsBase {
     // Enable the floating point pragma
     FPM_On,
 
-    // Aggressively fuse FP ops (E.g. FMA) disregarding pragmas.
-    FPM_Fast,
-
     // Aggressively fuse FP ops and honor pragmas.
-    FPM_FastHonorPragmas
+    FPM_Fast
   };
 
   /// Possible floating point exception behavior.
@@ -816,12 +813,7 @@ class FPOptions {
   }
   explicit FPOptions(const LangOptions &LO) {
     Value = 0;
-    // The language fp contract option FPM_FastHonorPragmas has the same effect
-    // as FPM_Fast in frontend. For simplicity, use FPM_Fast uniformly in
-    // frontend.
     auto LangOptContractMode = LO.getDefaultFPContractMode();
-    if (LangOptContractMode == LangOptions::FPM_FastHonorPragmas)
-      LangOptContractMode = LangOptions::FPM_Fast;
     setFPContractMode(LangOptContractMode);
     setRoundingMath(LO.RoundingMath);
     setConstRoundingMode(LangOptions::RoundingMode::Dynamic);
diff --git a/clang/include/clang/Options/Options.td 
b/clang/include/clang/Options/Options.td
index 2f7434d8afe11..4c6291d5eabf4 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -2831,11 +2831,11 @@ def fno_trapping_math : Flag<["-"], 
"fno-trapping-math">, Group<f_Group>;
 def ffp_contract : Joined<["-"], "ffp-contract=">, Group<f_Group>,
   Visibility<[ClangOption, CC1Option, FC1Option, FlangOption]>,
   DocBrief<"Form fused FP ops (e.g. FMAs):"
-  " fast (fuses across statements disregarding pragmas)"
+  " fast (fuses across statements unless dictated by pragmas)"
   " | on (only fuses in the same statement unless dictated by pragmas)"
   " | off (never fuses)"
-  " | fast-honor-pragmas (fuses across statements unless dictated by pragmas)."
-  " Default is 'fast' for CUDA, 'fast-honor-pragmas' for HIP, and 'on' 
otherwise.">,
+  " | fast-honor-pragmas (deprecated, aliases fast)."
+  " Default is 'fast' for CUDA or HIP, and 'on' otherwise.">,
   HelpText<"Form fused FP ops (e.g. FMAs)">,
   Values<"fast,on,off,fast-honor-pragmas">;
 
diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index 19b557603d135..1040c865614ad 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -192,18 +192,8 @@ void LangOptions::setLangDefaults(LangOptions &Opts, 
Language Lang,
 
   Opts.HIP = Lang == Language::HIP;
   Opts.CUDA = Lang == Language::CUDA || Opts.HIP;
-  if (Opts.HIP) {
-    // HIP toolchain does not support 'Fast' FPOpFusion in backends since it
-    // fuses multiplication/addition instructions without contract flag from
-    // device library functions in LLVM bitcode, which causes accuracy loss in
-    // certain math functions, e.g. tan(-1e20) becomes -0.933 instead of 
0.8446.
-    // For device library functions in bitcode to work, 'Strict' or 'Standard'
-    // FPOpFusion options in backends is needed. Therefore 'fast-honor-pragmas'
-    // FP contract option is used to allow fuse across statements in frontend
-    // whereas respecting contract flag in backend.
-    Opts.setDefaultFPContractMode(LangOptions::FPM_FastHonorPragmas);
-  } else if (Opts.CUDA) {
-    if (T.isSPIRV()) {
+  if (Opts.HIP || Opts.CUDA) {
+    if (Opts.CUDA && T.isSPIRV()) {
       // Emit OpenCL version metadata in LLVM IR when targeting SPIR-V.
       Opts.OpenCLVersion = 200;
     }
diff --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index 6f63e6470270e..777b3e579d7cc 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -398,20 +398,8 @@ static bool initTargetOptions(const CompilerInstance &CI,
           .Default(llvm::FloatABI::Default);
 
   // Set FP fusion mode.
-  switch (LangOpts.getDefaultFPContractMode()) {
-  case LangOptions::FPM_Off:
-    // Preserve any contraction performed by the front-end.  (Strict performs
-    // splitting of the muladd intrinsic in the backend.)
-    Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
-    break;
-  case LangOptions::FPM_On:
-  case LangOptions::FPM_FastHonorPragmas:
-    Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
-    break;
-  case LangOptions::FPM_Fast:
-    Options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
-    break;
-  }
+  // All allowed fusion is indicated in the IR.
+  Options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
 
   Options.BinutilsVersion =
       llvm::TargetMachine::parseBinutilsVersion(CodeGenOpts.BinutilsVersion);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index c5d40c9825fab..50af376ab4814 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3000,6 +3000,13 @@ static void RenderFloatingPointOptions(const ToolChain 
&TC, const Driver &D,
       StringRef Val = A->getValue();
       if (Val == "fast" || Val == "on" || Val == "off" ||
           Val == "fast-honor-pragmas") {
+        // fast-honor-pragmas is deprecated -- replace it with fast
+        if (Val == "fast-honor-pragmas") {
+          D.Diag(diag::warn_drv_deprecated_arg)
+            << A->getAsString(Args) << /*hasReplacement=*/true
+            << "-ffp-contract=fast";
+          Val = "fast";
+        }
         if (Val != FPContract && LastFpContractOverrideOption != "") {
           D.Diag(clang::diag::warn_drv_overriding_option)
               << LastFpContractOverrideOption
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp 
b/clang/lib/Frontend/CompilerInvocation.cpp
index 54b302e829e1f..81fc47295e6d1 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3898,8 +3898,6 @@ void CompilerInvocationBase::GenerateLangArgs(const 
LangOptions &Opts,
     GenerateArg(Consumer, OPT_ffp_contract, "on");
   else if (Opts.DefaultFPContractMode == LangOptions::FPM_Off)
     GenerateArg(Consumer, OPT_ffp_contract, "off");
-  else if (Opts.DefaultFPContractMode == LangOptions::FPM_FastHonorPragmas)
-    GenerateArg(Consumer, OPT_ffp_contract, "fast-honor-pragmas");
 
   for (StringRef Sanitizer : serializeSanitizerKinds(Opts.Sanitize))
     GenerateArg(Consumer, OPT_fsanitize_EQ, Sanitizer);
@@ -4389,8 +4387,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, 
ArgList &Args,
       Opts.setDefaultFPContractMode(LangOptions::FPM_On);
     else if (Val == "off")
       Opts.setDefaultFPContractMode(LangOptions::FPM_Off);
-    else if (Val == "fast-honor-pragmas")
-      Opts.setDefaultFPContractMode(LangOptions::FPM_FastHonorPragmas);
+    else if (Val == "fast-honor-pragmas") // Deprecated
+      Opts.setDefaultFPContractMode(LangOptions::FPM_Fast);
     else
       Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val;
   }
diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp
index 8411a3da8322d..3c1c000b544dc 100644
--- a/clang/lib/Sema/SemaAttr.cpp
+++ b/clang/lib/Sema/SemaAttr.cpp
@@ -1386,7 +1386,6 @@ void Sema::ActOnPragmaFPContract(SourceLocation Loc,
     NewFPFeatures.setAllowFPContractWithinStatement();
     break;
   case LangOptions::FPM_Fast:
-  case LangOptions::FPM_FastHonorPragmas:
     NewFPFeatures.setAllowFPContractAcrossStatement();
     break;
   case LangOptions::FPM_Off:
diff --git a/clang/test/CodeGen/ffp-contract-fast-honor-pramga-option.cpp 
b/clang/test/CodeGen/ffp-contract-fast-honor-pramga-option.cpp
index fef4da1edf1fc..c3d8909c33bd6 100644
--- a/clang/test/CodeGen/ffp-contract-fast-honor-pramga-option.cpp
+++ b/clang/test/CodeGen/ffp-contract-fast-honor-pramga-option.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -O3 -ffp-contract=fast-honor-pragmas -triple 
%itanium_abi_triple -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -O3 -ffp-contract=fast -triple %itanium_abi_triple 
-emit-llvm -o - %s | FileCheck %s
 
 float fp_contract_1(float a, float b, float c) {
   // CHECK-LABEL: fp_contract_1fff(
diff --git a/clang/test/CodeGen/ffp-contract-fhp-pragma-override.cpp 
b/clang/test/CodeGen/ffp-contract-fhp-pragma-override.cpp
index ff35c9204c79c..fe4cf21861f00 100644
--- a/clang/test/CodeGen/ffp-contract-fhp-pragma-override.cpp
+++ b/clang/test/CodeGen/ffp-contract-fhp-pragma-override.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -O3 -ffp-contract=fast-honor-pragmas -triple 
%itanium_abi_triple -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -O3 -ffp-contract=fast -triple %itanium_abi_triple 
-emit-llvm -o - %s | FileCheck %s
 
 float fp_contract_on_1(float a, float b, float c) {
   // CHECK-LABEL: fp_contract_on_1fff(
diff --git a/clang/test/CodeGen/fp-function-attrs.cpp 
b/clang/test/CodeGen/fp-function-attrs.cpp
index 3775bd5452d78..2e62875bc9f34 100644
--- a/clang/test/CodeGen/fp-function-attrs.cpp
+++ b/clang/test/CodeGen/fp-function-attrs.cpp
@@ -1,5 +1,4 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast 
-emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math 
-ffp-contract=fast-honor-pragmas -emit-llvm -o - %s | FileCheck %s
 
 float test_default(float a, float b, float c) {
   float tmp = a;
@@ -53,5 +52,18 @@ float test_contract_on_pragma(float a, float b, float c) {
 // CHECK: fmul fast float {{%.+}}, {{%.+}}
 // CHECK: fadd reassoc nnan ninf nsz arcp afn float {{%.+}}, {{%.+}}
 
+float test_contract_off_pragma(float a, float b, float c) {
+  float tmp = a * b;
+  {
+    #pragma clang fp contract(off)
+    tmp += c;
+  }
+  return tmp;
+}
+
+// CHECK: define{{.*}} float @_Z24test_contract_off_pragmafff(float noundef 
nofpclass(nan inf) %a, float noundef nofpclass(nan inf) %b, float noundef 
nofpclass(nan inf) %c)
+// CHECK: fmul fast float {{%.+}}, {{%.+}}
+// CHECK: fadd reassoc nnan ninf nsz arcp afn float {{%.+}}, {{%.+}}
+
 // CHECK: attributes [[FAST_ATTRS]] = { {{.*}}"no-infs-fp-math"="true" 
{{.*}}"no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true"{{.*}} }
 // CHECK: attributes [[PRECISE_ATTRS]] = { {{.*}}"no-infs-fp-math"="false" 
{{.*}}"no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false"{{.*}} }
diff --git a/clang/test/CodeGenCUDA/fp-contract.cu 
b/clang/test/CodeGenCUDA/fp-contract.cu
index d6c796a817cbf..c86774ac451f7 100644
--- a/clang/test/CodeGenCUDA/fp-contract.cu
+++ b/clang/test/CodeGenCUDA/fp-contract.cu
@@ -1,6 +1,6 @@
 // REQUIRES: x86-registered-target, nvptx-registered-target, 
amdgpu-registered-target
 
-// By default CUDA uses -ffp-contract=fast, HIP uses 
-ffp-contract=fast-honor-pragmas.
+// By default CUDA and HIP use -ffp-contract=fast.
 // we should fuse multiply/add into fma instruction.
 // In IR, fmul/fadd instructions with contract flag are emitted.
 // In backend
@@ -68,35 +68,12 @@
 // RUN:   -O3 -target-cpu gfx906 -o - -x ir %t.ll \
 // RUN:   | FileCheck -check-prefixes=COMMON,AMD-OPT-FASTSTD %s
 
-// Explicit -ffp-contract=fast-honor-pragmas
-// In IR, fmul/fadd instructions with contract flag are emitted.
-// In backend
-//    nvptx/amdgcn - assumes standard fp fuse option, which only
-//                   fuses mult/add insts with contract flag or
-//                   llvm.fmuladd intrinsics.
-
-// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
-// RUN:   -ffp-contract=fast-honor-pragmas -disable-llvm-passes -o - %s \
-// RUN:   | FileCheck -check-prefixes=COMMON,NV-ON %s
-// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -S \
-// RUN:   -target-cpu gfx906 -disable-llvm-passes -o - -x hip %s \
-// RUN:   -ffp-contract=fast-honor-pragmas \
-// RUN:   | FileCheck -check-prefixes=COMMON,AMD-ON %s
-// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
-// RUN:   -O3 -o - %s \
-// RUN:   -ffp-contract=fast-honor-pragmas \
-// RUN:   | FileCheck -check-prefixes=COMMON,NV-OPT-FASTSTD %s
-// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -S \
-// RUN:   -O3 -target-cpu gfx906 -o - -x hip %s \
-// RUN:   -ffp-contract=fast-honor-pragmas \
-// RUN:   | FileCheck -check-prefixes=COMMON,AMD-OPT-FASTSTD %s
-
 // Check separate compile/backend steps corresponding to -save-temps.
 // When input is IR, -ffp-contract has no effect. Backend uses default
 // default FP fuse option.
 
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN:   -ffp-contract=fast-honor-pragmas \
+// RUN:   -ffp-contract=fast \
 // RUN:   -O3 -disable-llvm-passes -target-cpu gfx906 -o %t.ll -x hip %s
 // RUN: cat %t.ll  | FileCheck -check-prefixes=COMMON,AMD-OPT-FAST-IR %s
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -S \
diff --git a/clang/test/Driver/fp-contract.c b/clang/test/Driver/fp-contract.c
index cab63683ee813..06241caa2fbd5 100644
--- a/clang/test/Driver/fp-contract.c
+++ b/clang/test/Driver/fp-contract.c
@@ -6,7 +6,6 @@
 // before the drive options that are checked below the run lines.
 // WARN_FM_OFF: warning: overriding '-ffast-math' option with 
'-ffp-contract=off'
 // WARN_FM_ON: warning: overriding '-ffast-math' option with '-ffp-contract=on'
-// WARN_FM_FHP: warning: overriding '-ffast-math' option with 
'-ffp-contract=fast-honor-pragmas'
 // WARN_UM_OFF: warning: overriding '-funsafe-math-optimizations' option with 
'-ffp-contract=off'
 // WARN_UM_ON: warning: overriding '-funsafe-math-optimizations' option with 
'-ffp-contract=on'
 
@@ -30,8 +29,10 @@
 // RUN:   | FileCheck --check-prefix=CHECK-FPC-FAST %s
 
 // RUN: %clang -### -ffast-math -ffp-contract=fast-honor-pragmas -c %s 2>&1 \
-// RUN:   | FileCheck --check-prefixes=CHECK-FPC-FAST-HONOR,WARN_FM_FHP %s
-// CHECK-FPC-FAST-HONOR:     "-ffp-contract=fast-honor-pragmas"
+// RUN:   | FileCheck 
--check-prefixes=CHECK-FPC-FAST-HONOR,WARN_FHP_DEPRECATED %s
+// WARN_FHP_DEPRECATED: clang: warning: argument 
'-ffp-contract=fast-honor-pragmas' is deprecated, use '-ffp-contract=fast' 
instead [-Wdeprecated]
+// CHECK-FPC-FAST-HONOR:     "-ffp-contract=fast"
+// CHECK-FPC-FAST-HONOR-NOT: "-honor-pragmas"
 
 // RUN: %clang -### -Werror -ffp-contract=fast -ffast-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-FPC-FAST %s

>From 7a06cfd62f425c73d596d4ea646f6818405d9550 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Tue, 27 Aug 2024 18:04:25 -0700
Subject: [PATCH 2/3] Update fp-contract.cu test

---
 clang/test/CodeGenCUDA/fp-contract.cu | 39 ++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/clang/test/CodeGenCUDA/fp-contract.cu 
b/clang/test/CodeGenCUDA/fp-contract.cu
index c86774ac451f7..dd7e619a262c1 100644
--- a/clang/test/CodeGenCUDA/fp-contract.cu
+++ b/clang/test/CodeGenCUDA/fp-contract.cu
@@ -68,6 +68,29 @@
 // RUN:   -O3 -target-cpu gfx906 -o - -x ir %t.ll \
 // RUN:   | FileCheck -check-prefixes=COMMON,AMD-OPT-FASTSTD %s
 
+// Explicit -ffp-contract=fast (was fast-honor-pragmas)
+// In IR, fmul/fadd instructions with contract flag are emitted.
+// In backend
+//    nvptx/amdgcn - assumes standard fp fuse option, which only
+//                   fuses mult/add insts with contract flag or
+//                   llvm.fmuladd intrinsics.
+
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
+// RUN:   -ffp-contract=fast -disable-llvm-passes -o - %s \
+// RUN:   | FileCheck -check-prefixes=COMMON,NV-ON %s
+// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -S \
+// RUN:   -target-cpu gfx906 -disable-llvm-passes -o - -x hip %s \
+// RUN:   -ffp-contract=fast \
+// RUN:   | FileCheck -check-prefixes=COMMON,AMD-ON %s
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \
+// RUN:   -O3 -o - %s \
+// RUN:   -ffp-contract=fast \
+// RUN:   | FileCheck -check-prefixes=COMMON,NV-OPT-FASTSTD %s
+// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -S \
+// RUN:   -O3 -target-cpu gfx906 -o - -x hip %s \
+// RUN:   -ffp-contract=fast \
+// RUN:   | FileCheck -check-prefixes=COMMON,AMD-OPT-FASTSTD %s
+
 // Check separate compile/backend steps corresponding to -save-temps.
 // When input is IR, -ffp-contract has no effect. Backend uses default
 // default FP fuse option.
@@ -231,19 +254,16 @@ __host__ __device__ float func2(float a, float b, float 
c) {
 
 // Test multiply/add in the different statements, which is forced
 // to be compiled with fp contract on. fmul/fadd without contract
-// flags are emitted in IR. In nvptx, they are emitted as FMA in
-// fp-contract is fast but not on, as nvptx backend uses the same
-// fp fuse option as front end, whereas fast fp fuse option in
-// backend fuses fadd/fmul disregarding contract flag. In amdgcn
-// they are not fused as amdgcn always use standard fp fusion
-// option which respects contract flag.
-  __host__ __device__ float func3(float a, float b, float c) {
+// flags are emitted in IR. The operations should not be fused
+// because the mul and add occurs in different statements.
+__host__ __device__ float func3(float a, float b, float c) {
 #pragma clang fp contract(on)
   float t = b * c;
   return t + a;
 }
 // COMMON-LABEL: _Z5func3fff
-// NV-OPT-FAST: fma.rn.f32
+// NV-OPT-FAST: mul.rn.f32
+// NV-OPT-FAST: add.rn.f32
 // NV-OPT-FAST-NEXT: st.param.b32
 // NV-OPT-FASTSTD: mul.rn.f32
 // NV-OPT-FASTSTD: add.rn.f32
@@ -262,7 +282,8 @@ __host__ __device__ float func2(float a, float b, float c) {
 // AMD-OPT-OFF-IR: fmul float
 // AMD-OPT-OFF-IR: fadd float
 
-// AMD-OPT-FAST: v_fmac_f32_e32
+// AMD-OPT-FAST: v_mul_f32_e32
+// AMD-OPT-FAST-NEXT: v_add_f32_e32
 // AMD-OPT-FAST-NEXT: s_setpc_b64
 // AMD-OPT-FASTSTD: v_mul_f32_e32
 // AMD-OPT-FASTSTD-NEXT: v_add_f32_e32

>From cf21d97c81fd1d1424531995da8a295d8cb8f06e Mon Sep 17 00:00:00 2001
From: Andy Kaylor <[email protected]>
Date: Tue, 18 Nov 2025 10:57:27 -0800
Subject: [PATCH 3/3] Fix formatting

---
 clang/lib/Driver/ToolChains/Clang.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp 
b/clang/lib/Driver/ToolChains/Clang.cpp
index 50af376ab4814..8a39ea411a102 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -3003,8 +3003,8 @@ static void RenderFloatingPointOptions(const ToolChain 
&TC, const Driver &D,
         // fast-honor-pragmas is deprecated -- replace it with fast
         if (Val == "fast-honor-pragmas") {
           D.Diag(diag::warn_drv_deprecated_arg)
-            << A->getAsString(Args) << /*hasReplacement=*/true
-            << "-ffp-contract=fast";
+              << A->getAsString(Args) << /*hasReplacement=*/true
+              << "-ffp-contract=fast";
           Val = "fast";
         }
         if (Val != FPContract && LastFpContractOverrideOption != "") {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to