spatel created this revision.
spatel added reviewers: jgorbe, chandlerc, scanon, hans, echristo.
Herald added a subscriber: mcrosier.
As discussed in the post-commit thread for:
https://reviews.llvm.org/rL330437 (
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20180423/545906.html )
We need a way to opt-out of a float-to-int-to-float cast optimization because
too much existing code relies on the platform-specific undefined result of
those casts when the float-to-int overflows.
I speculatively committed the LLVM changes associated with adding this function
attribute, but I'll change the name/implementation if there's a better
alternative:
https://reviews.llvm.org/rL330947
https://reviews.llvm.org/rL330950
https://reviews.llvm.org/rL330951
Also as suggested, I changed the LLVM doc to mention the specific sanitizer
flag that catches this problem:
https://reviews.llvm.org/rL330958
I tested the end-to-end results on x86 and see the expected outcome: 'roundss'
is no longer produced in place of cvttss2si + cvtsi2ss with default
optimization.
https://reviews.llvm.org/D46135
Files:
docs/UsersManual.rst
include/clang/Driver/Options.td
include/clang/Frontend/CodeGenOptions.def
lib/CodeGen/CGCall.cpp
lib/Driver/ToolChains/Clang.cpp
lib/Frontend/CompilerInvocation.cpp
test/CodeGen/no-junk-ftrunc.c
test/Driver/fast-math.c
Index: test/Driver/fast-math.c
===================================================================
--- test/Driver/fast-math.c
+++ test/Driver/fast-math.c
@@ -287,3 +287,27 @@
// RUN: %clang -### -ftrapping-math -fno-trapping-math -c %s 2>&1 \
// RUN: | FileCheck --check-prefix=CHECK-NO-TRAPPING-MATH %s
// CHECK-NO-TRAPPING-MATH: "-fno-trapping-math"
+
+// This isn't fast-math, but the option is handled in the same place as other FP params.
+// Last option wins, and the flag is passed by default (FIXME).
+
+// RUN: %clang -### -ffp-cast-overflow-workaround -c %s 2>&1 \
+// RUN: | FileCheck --check-prefix=CHECK-FPOV-WORKAROUND %s
+// CHECK-FPOV-WORKAROUND: "-cc1"
+// CHECK-FPOV-WORKAROUND: "-ffp-cast-overflow-workaround"
+
+// RUN: %clang -### -c %s 2>&1 \
+// RUN: | FileCheck --check-prefix=CHECK-FPOV-WORKAROUND-DEFAULT %s
+// CHECK-FPOV-WORKAROUND-DEFAULT: "-cc1"
+// CHECK-FPOV-WORKAROUND-DEFAULT: "-ffp-cast-overflow-workaround"
+
+// RUN: %clang -### -fno-fp-cast-overflow-workaround -c %s 2>&1 \
+// RUN: | FileCheck --check-prefix=CHECK-NO-FPOV-WORKAROUND %s
+// CHECK-NO-FPOV-WORKAROUND: "-cc1"
+// CHECK-NO-FPOV-WORKAROUND-NOT: "-ffp-cast-overflow-workaround"
+
+// RUN: %clang -### -ffp-cast-overflow-workaround -fno-fp-cast-overflow-workaround -c %s 2>&1 \
+// RUN: | FileCheck --check-prefix=CHECK-NO-FPOV-WORKAROUND-OVERRIDE %s
+// CHECK-NO-FPOV-WORKAROUND-OVERRIDE: "-cc1"
+// CHECK-NO-FPOV-WORKAROUND-OVERRIDE-NOT: "-ffp-cast-overflow-workaround"
+
Index: test/CodeGen/no-junk-ftrunc.c
===================================================================
--- test/CodeGen/no-junk-ftrunc.c
+++ test/CodeGen/no-junk-ftrunc.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -S -ffp-cast-overflow-workaround %s -emit-llvm -o - | FileCheck %s
+
+// CHECK-LABEL: main
+// CHECK: attributes #0 = {{.*}}"fp-cast-overflow-workaround"="true"{{.*}}
+
+int main() {
+ return 0;
+}
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -699,6 +699,8 @@
Opts.Reciprocals = Args.getAllArgValues(OPT_mrecip_EQ);
Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math);
Opts.NoTrappingMath = Args.hasArg(OPT_fno_trapping_math);
+ Opts.FPCastOverflowWorkaround = Args.hasArg(OPT_ffp_cast_overflow_workaround);
+
Opts.NoZeroInitializedInBSS = Args.hasArg(OPT_mno_zero_initialized_in_bss);
Opts.NumRegisterParameters = getLastArgIntValue(Args, OPT_mregparm, 0, Diags);
Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack);
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -2241,6 +2241,14 @@
CmdArgs.push_back("-mfpmath");
CmdArgs.push_back(A->getValue());
}
+
+ // Disable a codegen optimization for floating-point casts. This is only
+ // intended to be temporarily defaulted to 'true' to give users time to fix
+ // their code.
+ if (Args.hasFlag(options::OPT_ffp_cast_overflow_workaround,
+ options::OPT_fno_fp_cast_overflow_workaround,
+ true))
+ CmdArgs.push_back("-ffp-cast-overflow-workaround");
}
static void RenderAnalyzerOptions(const ArgList &Args, ArgStringList &CmdArgs,
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -1727,6 +1727,9 @@
FuncAttrs.addAttribute("no-trapping-math",
llvm::toStringRef(CodeGenOpts.NoTrappingMath));
+ if (CodeGenOpts.FPCastOverflowWorkaround)
+ FuncAttrs.addAttribute("fp-cast-overflow-workaround", "true");
+
// TODO: Are these all needed?
// unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
FuncAttrs.addAttribute("no-infs-fp-math",
Index: include/clang/Frontend/CodeGenOptions.def
===================================================================
--- include/clang/Frontend/CodeGenOptions.def
+++ include/clang/Frontend/CodeGenOptions.def
@@ -136,6 +136,11 @@
CODEGENOPT(NoNaNsFPMath , 1, 0) ///< Assume FP arguments, results not NaN.
CODEGENOPT(FlushDenorm , 1, 0) ///< Allow FP denorm numbers to be flushed to zero
CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt
+
+/// Disable a float-to-int-to-float cast optimization to preserve behavior for
+/// source code that depends on existing -- but undefined -- behavior.
+CODEGENOPT(FPCastOverflowWorkaround, 1, 0)
+
CODEGENOPT(UniformWGSize , 1, 0) ///< -cl-uniform-work-group-size
CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss.
/// \brief Method of Objective-C dispatch to use.
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -1029,6 +1029,11 @@
Flags<[CC1Option]>, HelpText<"Form fused FP ops (e.g. FMAs): fast (everywhere)"
" | on (according to FP_CONTRACT pragma, default) | off (never fuse)">, Values<"fast,on,off">;
+def ffp_cast_overflow_workaround : Flag<["-"],
+ "ffp-cast-overflow-workaround">, Group<f_Group>, Flags<[CC1Option]>;
+def fno_fp_cast_overflow_workaround : Flag<["-"],
+ "fno-fp-cast-overflow-workaround">, Group<f_Group>, Flags<[CC1Option]>;
+
def ffor_scope : Flag<["-"], "ffor-scope">, Group<f_Group>;
def fno_for_scope : Flag<["-"], "fno-for-scope">, Group<f_Group>;
Index: docs/UsersManual.rst
===================================================================
--- docs/UsersManual.rst
+++ docs/UsersManual.rst
@@ -1255,6 +1255,15 @@
flushed-to-zero number is preserved in the sign of 0, denormals are
flushed to positive zero, respectively.
+.. option:: -f[no-]fp-cast-overflow-workaround
+
+ Enable or disable a code generation optimization that may convert a
+ cast of a floating-point value to integer and back to floating-point
+ into the equivalent of the math libary's 'trunc()' function. This
+ optimization is disabled by default to avoid problems associated
+ with relying on the undefined behavior of an overflowing cast, but
+ that limitation should be considered temporary.
+
.. option:: -fwhole-program-vtables
Enable whole-program vtable optimizations, such as single-implementation
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits