yaxunl created this revision.
yaxunl added a reviewer: tra.
Herald added a subscriber: dang.
yaxunl requested review of this revision.

Choose optimized device lib bitcode by fp options
for performance.


https://reviews.llvm.org/D101654

Files:
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/test/Driver/hip-device-libs.hip

Index: clang/test/Driver/hip-device-libs.hip
===================================================================
--- clang/test/Driver/hip-device-libs.hip
+++ clang/test/Driver/hip-device-libs.hip
@@ -113,6 +113,38 @@
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,INST
 
+// Test -fast-math
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=gfx900 \
+// RUN:   -ffast-math \
+// RUN:   --rocm-path=%S/Inputs/rocm \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=FAST
+
+// Test -ffinite-math-only
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=gfx900 \
+// RUN:   -ffinite-math-only \
+// RUN:   --rocm-path=%S/Inputs/rocm \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=FINITE
+
+// Test -funsafe-math-optimizations
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=gfx900 \
+// RUN:   -funsafe-math-optimizations \
+// RUN:   --rocm-path=%S/Inputs/rocm \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=UNSAFE
+
+// Test -fno-hip-fp32-correctly-rounded-divide-sqrt
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=gfx900 \
+// RUN:   -fno-hip-fp32-correctly-rounded-divide-sqrt \
+// RUN:   --rocm-path=%S/Inputs/rocm \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=DIVSQRT
+
 // ALL-NOT: error:
 // ALL: {{"[^"]*clang[^"]*"}}
 // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}hip.bc"
@@ -128,3 +160,23 @@
 // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc"
 // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_isa_version_{{[0-9]+}}.bc"
 // INST-SAME: "-mlink-builtin-bitcode" "{{.*}}instrument.bc"
+
+// FAST: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc"
+// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc"
+// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
+
+// FINITE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc"
+// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc"
+// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
+
+// UNSAFE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc"
+// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc"
+// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc"
+
+// DIVSQRT: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc"
+// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc"
+// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc"
+// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_off.bc"
Index: clang/lib/Driver/ToolChains/HIP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -404,11 +404,17 @@
     bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
                                   options::OPT_fno_gpu_flush_denormals_to_zero,
                                   getDefaultDenormsAreZeroForTarget(Kind));
-    // TODO: Check standard C++ flags?
-    bool FiniteOnly = false;
-    bool UnsafeMathOpt = false;
-    bool FastRelaxedMath = false;
-    bool CorrectSqrt = true;
+    bool FiniteOnly =
+        DriverArgs.hasFlag(options::OPT_ffinite_math_only,
+                           options::OPT_fno_finite_math_only, false);
+    bool UnsafeMathOpt =
+        DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
+                           options::OPT_fno_unsafe_math_optimizations, false);
+    bool FastRelaxedMath = DriverArgs.hasFlag(
+        options::OPT_ffast_math, options::OPT_fno_fast_math, false);
+    bool CorrectSqrt = DriverArgs.hasFlag(
+        options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
+        options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt);
     bool Wave64 = isWave64(DriverArgs, Kind);
 
     if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
Index: clang/lib/CodeGen/CGExprScalar.cpp
===================================================================
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -3216,7 +3216,8 @@
     llvm::Value *Val;
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
     Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
-    if (CGF.getLangOpts().OpenCL &&
+    if ((CGF.getLangOpts().OpenCL ||
+         (CGF.getLangOpts().HIP && CGF.getLangOpts().CUDAIsDevice)) &&
         !CGF.CGM.getCodeGenOpts().CorrectlyRoundedDivSqrt) {
       // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
       // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -939,6 +939,13 @@
   LangOpts<"HIPUseNewLaunchAPI">, DefaultFalse,
   PosFlag<SetTrue, [CC1Option], "Use">, NegFlag<SetFalse, [], "Don't use">,
   BothFlags<[], " new kernel launching API for HIP">>;
+defm hip_fp32_correctly_rounded_divide_sqrt : BoolFOption<"hip-fp32-correctly-rounded-divide-sqrt",
+  CodeGenOpts<"CorrectlyRoundedDivSqrt">, DefaultTrue,
+  PosFlag<SetTrue, [], "Specify">,
+  NegFlag<SetFalse, [CC1Option], "Don't specify">,
+  BothFlags<[], " that single precision floating-point divide and sqrt used in "
+  "the program source are correctly rounded (HIP device compilation only)">>,
+  ShouldParseIf<hip.KeyPath>;
 defm gpu_allow_device_init : BoolFOption<"gpu-allow-device-init",
   LangOpts<"GPUAllowDeviceInit">, DefaultFalse,
   PosFlag<SetTrue, [CC1Option], "Allow">, NegFlag<SetFalse, [], "Don't allow">,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to