tra updated this revision to Diff 57541. tra added a comment. Added test case.
Is there a better way to test that correct options are passed to back-end? This test resorts to checking assembly generated by back-end which is way too far away from what actually needs testing. http://reviews.llvm.org/D20341 Files: lib/Frontend/CompilerInvocation.cpp test/CodeGenCUDA/fp-contract.cu Index: test/CodeGenCUDA/fp-contract.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/fp-contract.cu @@ -0,0 +1,32 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// By default we should fuse multiply/add into fma instruction. +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ +// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix ENABLED %s + +// Explicit -ffp-contract=fast +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ +// RUN: -ffp-contract=fast -disable-llvm-passes -o - %s \ +// RUN: | FileCheck -check-prefix ENABLED %s + +// Explicit -ffp-contract=on -- fusing by front-end (disabled). +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ +// RUN: -ffp-contract=on -disable-llvm-passes -o - %s \ +// RUN: | FileCheck -check-prefix DISABLED %s + +// Explicit -ffp-contract=off should disable instruction fusing. +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ +// RUN: -ffp-contract=off -disable-llvm-passes -o - %s \ +// RUN: | FileCheck -check-prefix DISABLED %s + + +#include "Inputs/cuda.h" + +__host__ __device__ float func(float a, float b, float c) { return a + b * c; } +// ENABLED: fma.rn.f32 +// ENABLED-NEXT: st.param.f32 + +// DISABLED: mul.rn.f32 +// DISABLED-NEXT: add.rn.f32 +// DISABLED-NEXT: st.param.f32 Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -2212,10 +2212,15 @@ LangOpts.ObjCExceptions = 1; } - // During CUDA device-side compilation, the aux triple is the triple used for - // host compilation. - if (LangOpts.CUDA && LangOpts.CUDAIsDevice) { - Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; + if (LangOpts.CUDA) { + // During CUDA device-side compilation, the aux triple is the + // triple used for host compilation. + if (LangOpts.CUDAIsDevice) + Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; + + // Set default FP_CONTRACT to FAST. + if (!Args.hasArg(OPT_ffp_contract)) + Res.getCodeGenOpts().setFPContractMode(CodeGenOptions::FPC_Fast); } // FIXME: Override value name discarding when asan or msan is used because the
Index: test/CodeGenCUDA/fp-contract.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/fp-contract.cu @@ -0,0 +1,32 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// By default we should fuse multiply/add into fma instruction. +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ +// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix ENABLED %s + +// Explicit -ffp-contract=fast +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ +// RUN: -ffp-contract=fast -disable-llvm-passes -o - %s \ +// RUN: | FileCheck -check-prefix ENABLED %s + +// Explicit -ffp-contract=on -- fusing by front-end (disabled). +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ +// RUN: -ffp-contract=on -disable-llvm-passes -o - %s \ +// RUN: | FileCheck -check-prefix DISABLED %s + +// Explicit -ffp-contract=off should disable instruction fusing. +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ +// RUN: -ffp-contract=off -disable-llvm-passes -o - %s \ +// RUN: | FileCheck -check-prefix DISABLED %s + + +#include "Inputs/cuda.h" + +__host__ __device__ float func(float a, float b, float c) { return a + b * c; } +// ENABLED: fma.rn.f32 +// ENABLED-NEXT: st.param.f32 + +// DISABLED: mul.rn.f32 +// DISABLED-NEXT: add.rn.f32 +// DISABLED-NEXT: st.param.f32 Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -2212,10 +2212,15 @@ LangOpts.ObjCExceptions = 1; } - // During CUDA device-side compilation, the aux triple is the triple used for - // host compilation. - if (LangOpts.CUDA && LangOpts.CUDAIsDevice) { - Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; + if (LangOpts.CUDA) { + // During CUDA device-side compilation, the aux triple is the + // triple used for host compilation. + if (LangOpts.CUDAIsDevice) + Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; + + // Set default FP_CONTRACT to FAST. + if (!Args.hasArg(OPT_ffp_contract)) + Res.getCodeGenOpts().setFPContractMode(CodeGenOptions::FPC_Fast); } // FIXME: Override value name discarding when asan or msan is used because the
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits