Author: Joseph Huber Date: 2021-10-07T22:23:09-04:00 New Revision: 9efdca87c78256bb00ed51521272dec2deed7f23
URL: https://github.com/llvm/llvm-project/commit/9efdca87c78256bb00ed51521272dec2deed7f23 DIFF: https://github.com/llvm/llvm-project/commit/9efdca87c78256bb00ed51521272dec2deed7f23.diff LOG: [OpenMP] Introduce new flags to assert thread and team usage in the runtime This patch adds two flags to be supported for the new runtime. The flags are `-fopenmp-assume-threads-oversubscription` and -fopenmp-assume-teams-oversubscription`. These add global values that can be checked by the work sharing runtime functions to make better judgements about how to distribute work between the threads. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D111348 Added: clang/test/OpenMP/target_globals_codegen.cpp Modified: clang/include/clang/Basic/LangOptions.def clang/include/clang/Driver/Options.td clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Frontend/CompilerInvocation.cpp llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp Removed: clang/test/OpenMP/target_debug_codegen.cpp ################################################################################ diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 598c847597108..e21d2fcea621e 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -244,6 +244,8 @@ LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records i LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading") LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading device RTL") LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.") +LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.") +LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.") LANGOPT(RenderScript , 1, 0, "RenderScript") LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 7d1e6d181f973..5a87f1276d1f3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2427,6 +2427,14 @@ def fopenmp_target_debug : Flag<["-"], "fopenmp-target-debug">, Group<f_Group>, HelpText<"Enable debugging in the OpenMP offloading device RTL">; def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group<f_Group>, Flags<[NoArgumentUnused]>; def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">, + Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">, + Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">, + Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">, + Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime", LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse, PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 16f1b0b00b095..be47ccbbbc58a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1200,8 +1200,14 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) llvm_unreachable("OpenMP NVPTX can only handle device code."); llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder(); - if (CGM.getLangOpts().OpenMPTargetNewRuntime) - OMPBuilder.createDebugKind(CGM.getLangOpts().OpenMPTargetDebug); + if (CGM.getLangOpts().OpenMPTargetNewRuntime) { + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug, + "__omp_rtl_debug_kind"); + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription, + "__omp_rtl_assume_teams_oversubscription"); + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription, + "__omp_rtl_assume_threads_oversubscription"); + } } void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 369c12aea5231..3f98914bd1904 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5815,6 +5815,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_openmp_cuda_force_full_runtime, /*Default=*/false)) CmdArgs.push_back("-fopenmp-cuda-force-full-runtime"); + + // When in OpenMP offloading mode, forward assumptions information about + // thread and team counts in the device. + if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription, + options::OPT_fno_openmp_assume_teams_oversubscription, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-assume-teams-oversubscription"); + if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription, + options::OPT_fno_openmp_assume_threads_oversubscription, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-assume-threads-oversubscription"); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index e9eed36f36a96..34cbb7aab2666 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3486,6 +3486,12 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, if (Opts.OpenMPTargetNewRuntime) GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA); + if (Opts.OpenMPThreadSubscription) + GenerateArg(Args, OPT_fopenmp_assume_threads_oversubscription, SA); + + if (Opts.OpenMPTeamSubscription) + GenerateArg(Args, OPT_fopenmp_assume_teams_oversubscription, SA); + if (Opts.OpenMPTargetDebug != 0) GenerateArg(Args, OPT_fopenmp_target_debug_EQ, Twine(Opts.OpenMPTargetDebug), SA); @@ -3928,6 +3934,13 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, } } + if (Opts.OpenMPIsDevice && Opts.OpenMPTargetNewRuntime) { + if (Args.hasArg(OPT_fopenmp_assume_teams_oversubscription)) + Opts.OpenMPTeamSubscription = true; + if (Args.hasArg(OPT_fopenmp_assume_threads_oversubscription)) + Opts.OpenMPThreadSubscription = true; + } + // Get the OpenMP target triples if any. if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) { enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit }; diff --git a/clang/test/OpenMP/target_debug_codegen.cpp b/clang/test/OpenMP/target_debug_codegen.cpp deleted file mode 100644 index 5932e12a4d5df..0000000000000 --- a/clang/test/OpenMP/target_debug_codegen.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_debug_kind" -// Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -//. -// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1 -//. -// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111 -//. -// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0 -//. -void foo() { -#pragma omp target - { } -} - -#endif diff --git a/clang/test/OpenMP/target_globals_codegen.cpp b/clang/test/OpenMP/target_globals_codegen.cpp new file mode 100644 index 0000000000000..1264266340729 --- /dev/null +++ b/clang/test/OpenMP/target_globals_codegen.cpp @@ -0,0 +1,40 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_" +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-threads-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-THREADS +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-teams-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-TEAMS +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +//. +// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1 +// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111 +// CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0 +// CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +// CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr constant i32 0 +// CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 1 +//. +// CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr constant i32 0 +// CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 1 +// CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +void foo() { +#pragma omp target + { } +} + +#endif diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 2496714bdfa0c..563e0eed17629 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -683,9 +683,8 @@ class OpenMPIRBuilder { omp::IdentFlag Flags = omp::IdentFlag(0), unsigned Reserve2Flags = 0); - /// Create a global value containing the \p DebugLevel to control debuggin in - /// the module. - GlobalValue *createDebugKind(unsigned DebugLevel); + /// Create a global flag \p Namein the module with initial value \p Value. + GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); /// Generate control flow and cleanup for cancellation. /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 36f6fa68ce242..20e36c8143704 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -245,12 +245,12 @@ OpenMPIRBuilder::~OpenMPIRBuilder() { assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } -GlobalValue *OpenMPIRBuilder::createDebugKind(unsigned DebugKind) { +GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) { IntegerType *I32Ty = Type::getInt32Ty(M.getContext()); - auto *GV = new GlobalVariable( - M, I32Ty, - /* isConstant = */ true, GlobalValue::WeakODRLinkage, - ConstantInt::get(I32Ty, DebugKind), "__omp_rtl_debug_kind"); + auto *GV = + new GlobalVariable(M, I32Ty, + /* isConstant = */ true, GlobalValue::WeakODRLinkage, + ConstantInt::get(I32Ty, Value), Name); return GV; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits