jhuber6 created this revision. jhuber6 added a reviewer: jdoerfert. Herald added subscribers: dexonsmith, dang, guansong, hiraditya, yaxunl. jhuber6 requested review of this revision. Herald added subscribers: llvm-commits, cfe-commits, sstefan1. Herald added projects: clang, LLVM.
This patch adds two flags to be supported for the new runtime. The flags are `-fopenmp-assume-threads-oversubscription` and -fopenmp-assume-teams-oversubscription`. These add global values that can be checked by the work sharing runtime functions to make better judgements about how to distribute work between the threads. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D111348 Files: clang/include/clang/Basic/LangOptions.def clang/include/clang/Driver/Options.td clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/OpenMP/target_debug_codegen.cpp clang/test/OpenMP/target_globals_codegen.cpp llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -245,12 +245,12 @@ assert(OutlineInfos.empty() && "There must be no outstanding outlinings"); } -GlobalValue *OpenMPIRBuilder::createDebugKind(unsigned DebugKind) { +GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) { IntegerType *I32Ty = Type::getInt32Ty(M.getContext()); - auto *GV = new GlobalVariable( - M, I32Ty, - /* isConstant = */ true, GlobalValue::WeakODRLinkage, - ConstantInt::get(I32Ty, DebugKind), "__omp_rtl_debug_kind"); + auto *GV = + new GlobalVariable(M, I32Ty, + /* isConstant = */ true, GlobalValue::WeakODRLinkage, + ConstantInt::get(I32Ty, Value), Name); return GV; } Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -683,9 +683,8 @@ omp::IdentFlag Flags = omp::IdentFlag(0), unsigned Reserve2Flags = 0); - /// Create a global value containing the \p DebugLevel to control debuggin in - /// the module. - GlobalValue *createDebugKind(unsigned DebugLevel); + /// Create a global flag \p Namein the module with initial value \p Value. + GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); /// Generate control flow and cleanup for cancellation. /// Index: clang/test/OpenMP/target_globals_codegen.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/target_globals_codegen.cpp @@ -0,0 +1,40 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_" +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-threads-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-THREADS +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-assume-teams-oversubscription -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-TEAMS +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +//. +// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1 +// CHECK: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111 +// CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0 +// CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +// CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr constant i32 0 +// CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 0 +// CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 1 +//. +// CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr constant i32 0 +// CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr constant i32 1 +// CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr constant i32 0 +//. +void foo() { +#pragma omp target + { } +} + +#endif Index: clang/test/OpenMP/target_debug_codegen.cpp =================================================================== --- clang/test/OpenMP/target_debug_codegen.cpp +++ /dev/null @@ -1,24 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --global-value-regex "__omp_rtl_debug_kind" -// Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-target-debug=111 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-EQ -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-new-runtime -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK-DEFAULT -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -//. -// CHECK: @__omp_rtl_debug_kind = weak_odr constant i32 1 -//. -// CHECK-EQ: @__omp_rtl_debug_kind = weak_odr constant i32 111 -//. -// CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr constant i32 0 -//. -void foo() { -#pragma omp target - { } -} - -#endif Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -3486,6 +3486,12 @@ if (Opts.OpenMPTargetNewRuntime) GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA); + if (Opts.OpenMPThreadSubscription) + GenerateArg(Args, OPT_fopenmp_assume_threads_oversubscription, SA); + + if (Opts.OpenMPTeamSubscription) + GenerateArg(Args, OPT_fopenmp_assume_teams_oversubscription, SA); + if (Opts.OpenMPTargetDebug != 0) GenerateArg(Args, OPT_fopenmp_target_debug_EQ, Twine(Opts.OpenMPTargetDebug), SA); @@ -3928,6 +3934,13 @@ } } + if (Opts.OpenMPIsDevice && Opts.OpenMPTargetNewRuntime) { + if (Args.hasArg(OPT_fopenmp_assume_teams_oversubscription)) + Opts.OpenMPTeamSubscription = true; + if (Args.hasArg(OPT_fopenmp_assume_threads_oversubscription)) + Opts.OpenMPThreadSubscription = true; + } + // Get the OpenMP target triples if any. if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) { enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit }; Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -5815,6 +5815,17 @@ options::OPT_fno_openmp_cuda_force_full_runtime, /*Default=*/false)) CmdArgs.push_back("-fopenmp-cuda-force-full-runtime"); + + // When in OpenMP offloading mode, forward assumptions information about + // thread and team counts in the device. + if (Args.hasFlag(options::OPT_fopenmp_assume_teams_oversubscription, + options::OPT_fno_openmp_assume_teams_oversubscription, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-assume-teams-oversubscription"); + if (Args.hasFlag(options::OPT_fopenmp_assume_threads_oversubscription, + options::OPT_fno_openmp_assume_threads_oversubscription, + /*Default=*/false)) + CmdArgs.push_back("-fopenmp-assume-threads-oversubscription"); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1200,8 +1200,14 @@ llvm_unreachable("OpenMP NVPTX can only handle device code."); llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder(); - if (CGM.getLangOpts().OpenMPTargetNewRuntime) - OMPBuilder.createDebugKind(CGM.getLangOpts().OpenMPTargetDebug); + if (CGM.getLangOpts().OpenMPTargetNewRuntime) { + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTargetDebug, + "__omp_rtl_debug_kind"); + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPTeamSubscription, + "__omp_rtl_assume_teams_oversubscription"); + OMPBuilder.createGlobalFlag(CGM.getLangOpts().OpenMPThreadSubscription, + "__omp_rtl_assume_threads_oversubscription"); + } } void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF, Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2427,6 +2427,14 @@ HelpText<"Enable debugging in the OpenMP offloading device RTL">; def fno_openmp_target_debug : Flag<["-"], "fno-openmp-target-debug">, Group<f_Group>, Flags<[NoArgumentUnused]>; def fopenmp_target_debug_EQ : Joined<["-"], "fopenmp-target-debug=">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fopenmp_assume_teams_oversubscription : Flag<["-"], "fopenmp-assume-teams-oversubscription">, + Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fopenmp_assume_threads_oversubscription : Flag<["-"], "fopenmp-assume-threads-oversubscription">, + Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fno_openmp_assume_teams_oversubscription : Flag<["-"], "fno-openmp-assume-teams-oversubscription">, + Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; +def fno_openmp_assume_threads_oversubscription : Flag<["-"], "fno-openmp-assume-threads-oversubscription">, + Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime", LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse, PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">, Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -244,6 +244,8 @@ LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading") LANGOPT(OpenMPTargetDebug , 32, 0, "Enable debugging in the OpenMP offloading device RTL") LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.") +LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume that the number of threads is greater than the loop tripcount.") +LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume that the number of teams is greater than the loop tripcount.") LANGOPT(RenderScript , 1, 0, "RenderScript") LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits