jhuber6 updated this revision to Diff 410846. jhuber6 added a comment. Adding test case to check `if` codegen for unreachables, and an extra function to show that it is not created for the host while the other is. Also added an error message when the user specified offloading is mandatory but couldn't be created due to `if(0)` or a lack of triples.
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D120353/new/ https://reviews.llvm.org/D120353 Files: clang/include/clang/Basic/LangOptions.def clang/include/clang/Driver/Options.td clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGStmtOpenMP.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Sema/SemaOpenMP.cpp clang/test/OpenMP/target_offload_mandatory_codegen.cpp
Index: clang/test/OpenMP/target_offload_mandatory_codegen.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/target_offload_mandatory_codegen.cpp @@ -0,0 +1,66 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-offload-mandatory -emit-llvm %s -o - | FileCheck %s --check-prefix=MANDATORY +// expected-no-diagnostics + +void foo() {} +#pragma omp declare target(foo) + +void bar() {} +#pragma omp declare target device_type(nohost) to(bar) + +void host() { +#pragma omp target + { bar(); } +} + +void host_if(bool cond) { +#pragma omp target if(cond) + { bar(); } +} +// MANDATORY-LABEL: define {{[^@]+}}@_Z3foov +// MANDATORY-SAME: () #[[ATTR0:[0-9]+]] { +// MANDATORY-NEXT: entry: +// MANDATORY-NEXT: ret void +// +// +// MANDATORY-LABEL: define {{[^@]+}}@_Z4hostv +// MANDATORY-SAME: () #[[ATTR0]] { +// MANDATORY-NEXT: entry: +// MANDATORY-NEXT: [[TMP0:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4hostv_l12.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null) +// MANDATORY-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +// MANDATORY-NEXT: br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// MANDATORY: omp_offload.failed: +// MANDATORY-NEXT: unreachable +// MANDATORY: omp_offload.cont: +// MANDATORY-NEXT: ret void +// +// +// MANDATORY-LABEL: define {{[^@]+}}@_Z7host_ifb +// MANDATORY-SAME: (i1 noundef zeroext [[COND:%.*]]) #[[ATTR0]] { +// MANDATORY-NEXT: entry: +// MANDATORY-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1 +// MANDATORY-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND]] to i8 +// MANDATORY-NEXT: store i8 [[FROMBOOL]], i8* [[COND_ADDR]], align 1 +// MANDATORY-NEXT: [[TMP0:%.*]] = load i8, i8* [[COND_ADDR]], align 1 +// MANDATORY-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// MANDATORY-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// MANDATORY: omp_if.then: +// MANDATORY-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7host_ifb_l17.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null) +// MANDATORY-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 +// MANDATORY-NEXT: br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// MANDATORY: omp_offload.failed: +// MANDATORY-NEXT: unreachable +// MANDATORY: omp_offload.cont: +// MANDATORY-NEXT: br label [[OMP_IF_END:%.*]] +// MANDATORY: omp_if.else: +// MANDATORY-NEXT: unreachable +// MANDATORY: omp_if.end: +// MANDATORY-NEXT: ret void +// +// +// MANDATORY-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// MANDATORY-SAME: () #[[ATTR3:[0-9]+]] { +// MANDATORY-NEXT: entry: +// MANDATORY-NEXT: call void @__tgt_register_requires(i64 1) +// MANDATORY-NEXT: ret void +// Index: clang/lib/Sema/SemaOpenMP.cpp =================================================================== --- clang/lib/Sema/SemaOpenMP.cpp +++ clang/lib/Sema/SemaOpenMP.cpp @@ -2517,7 +2517,7 @@ << HostDevTy; return; } - if (!LangOpts.OpenMPIsDevice && DevTy && + if (!LangOpts.OpenMPIsDevice && !LangOpts.OpenMPOffloadMandatory && DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) { // Diagnose nohost function called during host codegen. StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName( Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -5997,6 +5997,8 @@ CmdArgs.push_back("-fopenmp-assume-threads-oversubscription"); if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state)) CmdArgs.push_back("-fopenmp-assume-no-thread-state"); + if (Args.hasArg(options::OPT_fopenmp_offload_mandatory)) + CmdArgs.push_back("-fopenmp-offload-mandatory"); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -6289,6 +6289,13 @@ if (CGM.getLangOpts().OMPTargetTriples.empty()) IsOffloadEntry = false; + if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "No offloading entry generated while offloading is mandatory."); + CGM.getDiags().Report(DiagID); + } + assert(CGF.CurFuncDecl && "No parent declaration for target region!"); StringRef ParentName; // In case we have Ctors/Dtors we use the complete type variant to produce Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6538,6 +6538,8 @@ // mangled name of the function that encloses the target region and BB is the // line number of the target region. + const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice || + !CGM.getLangOpts().OpenMPOffloadMandatory; unsigned DeviceID; unsigned FileID; unsigned Line; @@ -6556,7 +6558,8 @@ CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); + if (BuildOutlinedFn) + OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); // If this target outline function is not an offload entry, we don't need to // register it. @@ -6588,9 +6591,20 @@ llvm::Constant::getNullValue(CGM.Int8Ty), Name); } + // If we do not allow host fallback we still need a named address to use. + llvm::Constant *TargetRegionEntryAddr = OutlinedFn; + if (!BuildOutlinedFn) { + assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) && + "Named kernel already exists?"); + TargetRegionEntryAddr = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName); + } + // Register the information for the entry associated with this target region. OffloadEntriesInfoManager.registerTargetRegionEntryInfo( - DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, + DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); // Add NumTeams and ThreadLimit attributes to the outlined GPU function @@ -6607,7 +6621,8 @@ std::to_string(DefaultValThreads)); } - CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); + if (BuildOutlinedFn) + CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); } /// Checks if the expression is constant or does not have non-trivial function @@ -10324,7 +10339,10 @@ if (!CGF.HaveInsertPoint()) return; - assert(OutlinedFn && "Invalid outlined function!"); + const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice && + CGM.getLangOpts().OpenMPOffloadMandatory; + + assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); @@ -10339,18 +10357,28 @@ CodeGenFunction::OMPTargetDataInfo InputInfo; llvm::Value *MapTypesArray = nullptr; llvm::Value *MapNamesArray = nullptr; - // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, - &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, - &CapturedVars, - SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - if (Device.getInt() == OMPC_DEVICE_ancestor) { - // Reverse offloading is not supported, so just execute on the host. + // Generate code for the host fallback function. + auto &&FallbackGen = [this, OutlinedFn, OutlinedFnID, &D, &CapturedVars, + RequiresOuterTask, &CS, + OffloadingMandatory](CodeGenFunction &CGF) { + if (OffloadingMandatory) { + CGF.Builder.CreateUnreachable(); + } else { if (RequiresOuterTask) { CapturedVars.clear(); CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); } emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); + } + }; + // Fill up the pointer arrays and transfer execution to the device. + auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, + &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, + &CapturedVars, SizeEmitter, + FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { + if (Device.getInt() == OMPC_DEVICE_ancestor) { + // Reverse offloading is not supported, so just execute on the host. + FallbackGen(CGF); return; } @@ -10494,25 +10522,17 @@ CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); CGF.EmitBlock(OffloadFailedBlock); - if (RequiresOuterTask) { - CapturedVars.clear(); - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - } - emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); + FallbackGen(CGF); + CGF.EmitBranch(OffloadContBlock); CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); }; // Notify that the host version must be executed. - auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, - RequiresOuterTask](CodeGenFunction &CGF, - PrePostActionTy &) { - if (RequiresOuterTask) { - CapturedVars.clear(); - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - } - emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); + auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, RequiresOuterTask, + FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) { + FallbackGen(CGF); }; auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2477,6 +2477,10 @@ Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, HelpText<"Assert no thread in a parallel region modifies an ICV">, MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>; +def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<f_Group>, + Flags<[CC1Option, NoArgumentUnused]>, + HelpText<"Do not create a host fallback if offloading to the device fails.">, + MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>; defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime", LangOpts<"OpenMPTargetNewRuntime">, DefaultTrue, PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">, Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -247,6 +247,7 @@ LANGOPT(OpenMPThreadSubscription , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.") LANGOPT(OpenMPTeamSubscription , 1, 0, "Assume distributed loops do not have more iterations than participating teams.") LANGOPT(OpenMPNoThreadState , 1, 0, "Assume that no thread in a parallel region will modify an ICV.") +LANGOPT(OpenMPOffloadMandatory , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.") LANGOPT(RenderScript , 1, 0, "RenderScript") LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits