jhuber6 updated this revision to Diff 410846.
jhuber6 added a comment.

Adding test case to check `if` codegen for unreachables, and an extra function 
to show that it is not created for the host while the other is. Also added an 
error message when the user specified offloading is mandatory but couldn't be 
created due to `if(0)` or a lack of triples.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120353/new/

https://reviews.llvm.org/D120353

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/target_offload_mandatory_codegen.cpp

Index: clang/test/OpenMP/target_offload_mandatory_codegen.cpp
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_offload_mandatory_codegen.cpp
@@ -0,0 +1,66 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+"
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-offload-mandatory -emit-llvm %s -o - | FileCheck %s --check-prefix=MANDATORY
+// expected-no-diagnostics
+
+void foo() {}
+#pragma omp declare target(foo)
+
+void bar() {}
+#pragma omp declare target device_type(nohost) to(bar)
+
+void host() {
+#pragma omp target
+  { bar(); }
+}
+
+void host_if(bool cond) {
+#pragma omp target if(cond)
+  { bar(); }
+}
+// MANDATORY-LABEL: define {{[^@]+}}@_Z3foov
+// MANDATORY-SAME: () #[[ATTR0:[0-9]+]] {
+// MANDATORY-NEXT:  entry:
+// MANDATORY-NEXT:    ret void
+//
+//
+// MANDATORY-LABEL: define {{[^@]+}}@_Z4hostv
+// MANDATORY-SAME: () #[[ATTR0]] {
+// MANDATORY-NEXT:  entry:
+// MANDATORY-NEXT:    [[TMP0:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4hostv_l12.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null)
+// MANDATORY-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
+// MANDATORY-NEXT:    br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// MANDATORY:       omp_offload.failed:
+// MANDATORY-NEXT:    unreachable
+// MANDATORY:       omp_offload.cont:
+// MANDATORY-NEXT:    ret void
+//
+//
+// MANDATORY-LABEL: define {{[^@]+}}@_Z7host_ifb
+// MANDATORY-SAME: (i1 noundef zeroext [[COND:%.*]]) #[[ATTR0]] {
+// MANDATORY-NEXT:  entry:
+// MANDATORY-NEXT:    [[COND_ADDR:%.*]] = alloca i8, align 1
+// MANDATORY-NEXT:    [[FROMBOOL:%.*]] = zext i1 [[COND]] to i8
+// MANDATORY-NEXT:    store i8 [[FROMBOOL]], i8* [[COND_ADDR]], align 1
+// MANDATORY-NEXT:    [[TMP0:%.*]] = load i8, i8* [[COND_ADDR]], align 1
+// MANDATORY-NEXT:    [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1
+// MANDATORY-NEXT:    br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// MANDATORY:       omp_if.then:
+// MANDATORY-NEXT:    [[TMP1:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7host_ifb_l17.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null)
+// MANDATORY-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+// MANDATORY-NEXT:    br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// MANDATORY:       omp_offload.failed:
+// MANDATORY-NEXT:    unreachable
+// MANDATORY:       omp_offload.cont:
+// MANDATORY-NEXT:    br label [[OMP_IF_END:%.*]]
+// MANDATORY:       omp_if.else:
+// MANDATORY-NEXT:    unreachable
+// MANDATORY:       omp_if.end:
+// MANDATORY-NEXT:    ret void
+//
+//
+// MANDATORY-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
+// MANDATORY-SAME: () #[[ATTR3:[0-9]+]] {
+// MANDATORY-NEXT:  entry:
+// MANDATORY-NEXT:    call void @__tgt_register_requires(i64 1)
+// MANDATORY-NEXT:    ret void
+//
Index: clang/lib/Sema/SemaOpenMP.cpp
===================================================================
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -2517,7 +2517,7 @@
         << HostDevTy;
     return;
   }
-  if (!LangOpts.OpenMPIsDevice && DevTy &&
+  if (!LangOpts.OpenMPIsDevice && !LangOpts.OpenMPOffloadMandatory && DevTy &&
       *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) {
     // Diagnose nohost function called during host codegen.
     StringRef NoHostDevTy = getOpenMPSimpleClauseTypeName(
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -5997,6 +5997,8 @@
         CmdArgs.push_back("-fopenmp-assume-threads-oversubscription");
       if (Args.hasArg(options::OPT_fopenmp_assume_no_thread_state))
         CmdArgs.push_back("-fopenmp-assume-no-thread-state");
+      if (Args.hasArg(options::OPT_fopenmp_offload_mandatory))
+        CmdArgs.push_back("-fopenmp-offload-mandatory");
       break;
     default:
       // By default, if Clang doesn't know how to generate useful OpenMP code
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -6289,6 +6289,13 @@
   if (CGM.getLangOpts().OMPTargetTriples.empty())
     IsOffloadEntry = false;
 
+  if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
+    unsigned DiagID = CGM.getDiags().getCustomDiagID(
+        DiagnosticsEngine::Error,
+        "No offloading entry generated while offloading is mandatory.");
+    CGM.getDiags().Report(DiagID);
+  }
+
   assert(CGF.CurFuncDecl && "No parent declaration for target region!");
   StringRef ParentName;
   // In case we have Ctors/Dtors we use the complete type variant to produce
Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6538,6 +6538,8 @@
   // mangled name of the function that encloses the target region and BB is the
   // line number of the target region.
 
+  const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
+                               !CGM.getLangOpts().OpenMPOffloadMandatory;
   unsigned DeviceID;
   unsigned FileID;
   unsigned Line;
@@ -6556,7 +6558,8 @@
   CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
 
-  OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
+  if (BuildOutlinedFn)
+    OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
 
   // If this target outline function is not an offload entry, we don't need to
   // register it.
@@ -6588,9 +6591,20 @@
         llvm::Constant::getNullValue(CGM.Int8Ty), Name);
   }
 
+  // If we do not allow host fallback we still need a named address to use.
+  llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
+  if (!BuildOutlinedFn) {
+    assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&
+           "Named kernel already exists?");
+    TargetRegionEntryAddr = new llvm::GlobalVariable(
+        CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+        llvm::GlobalValue::InternalLinkage,
+        llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
+  }
+
   // Register the information for the entry associated with this target region.
   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
-      DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
+      DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
       OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
 
   // Add NumTeams and ThreadLimit attributes to the outlined GPU function
@@ -6607,7 +6621,8 @@
                           std::to_string(DefaultValThreads));
   }
 
-  CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
+  if (BuildOutlinedFn)
+    CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
 }
 
 /// Checks if the expression is constant or does not have non-trivial function
@@ -10324,7 +10339,10 @@
   if (!CGF.HaveInsertPoint())
     return;
 
-  assert(OutlinedFn && "Invalid outlined function!");
+  const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
+                                   CGM.getLangOpts().OpenMPOffloadMandatory;
+
+  assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
 
   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
                                  D.hasClausesOfKind<OMPNowaitClause>();
@@ -10339,18 +10357,28 @@
   CodeGenFunction::OMPTargetDataInfo InputInfo;
   llvm::Value *MapTypesArray = nullptr;
   llvm::Value *MapNamesArray = nullptr;
-  // Fill up the pointer arrays and transfer execution to the device.
-  auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
-                    &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
-                    &CapturedVars,
-                    SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
-    if (Device.getInt() == OMPC_DEVICE_ancestor) {
-      // Reverse offloading is not supported, so just execute on the host.
+  // Generate code for the host fallback function.
+  auto &&FallbackGen = [this, OutlinedFn, OutlinedFnID, &D, &CapturedVars,
+                        RequiresOuterTask, &CS,
+                        OffloadingMandatory](CodeGenFunction &CGF) {
+    if (OffloadingMandatory) {
+      CGF.Builder.CreateUnreachable();
+    } else {
       if (RequiresOuterTask) {
         CapturedVars.clear();
         CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
       }
       emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
+    }
+  };
+  // Fill up the pointer arrays and transfer execution to the device.
+  auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
+                    &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
+                    &CapturedVars, SizeEmitter,
+                    FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
+    if (Device.getInt() == OMPC_DEVICE_ancestor) {
+      // Reverse offloading is not supported, so just execute on the host.
+      FallbackGen(CGF);
       return;
     }
 
@@ -10494,25 +10522,17 @@
     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
 
     CGF.EmitBlock(OffloadFailedBlock);
-    if (RequiresOuterTask) {
-      CapturedVars.clear();
-      CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
-    }
-    emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
+    FallbackGen(CGF);
+
     CGF.EmitBranch(OffloadContBlock);
 
     CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
   };
 
   // Notify that the host version must be executed.
-  auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
-                    RequiresOuterTask](CodeGenFunction &CGF,
-                                       PrePostActionTy &) {
-    if (RequiresOuterTask) {
-      CapturedVars.clear();
-      CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
-    }
-    emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
+  auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, RequiresOuterTask,
+                    FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
+    FallbackGen(CGF);
   };
 
   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2477,6 +2477,10 @@
   Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, 
   HelpText<"Assert no thread in a parallel region modifies an ICV">,
   MarshallingInfoFlag<LangOpts<"OpenMPNoThreadState">>;
+def fopenmp_offload_mandatory : Flag<["-"], "fopenmp-offload-mandatory">, Group<f_Group>, 
+  Flags<[CC1Option, NoArgumentUnused]>, 
+  HelpText<"Do not create a host fallback if offloading to the device fails.">,
+  MarshallingInfoFlag<LangOpts<"OpenMPOffloadMandatory">>;
 defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
   LangOpts<"OpenMPTargetNewRuntime">, DefaultTrue,
   PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -247,6 +247,7 @@
 LANGOPT(OpenMPThreadSubscription  , 1, 0, "Assume work-shared loops do not have more iterations than participating threads.")
 LANGOPT(OpenMPTeamSubscription  , 1, 0, "Assume distributed loops do not have more iterations than participating teams.")
 LANGOPT(OpenMPNoThreadState  , 1, 0, "Assume that no thread in a parallel region will modify an ICV.")
+LANGOPT(OpenMPOffloadMandatory  , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.")
 LANGOPT(RenderScript      , 1, 0, "RenderScript")
 
 LANGOPT(CUDAIsDevice      , 1, 0, "compiling for CUDA device")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to