jhuber6 updated this revision to Diff 361667.
jhuber6 added a comment.

Adding test


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D106793/new/

https://reviews.llvm.org/D106793

Files:
  clang/include/clang/Basic/LangOptions.def
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/lib/Driver/ToolChains/Cuda.cpp
  clang/test/Driver/Inputs/libomptarget/libomptarget-new-nvptx-sm_35.bc
  clang/test/Driver/openmp-offload-gpu.c


Index: clang/test/Driver/openmp-offload-gpu.c
===================================================================
--- clang/test/Driver/openmp-offload-gpu.c
+++ clang/test/Driver/openmp-offload-gpu.c
@@ -154,6 +154,11 @@
 // RUN:   -Xopenmp-target -march=sm_35 
--cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 
2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB %s
+/// Check with the new runtime enabled
+// RUN:   env LIBRARY_PATH=%S/Inputs/libomptarget %clang -### -fopenmp=libomp 
-fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:   -Xopenmp-target -march=sm_35 
--cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
+// RUN:   -fopenmp-relocatable-target -fopenmp-target-new-runtime -save-temps 
-no-canonical-prefixes %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-BCLIB-NEW %s
 /// The user can override default detection using 
--libomptarget-nvptx-bc-path=.
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:   
--libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
@@ -162,6 +167,7 @@
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB-USER %s
 
 // CHK-BCLIB: 
clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-sm_35.bc
+// CHK-BCLIB-NEW: 
clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-new-nvptx-sm_35.bc
 // CHK-BCLIB-USER: 
clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc
 // CHK-BCLIB-NOT: {{error:|warning:}}
 
Index: clang/test/Driver/Inputs/libomptarget/libomptarget-new-nvptx-sm_35.bc
===================================================================
--- /dev/null
+++ clang/test/Driver/Inputs/libomptarget/libomptarget-new-nvptx-sm_35.bc
@@ -0,0 +1 @@
+
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -751,7 +751,13 @@
       return;
     }
 
-    std::string BitcodeSuffix = "nvptx-" + GpuArch.str();
+    std::string BitcodeSuffix;
+    if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
+                           options::OPT_fno_openmp_target_new_runtime, false))
+      BitcodeSuffix = "new-nvptx-" + GpuArch.str();
+    else
+      BitcodeSuffix = "nvptx-" + GpuArch.str();
+
     addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
                        getTriple());
   }
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -222,7 +222,14 @@
 
   if (DriverArgs.hasArg(options::OPT_nogpulib))
     return;
-  std::string BitcodeSuffix = "amdgcn-" + GPUArch;
+
+  std::string BitcodeSuffix;
+  if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
+                         options::OPT_fno_openmp_target_new_runtime, false))
+    BitcodeSuffix = "new-amdgcn-" + GPUArch;
+  else
+    BitcodeSuffix = "amdgcn-" + GPUArch;
+
   addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
                      getTriple());
 }
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2373,6 +2373,10 @@
   Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], 
"fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
   Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
+  LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse,
+  PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP 
offloading">,
+  NegFlag<SetFalse>>;
 defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse",
   LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse,
   PosFlag<SetTrue, [CC1Option]>, NegFlag<SetFalse>, 
BothFlags<[NoArgumentUnused, HelpHidden]>>;
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -240,6 +240,7 @@
 LANGOPT(OpenMPCUDANumSMs  , 32, 0, "Number of SMs for CUDA devices.")
 LANGOPT(OpenMPCUDABlocksPerSM  , 32, 0, "Number of blocks per SM for CUDA 
devices.")
 LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records 
in the intermediate reduction buffer used for the teams reductions.")
+LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP 
offloading")
 LANGOPT(OpenMPOptimisticCollapse  , 1, 0, "Use at most 32 bits to represent 
the collapsed loop nest counter.")
 LANGOPT(RenderScript      , 1, 0, "RenderScript")
 


Index: clang/test/Driver/openmp-offload-gpu.c
===================================================================
--- clang/test/Driver/openmp-offload-gpu.c
+++ clang/test/Driver/openmp-offload-gpu.c
@@ -154,6 +154,11 @@
 // RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
 // RUN:   -fopenmp-relocatable-target -save-temps -no-canonical-prefixes %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB %s
+/// Check with the new runtime enabled
+// RUN:   env LIBRARY_PATH=%S/Inputs/libomptarget %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
+// RUN:   -Xopenmp-target -march=sm_35 --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \
+// RUN:   -fopenmp-relocatable-target -fopenmp-target-new-runtime -save-temps -no-canonical-prefixes %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHK-BCLIB-NEW %s
 /// The user can override default detection using --libomptarget-nvptx-bc-path=.
 // RUN:   %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda \
 // RUN:   --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc \
@@ -162,6 +167,7 @@
 // RUN:   | FileCheck -check-prefix=CHK-BCLIB-USER %s
 
 // CHK-BCLIB: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-sm_35.bc
+// CHK-BCLIB-NEW: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-new-nvptx-sm_35.bc
 // CHK-BCLIB-USER: clang{{.*}}-triple{{.*}}nvptx64-nvidia-cuda{{.*}}-mlink-builtin-bitcode{{.*}}libomptarget-nvptx-test.bc
 // CHK-BCLIB-NOT: {{error:|warning:}}
 
Index: clang/test/Driver/Inputs/libomptarget/libomptarget-new-nvptx-sm_35.bc
===================================================================
--- /dev/null
+++ clang/test/Driver/Inputs/libomptarget/libomptarget-new-nvptx-sm_35.bc
@@ -0,0 +1 @@
+
Index: clang/lib/Driver/ToolChains/Cuda.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Cuda.cpp
+++ clang/lib/Driver/ToolChains/Cuda.cpp
@@ -751,7 +751,13 @@
       return;
     }
 
-    std::string BitcodeSuffix = "nvptx-" + GpuArch.str();
+    std::string BitcodeSuffix;
+    if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
+                           options::OPT_fno_openmp_target_new_runtime, false))
+      BitcodeSuffix = "new-nvptx-" + GpuArch.str();
+    else
+      BitcodeSuffix = "nvptx-" + GpuArch.str();
+
     addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
                        getTriple());
   }
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -222,7 +222,14 @@
 
   if (DriverArgs.hasArg(options::OPT_nogpulib))
     return;
-  std::string BitcodeSuffix = "amdgcn-" + GPUArch;
+
+  std::string BitcodeSuffix;
+  if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
+                         options::OPT_fno_openmp_target_new_runtime, false))
+    BitcodeSuffix = "new-amdgcn-" + GPUArch;
+  else
+    BitcodeSuffix = "amdgcn-" + GPUArch;
+
   addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
                      getTriple());
 }
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2373,6 +2373,10 @@
   Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group<f_Group>,
   Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
+defm openmp_target_new_runtime: BoolFOption<"openmp-target-new-runtime",
+  LangOpts<"OpenMPTargetNewRuntime">, DefaultFalse,
+  PosFlag<SetTrue, [CC1Option], "Use the new bitcode library for OpenMP offloading">,
+  NegFlag<SetFalse>>;
 defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse",
   LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse,
   PosFlag<SetTrue, [CC1Option]>, NegFlag<SetFalse>, BothFlags<[NoArgumentUnused, HelpHidden]>>;
Index: clang/include/clang/Basic/LangOptions.def
===================================================================
--- clang/include/clang/Basic/LangOptions.def
+++ clang/include/clang/Basic/LangOptions.def
@@ -240,6 +240,7 @@
 LANGOPT(OpenMPCUDANumSMs  , 32, 0, "Number of SMs for CUDA devices.")
 LANGOPT(OpenMPCUDABlocksPerSM  , 32, 0, "Number of blocks per SM for CUDA devices.")
 LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.")
+LANGOPT(OpenMPTargetNewRuntime , 1, 0, "Use the new bitcode library for OpenMP offloading")
 LANGOPT(OpenMPOptimisticCollapse  , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.")
 LANGOPT(RenderScript      , 1, 0, "RenderScript")
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to