JonChesterfield updated this revision to Diff 382433.
JonChesterfield added a comment.

- delete the extern debug_kind variable


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D112227/new/

https://reviews.llvm.org/D112227

Files:
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  openmp/libomptarget/DeviceRTL/CMakeLists.txt
  openmp/libomptarget/DeviceRTL/src/Configuration.cpp
  openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
  openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
  openmp/libomptarget/test/mapping/data_member_ref.cpp
  openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
  openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
  openmp/libomptarget/test/mapping/delete_inf_refcount.c
  openmp/libomptarget/test/mapping/lambda_by_value.cpp
  openmp/libomptarget/test/mapping/ompx_hold/struct.c
  openmp/libomptarget/test/mapping/ptr_and_obj_motion.c
  openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
  openmp/libomptarget/test/offloading/bug49021.cpp
  openmp/libomptarget/test/offloading/bug49334.cpp
  openmp/libomptarget/test/offloading/bug50022.cpp
  openmp/libomptarget/test/offloading/global_constructor.cpp
  openmp/libomptarget/test/offloading/host_as_target.c
  openmp/libomptarget/test/unified_shared_memory/api.c
  openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
  openmp/libomptarget/test/unified_shared_memory/close_modifier.c
  openmp/libomptarget/test/unified_shared_memory/shared_update.c

Index: openmp/libomptarget/test/unified_shared_memory/shared_update.c
===================================================================
--- openmp/libomptarget/test/unified_shared_memory/shared_update.c
+++ openmp/libomptarget/test/unified_shared_memory/shared_update.c
@@ -4,6 +4,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 #include <omp.h>
Index: openmp/libomptarget/test/unified_shared_memory/close_modifier.c
===================================================================
--- openmp/libomptarget/test/unified_shared_memory/close_modifier.c
+++ openmp/libomptarget/test/unified_shared_memory/close_modifier.c
@@ -5,6 +5,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <omp.h>
 #include <stdio.h>
Index: openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
===================================================================
--- openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
+++ openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
@@ -5,6 +5,7 @@
 
 // Fails on amdgcn with error: GPU Memory Error
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <omp.h>
 #include <stdio.h>
Index: openmp/libomptarget/test/unified_shared_memory/api.c
===================================================================
--- openmp/libomptarget/test/unified_shared_memory/api.c
+++ openmp/libomptarget/test/unified_shared_memory/api.c
@@ -4,6 +4,7 @@
 
 // Fails on amdgcn with error: GPU Memory Error
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 #include <omp.h>
Index: openmp/libomptarget/test/offloading/host_as_target.c
===================================================================
--- openmp/libomptarget/test/offloading/host_as_target.c
+++ openmp/libomptarget/test/offloading/host_as_target.c
@@ -9,6 +9,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 #include <omp.h>
Index: openmp/libomptarget/test/offloading/global_constructor.cpp
===================================================================
--- openmp/libomptarget/test/offloading/global_constructor.cpp
+++ openmp/libomptarget/test/offloading/global_constructor.cpp
@@ -2,6 +2,7 @@
 
 // Fails in DAGToDAG on an address space problem
 // UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
 
 #include <cmath>
 #include <cstdio>
Index: openmp/libomptarget/test/offloading/bug50022.cpp
===================================================================
--- openmp/libomptarget/test/offloading/bug50022.cpp
+++ openmp/libomptarget/test/offloading/bug50022.cpp
@@ -1,6 +1,7 @@
 // RUN: %libomptarget-compilexx-and-run-generic
 
 // UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
 
 #include <cassert>
 #include <iostream>
Index: openmp/libomptarget/test/offloading/bug49334.cpp
===================================================================
--- openmp/libomptarget/test/offloading/bug49334.cpp
+++ openmp/libomptarget/test/offloading/bug49334.cpp
@@ -2,7 +2,7 @@
 
 // Currently hangs on amdgpu
 // UNSUPPORTED: amdgcn-amd-amdhsa
-
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
 // UNSUPPORTED: x86_64-pc-linux-gnu
 
 #include <cassert>
Index: openmp/libomptarget/test/offloading/bug49021.cpp
===================================================================
--- openmp/libomptarget/test/offloading/bug49021.cpp
+++ openmp/libomptarget/test/offloading/bug49021.cpp
@@ -2,6 +2,7 @@
 
 // Wrong results on amdgcn
 // UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
 
 #include <iostream>
 
Index: openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
===================================================================
--- openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
+++ openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 
Index: openmp/libomptarget/test/mapping/ptr_and_obj_motion.c
===================================================================
--- openmp/libomptarget/test/mapping/ptr_and_obj_motion.c
+++ openmp/libomptarget/test/mapping/ptr_and_obj_motion.c
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 
Index: openmp/libomptarget/test/mapping/ompx_hold/struct.c
===================================================================
--- openmp/libomptarget/test/mapping/ompx_hold/struct.c
+++ openmp/libomptarget/test/mapping/ompx_hold/struct.c
@@ -3,6 +3,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <omp.h>
 #include <stdio.h>
Index: openmp/libomptarget/test/mapping/lambda_by_value.cpp
===================================================================
--- openmp/libomptarget/test/mapping/lambda_by_value.cpp
+++ openmp/libomptarget/test/mapping/lambda_by_value.cpp
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 #include <stdint.h>
Index: openmp/libomptarget/test/mapping/delete_inf_refcount.c
===================================================================
--- openmp/libomptarget/test/mapping/delete_inf_refcount.c
+++ openmp/libomptarget/test/mapping/delete_inf_refcount.c
@@ -2,6 +2,7 @@
 
 // fails with error message 'Unable to generate target entries' on amdgcn
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 #include <omp.h>
Index: openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
===================================================================
--- openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
+++ openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <cstdio>
 #include <cstdlib>
Index: openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
===================================================================
--- openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
+++ openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <cstdio>
 #include <cstdlib>
Index: openmp/libomptarget/test/mapping/data_member_ref.cpp
===================================================================
--- openmp/libomptarget/test/mapping/data_member_ref.cpp
+++ openmp/libomptarget/test/mapping/data_member_ref.cpp
@@ -2,6 +2,7 @@
 
 // amdgcn does not have printf definition
 // XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
 
 #include <stdio.h>
 
Index: openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
===================================================================
--- openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
+++ openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
@@ -122,3 +122,4 @@
 
 # Report to the parent scope that we are building a plugin for amdgpu
 set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE)
+set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa-newRTL" PARENT_SCOPE)
Index: openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -68,8 +68,23 @@
 ///{
 #pragma omp begin declare variant match(device = {arch(amdgcn)})
 
-uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering) {
-  return __builtin_amdgcn_atomic_inc32(Address, Val, Ordering, "");
+uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering) {
+  // builtin_amdgcn_atomic_inc32 should expand to this switch when
+  // passed a runtime value, but does not do so yet. Workaround here.
+  switch (Ordering) {
+  default:
+    __builtin_unreachable();
+  case __ATOMIC_RELAXED:
+    return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELAXED, "");
+  case __ATOMIC_ACQUIRE:
+    return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQUIRE, "");
+  case __ATOMIC_RELEASE:
+    return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELEASE, "");
+  case __ATOMIC_ACQ_REL:
+    return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQ_REL, "");
+  case __ATOMIC_SEQ_CST:
+    return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_SEQ_CST, "");
+  }
 }
 
 uint32_t SHARED(namedBarrierTracker);
@@ -126,17 +141,64 @@
   fence::team(__ATOMIC_RELEASE);
 }
 
+// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
+// so that it is usable within a template environment and so that a runtime
+// value of the memory order is expanded to this switch within clang/llvm.
+void fenceTeam(int Ordering) {
+  switch (Ordering) {
+  default:
+    __builtin_unreachable();
+  case __ATOMIC_ACQUIRE:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
+  case __ATOMIC_RELEASE:
+    return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
+  case __ATOMIC_ACQ_REL:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "workgroup");
+  case __ATOMIC_SEQ_CST:
+    return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
+  }
+}
+void fenceKernel(int Ordering) {
+  switch (Ordering) {
+  default:
+    __builtin_unreachable();
+  case __ATOMIC_ACQUIRE:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
+  case __ATOMIC_RELEASE:
+    return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");
+  case __ATOMIC_ACQ_REL:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent");
+  case __ATOMIC_SEQ_CST:
+    return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
+  }
+}
+void fenceSystem(int Ordering) {
+  switch (Ordering) {
+  default:
+    __builtin_unreachable();
+  case __ATOMIC_ACQUIRE:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "");
+  case __ATOMIC_RELEASE:
+    return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "");
+  case __ATOMIC_ACQ_REL:
+    return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
+  case __ATOMIC_SEQ_CST:
+    return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
+  }
+}
+
 void syncWarp(__kmpc_impl_lanemask_t) {
   // AMDGCN doesn't need to sync threads in a warp
 }
 
 void syncThreads() { __builtin_amdgcn_s_barrier(); }
 
-void fenceTeam(int Ordering) { __builtin_amdgcn_fence(Ordering, "workgroup"); }
-
-void fenceKernel(int Ordering) { __builtin_amdgcn_fence(Ordering, "agent"); }
-
-void fenceSystem(int Ordering) { __builtin_amdgcn_fence(Ordering, ""); }
+// TODO: Don't have wavefront lane locks. Possibly can't have them.
+void unsetLock(omp_lock_t *) { __builtin_trap(); }
+int testLock(omp_lock_t *) { __builtin_trap(); }
+void initLock(omp_lock_t *) { __builtin_trap(); }
+void destroyLock(omp_lock_t *) { __builtin_trap(); }
+void setLock(omp_lock_t *) { __builtin_trap(); }
 
 #pragma omp end declare variant
 ///}
@@ -238,7 +300,7 @@
 }
 
 void atomic::store(uint32_t *Addr, uint32_t V, int Ordering) {
-   impl::atomicStore(Addr, V, Ordering);
+  impl::atomicStore(Addr, V, Ordering);
 }
 
 uint32_t atomic::inc(uint32_t *Addr, uint32_t V, int Ordering) {
Index: openmp/libomptarget/DeviceRTL/src/Configuration.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -20,14 +20,12 @@
 
 #pragma omp declare target
 
-extern uint32_t __omp_rtl_debug_kind;
-
-// TOOD: We want to change the name as soon as the old runtime is gone.
+// TODO: We want to change the name as soon as the old runtime is gone.
 DeviceEnvironmentTy CONSTANT(omptarget_device_environment)
     __attribute__((used));
 
 uint32_t config::getDebugKind() {
-  return __omp_rtl_debug_kind & omptarget_device_environment.DebugKind;
+  return omptarget_device_environment.DebugKind;
 }
 
 uint32_t config::getNumDevices() {
Index: openmp/libomptarget/DeviceRTL/CMakeLists.txt
===================================================================
--- openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -29,7 +29,7 @@
   find_program(LINK_TOOL llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR}
     NO_DEFAULT_PATH)
   find_program(OPT_TOOL opt PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
-  libomptarget_say("Building Device RTL. Using clang: ${CLANG_TOOL}")
+  libomptarget_say("Building DeviceRTL. Using clang: ${CLANG_TOOL}")
 elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING AND NOT OPENMP_STANDALONE_BUILD)
   # LLVM in-tree builds may use CMake target names to discover the tools.
   set(CLANG_TOOL $<TARGET_FILE:clang>)
@@ -63,7 +63,7 @@
 set(all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80)
 
 set(LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES ${all_capabilities} CACHE STRING
-  "List of CUDA Compute Capabilities to be used to compile the NVPTX device RTL.")
+  "List of CUDA Compute Capabilities to be used to compile the NVPTX DeviceRTL.")
 string(TOLOWER ${LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES} LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES)
 
 if (LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES STREQUAL "all")
@@ -80,7 +80,7 @@
 # If user set LIBOMPTARGET_NVPTX_COMPUTE_CAPABILITIES to empty, we disable the
 # build.
 if (NOT nvptx_sm_list)
-  libomptarget_say("Not building CUDA offloading device RTL: empty compute capability list")
+  libomptarget_say("Not building CUDA offloading DeviceRTL: empty compute capability list")
   return()
 endif()
 
@@ -91,6 +91,12 @@
   endif()
 endforeach()
 
+set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx1010 gfx1030 gfx1031)
+if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
+  set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
+endif()
+
+
 # Activate RTL message dumps if requested by the user.
 set(LIBOMPTARGET_DEVICE_DEBUG FALSE CACHE BOOL
   "Activate DeviceRTL debug messages.")
@@ -121,11 +127,9 @@
 # Set flags for LLVM Bitcode compilation.
 set(bc_flags -S -x c++ -std=c++17
               ${clang_opt_flags}
-             -target nvptx64
              -Xclang -emit-llvm-bc
              -Xclang -aux-triple -Xclang ${aux_triple}
              -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device
-             -Xclang -target-feature -Xclang +ptx61
              -I${include_directory}
              -I${devicertl_base_directory}/../include
              ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL}
@@ -137,23 +141,21 @@
   list(APPEND bc_flags -DOMPTARGET_DEBUG=0)
 endif()
 
-# Create target to build all Bitcode libraries.
-add_custom_target(omptarget-new-nvptx-bc)
-add_dependencies(omptarget-new-nvptx-bc opt llvm-link)
+function(compileDeviceRTLLibrary target_cpu target_name)
+  set(target_bc_flags ${ARGN})
 
-# Generate a Bitcode library for all the compute capabilities the user requested
-foreach(sm ${nvptx_sm_list})
-  # TODO: replace this with declare variant and isa selector.
-  set(cuda_flags -Xclang -target-cpu -Xclang sm_${sm} "-D__CUDA_ARCH__=${sm}0")
   set(bc_files "")
   foreach(src ${src_files})
     get_filename_component(infile ${src} ABSOLUTE)
     get_filename_component(outfile ${src} NAME)
-    set(outfile "${outfile}-sm_${sm}.bc")
+    set(outfile "${outfile}-${target_cpu}.bc")
 
     add_custom_command(OUTPUT ${outfile}
-      COMMAND ${CLANG_TOOL} ${bc_flags}
-        ${cuda_flags} ${infile} -o ${outfile}
+      COMMAND ${CLANG_TOOL}
+      ${bc_flags}
+      -Xclang -target-cpu -Xclang ${target_cpu}
+      ${target_bc_flags}
+      ${infile} -o ${outfile}
       DEPENDS ${infile}
       IMPLICIT_DEPENDS CXX ${infile}
       COMMENT "Building LLVM bitcode ${outfile}"
@@ -173,43 +175,41 @@
     list(APPEND bc_files ${outfile})
   endforeach()
 
-  set(bclib_name "libomptarget-new-nvptx-sm_${sm}.bc")
+  set(bclib_name "libomptarget-new-${target_name}-${target_cpu}.bc")
 
   # Link to a bitcode library.
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
       COMMAND ${LINK_TOOL}
-        -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} ${bc_files}
+        -o ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name} ${bc_files}
       DEPENDS ${bc_files}
       COMMENT "Linking LLVM bitcode ${bclib_name}"
   )
 
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}_opt
-      COMMAND ${OPT_TOOL} ${link_opt_flags} ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
+      COMMAND ${OPT_TOOL} ${link_opt_flags} ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
                       -o ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
-      DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
+      DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
       COMMENT "Optimizing LLVM bitcode ${bclib_name}"
   )
 
   # Add a file-level dependency to ensure that llvm-link and opt are up-to-date.
   # By default, add_custom_command only builds the tool if the executable is missing
   if("${LINK_TOOL}" STREQUAL "$<TARGET_FILE:llvm-link>")
-    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
+    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/linked_${bclib_name}
       DEPENDS llvm-link
       APPEND)
   endif()
   if("${OPT_TOOL}" STREQUAL "$<TARGET_FILE:opt>")
-    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}_opt
+    add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
       DEPENDS opt
       APPEND)
   endif()
 
   set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${bclib_name})
 
-  set(bclib_target_name "omptarget-new-nvptx-sm_${sm}-bc")
+  set(bclib_target_name "omptarget-new-${target_name}-${target_cpu}-bc")
 
-  add_custom_target(${bclib_target_name} ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}_opt)
-  add_dependencies(omptarget-new-nvptx-bc ${bclib_target_name})
-  add_dependencies(${bclib_target_name} opt llvm-link)
+  add_custom_target(${bclib_target_name} ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name})
 
   # Copy library to destination.
   add_custom_command(TARGET ${bclib_target_name} POST_BUILD
@@ -218,4 +218,13 @@
 
   # Install bitcode library under the lib destination folder.
   install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name} DESTINATION "${OPENMP_INSTALL_LIBDIR}")
+endfunction()
+
+# Generate a Bitcode library for all the compute capabilities the user requested
+foreach(sm ${nvptx_sm_list})
+  compileDeviceRTLLibrary(sm_${sm} nvptx -target nvptx64 -Xclang -target-feature -Xclang +ptx61 "-D__CUDA_ARCH__=${sm}0")
+endforeach()
+
+foreach(mcpu ${amdgpu_mcpus})
+  compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa "-D__AMDGCN__" -fvisibility=default -nogpulib)
 endforeach()
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -252,7 +252,7 @@
   std::string BitcodeSuffix;
   if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
                          options::OPT_fno_openmp_target_new_runtime, false))
-    BitcodeSuffix = "new-amdgcn-" + GPUArch;
+    BitcodeSuffix = "new-amdgpu-" + GPUArch;
   else
     BitcodeSuffix = "amdgcn-" + GPUArch;
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to