JonChesterfield updated this revision to Diff 382441.
JonChesterfield added a comment.
- rebase
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D112227/new/
https://reviews.llvm.org/D112227
Files:
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
openmp/libomptarget/DeviceRTL/CMakeLists.txt
openmp/libomptarget/DeviceRTL/src/Configuration.cpp
openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
openmp/libomptarget/test/mapping/data_member_ref.cpp
openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
openmp/libomptarget/test/mapping/delete_inf_refcount.c
openmp/libomptarget/test/mapping/lambda_by_value.cpp
openmp/libomptarget/test/mapping/ompx_hold/struct.c
openmp/libomptarget/test/mapping/ptr_and_obj_motion.c
openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
openmp/libomptarget/test/offloading/bug49021.cpp
openmp/libomptarget/test/offloading/bug49334.cpp
openmp/libomptarget/test/offloading/bug50022.cpp
openmp/libomptarget/test/offloading/global_constructor.cpp
openmp/libomptarget/test/offloading/host_as_target.c
openmp/libomptarget/test/unified_shared_memory/api.c
openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
openmp/libomptarget/test/unified_shared_memory/close_modifier.c
openmp/libomptarget/test/unified_shared_memory/shared_update.c
Index: openmp/libomptarget/test/unified_shared_memory/shared_update.c
===================================================================
--- openmp/libomptarget/test/unified_shared_memory/shared_update.c
+++ openmp/libomptarget/test/unified_shared_memory/shared_update.c
@@ -4,6 +4,7 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
#include <omp.h>
Index: openmp/libomptarget/test/unified_shared_memory/close_modifier.c
===================================================================
--- openmp/libomptarget/test/unified_shared_memory/close_modifier.c
+++ openmp/libomptarget/test/unified_shared_memory/close_modifier.c
@@ -5,6 +5,7 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <omp.h>
#include <stdio.h>
Index: openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
===================================================================
--- openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
+++ openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
@@ -5,6 +5,7 @@
// Fails on amdgcn with error: GPU Memory Error
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <omp.h>
#include <stdio.h>
Index: openmp/libomptarget/test/unified_shared_memory/api.c
===================================================================
--- openmp/libomptarget/test/unified_shared_memory/api.c
+++ openmp/libomptarget/test/unified_shared_memory/api.c
@@ -4,6 +4,7 @@
// Fails on amdgcn with error: GPU Memory Error
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
#include <omp.h>
Index: openmp/libomptarget/test/offloading/host_as_target.c
===================================================================
--- openmp/libomptarget/test/offloading/host_as_target.c
+++ openmp/libomptarget/test/offloading/host_as_target.c
@@ -9,6 +9,7 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
#include <omp.h>
Index: openmp/libomptarget/test/offloading/global_constructor.cpp
===================================================================
--- openmp/libomptarget/test/offloading/global_constructor.cpp
+++ openmp/libomptarget/test/offloading/global_constructor.cpp
@@ -2,6 +2,7 @@
// Fails in DAGToDAG on an address space problem
// UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
#include <cmath>
#include <cstdio>
Index: openmp/libomptarget/test/offloading/bug50022.cpp
===================================================================
--- openmp/libomptarget/test/offloading/bug50022.cpp
+++ openmp/libomptarget/test/offloading/bug50022.cpp
@@ -1,6 +1,7 @@
// RUN: %libomptarget-compilexx-and-run-generic
// UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
#include <cassert>
#include <iostream>
Index: openmp/libomptarget/test/offloading/bug49334.cpp
===================================================================
--- openmp/libomptarget/test/offloading/bug49334.cpp
+++ openmp/libomptarget/test/offloading/bug49334.cpp
@@ -2,7 +2,7 @@
// Currently hangs on amdgpu
// UNSUPPORTED: amdgcn-amd-amdhsa
-
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
// UNSUPPORTED: x86_64-pc-linux-gnu
#include <cassert>
Index: openmp/libomptarget/test/offloading/bug49021.cpp
===================================================================
--- openmp/libomptarget/test/offloading/bug49021.cpp
+++ openmp/libomptarget/test/offloading/bug49021.cpp
@@ -2,6 +2,7 @@
// Wrong results on amdgcn
// UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
#include <iostream>
Index: openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
===================================================================
--- openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
+++ openmp/libomptarget/test/mapping/reduction_implicit_map.cpp
@@ -2,6 +2,7 @@
// amdgcn does not have printf definition
// UNSUPPORTED: amdgcn-amd-amdhsa
+// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
Index: openmp/libomptarget/test/mapping/ptr_and_obj_motion.c
===================================================================
--- openmp/libomptarget/test/mapping/ptr_and_obj_motion.c
+++ openmp/libomptarget/test/mapping/ptr_and_obj_motion.c
@@ -2,6 +2,7 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
Index: openmp/libomptarget/test/mapping/ompx_hold/struct.c
===================================================================
--- openmp/libomptarget/test/mapping/ompx_hold/struct.c
+++ openmp/libomptarget/test/mapping/ompx_hold/struct.c
@@ -3,6 +3,7 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <omp.h>
#include <stdio.h>
Index: openmp/libomptarget/test/mapping/lambda_by_value.cpp
===================================================================
--- openmp/libomptarget/test/mapping/lambda_by_value.cpp
+++ openmp/libomptarget/test/mapping/lambda_by_value.cpp
@@ -2,6 +2,7 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
#include <stdint.h>
Index: openmp/libomptarget/test/mapping/delete_inf_refcount.c
===================================================================
--- openmp/libomptarget/test/mapping/delete_inf_refcount.c
+++ openmp/libomptarget/test/mapping/delete_inf_refcount.c
@@ -2,6 +2,7 @@
// fails with error message 'Unable to generate target entries' on amdgcn
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
#include <omp.h>
Index: openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
===================================================================
--- openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
+++ openmp/libomptarget/test/mapping/declare_mapper_nested_mappers.cpp
@@ -2,6 +2,7 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <cstdio>
#include <cstdlib>
Index: openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
===================================================================
--- openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
+++ openmp/libomptarget/test/mapping/declare_mapper_nested_default_mappers.cpp
@@ -2,6 +2,7 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <cstdio>
#include <cstdlib>
Index: openmp/libomptarget/test/mapping/data_member_ref.cpp
===================================================================
--- openmp/libomptarget/test/mapping/data_member_ref.cpp
+++ openmp/libomptarget/test/mapping/data_member_ref.cpp
@@ -2,6 +2,7 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
+// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
Index: openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
===================================================================
--- openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
+++ openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
@@ -122,3 +122,4 @@
# Report to the parent scope that we are building a plugin for amdgpu
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE)
+set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa-newRTL" PARENT_SCOPE)
Index: openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -68,8 +68,23 @@
///{
#pragma omp begin declare variant match(device = {arch(amdgcn)})
-uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering) {
- return __builtin_amdgcn_atomic_inc32(Address, Val, Ordering, "");
+uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering) {
+ // builtin_amdgcn_atomic_inc32 should expand to this switch when
+ // passed a runtime value, but does not do so yet. Workaround here.
+ switch (Ordering) {
+ default:
+ __builtin_unreachable();
+ case __ATOMIC_RELAXED:
+ return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELAXED, "");
+ case __ATOMIC_ACQUIRE:
+ return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQUIRE, "");
+ case __ATOMIC_RELEASE:
+ return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELEASE, "");
+ case __ATOMIC_ACQ_REL:
+ return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQ_REL, "");
+ case __ATOMIC_SEQ_CST:
+ return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_SEQ_CST, "");
+ }
}
uint32_t SHARED(namedBarrierTracker);
@@ -126,17 +141,64 @@
fence::team(__ATOMIC_RELEASE);
}
+// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
+// so that it is usable within a template environment and so that a runtime
+// value of the memory order is expanded to this switch within clang/llvm.
+void fenceTeam(int Ordering) {
+ switch (Ordering) {
+ default:
+ __builtin_unreachable();
+ case __ATOMIC_ACQUIRE:
+ return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
+ case __ATOMIC_RELEASE:
+ return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
+ case __ATOMIC_ACQ_REL:
+ return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "workgroup");
+ case __ATOMIC_SEQ_CST:
+ return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
+ }
+}
+void fenceKernel(int Ordering) {
+ switch (Ordering) {
+ default:
+ __builtin_unreachable();
+ case __ATOMIC_ACQUIRE:
+ return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
+ case __ATOMIC_RELEASE:
+ return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");
+ case __ATOMIC_ACQ_REL:
+ return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent");
+ case __ATOMIC_SEQ_CST:
+ return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
+ }
+}
+void fenceSystem(int Ordering) {
+ switch (Ordering) {
+ default:
+ __builtin_unreachable();
+ case __ATOMIC_ACQUIRE:
+ return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "");
+ case __ATOMIC_RELEASE:
+ return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "");
+ case __ATOMIC_ACQ_REL:
+ return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
+ case __ATOMIC_SEQ_CST:
+ return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
+ }
+}
+
void syncWarp(__kmpc_impl_lanemask_t) {
// AMDGCN doesn't need to sync threads in a warp
}
void syncThreads() { __builtin_amdgcn_s_barrier(); }
-void fenceTeam(int Ordering) { __builtin_amdgcn_fence(Ordering, "workgroup"); }
-
-void fenceKernel(int Ordering) { __builtin_amdgcn_fence(Ordering, "agent"); }
-
-void fenceSystem(int Ordering) { __builtin_amdgcn_fence(Ordering, ""); }
+// TODO: Don't have wavefront lane locks. Possibly can't have them.
+void unsetLock(omp_lock_t *) { __builtin_trap(); }
+int testLock(omp_lock_t *) { __builtin_trap(); }
+void initLock(omp_lock_t *) { __builtin_trap(); }
+void destroyLock(omp_lock_t *) { __builtin_trap(); }
+void setLock(omp_lock_t *) { __builtin_trap(); }
#pragma omp end declare variant
///}
@@ -238,7 +300,7 @@
}
void atomic::store(uint32_t *Addr, uint32_t V, int Ordering) {
- impl::atomicStore(Addr, V, Ordering);
+ impl::atomicStore(Addr, V, Ordering);
}
uint32_t atomic::inc(uint32_t *Addr, uint32_t V, int Ordering) {
Index: openmp/libomptarget/DeviceRTL/src/Configuration.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -20,14 +20,12 @@
#pragma omp declare target
-extern uint32_t __omp_rtl_debug_kind;
-
-// TOOD: We want to change the name as soon as the old runtime is gone.
+// TODO: We want to change the name as soon as the old runtime is gone.
DeviceEnvironmentTy CONSTANT(omptarget_device_environment)
__attribute__((used));
uint32_t config::getDebugKind() {
- return __omp_rtl_debug_kind & omptarget_device_environment.DebugKind;
+ return omptarget_device_environment.DebugKind;
}
uint32_t config::getNumDevices() {
Index: openmp/libomptarget/DeviceRTL/CMakeLists.txt
===================================================================
--- openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -226,6 +226,5 @@
endforeach()
foreach(mcpu ${amdgpu_mcpus})
- # require D112227 or similar to enable the compilation for amdgpu
- # compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
+ compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
endforeach()
Index: clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -252,7 +252,7 @@
std::string BitcodeSuffix;
if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
options::OPT_fno_openmp_target_new_runtime, false))
- BitcodeSuffix = "new-amdgcn-" + GPUArch;
+ BitcodeSuffix = "new-amdgpu-" + GPUArch;
else
BitcodeSuffix = "amdgcn-" + GPUArch;
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits