https://github.com/wenju-he created
https://github.com/llvm/llvm-project/pull/168325
Main changes:
* OpenCL legacy atom/atomic builtins now call CLC atomic functions (which use
Clang __scoped_atomic_*), replacing previous Clang __sync_* functions.
* Change memory order from seq_cst to relaxed; keep device scope (spec permits
broader than workgroup). LLVM IR for _Z8atom_decPU3AS1Vi:
Before:
%2 = atomicrmw volatile sub ptr subrspace(1) %0, i32 1 syncscope("agent")
seq_cst
After:
%2 = atomicrmw volatile sub ptr subrspace(1) %0, i32 1 syncscope("agent")
monotonic
* Also adds OpenCL 1.0 atom_* variants without volatile on the pointer. They
are added for backward compatibility.
>From d95e51c48de66f635c6a118cdcfcbee92495f94d Mon Sep 17 00:00:00 2001
From: Wenju He <[email protected]>
Date: Mon, 17 Nov 2025 07:29:19 +0100
Subject: [PATCH] [libclc] Use CLC atomic functions for legacy OpenCL
atom/atomic builtins
Main changes:
* OpenCL legacy atom/atomic builtins now call CLC atomic functions (which
use Clang __scoped_atomic_*), replacing previous Clang __sync_* functions.
* Change memory order from seq_cst to relaxed; keep device scope (spec
permits broader than workgroup). LLVM IR for _Z8atom_decPU3AS1Vi:
Before:
%2 = atomicrmw volatile sub ptr subrspace(1) %0, i32 1 syncscope("agent")
seq_cst
After:
%2 = atomicrmw volatile sub ptr subrspace(1) %0, i32 1 syncscope("agent")
monotonic
* Also adds OpenCL 1.0 atom_* variants without volatile on the pointer.
They are added for backward compatibility.
---
.../clc/opencl/atomic/atom_decl_int32.inc | 3 -
.../clc/opencl/atomic/atom_decl_int64.inc | 3 -
libclc/opencl/lib/amdgcn/SOURCES | 1 -
.../minmax_helpers.ll | 55 -------------------
libclc/opencl/lib/generic/atomic/atom_add.cl | 29 +++++-----
libclc/opencl/lib/generic/atomic/atom_and.cl | 29 +++++-----
.../opencl/lib/generic/atomic/atom_cmpxchg.cl | 20 +++----
libclc/opencl/lib/generic/atomic/atom_dec.cl | 18 +++---
libclc/opencl/lib/generic/atomic/atom_inc.cl | 18 +++---
libclc/opencl/lib/generic/atomic/atom_max.cl | 45 +++++++--------
libclc/opencl/lib/generic/atomic/atom_min.cl | 45 +++++++--------
libclc/opencl/lib/generic/atomic/atom_or.cl | 29 +++++-----
libclc/opencl/lib/generic/atomic/atom_sub.cl | 29 +++++-----
libclc/opencl/lib/generic/atomic/atom_xchg.cl | 29 +++++-----
libclc/opencl/lib/generic/atomic/atom_xor.cl | 29 +++++-----
.../opencl/lib/generic/atomic/atomic_add.cl | 4 +-
.../opencl/lib/generic/atomic/atomic_and.cl | 4 +-
.../lib/generic/atomic/atomic_cmpxchg.cl | 5 +-
.../opencl/lib/generic/atomic/atomic_max.cl | 4 +-
.../opencl/lib/generic/atomic/atomic_min.cl | 4 +-
libclc/opencl/lib/generic/atomic/atomic_or.cl | 4 +-
.../opencl/lib/generic/atomic/atomic_sub.cl | 4 +-
.../opencl/lib/generic/atomic/atomic_xchg.cl | 15 ++---
.../opencl/lib/generic/atomic/atomic_xor.cl | 4 +-
24 files changed, 189 insertions(+), 241 deletions(-)
delete mode 100644
libclc/opencl/lib/amdgcn/cl_khr_int64_extended_atomics/minmax_helpers.ll
diff --git a/libclc/opencl/include/clc/opencl/atomic/atom_decl_int32.inc
b/libclc/opencl/include/clc/opencl/atomic/atom_decl_int32.inc
index 866d8903db816..8a0ec9481c595 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atom_decl_int32.inc
+++ b/libclc/opencl/include/clc/opencl/atomic/atom_decl_int32.inc
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcfunc.h>
-#include <clc/clctypes.h>
-
#define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE)
\
_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION(volatile ADDRSPACE TYPE *, TYPE);
diff --git a/libclc/opencl/include/clc/opencl/atomic/atom_decl_int64.inc
b/libclc/opencl/include/clc/opencl/atomic/atom_decl_int64.inc
index 146de3412fc2e..45489df0609ca 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atom_decl_int64.inc
+++ b/libclc/opencl/include/clc/opencl/atomic/atom_decl_int64.inc
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/clcfunc.h>
-#include <clc/clctypes.h>
-
#define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE)
\
_CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION(volatile ADDRSPACE TYPE *, TYPE);
diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES
index 213f62cc73a74..0522e13f5d3db 100644
--- a/libclc/opencl/lib/amdgcn/SOURCES
+++ b/libclc/opencl/lib/amdgcn/SOURCES
@@ -1,4 +1,3 @@
-cl_khr_int64_extended_atomics/minmax_helpers.ll
mem_fence/fence.cl
synchronization/barrier.cl
workitem/get_global_offset.cl
diff --git
a/libclc/opencl/lib/amdgcn/cl_khr_int64_extended_atomics/minmax_helpers.ll
b/libclc/opencl/lib/amdgcn/cl_khr_int64_extended_atomics/minmax_helpers.ll
deleted file mode 100644
index 3ed5e99be3149..0000000000000
--- a/libclc/opencl/lib/amdgcn/cl_khr_int64_extended_atomics/minmax_helpers.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-;;===----------------------------------------------------------------------===;;
-;
-; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-; See https://llvm.org/LICENSE.txt for license information.
-; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-;
-;;===----------------------------------------------------------------------===;;
-
-define i64 @__clc__sync_fetch_and_min_global_8(i64 addrspace(1)* nocapture
%ptr, i64 %value) nounwind alwaysinline {
-entry:
- %0 = atomicrmw volatile min i64 addrspace(1)* %ptr, i64 %value seq_cst
- ret i64 %0
-}
-
-define i64 @__clc__sync_fetch_and_umin_global_8(i64 addrspace(1)* nocapture
%ptr, i64 %value) nounwind alwaysinline {
-entry:
- %0 = atomicrmw volatile umin i64 addrspace(1)* %ptr, i64 %value seq_cst
- ret i64 %0
-}
-
-define i64 @__clc__sync_fetch_and_min_local_8(i64 addrspace(3)* nocapture
%ptr, i64 %value) nounwind alwaysinline {
-entry:
- %0 = atomicrmw volatile min i64 addrspace(3)* %ptr, i64 %value seq_cst
- ret i64 %0
-}
-
-define i64 @__clc__sync_fetch_and_umin_local_8(i64 addrspace(3)* nocapture
%ptr, i64 %value) nounwind alwaysinline {
-entry:
- %0 = atomicrmw volatile umin i64 addrspace(3)* %ptr, i64 %value seq_cst
- ret i64 %0
-}
-
-define i64 @__clc__sync_fetch_and_max_global_8(i64 addrspace(1)* nocapture
%ptr, i64 %value) nounwind alwaysinline {
-entry:
- %0 = atomicrmw volatile max i64 addrspace(1)* %ptr, i64 %value seq_cst
- ret i64 %0
-}
-
-define i64 @__clc__sync_fetch_and_umax_global_8(i64 addrspace(1)* nocapture
%ptr, i64 %value) nounwind alwaysinline {
-entry:
- %0 = atomicrmw volatile umax i64 addrspace(1)* %ptr, i64 %value seq_cst
- ret i64 %0
-}
-
-define i64 @__clc__sync_fetch_and_max_local_8(i64 addrspace(3)* nocapture
%ptr, i64 %value) nounwind alwaysinline {
-entry:
- %0 = atomicrmw volatile max i64 addrspace(3)* %ptr, i64 %value seq_cst
- ret i64 %0
-}
-
-define i64 @__clc__sync_fetch_and_umax_local_8(i64 addrspace(3)* nocapture
%ptr, i64 %value) nounwind alwaysinline {
-entry:
- %0 = atomicrmw volatile umax i64 addrspace(3)* %ptr, i64 %value seq_cst
- ret i64 %0
-}
diff --git a/libclc/opencl/lib/generic/atomic/atom_add.cl
b/libclc/opencl/lib/generic/atomic/atom_add.cl
index 08fb3fecd5bc9..7962b1d68a8ad 100644
--- a/libclc/opencl/lib/generic/atomic/atom_add.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_add.cl
@@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_add.h>
#include <clc/opencl/atomic/atom_add.h>
-#include <clc/opencl/atomic/atomic_add.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
+
+#define __CLC_IMPL(AS, TYPE)
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) {
\
+ return __clc_atomic_fetch_add(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_add(AS TYPE *p, TYPE val) {
\
+ return atom_add((volatile AS TYPE *)p, val);
\
+ }
#ifdef cl_khr_global_int32_base_atomics
-#define __CLC_ATOMIC_OP add
-#define __CLC_ATOMIC_ADDRESS_SPACE global
-#include "atom_int32_binary.inc"
+__CLC_IMPL(global, int)
+__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_base_atomics
#ifdef cl_khr_local_int32_base_atomics
-#define __CLC_ATOMIC_OP add
-#define __CLC_ATOMIC_ADDRESS_SPACE local
-#include "atom_int32_binary.inc"
+__CLC_IMPL(local, int)
+__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
#ifdef cl_khr_int64_base_atomics
-#define __CLC_IMPL(AS, TYPE)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_add_8(p, val);
\
- }
-
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
-#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_and.cl
b/libclc/opencl/lib/generic/atomic/atom_and.cl
index 1dddd8e72f305..38f410f8f9705 100644
--- a/libclc/opencl/lib/generic/atomic/atom_and.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_and.cl
@@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_and.h>
#include <clc/opencl/atomic/atom_and.h>
-#include <clc/opencl/atomic/atomic_and.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
+
+#define __CLC_IMPL(AS, TYPE)
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) {
\
+ return __clc_atomic_fetch_and(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_and(AS TYPE *p, TYPE val) {
\
+ return atom_and((volatile AS TYPE *)p, val);
\
+ }
#ifdef cl_khr_global_int32_extended_atomics
-#define __CLC_ATOMIC_OP and
-#define __CLC_ATOMIC_ADDRESS_SPACE global
-#include "atom_int32_binary.inc"
+__CLC_IMPL(global, int)
+__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
-#define __CLC_ATOMIC_OP and
-#define __CLC_ATOMIC_ADDRESS_SPACE local
-#include "atom_int32_binary.inc"
+__CLC_IMPL(local, int)
+__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
-#define __CLC_IMPL(AS, TYPE)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_and_8(p, val);
\
- }
-
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
-#undef __CLC_IMPL
#endif // cl_khr_int64_extended_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_cmpxchg.cl
b/libclc/opencl/lib/generic/atomic/atom_cmpxchg.cl
index 5ae6aa30a8358..652ab335165e0 100644
--- a/libclc/opencl/lib/generic/atomic/atom_cmpxchg.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_cmpxchg.cl
@@ -6,13 +6,20 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_compare_exchange.h>
#include <clc/opencl/atomic/atom_cmpxchg.h>
-#include <clc/opencl/atomic/atomic_cmpxchg.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE)
\
_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp,
\
TYPE val) {
\
- return atomic_cmpxchg(p, cmp, val);
\
+ return __clc_atomic_compare_exchange(p, cmp, val, __ATOMIC_RELAXED,
\
+ __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(AS TYPE *p, TYPE cmp, TYPE val) {
\
+ return atom_cmpxchg((volatile AS TYPE *)p, cmp, val);
\
}
#ifdef cl_khr_global_int32_base_atomics
@@ -24,20 +31,11 @@ __CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
-#undef __CLC_IMPL
-
#ifdef cl_khr_int64_base_atomics
-#define __CLC_IMPL(AS, TYPE)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp,
\
- TYPE val) {
\
- return __sync_val_compare_and_swap_8(p, cmp, val);
\
- }
-
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
-#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_dec.cl
b/libclc/opencl/lib/generic/atomic/atom_dec.cl
index af811042d307a..69391d4fe7382 100644
--- a/libclc/opencl/lib/generic/atomic/atom_dec.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_dec.cl
@@ -6,13 +6,17 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_dec.h>
#include <clc/opencl/atomic/atom_dec.h>
-#include <clc/opencl/atomic/atom_sub.h>
-#include <clc/opencl/atomic/atomic_dec.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE)
\
_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) {
\
- return atomic_dec(p);
\
+ return __clc_atomic_dec(p, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(AS TYPE *p) {
\
+ return atom_dec((volatile AS TYPE *)p);
\
}
#ifdef cl_khr_global_int32_base_atomics
@@ -24,19 +28,11 @@ __CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
-#undef __CLC_IMPL
-
#ifdef cl_khr_int64_base_atomics
-#define __CLC_IMPL(AS, TYPE)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) {
\
- return atom_sub(p, (TYPE)1);
\
- }
-
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
-#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_inc.cl
b/libclc/opencl/lib/generic/atomic/atom_inc.cl
index f881b3a3caa6d..6ce157b757376 100644
--- a/libclc/opencl/lib/generic/atomic/atom_inc.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_inc.cl
@@ -6,13 +6,17 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/opencl/atomic/atom_add.h>
+#include <clc/atomic/clc_atomic_inc.h>
#include <clc/opencl/atomic/atom_inc.h>
-#include <clc/opencl/atomic/atomic_inc.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
#define __CLC_IMPL(AS, TYPE)
\
_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) {
\
- return atomic_inc(p);
\
+ return __clc_atomic_inc(p, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(AS TYPE *p) {
\
+ return atom_inc((volatile AS TYPE *)p);
\
}
#ifdef cl_khr_global_int32_base_atomics
@@ -24,19 +28,11 @@ __CLC_IMPL(local, int)
__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
-#undef __CLC_IMPL
-
#ifdef cl_khr_int64_base_atomics
-#define __CLC_IMPL(AS, TYPE)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) {
\
- return atom_add(p, (TYPE)1);
\
- }
-
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
-#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_max.cl
b/libclc/opencl/lib/generic/atomic/atom_max.cl
index 83b532ac19a1e..0bb99841ee61a 100644
--- a/libclc/opencl/lib/generic/atomic/atom_max.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_max.cl
@@ -6,40 +6,35 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_max.h>
#include <clc/opencl/atomic/atom_max.h>
-#include <clc/opencl/atomic/atomic_max.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
+
+#define __CLC_IMPL(AS, TYPE)
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) {
\
+ return __clc_atomic_fetch_max(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_max(AS TYPE *p, TYPE val) {
\
+ return atom_max((volatile AS TYPE *)p, val);
\
+ }
#ifdef cl_khr_global_int32_extended_atomics
-#define __CLC_ATOMIC_OP max
-#define __CLC_ATOMIC_ADDRESS_SPACE global
-#include "atom_int32_binary.inc"
+__CLC_IMPL(global, int)
+__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
-#define __CLC_ATOMIC_OP max
-#define __CLC_ATOMIC_ADDRESS_SPACE local
-#include "atom_int32_binary.inc"
+__CLC_IMPL(local, int)
+__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
-unsigned long __clc__sync_fetch_and_max_local_8(volatile local long *, long);
-unsigned long __clc__sync_fetch_and_max_global_8(volatile global long *, long);
-unsigned long __clc__sync_fetch_and_umax_local_8(volatile local unsigned long
*,
- unsigned long);
-unsigned long
-__clc__sync_fetch_and_umax_global_8(volatile global unsigned long *,
- unsigned long);
-
-#define __CLC_IMPL(AS, TYPE, OP)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) {
\
- return __clc__sync_fetch_and_##OP##_##AS##_8(p, val);
\
- }
-
-__CLC_IMPL(global, long, max)
-__CLC_IMPL(global, unsigned long, umax)
-__CLC_IMPL(local, long, max)
-__CLC_IMPL(local, unsigned long, umax)
-#undef __CLC_IMPL
+__CLC_IMPL(global, long)
+__CLC_IMPL(global, unsigned long)
+__CLC_IMPL(local, long)
+__CLC_IMPL(local, unsigned long)
#endif // cl_khr_int64_extended_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_min.cl
b/libclc/opencl/lib/generic/atomic/atom_min.cl
index b52e34769cdd8..5e210ca8fa46b 100644
--- a/libclc/opencl/lib/generic/atomic/atom_min.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_min.cl
@@ -6,40 +6,35 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_min.h>
#include <clc/opencl/atomic/atom_min.h>
-#include <clc/opencl/atomic/atomic_min.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
+
+#define __CLC_IMPL(AS, TYPE)
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) {
\
+ return __clc_atomic_fetch_min(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_min(AS TYPE *p, TYPE val) {
\
+ return atom_min((volatile AS TYPE *)p, val);
\
+ }
#ifdef cl_khr_global_int32_extended_atomics
-#define __CLC_ATOMIC_OP min
-#define __CLC_ATOMIC_ADDRESS_SPACE global
-#include "atom_int32_binary.inc"
+__CLC_IMPL(global, int)
+__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
-#define __CLC_ATOMIC_OP min
-#define __CLC_ATOMIC_ADDRESS_SPACE local
-#include "atom_int32_binary.inc"
+__CLC_IMPL(local, int)
+__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
-unsigned long __clc__sync_fetch_and_min_local_8(volatile local long *, long);
-unsigned long __clc__sync_fetch_and_min_global_8(volatile global long *, long);
-unsigned long __clc__sync_fetch_and_umin_local_8(volatile local unsigned long
*,
- unsigned long);
-unsigned long
-__clc__sync_fetch_and_umin_global_8(volatile global unsigned long *,
- unsigned long);
-
-#define __CLC_IMPL(AS, TYPE, OP)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) {
\
- return __clc__sync_fetch_and_##OP##_##AS##_8(p, val);
\
- }
-
-__CLC_IMPL(global, long, min)
-__CLC_IMPL(global, unsigned long, umin)
-__CLC_IMPL(local, long, min)
-__CLC_IMPL(local, unsigned long, umin)
-#undef __CLC_IMPL
+__CLC_IMPL(global, long)
+__CLC_IMPL(global, unsigned long)
+__CLC_IMPL(local, long)
+__CLC_IMPL(local, unsigned long)
#endif // cl_khr_int64_extended_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_or.cl
b/libclc/opencl/lib/generic/atomic/atom_or.cl
index fa9737f5f28ee..f0e8d93e38ce7 100644
--- a/libclc/opencl/lib/generic/atomic/atom_or.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_or.cl
@@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_or.h>
#include <clc/opencl/atomic/atom_or.h>
-#include <clc/opencl/atomic/atomic_or.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
+
+#define __CLC_IMPL(AS, TYPE)
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) {
\
+ return __clc_atomic_fetch_or(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_or(AS TYPE *p, TYPE val) {
\
+ return atom_or((volatile AS TYPE *)p, val);
\
+ }
#ifdef cl_khr_global_int32_extended_atomics
-#define __CLC_ATOMIC_OP or
-#define __CLC_ATOMIC_ADDRESS_SPACE global
-#include "atom_int32_binary.inc"
+__CLC_IMPL(global, int)
+__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
-#define __CLC_ATOMIC_OP or
-#define __CLC_ATOMIC_ADDRESS_SPACE local
-#include "atom_int32_binary.inc"
+__CLC_IMPL(local, int)
+__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
-#define __CLC_IMPL(AS, TYPE)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_or_8(p, val);
\
- }
-
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
-#undef __CLC_IMPL
#endif // cl_khr_int64_extended_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_sub.cl
b/libclc/opencl/lib/generic/atomic/atom_sub.cl
index 9a8acfa9116b8..0061554d19d9f 100644
--- a/libclc/opencl/lib/generic/atomic/atom_sub.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_sub.cl
@@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_sub.h>
#include <clc/opencl/atomic/atom_sub.h>
-#include <clc/opencl/atomic/atomic_sub.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
+
+#define __CLC_IMPL(AS, TYPE)
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) {
\
+ return __clc_atomic_fetch_sub(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(AS TYPE *p, TYPE val) {
\
+ return atom_sub((volatile AS TYPE *)p, val);
\
+ }
#ifdef cl_khr_global_int32_base_atomics
-#define __CLC_ATOMIC_OP sub
-#define __CLC_ATOMIC_ADDRESS_SPACE global
-#include "atom_int32_binary.inc"
+__CLC_IMPL(global, int)
+__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_base_atomics
#ifdef cl_khr_local_int32_base_atomics
-#define __CLC_ATOMIC_OP sub
-#define __CLC_ATOMIC_ADDRESS_SPACE local
-#include "atom_int32_binary.inc"
+__CLC_IMPL(local, int)
+__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
#ifdef cl_khr_int64_base_atomics
-#define __CLC_IMPL(AS, TYPE)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_sub_8(p, val);
\
- }
-
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
-#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_xchg.cl
b/libclc/opencl/lib/generic/atomic/atom_xchg.cl
index 03f8a9c466c5f..2001245f50fd5 100644
--- a/libclc/opencl/lib/generic/atomic/atom_xchg.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_xchg.cl
@@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_exchange.h>
#include <clc/opencl/atomic/atom_xchg.h>
-#include <clc/opencl/atomic/atomic_xchg.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
+
+#define __CLC_IMPL(AS, TYPE)
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) {
\
+ return __clc_atomic_exchange(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(AS TYPE *p, TYPE val) {
\
+ return atom_xchg((volatile AS TYPE *)p, val);
\
+ }
#ifdef cl_khr_global_int32_base_atomics
-#define __CLC_ATOMIC_OP xchg
-#define __CLC_ATOMIC_ADDRESS_SPACE global
-#include "atom_int32_binary.inc"
+__CLC_IMPL(global, int)
+__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_base_atomics
#ifdef cl_khr_local_int32_base_atomics
-#define __CLC_ATOMIC_OP xchg
-#define __CLC_ATOMIC_ADDRESS_SPACE local
-#include "atom_int32_binary.inc"
+__CLC_IMPL(local, int)
+__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_base_atomics
#ifdef cl_khr_int64_base_atomics
-#define __CLC_IMPL(AS, TYPE)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) {
\
- return __sync_swap_8(p, val);
\
- }
-
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
-#undef __CLC_IMPL
#endif // cl_khr_int64_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_xor.cl
b/libclc/opencl/lib/generic/atomic/atom_xor.cl
index 392a4b794c698..359a1f4d4a932 100644
--- a/libclc/opencl/lib/generic/atomic/atom_xor.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_xor.cl
@@ -6,32 +6,35 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_xor.h>
#include <clc/opencl/atomic/atom_xor.h>
-#include <clc/opencl/atomic/atomic_xor.h>
+
+// Non-volatile overloads is for backward compatibility with OpenCL 1.0.
+
+#define __CLC_IMPL(AS, TYPE)
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) {
\
+ return __clc_atomic_fetch_xor(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
+ }
\
+ _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(AS TYPE *p, TYPE val) {
\
+ return atom_xor((volatile AS TYPE *)p, val);
\
+ }
#ifdef cl_khr_global_int32_extended_atomics
-#define __CLC_ATOMIC_OP xor
-#define __CLC_ATOMIC_ADDRESS_SPACE global
-#include "atom_int32_binary.inc"
+__CLC_IMPL(global, int)
+__CLC_IMPL(global, unsigned int)
#endif // cl_khr_global_int32_extended_atomics
#ifdef cl_khr_local_int32_extended_atomics
-#define __CLC_ATOMIC_OP xor
-#define __CLC_ATOMIC_ADDRESS_SPACE local
-#include "atom_int32_binary.inc"
+__CLC_IMPL(local, int)
+__CLC_IMPL(local, unsigned int)
#endif // cl_khr_local_int32_extended_atomics
#ifdef cl_khr_int64_extended_atomics
-#define __CLC_IMPL(AS, TYPE)
\
- _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_xor_8(p, val);
\
- }
-
__CLC_IMPL(global, long)
__CLC_IMPL(global, unsigned long)
__CLC_IMPL(local, long)
__CLC_IMPL(local, unsigned long)
-#undef __CLC_IMPL
#endif // cl_khr_int64_extended_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atomic_add.cl
b/libclc/opencl/lib/generic/atomic/atomic_add.cl
index d005c1dd6ac51..5501d30544e7c 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_add.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_add.cl
@@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_add.h>
#include <clc/opencl/atomic/atomic_add.h>
#define __CLC_IMPL(TYPE, AS)
\
_CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_add(p, val);
\
+ return __clc_atomic_fetch_add(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
}
__CLC_IMPL(int, global)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_and.cl
b/libclc/opencl/lib/generic/atomic/atomic_and.cl
index 12558568b0e4e..ce1adbb6f8235 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_and.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_and.cl
@@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_and.h>
#include <clc/opencl/atomic/atomic_and.h>
#define __CLC_IMPL(TYPE, AS)
\
_CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_and(p, val);
\
+ return __clc_atomic_fetch_and(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
}
__CLC_IMPL(int, global)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_cmpxchg.cl
b/libclc/opencl/lib/generic/atomic/atomic_cmpxchg.cl
index 1045020a553fc..16a8db43e9374 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_cmpxchg.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_cmpxchg.cl
@@ -6,12 +6,15 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_compare_exchange.h>
#include <clc/opencl/atomic/atomic_cmpxchg.h>
#define __CLC_IMPL(TYPE, AS)
\
_CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp,
\
TYPE val) {
\
- return __sync_val_compare_and_swap(p, cmp, val);
\
+ return __clc_atomic_compare_exchange(p, cmp, val, __ATOMIC_RELAXED,
\
+ __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
}
__CLC_IMPL(int, global)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_max.cl
b/libclc/opencl/lib/generic/atomic/atomic_max.cl
index aa482a8f46397..362a0ed90ca0e 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_max.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_max.cl
@@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_max.h>
#include <clc/opencl/atomic/atomic_max.h>
#define __CLC_IMPL(TYPE, AS, OP)
\
_CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_##OP(p, val);
\
+ return __clc_atomic_fetch_max(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
}
__CLC_IMPL(int, global, max)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_min.cl
b/libclc/opencl/lib/generic/atomic/atomic_min.cl
index 7f39e94316846..1976be0014d70 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_min.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_min.cl
@@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_min.h>
#include <clc/opencl/atomic/atomic_min.h>
#define __CLC_IMPL(TYPE, AS, OP)
\
_CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_##OP(p, val);
\
+ return __clc_atomic_fetch_min(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
}
__CLC_IMPL(int, global, min)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_or.cl
b/libclc/opencl/lib/generic/atomic/atomic_or.cl
index ad14cd2178555..ef8bc00f45593 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_or.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_or.cl
@@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_or.h>
#include <clc/opencl/atomic/atomic_or.h>
#define __CLC_IMPL(TYPE, AS)
\
_CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_or(p, val);
\
+ return __clc_atomic_fetch_or(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
}
__CLC_IMPL(int, global)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_sub.cl
b/libclc/opencl/lib/generic/atomic/atomic_sub.cl
index 2e51c4c2ce02f..397737d113c0d 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_sub.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_sub.cl
@@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_sub.h>
#include <clc/opencl/atomic/atomic_sub.h>
#define __CLC_IMPL(TYPE, AS)
\
_CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_sub(p, val);
\
+ return __clc_atomic_fetch_sub(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
}
__CLC_IMPL(int, global)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_xchg.cl
b/libclc/opencl/lib/generic/atomic/atomic_xchg.cl
index 2585a5427392e..2b4bbf06d9400 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_xchg.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_xchg.cl
@@ -6,24 +6,19 @@
//
//===----------------------------------------------------------------------===//
-#include <clc/opencl/as_type.h>
+#include <clc/atomic/clc_atomic_exchange.h>
#include <clc/opencl/atomic/atomic_xchg.h>
-_CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) {
- return as_float(atomic_xchg((volatile global uint *)p, as_uint(val)));
-}
-
-_CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) {
- return as_float(atomic_xchg((volatile local uint *)p, as_uint(val)));
-}
-
#define __CLC_IMPL(TYPE, AS)
\
_CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) {
\
- return __sync_swap_4(p, val);
\
+ return __clc_atomic_exchange(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
}
__CLC_IMPL(int, global)
__CLC_IMPL(unsigned int, global)
+__CLC_IMPL(float, global)
__CLC_IMPL(int, local)
__CLC_IMPL(unsigned int, local)
+__CLC_IMPL(float, local)
#undef __CLC_IMPL
diff --git a/libclc/opencl/lib/generic/atomic/atomic_xor.cl
b/libclc/opencl/lib/generic/atomic/atomic_xor.cl
index 0228134397464..1f200c58edbff 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_xor.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_xor.cl
@@ -6,11 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#include <clc/atomic/clc_atomic_fetch_xor.h>
#include <clc/opencl/atomic/atomic_xor.h>
#define __CLC_IMPL(TYPE, AS)
\
_CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) {
\
- return __sync_fetch_and_xor(p, val);
\
+ return __clc_atomic_fetch_xor(p, val, __ATOMIC_RELAXED,
\
+ __MEMORY_SCOPE_DEVICE);
\
}
__CLC_IMPL(int, global)
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits