https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/185368
>From 2ddac580213229bbd43f19b26644c58e8981a374 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Sun, 8 Mar 2026 17:35:55 +0100 Subject: [PATCH] libclc: Add work_group_reduce_* functions --- .../clc/collective/clc_work_group_reduce.h | 20 +++++ .../clc/collective/clc_work_group_reduce.inc | 17 ++++ libclc/clc/lib/generic/CMakeLists.txt | 1 + .../collective/clc_work_group_reduce.cl | 30 +++++++ .../collective/clc_work_group_reduce.inc | 86 +++++++++++++++++++ libclc/opencl/lib/generic/CMakeLists.txt | 1 + .../generic/collective/work_group_reduce.cl | 15 ++++ .../generic/collective/work_group_reduce.inc | 23 +++++ 8 files changed, 193 insertions(+) create mode 100644 libclc/clc/include/clc/collective/clc_work_group_reduce.h create mode 100644 libclc/clc/include/clc/collective/clc_work_group_reduce.inc create mode 100644 libclc/clc/lib/generic/collective/clc_work_group_reduce.cl create mode 100644 libclc/clc/lib/generic/collective/clc_work_group_reduce.inc create mode 100644 libclc/opencl/lib/generic/collective/work_group_reduce.cl create mode 100644 libclc/opencl/lib/generic/collective/work_group_reduce.inc diff --git a/libclc/clc/include/clc/collective/clc_work_group_reduce.h b/libclc/clc/include/clc/collective/clc_work_group_reduce.h new file mode 100644 index 0000000000000..5b78ead50370b --- /dev/null +++ b/libclc/clc/include/clc/collective/clc_work_group_reduce.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_COLLECTIVE_CLC_WORK_GROUP_REDUCE_H__ +#define __CLC_COLLECTIVE_CLC_WORK_GROUP_REDUCE_H__ + +#include "clc/internal/clc.h" + +#define __CLC_BODY <clc/collective/clc_work_group_reduce.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc/collective/clc_work_group_reduce.inc> +#include <clc/math/gentype.inc> + +#endif // __CLC_COLLECTIVE_CLC_WORK_GROUP_REDUCE_H__ diff --git a/libclc/clc/include/clc/collective/clc_work_group_reduce.inc b/libclc/clc/include/clc/collective/clc_work_group_reduce.inc new file mode 100644 index 0000000000000..a95112ca51831 --- /dev/null +++ b/libclc/clc/include/clc/collective/clc_work_group_reduce.inc @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) && \ + (defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) +_CLC_OVERLOAD _CLC_DECL _CLC_CONST __CLC_GENTYPE +__clc_work_group_reduce_add(__CLC_GENTYPE x); +_CLC_OVERLOAD _CLC_DECL _CLC_CONST __CLC_GENTYPE +__clc_work_group_reduce_min(__CLC_GENTYPE x); +_CLC_OVERLOAD _CLC_DECL _CLC_CONST __CLC_GENTYPE +__clc_work_group_reduce_max(__CLC_GENTYPE x); +#endif diff --git a/libclc/clc/lib/generic/CMakeLists.txt b/libclc/clc/lib/generic/CMakeLists.txt index ac4799c035f73..ce7614726e21e 100644 --- a/libclc/clc/lib/generic/CMakeLists.txt +++ b/libclc/clc/lib/generic/CMakeLists.txt @@ -18,6 +18,7 @@ libclc_configure_source_list(CLC_GENERIC_SOURCES atomic/clc_atomic_store.cl collective/clc_work_group_any_all.cl collective/clc_work_group_broadcast.cl + collective/clc_work_group_reduce.cl common/clc_degrees.cl common/clc_radians.cl common/clc_sign.cl diff --git a/libclc/clc/lib/generic/collective/clc_work_group_reduce.cl b/libclc/clc/lib/generic/collective/clc_work_group_reduce.cl new file mode 100644 index 0000000000000..a28b5cf1c2c58 --- /dev/null +++ b/libclc/clc/lib/generic/collective/clc_work_group_reduce.cl @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/atomic/clc_atomic_load.h" +#include "clc/atomic/clc_atomic_store.h" +#include "clc/collective/clc_work_group_reduce.h" +#include "clc/subgroup/clc_sub_group_reduce.h" +#include "clc/synchronization/clc_work_group_barrier.h" +#include "clc/workitem/clc_get_num_sub_groups.h" +#include "clc/workitem/clc_get_sub_group_id.h" +#include "clc/workitem/clc_get_sub_group_local_id.h" + +#pragma OPENCL EXTENSION __cl_clang_function_scope_local_variables : enable + +enum __CLC_WORK_GROUP_REDUCE_OP { + __CLC_WORK_GROUP_REDUCE_ADD, + __CLC_WORK_GROUP_REDUCE_MIN, + __CLC_WORK_GROUP_REDUCE_MAX +}; + +#define __CLC_BODY <clc_work_group_reduce.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <clc_work_group_reduce.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/collective/clc_work_group_reduce.inc b/libclc/clc/lib/generic/collective/clc_work_group_reduce.inc new file mode 100644 index 0000000000000..08870b7460fb5 --- /dev/null +++ b/libclc/clc/lib/generic/collective/clc_work_group_reduce.inc @@ -0,0 +1,86 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) && \ + (defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) + +_CLC_OVERLOAD _CLC_CONST static __CLC_GENTYPE +sub_group_reduce_op(__CLC_GENTYPE x, enum __CLC_WORK_GROUP_REDUCE_OP opcode) { + switch (opcode) { + case __CLC_WORK_GROUP_REDUCE_ADD: + return __clc_sub_group_reduce_add(x); + case __CLC_WORK_GROUP_REDUCE_MIN: + return __clc_sub_group_reduce_min(x); + case __CLC_WORK_GROUP_REDUCE_MAX: + return __clc_sub_group_reduce_max(x); + } +} + +_CLC_OVERLOAD +static __CLC_GENTYPE atomic_reduce_op(__local __CLC_GENTYPE *ptr, + __CLC_GENTYPE x, + enum __CLC_WORK_GROUP_REDUCE_OP opcode) { + switch (opcode) { + case __CLC_WORK_GROUP_REDUCE_ADD: + return __scoped_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED, + __MEMORY_SCOPE_WRKGRP); + case __CLC_WORK_GROUP_REDUCE_MIN: + return __scoped_atomic_fetch_min(ptr, x, __ATOMIC_RELAXED, + __MEMORY_SCOPE_WRKGRP); + case __CLC_WORK_GROUP_REDUCE_MAX: + return __scoped_atomic_fetch_max(ptr, x, __ATOMIC_RELAXED, + __MEMORY_SCOPE_WRKGRP); + } +} + +_CLC_OVERLOAD +static __CLC_GENTYPE +__clc_work_group_reduce_impl(__CLC_GENTYPE a, + enum __CLC_WORK_GROUP_REDUCE_OP opcode) { + uint n = __clc_get_num_sub_groups(); + a = sub_group_reduce_op(a, opcode); + if (n == 1) + return a; + + __local __CLC_GENTYPE scratch; + uint l = __clc_get_sub_group_local_id(); + uint i = __clc_get_sub_group_id(); + + if ((i == 0) & (l == 0)) { + __scoped_atomic_store_n(&scratch, a, __ATOMIC_RELAXED, + __MEMORY_SCOPE_WRKGRP); + } + + __clc_work_group_barrier(__MEMORY_SCOPE_WRKGRP, __CLC_MEMORY_LOCAL); + if ((i != 0) & (l == 0)) { + atomic_reduce_op(&scratch, a, opcode); + } + + __clc_work_group_barrier(__MEMORY_SCOPE_WRKGRP, __CLC_MEMORY_LOCAL); + __CLC_GENTYPE ret = + __scoped_atomic_load_n(&scratch, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); + __clc_work_group_barrier(__MEMORY_SCOPE_WRKGRP, __CLC_MEMORY_LOCAL); + return ret; +} + +_CLC_OVERLOAD +__CLC_GENTYPE __clc_work_group_reduce_add(__CLC_GENTYPE a) { + return __clc_work_group_reduce_impl(a, __CLC_WORK_GROUP_REDUCE_ADD); +} + +_CLC_OVERLOAD +__CLC_GENTYPE __clc_work_group_reduce_min(__CLC_GENTYPE a) { + return __clc_work_group_reduce_impl(a, __CLC_WORK_GROUP_REDUCE_MIN); +} + +_CLC_OVERLOAD +__CLC_GENTYPE __clc_work_group_reduce_max(__CLC_GENTYPE a) { + return __clc_work_group_reduce_impl(a, __CLC_WORK_GROUP_REDUCE_MAX); +} + +#endif diff --git a/libclc/opencl/lib/generic/CMakeLists.txt b/libclc/opencl/lib/generic/CMakeLists.txt index d380b8b6becfa..ca95e6d5cfe23 100644 --- a/libclc/opencl/lib/generic/CMakeLists.txt +++ b/libclc/opencl/lib/generic/CMakeLists.txt @@ -45,6 +45,7 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES atomic/atomic_work_item_fence.cl collective/work_group_any_all.cl collective/work_group_broadcast.cl + collective/work_group_reduce.cl common/degrees.cl common/mix.cl common/radians.cl diff --git a/libclc/opencl/lib/generic/collective/work_group_reduce.cl b/libclc/opencl/lib/generic/collective/work_group_reduce.cl new file mode 100644 index 0000000000000..219106432cfc6 --- /dev/null +++ b/libclc/opencl/lib/generic/collective/work_group_reduce.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clc/collective/clc_work_group_reduce.h" + +#define __CLC_BODY <work_group_reduce.inc> +#include <clc/integer/gentype.inc> + +#define __CLC_BODY <work_group_reduce.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/opencl/lib/generic/collective/work_group_reduce.inc b/libclc/opencl/lib/generic/collective/work_group_reduce.inc new file mode 100644 index 0000000000000..8a3993ef68587 --- /dev/null +++ b/libclc/opencl/lib/generic/collective/work_group_reduce.inc @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if defined(__CLC_SCALAR) && (defined(__CLC_FPSIZE) || __CLC_GENSIZE >= 32) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE work_group_reduce_add(__CLC_GENTYPE a) { + return __clc_work_group_reduce_add(a); +} + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE work_group_reduce_min(__CLC_GENTYPE a) { + return __clc_work_group_reduce_min(a); +} + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE work_group_reduce_max(__CLC_GENTYPE a) { + return __clc_work_group_reduce_max(a); +} + +#endif _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
