https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/137932
>From cafb374de8d77c82fa450b732a122663090f6e34 Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Wed, 30 Apr 2025 00:44:50 -0700 Subject: [PATCH 1/3] [libclc] Add v3 variants of async_work_group_copy/async_work_group_strided_copy/prefetch 3-component vector type is supported for them per OpenCL spec. --- libclc/generic/include/clc/async/gentype.inc | 44 ++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/libclc/generic/include/clc/async/gentype.inc b/libclc/generic/include/clc/async/gentype.inc index 1114883e1ad35..e023c8bbd97d2 100644 --- a/libclc/generic/include/clc/async/gentype.inc +++ b/libclc/generic/include/clc/async/gentype.inc @@ -14,6 +14,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE char3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE char4 #include __CLC_BODY #undef __CLC_GENTYPE @@ -34,6 +38,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE uchar3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE uchar4 #include __CLC_BODY #undef __CLC_GENTYPE @@ -54,6 +62,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE short3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE short4 #include __CLC_BODY #undef __CLC_GENTYPE @@ -74,6 +86,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE ushort3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE ushort4 #include __CLC_BODY #undef __CLC_GENTYPE @@ -94,6 +110,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE int3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE int4 #include __CLC_BODY #undef __CLC_GENTYPE @@ -114,6 +134,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE uint3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE uint4 #include __CLC_BODY #undef __CLC_GENTYPE @@ -134,6 +158,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE float3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE float4 #include __CLC_BODY #undef __CLC_GENTYPE @@ -154,6 +182,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE long3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE long4 #include __CLC_BODY #undef __CLC_GENTYPE @@ -174,6 +206,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE ulong3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE ulong4 #include __CLC_BODY #undef __CLC_GENTYPE @@ -197,6 +233,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE double3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE double4 #include __CLC_BODY #undef __CLC_GENTYPE @@ -222,6 +262,10 @@ #include __CLC_BODY #undef __CLC_GENTYPE +#define __CLC_GENTYPE half3 +#include __CLC_BODY +#undef __CLC_GENTYPE + #define __CLC_GENTYPE half4 #include __CLC_BODY #undef __CLC_GENTYPE >From 1def8b25f6cbd5e3128f697393fa52ff1d97e90b Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Wed, 30 Apr 2025 02:32:40 -0700 Subject: [PATCH 2/3] delete async gentype.inc, use integer and math gentype.inc --- .../include/clc/async/async_work_group_copy.h | 20 +- .../clc/async/async_work_group_strided_copy.h | 20 +- libclc/generic/include/clc/async/gentype.inc | 283 ------------------ libclc/generic/include/clc/async/prefetch.h | 6 +- .../lib/async/async_work_group_copy.cl | 7 +- .../async/async_work_group_strided_copy.cl | 7 +- libclc/generic/lib/async/prefetch.cl | 7 +- 7 files changed, 59 insertions(+), 291 deletions(-) delete mode 100644 libclc/generic/include/clc/async/gentype.inc diff --git a/libclc/generic/include/clc/async/async_work_group_copy.h b/libclc/generic/include/clc/async/async_work_group_copy.h index a2c4e353ce469..1af31056e62f3 100644 --- a/libclc/generic/include/clc/async/async_work_group_copy.h +++ b/libclc/generic/include/clc/async/async_work_group_copy.h @@ -9,7 +9,23 @@ #define __CLC_DST_ADDR_SPACE local #define __CLC_SRC_ADDR_SPACE global #define __CLC_BODY <clc/async/async_work_group_copy.inc> -#include <clc/async/gentype.inc> +#include <clc/integer/gentype.inc> +#undef __CLC_DST_ADDR_SPACE +#undef __CLC_SRC_ADDR_SPACE +#undef __CLC_BODY + +#define __CLC_DST_ADDR_SPACE local +#define __CLC_SRC_ADDR_SPACE global +#define __CLC_BODY <clc/async/async_work_group_copy.inc> +#include <clc/math/gentype.inc> +#undef __CLC_DST_ADDR_SPACE +#undef __CLC_SRC_ADDR_SPACE +#undef __CLC_BODY + +#define __CLC_DST_ADDR_SPACE global +#define __CLC_SRC_ADDR_SPACE local +#define __CLC_BODY <clc/async/async_work_group_copy.inc> +#include <clc/integer/gentype.inc> #undef __CLC_DST_ADDR_SPACE #undef __CLC_SRC_ADDR_SPACE #undef __CLC_BODY @@ -17,7 +33,7 @@ #define __CLC_DST_ADDR_SPACE global #define __CLC_SRC_ADDR_SPACE local #define __CLC_BODY <clc/async/async_work_group_copy.inc> -#include <clc/async/gentype.inc> +#include <clc/math/gentype.inc> #undef __CLC_DST_ADDR_SPACE #undef __CLC_SRC_ADDR_SPACE #undef __CLC_BODY diff --git a/libclc/generic/include/clc/async/async_work_group_strided_copy.h b/libclc/generic/include/clc/async/async_work_group_strided_copy.h index ab97d203d3100..9289ba0a4ff65 100644 --- a/libclc/generic/include/clc/async/async_work_group_strided_copy.h +++ b/libclc/generic/include/clc/async/async_work_group_strided_copy.h @@ -9,7 +9,23 @@ #define __CLC_DST_ADDR_SPACE local #define __CLC_SRC_ADDR_SPACE global #define __CLC_BODY <clc/async/async_work_group_strided_copy.inc> -#include <clc/async/gentype.inc> +#include <clc/integer/gentype.inc> +#undef __CLC_DST_ADDR_SPACE +#undef __CLC_SRC_ADDR_SPACE +#undef __CLC_BODY + +#define __CLC_DST_ADDR_SPACE local +#define __CLC_SRC_ADDR_SPACE global +#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc> +#include <clc/math/gentype.inc> +#undef __CLC_DST_ADDR_SPACE +#undef __CLC_SRC_ADDR_SPACE +#undef __CLC_BODY + +#define __CLC_DST_ADDR_SPACE global +#define __CLC_SRC_ADDR_SPACE local +#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc> +#include <clc/integer/gentype.inc> #undef __CLC_DST_ADDR_SPACE #undef __CLC_SRC_ADDR_SPACE #undef __CLC_BODY @@ -17,7 +33,7 @@ #define __CLC_DST_ADDR_SPACE global #define __CLC_SRC_ADDR_SPACE local #define __CLC_BODY <clc/async/async_work_group_strided_copy.inc> -#include <clc/async/gentype.inc> +#include <clc/math/gentype.inc> #undef __CLC_DST_ADDR_SPACE #undef __CLC_SRC_ADDR_SPACE #undef __CLC_BODY diff --git a/libclc/generic/include/clc/async/gentype.inc b/libclc/generic/include/clc/async/gentype.inc deleted file mode 100644 index e023c8bbd97d2..0000000000000 --- a/libclc/generic/include/clc/async/gentype.inc +++ /dev/null @@ -1,283 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#define __CLC_GENTYPE char -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE char2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE char3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE char4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE char8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE char16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uchar -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uchar2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uchar3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uchar4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uchar8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uchar16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE short -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE short2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE short3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE short4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE short8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE short16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ushort -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ushort2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ushort3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ushort4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ushort8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ushort16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE int -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE int2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE int3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE int4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE int8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE int16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uint -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uint2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uint3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uint4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uint8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE uint16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE float -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE float2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE float3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE float4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE float8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE float16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE long -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE long2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE long3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE long4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE long8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE long16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ulong -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ulong2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ulong3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ulong4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ulong8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE ulong16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -#define __CLC_GENTYPE double -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE double2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE double3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE double4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE double8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE double16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16: enable - -#define __CLC_GENTYPE half -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE half2 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE half3 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE half4 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE half8 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#define __CLC_GENTYPE half16 -#include __CLC_BODY -#undef __CLC_GENTYPE - -#endif - -#undef __CLC_BODY diff --git a/libclc/generic/include/clc/async/prefetch.h b/libclc/generic/include/clc/async/prefetch.h index 50cbc05a2c7e7..9cc32b267111b 100644 --- a/libclc/generic/include/clc/async/prefetch.h +++ b/libclc/generic/include/clc/async/prefetch.h @@ -7,5 +7,9 @@ //===----------------------------------------------------------------------===// #define __CLC_BODY <clc/async/prefetch.inc> -#include <clc/async/gentype.inc> +#include <clc/integer/gentype.inc> +#undef __CLC_BODY + +#define __CLC_BODY <clc/async/prefetch.inc> +#include <clc/math/gentype.inc> #undef __CLC_BODY diff --git a/libclc/generic/lib/async/async_work_group_copy.cl b/libclc/generic/lib/async/async_work_group_copy.cl index 8c591562f499d..94452ef51c892 100644 --- a/libclc/generic/lib/async/async_work_group_copy.cl +++ b/libclc/generic/lib/async/async_work_group_copy.cl @@ -9,4 +9,9 @@ #include <clc/clc.h> #define __CLC_BODY <async_work_group_copy.inc> -#include <clc/async/gentype.inc> +#include <clc/integer/gentype.inc> +#undef __CLC_BODY + +#define __CLC_BODY <async_work_group_copy.inc> +#include <clc/math/gentype.inc> +#undef __CLC_BODY diff --git a/libclc/generic/lib/async/async_work_group_strided_copy.cl b/libclc/generic/lib/async/async_work_group_strided_copy.cl index 2083b357aac66..79aaf1f1c1337 100644 --- a/libclc/generic/lib/async/async_work_group_strided_copy.cl +++ b/libclc/generic/lib/async/async_work_group_strided_copy.cl @@ -9,4 +9,9 @@ #include <clc/clc.h> #define __CLC_BODY <async_work_group_strided_copy.inc> -#include <clc/async/gentype.inc> +#include <clc/integer/gentype.inc> +#undef __CLC_BODY + +#define __CLC_BODY <async_work_group_strided_copy.inc> +#include <clc/math/gentype.inc> +#undef __CLC_BODY diff --git a/libclc/generic/lib/async/prefetch.cl b/libclc/generic/lib/async/prefetch.cl index 58b1cb015fd71..116cde0360879 100644 --- a/libclc/generic/lib/async/prefetch.cl +++ b/libclc/generic/lib/async/prefetch.cl @@ -9,4 +9,9 @@ #include <clc/clc.h> #define __CLC_BODY <prefetch.inc> -#include <clc/async/gentype.inc> +#include <clc/integer/gentype.inc> +#undef __CLC_BODY + +#define __CLC_BODY <prefetch.inc> +#include <clc/math/gentype.inc> +#undef __CLC_BODY >From 6c412bedc4045ea59a636f1aa9a44a4777495490 Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Wed, 30 Apr 2025 03:36:39 -0700 Subject: [PATCH 3/3] remove redundant undef __CLC_DST_ADDR_SPACE __CLC_SRC_ADDR_SPACE --- .../include/clc/async/async_work_group_copy.h | 14 ++------------ .../clc/async/async_work_group_strided_copy.h | 14 ++------------ 2 files changed, 4 insertions(+), 24 deletions(-) diff --git a/libclc/generic/include/clc/async/async_work_group_copy.h b/libclc/generic/include/clc/async/async_work_group_copy.h index 1af31056e62f3..e0d63e2ce34c6 100644 --- a/libclc/generic/include/clc/async/async_work_group_copy.h +++ b/libclc/generic/include/clc/async/async_work_group_copy.h @@ -10,30 +10,20 @@ #define __CLC_SRC_ADDR_SPACE global #define __CLC_BODY <clc/async/async_work_group_copy.inc> #include <clc/integer/gentype.inc> -#undef __CLC_DST_ADDR_SPACE -#undef __CLC_SRC_ADDR_SPACE #undef __CLC_BODY - -#define __CLC_DST_ADDR_SPACE local -#define __CLC_SRC_ADDR_SPACE global #define __CLC_BODY <clc/async/async_work_group_copy.inc> #include <clc/math/gentype.inc> +#undef __CLC_BODY #undef __CLC_DST_ADDR_SPACE #undef __CLC_SRC_ADDR_SPACE -#undef __CLC_BODY #define __CLC_DST_ADDR_SPACE global #define __CLC_SRC_ADDR_SPACE local #define __CLC_BODY <clc/async/async_work_group_copy.inc> #include <clc/integer/gentype.inc> -#undef __CLC_DST_ADDR_SPACE -#undef __CLC_SRC_ADDR_SPACE #undef __CLC_BODY - -#define __CLC_DST_ADDR_SPACE global -#define __CLC_SRC_ADDR_SPACE local #define __CLC_BODY <clc/async/async_work_group_copy.inc> #include <clc/math/gentype.inc> +#undef __CLC_BODY #undef __CLC_DST_ADDR_SPACE #undef __CLC_SRC_ADDR_SPACE -#undef __CLC_BODY diff --git a/libclc/generic/include/clc/async/async_work_group_strided_copy.h b/libclc/generic/include/clc/async/async_work_group_strided_copy.h index 9289ba0a4ff65..a0a0c7bb425a2 100644 --- a/libclc/generic/include/clc/async/async_work_group_strided_copy.h +++ b/libclc/generic/include/clc/async/async_work_group_strided_copy.h @@ -10,30 +10,20 @@ #define __CLC_SRC_ADDR_SPACE global #define __CLC_BODY <clc/async/async_work_group_strided_copy.inc> #include <clc/integer/gentype.inc> -#undef __CLC_DST_ADDR_SPACE -#undef __CLC_SRC_ADDR_SPACE #undef __CLC_BODY - -#define __CLC_DST_ADDR_SPACE local -#define __CLC_SRC_ADDR_SPACE global #define __CLC_BODY <clc/async/async_work_group_strided_copy.inc> #include <clc/math/gentype.inc> +#undef __CLC_BODY #undef __CLC_DST_ADDR_SPACE #undef __CLC_SRC_ADDR_SPACE -#undef __CLC_BODY #define __CLC_DST_ADDR_SPACE global #define __CLC_SRC_ADDR_SPACE local #define __CLC_BODY <clc/async/async_work_group_strided_copy.inc> #include <clc/integer/gentype.inc> -#undef __CLC_DST_ADDR_SPACE -#undef __CLC_SRC_ADDR_SPACE #undef __CLC_BODY - -#define __CLC_DST_ADDR_SPACE global -#define __CLC_SRC_ADDR_SPACE local #define __CLC_BODY <clc/async/async_work_group_strided_copy.inc> #include <clc/math/gentype.inc> +#undef __CLC_BODY #undef __CLC_DST_ADDR_SPACE #undef __CLC_SRC_ADDR_SPACE -#undef __CLC_BODY _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits