Author: Fraser Cormack Date: 2025-04-09T15:52:25+01:00 New Revision: b0338c3d6c6d9787232e07ceff56ef161c357602
URL: https://github.com/llvm/llvm-project/commit/b0338c3d6c6d9787232e07ceff56ef161c357602 DIFF: https://github.com/llvm/llvm-project/commit/b0338c3d6c6d9787232e07ceff56ef161c357602.diff LOG: [libclc] Move shuffle/shuffle2 to the CLC library (#135000) This commit moves the shuffle and shuffle2 builtins to the CLC library. In so doing it makes the headers simpler and re-usable for other builtin layers to hook into the CLC functions, if they wish. An additional gentype utility has been made available, which provides a consistent vector-size-or-1 macro for use. The existing __CLC_VECSIZE is defined but empty which is useful in certain applications, such as in concatenation with a type to make a correctly sized scalar or vector type. However, this isn't usable in the same preprocessor lines when wanting to check for specific vector sizes, as e.g., '__CLC_VECSIZE == 2' resolves to '== 2' which is invalid. In local testing this is also useful for the geometric builtins which are only available for scalar types and vector types of 2, 3, or 4 elements. No codegen changes are observed, except the internal shuffle/shuffle2 utility functions are no longer made publicly available. Added: libclc/clc/include/clc/misc/clc_shuffle.h libclc/clc/include/clc/misc/clc_shuffle2.h libclc/clc/include/clc/misc/shuffle2_decl.inc libclc/clc/include/clc/misc/shuffle2_def.inc libclc/clc/include/clc/misc/shuffle_decl.inc libclc/clc/include/clc/misc/shuffle_def.inc libclc/clc/lib/generic/misc/clc_shuffle.cl libclc/clc/lib/generic/misc/clc_shuffle2.cl Modified: libclc/clc/include/clc/integer/gentype.inc libclc/clc/include/clc/math/gentype.inc libclc/clc/lib/generic/SOURCES libclc/generic/include/clc/misc/shuffle.h libclc/generic/lib/misc/shuffle.cl libclc/generic/lib/misc/shuffle2.cl Removed: ################################################################################ diff --git a/libclc/clc/include/clc/integer/gentype.inc b/libclc/clc/include/clc/integer/gentype.inc index 000e75cede6f7..b2b603e272573 100644 --- a/libclc/clc/include/clc/integer/gentype.inc +++ b/libclc/clc/include/clc/integer/gentype.inc @@ -27,15 +27,19 @@ #define __CLC_GENTYPE char #define __CLC_U_GENTYPE uchar #define __CLC_S_GENTYPE char -#define __CLC_SCALAR 1 +#define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #include __CLC_BODY +#undef __CLC_VECSIZE_OR_1 #undef __CLC_VECSIZE #undef __CLC_SCALAR #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE char2 #define __CLC_U_GENTYPE uchar2 #define __CLC_S_GENTYPE char2 @@ -85,6 +89,7 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#undef __CLC_VECSIZE_OR_1 #undef __CLC_SCALAR_GENTYPE #define __CLC_SCALAR_GENTYPE uchar @@ -92,15 +97,19 @@ #define __CLC_GENTYPE uchar #define __CLC_U_GENTYPE uchar #define __CLC_S_GENTYPE char -#define __CLC_SCALAR 1 +#define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #include __CLC_BODY +#undef __CLC_VECSIZE_OR_1 #undef __CLC_VECSIZE #undef __CLC_SCALAR #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE uchar2 #define __CLC_U_GENTYPE uchar2 #define __CLC_S_GENTYPE char2 @@ -150,6 +159,7 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#undef __CLC_VECSIZE_OR_1 #undef __CLC_GENSIZE #define __CLC_GENSIZE 16 @@ -159,15 +169,19 @@ #define __CLC_GENTYPE short #define __CLC_U_GENTYPE ushort #define __CLC_S_GENTYPE short -#define __CLC_SCALAR 1 +#define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #include __CLC_BODY +#undef __CLC_VECSIZE_OR_1 #undef __CLC_VECSIZE #undef __CLC_SCALAR #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE short2 #define __CLC_U_GENTYPE ushort2 #define __CLC_S_GENTYPE short2 @@ -217,6 +231,7 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#undef __CLC_VECSIZE_OR_1 #undef __CLC_SCALAR_GENTYPE #define __CLC_SCALAR_GENTYPE ushort @@ -224,15 +239,19 @@ #define __CLC_GENTYPE ushort #define __CLC_U_GENTYPE ushort #define __CLC_S_GENTYPE short -#define __CLC_SCALAR 1 +#define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #include __CLC_BODY +#undef __CLC_VECSIZE_OR_1 #undef __CLC_VECSIZE #undef __CLC_SCALAR #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE ushort2 #define __CLC_U_GENTYPE ushort2 #define __CLC_S_GENTYPE short2 @@ -282,6 +301,7 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#undef __CLC_VECSIZE_OR_1 #undef __CLC_GENSIZE #define __CLC_GENSIZE 32 @@ -291,15 +311,19 @@ #define __CLC_GENTYPE int #define __CLC_U_GENTYPE uint #define __CLC_S_GENTYPE int -#define __CLC_SCALAR 1 +#define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #include __CLC_BODY +#undef __CLC_VECSIZE_OR_1 #undef __CLC_VECSIZE #undef __CLC_SCALAR #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE int2 #define __CLC_U_GENTYPE uint2 #define __CLC_S_GENTYPE int2 @@ -349,6 +373,7 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#undef __CLC_VECSIZE_OR_1 #undef __CLC_SCALAR_GENTYPE #define __CLC_SCALAR_GENTYPE uint @@ -356,15 +381,19 @@ #define __CLC_GENTYPE uint #define __CLC_U_GENTYPE uint #define __CLC_S_GENTYPE int -#define __CLC_SCALAR 1 +#define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #include __CLC_BODY +#undef __CLC_VECSIZE_OR_1 #undef __CLC_VECSIZE #undef __CLC_SCALAR #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE uint2 #define __CLC_U_GENTYPE uint2 #define __CLC_S_GENTYPE int2 @@ -414,6 +443,7 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#undef __CLC_VECSIZE_OR_1 #undef __CLC_GENSIZE #define __CLC_GENSIZE 64 @@ -423,15 +453,19 @@ #define __CLC_GENTYPE long #define __CLC_U_GENTYPE ulong #define __CLC_S_GENTYPE long -#define __CLC_SCALAR 1 +#define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #include __CLC_BODY +#undef __CLC_VECSIZE_OR_1 #undef __CLC_VECSIZE #undef __CLC_SCALAR #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE long2 #define __CLC_U_GENTYPE ulong2 #define __CLC_S_GENTYPE long2 @@ -481,6 +515,7 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#undef __CLC_VECSIZE_OR_1 #undef __CLC_SCALAR_GENTYPE #define __CLC_SCALAR_GENTYPE ulong @@ -488,15 +523,20 @@ #define __CLC_GENTYPE ulong #define __CLC_U_GENTYPE ulong #define __CLC_S_GENTYPE long -#define __CLC_SCALAR 1 +#define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #include __CLC_BODY +#undef __CLC_VECSIZE_OR_1 +#undef __CLC_VECSIZE_OR_1 #undef __CLC_VECSIZE #undef __CLC_SCALAR #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE ulong2 #define __CLC_U_GENTYPE ulong2 #define __CLC_S_GENTYPE long2 @@ -546,6 +586,7 @@ #undef __CLC_GENTYPE #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE +#undef __CLC_VECSIZE_OR_1 #undef __CLC_GENSIZE #undef __CLC_SCALAR_GENTYPE diff --git a/libclc/clc/include/clc/math/gentype.inc b/libclc/clc/include/clc/math/gentype.inc index f65ccc2d42f71..0c7dd5b220b9c 100644 --- a/libclc/clc/include/clc/math/gentype.inc +++ b/libclc/clc/include/clc/math/gentype.inc @@ -81,12 +81,16 @@ #define __CLC_BIT_INTN int #define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #include __CLC_BODY +#undef __CLC_VECSIZE_OR_1 #undef __CLC_VECSIZE #undef __CLC_GENTYPE #undef __CLC_BIT_INTN #undef __CLC_SCALAR +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE float2 #define __CLC_BIT_INTN int2 #define __CLC_VECSIZE 2 @@ -127,6 +131,7 @@ #undef __CLC_GENTYPE #undef __CLC_BIT_INTN +#undef __CLC_VECSIZE_OR_1 #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE #undef __CLC_GENTYPE_NAN @@ -150,14 +155,18 @@ #define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #define __CLC_GENTYPE double #define __CLC_BIT_INTN long #include __CLC_BODY +#undef __CLC_VECSIZE_OR_1 #undef __CLC_GENTYPE #undef __CLC_BIT_INTN #undef __CLC_VECSIZE #undef __CLC_SCALAR +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE double2 #define __CLC_BIT_INTN long2 #define __CLC_VECSIZE 2 @@ -198,6 +207,7 @@ #undef __CLC_GENTYPE #undef __CLC_BIT_INTN +#undef __CLC_VECSIZE_OR_1 #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE #undef __CLC_GENTYPE_NAN @@ -221,14 +231,18 @@ #define __CLC_SCALAR #define __CLC_VECSIZE +#define __CLC_VECSIZE_OR_1 1 #define __CLC_GENTYPE half #define __CLC_BIT_INTN short #include __CLC_BODY #undef __CLC_GENTYPE #undef __CLC_BIT_INTN +#undef __CLC_VECSIZE_OR_1 #undef __CLC_VECSIZE #undef __CLC_SCALAR +#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE + #define __CLC_GENTYPE half2 #define __CLC_BIT_INTN short2 #define __CLC_VECSIZE 2 @@ -269,6 +283,7 @@ #undef __CLC_GENTYPE #undef __CLC_BIT_INTN +#undef __CLC_VECSIZE_OR_1 #undef __CLC_U_GENTYPE #undef __CLC_S_GENTYPE #undef __CLC_GENTYPE_NAN diff --git a/libclc/clc/include/clc/misc/clc_shuffle.h b/libclc/clc/include/clc/misc/clc_shuffle.h new file mode 100644 index 0000000000000..ba3d7a2bddd62 --- /dev/null +++ b/libclc/clc/include/clc/misc/clc_shuffle.h @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MISC_CLC_SHUFFLE_H__ +#define __CLC_MISC_CLC_SHUFFLE_H__ + +#define __CLC_FUNCTION __clc_shuffle + +// Integer-type decls +#define __CLC_BODY <clc/misc/shuffle_decl.inc> +#include <clc/integer/gentype.inc> + +// Floating-point decls +#define __CLC_BODY <clc/misc/shuffle_decl.inc> +#include <clc/math/gentype.inc> + +#undef __CLC_FUNCTION + +#endif // __CLC_MISC_CLC_SHUFFLE_H__ diff --git a/libclc/clc/include/clc/misc/clc_shuffle2.h b/libclc/clc/include/clc/misc/clc_shuffle2.h new file mode 100644 index 0000000000000..84bafba12b211 --- /dev/null +++ b/libclc/clc/include/clc/misc/clc_shuffle2.h @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_MISC_CLC_SHUFFLE2_H__ +#define __CLC_MISC_CLC_SHUFFLE2_H__ + +#define __CLC_FUNCTION __clc_shuffle2 + +// Integer-type decls +#define __CLC_BODY <clc/misc/shuffle2_decl.inc> +#include <clc/integer/gentype.inc> + +// Floating-point decls +#define __CLC_BODY <clc/misc/shuffle2_decl.inc> +#include <clc/math/gentype.inc> + +#undef __CLC_FUNCTION + +#endif // __CLC_MISC_CLC_SHUFFLE2_H__ diff --git a/libclc/clc/include/clc/misc/shuffle2_decl.inc b/libclc/clc/include/clc/misc/shuffle2_decl.inc new file mode 100644 index 0000000000000..2fc992ef2c177 --- /dev/null +++ b/libclc/clc/include/clc/misc/shuffle2_decl.inc @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 || \ + __CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16) + +// The return type is same base type as the input type, with the same vector +// size as the mask. Elements in the mask must be the same size (number of bits) +// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, + __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) y, __CLC_U_GENTYPE mask); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, + __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) y, __CLC_U_GENTYPE mask); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, + __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) y, __CLC_U_GENTYPE mask); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, + __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) y, __CLC_U_GENTYPE mask); + +#endif diff --git a/libclc/clc/include/clc/misc/shuffle2_def.inc b/libclc/clc/include/clc/misc/shuffle2_def.inc new file mode 100644 index 0000000000000..099a175d665dd --- /dev/null +++ b/libclc/clc/include/clc/misc/shuffle2_def.inc @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 || \ + __CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16) + +#include <clc/utils.h> + +#ifndef __CLC_FUNCTION +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) +#endif + +// The return type is same base type as the input type, with the same vector +// size as the mask. Elements in the mask must be the same size (number of bits) +// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, + __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) y, __CLC_U_GENTYPE mask) { + return __CLC_FUNCTION(FUNCTION)(x, y, mask); +} +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, + __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) y, __CLC_U_GENTYPE mask) { + return __CLC_FUNCTION(FUNCTION)(x, y, mask); +} +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, + __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) y, __CLC_U_GENTYPE mask) { + return __CLC_FUNCTION(FUNCTION)(x, y, mask); +} +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, + __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) y, __CLC_U_GENTYPE mask) { + return __CLC_FUNCTION(FUNCTION)(x, y, mask); +} + +#endif diff --git a/libclc/clc/include/clc/misc/shuffle_decl.inc b/libclc/clc/include/clc/misc/shuffle_decl.inc new file mode 100644 index 0000000000000..5e7e5b24c2873 --- /dev/null +++ b/libclc/clc/include/clc/misc/shuffle_decl.inc @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 || \ + __CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16) + +// The return type is same base type as the input type, with the same vector +// size as the mask. Elements in the mask must be the same size (number of bits) +// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, __CLC_U_GENTYPE mask); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, __CLC_U_GENTYPE mask); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, __CLC_U_GENTYPE mask); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, __CLC_U_GENTYPE mask); + +#endif diff --git a/libclc/clc/include/clc/misc/shuffle_def.inc b/libclc/clc/include/clc/misc/shuffle_def.inc new file mode 100644 index 0000000000000..84c873e6458ec --- /dev/null +++ b/libclc/clc/include/clc/misc/shuffle_def.inc @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 || \ + __CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16) + +#include <clc/utils.h> + +#ifndef __CLC_FUNCTION +#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x) +#endif + +// The return type is same base type as the input type, with the same vector +// size as the mask. Elements in the mask must be the same size (number of bits) +// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask); +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, __CLC_U_GENTYPE mask) { + return __CLC_FUNCTION(FUNCTION)(x, mask); +} +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, __CLC_U_GENTYPE mask) { + return __CLC_FUNCTION(FUNCTION)(x, mask); +} +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, __CLC_U_GENTYPE mask) { + return __CLC_FUNCTION(FUNCTION)(x, mask); +} +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE +FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, __CLC_U_GENTYPE mask) { + return __CLC_FUNCTION(FUNCTION)(x, mask); +} + +#endif diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 2e9878e6eaa4e..4503a20ad9848 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -89,6 +89,8 @@ math/clc_tanh.cl math/clc_tanpi.cl math/clc_tgamma.cl math/clc_trunc.cl +misc/clc_shuffle.cl +misc/clc_shuffle2.cl relational/clc_all.cl relational/clc_any.cl relational/clc_bitselect.cl diff --git a/libclc/clc/lib/generic/misc/clc_shuffle.cl b/libclc/clc/lib/generic/misc/clc_shuffle.cl new file mode 100644 index 0000000000000..626a94df08131 --- /dev/null +++ b/libclc/clc/lib/generic/misc/clc_shuffle.cl @@ -0,0 +1,173 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/internal/clc.h> + +#define _CLC_ELEMENT_CASES2(VAR) \ + case 0: \ + return VAR.s0; \ + case 1: \ + return VAR.s1; + +#define _CLC_ELEMENT_CASES4(VAR) \ + _CLC_ELEMENT_CASES2(VAR) \ + case 2: \ + return VAR.s2; \ + case 3: \ + return VAR.s3; + +#define _CLC_ELEMENT_CASES8(VAR) \ + _CLC_ELEMENT_CASES4(VAR) \ + case 4: \ + return VAR.s4; \ + case 5: \ + return VAR.s5; \ + case 6: \ + return VAR.s6; \ + case 7: \ + return VAR.s7; + +#define _CLC_ELEMENT_CASES16(VAR) \ + _CLC_ELEMENT_CASES8(VAR) \ + case 8: \ + return VAR.s8; \ + case 9: \ + return VAR.s9; \ + case 10: \ + return VAR.sA; \ + case 11: \ + return VAR.sB; \ + case 12: \ + return VAR.sC; \ + case 13: \ + return VAR.sD; \ + case 14: \ + return VAR.sE; \ + case 15: \ + return VAR.sF; + +#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \ + inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE( \ + ARGTYPE##ARGSIZE x, IDXTYPE idx) { \ + switch (idx) { _CLC_ELEMENT_CASES##ARGSIZE(x) default : return 0; } \ + } + +#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \ + ret_val.s##INDEX = \ + __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX); + +#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \ + ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1); + +#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \ + ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3); + +#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \ + ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \ + ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \ + ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7); + +#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \ + ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \ + ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \ + ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \ + ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \ + ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \ + ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \ + ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF); + +#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_DEF _CLC_OVERLOAD ARGTYPE##2 __clc_shuffle(ARGTYPE##ARGSIZE x, \ + MASKTYPE##2 mask) { \ + ARGTYPE##2 ret_val; \ + mask &= (MASKTYPE##2)(ARGSIZE - 1); \ + _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + return ret_val; \ + } + +#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_DEF _CLC_OVERLOAD ARGTYPE##4 __clc_shuffle(ARGTYPE##ARGSIZE x, \ + MASKTYPE##4 mask) { \ + ARGTYPE##4 ret_val; \ + mask &= (MASKTYPE##4)(ARGSIZE - 1); \ + _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + return ret_val; \ + } + +#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_DEF _CLC_OVERLOAD ARGTYPE##8 __clc_shuffle(ARGTYPE##ARGSIZE x, \ + MASKTYPE##8 mask) { \ + ARGTYPE##8 ret_val; \ + mask &= (MASKTYPE##8)(ARGSIZE - 1); \ + _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + return ret_val; \ + } + +#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_DEF _CLC_OVERLOAD ARGTYPE##16 __clc_shuffle(ARGTYPE##ARGSIZE x, \ + MASKTYPE##16 mask) { \ + ARGTYPE##16 ret_val; \ + mask &= (MASKTYPE##16)(ARGSIZE - 1); \ + _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + return ret_val; \ + } + +#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \ + _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) + +#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \ + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \ + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \ + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \ + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) + +_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar) +_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort) +_CLC_VECTOR_SHUFFLE_INSIZE(int, uint) +_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong) +_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar) +_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort) +_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint) +_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong) +_CLC_VECTOR_SHUFFLE_INSIZE(float, uint) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong) +#endif +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort) +#endif + +#undef _CLC_ELEMENT_CASES2 +#undef _CLC_ELEMENT_CASES4 +#undef _CLC_ELEMENT_CASES8 +#undef _CLC_ELEMENT_CASES16 +#undef _CLC_GET_ELEMENT_DEFINE +#undef _CLC_SHUFFLE_SET_ONE_ELEMENT +#undef _CLC_SHUFFLE_SET_2_ELEMENTS +#undef _CLC_SHUFFLE_SET_4_ELEMENTS +#undef _CLC_SHUFFLE_SET_8_ELEMENTS +#undef _CLC_SHUFFLE_SET_16_ELEMENTS +#undef _CLC_SHUFFLE_DEFINE2 +#undef _CLC_SHUFFLE_DEFINE4 +#undef _CLC_SHUFFLE_DEFINE8 +#undef _CLC_SHUFFLE_DEFINE16 +#undef _CLC_VECTOR_SHUFFLE_MASKSIZE +#undef _CLC_VECTOR_SHUFFLE_INSIZE diff --git a/libclc/clc/lib/generic/misc/clc_shuffle2.cl b/libclc/clc/lib/generic/misc/clc_shuffle2.cl new file mode 100644 index 0000000000000..3626e67423193 --- /dev/null +++ b/libclc/clc/lib/generic/misc/clc_shuffle2.cl @@ -0,0 +1,174 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/internal/clc.h> + +#define _CLC_ELEMENT_CASES2(VAR) \ + case 0: \ + return VAR.s0; \ + case 1: \ + return VAR.s1; + +#define _CLC_ELEMENT_CASES4(VAR) \ + _CLC_ELEMENT_CASES2(VAR) \ + case 2: \ + return VAR.s2; \ + case 3: \ + return VAR.s3; + +#define _CLC_ELEMENT_CASES8(VAR) \ + _CLC_ELEMENT_CASES4(VAR) \ + case 4: \ + return VAR.s4; \ + case 5: \ + return VAR.s5; \ + case 6: \ + return VAR.s6; \ + case 7: \ + return VAR.s7; + +#define _CLC_ELEMENT_CASES16(VAR) \ + _CLC_ELEMENT_CASES8(VAR) \ + case 8: \ + return VAR.s8; \ + case 9: \ + return VAR.s9; \ + case 10: \ + return VAR.sA; \ + case 11: \ + return VAR.sB; \ + case 12: \ + return VAR.sC; \ + case 13: \ + return VAR.sD; \ + case 14: \ + return VAR.sE; \ + case 15: \ + return VAR.sF; + +#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \ + __attribute__((always_inline)) ARGTYPE \ + __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE( \ + ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, IDXTYPE idx) { \ + if (idx < ARGSIZE) \ + switch (idx) { _CLC_ELEMENT_CASES##ARGSIZE(x) default : return 0; } \ + else \ + switch (idx - ARGSIZE) { \ + _CLC_ELEMENT_CASES##ARGSIZE(y) default : return 0; \ + } \ + } + +#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s0); \ + ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s1); + +#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s2); \ + ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s3); + +#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s4); \ + ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s5); \ + ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s6); \ + ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s7); + +#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s8); \ + ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s9); \ + ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sA); \ + ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sB); \ + ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sC); \ + ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sD); \ + ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sE); \ + ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sF); + +#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_DEF _CLC_OVERLOAD ARGTYPE##2 __clc_shuffle2( \ + ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##2 mask) { \ + ARGTYPE##2 ret_val; \ + mask &= (MASKTYPE##2)(ARGSIZE * 2 - 1); \ + _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + return ret_val; \ + } + +#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_DEF _CLC_OVERLOAD ARGTYPE##4 __clc_shuffle2( \ + ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##4 mask) { \ + ARGTYPE##4 ret_val; \ + mask &= (MASKTYPE##4)(ARGSIZE * 2 - 1); \ + _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + return ret_val; \ + } + +#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_DEF _CLC_OVERLOAD ARGTYPE##8 __clc_shuffle2( \ + ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##8 mask) { \ + ARGTYPE##8 ret_val; \ + mask &= (MASKTYPE##8)(ARGSIZE * 2 - 1); \ + _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + return ret_val; \ + } + +#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \ + _CLC_DEF _CLC_OVERLOAD ARGTYPE##16 __clc_shuffle2( \ + ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##16 mask) { \ + ARGTYPE##16 ret_val; \ + mask &= (MASKTYPE##16)(ARGSIZE * 2 - 1); \ + _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ + return ret_val; \ + } + +#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \ + _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \ + _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) + +#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \ + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \ + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \ + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \ + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) + +_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar) +_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort) +_CLC_VECTOR_SHUFFLE_INSIZE(int, uint) +_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong) +_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar) +_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort) +_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint) +_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong) +_CLC_VECTOR_SHUFFLE_INSIZE(float, uint) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong) +#endif +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort) +#endif + +#undef _CLC_ELEMENT_CASES2 +#undef _CLC_ELEMENT_CASES4 +#undef _CLC_ELEMENT_CASES8 +#undef _CLC_ELEMENT_CASES16 +#undef _CLC_GET_ELEMENT_DEFINE +#undef _CLC_SHUFFLE_SET_2_ELEMENTS +#undef _CLC_SHUFFLE_SET_4_ELEMENTS +#undef _CLC_SHUFFLE_SET_8_ELEMENTS +#undef _CLC_SHUFFLE_SET_16_ELEMENTS +#undef _CLC_SHUFFLE_DEFINE2 +#undef _CLC_SHUFFLE_DEFINE4 +#undef _CLC_SHUFFLE_DEFINE8 +#undef _CLC_SHUFFLE_DEFINE16 +#undef _CLC_VECTOR_SHUFFLE_MASKSIZE +#undef _CLC_VECTOR_SHUFFLE_INSIZE diff --git a/libclc/generic/include/clc/misc/shuffle.h b/libclc/generic/include/clc/misc/shuffle.h index 74a7a528ba0e5..f9d84c9633d37 100644 --- a/libclc/generic/include/clc/misc/shuffle.h +++ b/libclc/generic/include/clc/misc/shuffle.h @@ -6,41 +6,14 @@ // //===----------------------------------------------------------------------===// -#define _CLC_SHUFFLE_DECL(TYPE, MASKTYPE, RETTYPE) \ - _CLC_OVERLOAD _CLC_DECL RETTYPE shuffle(TYPE x, MASKTYPE mask); +#define __CLC_FUNCTION shuffle -//Return type is same base type as the input type, with the same vector size as the mask. -//Elements in the mask must be the same size (number of bits) as the input value. -//E.g. char8 ret = shuffle(char2 x, uchar8 mask); +// Integer-type decls +#define __CLC_BODY <clc/misc/shuffle_decl.inc> +#include <clc/integer/gentype.inc> -#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INBASE, INTYPE, MASKTYPE) \ - _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##2, INBASE##2) \ - _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##4, INBASE##4) \ - _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##8, INBASE##8) \ - _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##16, INBASE##16) \ +// Floating-point decls +#define __CLC_BODY <clc/misc/shuffle_decl.inc> +#include <clc/math/gentype.inc> -#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##2, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##4, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##8, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##16, MASKTYPE) \ - -_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar) -_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort) -_CLC_VECTOR_SHUFFLE_INSIZE(int, uint) -_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong) -_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar) -_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort) -_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint) -_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong) -_CLC_VECTOR_SHUFFLE_INSIZE(float, uint) -#ifdef cl_khr_fp64 -_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong) -#endif -#ifdef cl_khr_fp16 -_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort) -#endif - -#undef _CLC_SHUFFLE_DECL -#undef _CLC_VECTOR_SHUFFLE_MASKSIZE -#undef _CLC_VECTOR_SHUFFLE_INSIZE +#undef __CLC_FUNCTION diff --git a/libclc/generic/lib/misc/shuffle.cl b/libclc/generic/lib/misc/shuffle.cl index 989e307efa579..44bcb36bde0e2 100644 --- a/libclc/generic/lib/misc/shuffle.cl +++ b/libclc/generic/lib/misc/shuffle.cl @@ -7,150 +7,12 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> +#include <clc/misc/clc_shuffle.h> -#define _CLC_ELEMENT_CASES2(VAR) \ - case 0: return VAR.s0; \ - case 1: return VAR.s1; +#define FUNCTION shuffle -#define _CLC_ELEMENT_CASES4(VAR) \ - _CLC_ELEMENT_CASES2(VAR) \ - case 2: return VAR.s2; \ - case 3: return VAR.s3; +#define __CLC_BODY <clc/misc/shuffle_def.inc> +#include <clc/integer/gentype.inc> -#define _CLC_ELEMENT_CASES8(VAR) \ - _CLC_ELEMENT_CASES4(VAR) \ - case 4: return VAR.s4; \ - case 5: return VAR.s5; \ - case 6: return VAR.s6; \ - case 7: return VAR.s7; - -#define _CLC_ELEMENT_CASES16(VAR) \ - _CLC_ELEMENT_CASES8(VAR) \ - case 8: return VAR.s8; \ - case 9: return VAR.s9; \ - case 10: return VAR.sA; \ - case 11: return VAR.sB; \ - case 12: return VAR.sC; \ - case 13: return VAR.sD; \ - case 14: return VAR.sE; \ - case 15: return VAR.sF; - -#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \ - inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, IDXTYPE idx) {\ - switch (idx){ \ - _CLC_ELEMENT_CASES##ARGSIZE(x) \ - default: return 0; \ - } \ - } \ - -#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \ - ret_val.s##INDEX = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX); \ - -#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \ - ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1); - -#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \ - ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3); - -#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \ - ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \ - ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \ - ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7); - -#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \ - ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \ - ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \ - ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \ - ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \ - ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \ - ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \ - ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF); \ - -#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \ -_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##2 mask){ \ - ARGTYPE##2 ret_val; \ - mask &= (MASKTYPE##2)(ARGSIZE-1); \ - _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - return ret_val; \ -} - -#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \ -_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##4 mask){ \ - ARGTYPE##4 ret_val; \ - mask &= (MASKTYPE##4)(ARGSIZE-1); \ - _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - return ret_val; \ -} - -#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \ -_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##8 mask){ \ - ARGTYPE##8 ret_val; \ - mask &= (MASKTYPE##8)(ARGSIZE-1); \ - _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - return ret_val; \ -} - -#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \ -_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##16 mask){ \ - ARGTYPE##16 ret_val; \ - mask &= (MASKTYPE##16)(ARGSIZE-1); \ - _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - return ret_val; \ -} - -#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \ - _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \ - -#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \ - - - -_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar) -_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort) -_CLC_VECTOR_SHUFFLE_INSIZE(int, uint) -_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong) -_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar) -_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort) -_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint) -_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong) -_CLC_VECTOR_SHUFFLE_INSIZE(float, uint) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong) -#endif -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort) -#endif - -#undef _CLC_ELEMENT_CASES2 -#undef _CLC_ELEMENT_CASES4 -#undef _CLC_ELEMENT_CASES8 -#undef _CLC_ELEMENT_CASES16 -#undef _CLC_GET_ELEMENT_DEFINE -#undef _CLC_SHUFFLE_SET_ONE_ELEMENT -#undef _CLC_SHUFFLE_SET_2_ELEMENTS -#undef _CLC_SHUFFLE_SET_4_ELEMENTS -#undef _CLC_SHUFFLE_SET_8_ELEMENTS -#undef _CLC_SHUFFLE_SET_16_ELEMENTS -#undef _CLC_SHUFFLE_DEFINE2 -#undef _CLC_SHUFFLE_DEFINE4 -#undef _CLC_SHUFFLE_DEFINE8 -#undef _CLC_SHUFFLE_DEFINE16 -#undef _CLC_VECTOR_SHUFFLE_MASKSIZE -#undef _CLC_VECTOR_SHUFFLE_INSIZE +#define __CLC_BODY <clc/misc/shuffle_def.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/generic/lib/misc/shuffle2.cl b/libclc/generic/lib/misc/shuffle2.cl index a65e40eece415..718f5580b7323 100644 --- a/libclc/generic/lib/misc/shuffle2.cl +++ b/libclc/generic/lib/misc/shuffle2.cl @@ -7,153 +7,12 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> +#include <clc/misc/clc_shuffle2.h> -#define _CLC_ELEMENT_CASES2(VAR) \ - case 0: return VAR.s0; \ - case 1: return VAR.s1; +#define FUNCTION shuffle2 -#define _CLC_ELEMENT_CASES4(VAR) \ - _CLC_ELEMENT_CASES2(VAR) \ - case 2: return VAR.s2; \ - case 3: return VAR.s3; +#define __CLC_BODY <clc/misc/shuffle2_def.inc> +#include <clc/integer/gentype.inc> -#define _CLC_ELEMENT_CASES8(VAR) \ - _CLC_ELEMENT_CASES4(VAR) \ - case 4: return VAR.s4; \ - case 5: return VAR.s5; \ - case 6: return VAR.s6; \ - case 7: return VAR.s7; - -#define _CLC_ELEMENT_CASES16(VAR) \ - _CLC_ELEMENT_CASES8(VAR) \ - case 8: return VAR.s8; \ - case 9: return VAR.s9; \ - case 10: return VAR.sA; \ - case 11: return VAR.sB; \ - case 12: return VAR.sC; \ - case 13: return VAR.sD; \ - case 14: return VAR.sE; \ - case 15: return VAR.sF; - -#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \ - __attribute__((always_inline)) \ - ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, IDXTYPE idx) {\ - if (idx < ARGSIZE) \ - switch (idx){ \ - _CLC_ELEMENT_CASES##ARGSIZE(x) \ - default: return 0; \ - } \ - else \ - switch (idx - ARGSIZE){ \ - _CLC_ELEMENT_CASES##ARGSIZE(y) \ - default: return 0; \ - } \ - } \ - -#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s0); \ - ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s1); - -#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s2); \ - ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s3); - -#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s4); \ - ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s5); \ - ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s6); \ - ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s7); - -#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s8); \ - ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s9); \ - ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sA); \ - ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sB); \ - ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sC); \ - ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sD); \ - ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sE); \ - ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sF); \ - -#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \ -_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##2 mask){ \ - ARGTYPE##2 ret_val; \ - mask &= (MASKTYPE##2)(ARGSIZE * 2 - 1); \ - _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - return ret_val; \ -} - -#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \ -_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##4 mask){ \ - ARGTYPE##4 ret_val; \ - mask &= (MASKTYPE##4)(ARGSIZE * 2 - 1); \ - _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - return ret_val; \ -} - -#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \ -_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##8 mask){ \ - ARGTYPE##8 ret_val; \ - mask &= (MASKTYPE##8)(ARGSIZE * 2 - 1); \ - _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - return ret_val; \ -} - -#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \ -_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##16 mask){ \ - ARGTYPE##16 ret_val; \ - mask &= (MASKTYPE##16)(ARGSIZE * 2 - 1); \ - _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \ - return ret_val; \ -} - -#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \ - _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \ - _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \ - -#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \ - _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \ - - - -_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar) -_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort) -_CLC_VECTOR_SHUFFLE_INSIZE(int, uint) -_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong) -_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar) -_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort) -_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint) -_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong) -_CLC_VECTOR_SHUFFLE_INSIZE(float, uint) -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong) -#endif -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort) -#endif - -#undef _CLC_ELEMENT_CASES2 -#undef _CLC_ELEMENT_CASES4 -#undef _CLC_ELEMENT_CASES8 -#undef _CLC_ELEMENT_CASES16 -#undef _CLC_GET_ELEMENT_DEFINE -#undef _CLC_SHUFFLE_SET_2_ELEMENTS -#undef _CLC_SHUFFLE_SET_4_ELEMENTS -#undef _CLC_SHUFFLE_SET_8_ELEMENTS -#undef _CLC_SHUFFLE_SET_16_ELEMENTS -#undef _CLC_SHUFFLE_DEFINE2 -#undef _CLC_SHUFFLE_DEFINE4 -#undef _CLC_SHUFFLE_DEFINE8 -#undef _CLC_SHUFFLE_DEFINE16 -#undef _CLC_VECTOR_SHUFFLE_MASKSIZE -#undef _CLC_VECTOR_SHUFFLE_INSIZE +#define __CLC_BODY <clc/misc/shuffle2_def.inc> +#include <clc/math/gentype.inc> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits