Author: Fraser Cormack
Date: 2025-04-09T15:52:25+01:00
New Revision: b0338c3d6c6d9787232e07ceff56ef161c357602

URL: 
https://github.com/llvm/llvm-project/commit/b0338c3d6c6d9787232e07ceff56ef161c357602
DIFF: 
https://github.com/llvm/llvm-project/commit/b0338c3d6c6d9787232e07ceff56ef161c357602.diff

LOG: [libclc] Move shuffle/shuffle2 to the CLC library (#135000)

This commit moves the shuffle and shuffle2 builtins to the CLC library.
In so doing it makes the headers simpler and re-usable for other builtin
layers to hook into the CLC functions, if they wish.

An additional gentype utility has been made available, which provides a
consistent vector-size-or-1 macro for use.

The existing __CLC_VECSIZE is defined but empty which is useful in
certain applications, such as in concatenation with a type to make a
correctly sized scalar or vector type. However, this isn't usable in the
same preprocessor lines when wanting to check for specific vector sizes,
as e.g., '__CLC_VECSIZE == 2' resolves to '== 2' which is invalid. In
local testing this is also useful for the geometric builtins which are
only available for scalar types and vector types of 2, 3, or 4 elements.

No codegen changes are observed, except the internal shuffle/shuffle2
utility functions are no longer made publicly available.

Added: 
    libclc/clc/include/clc/misc/clc_shuffle.h
    libclc/clc/include/clc/misc/clc_shuffle2.h
    libclc/clc/include/clc/misc/shuffle2_decl.inc
    libclc/clc/include/clc/misc/shuffle2_def.inc
    libclc/clc/include/clc/misc/shuffle_decl.inc
    libclc/clc/include/clc/misc/shuffle_def.inc
    libclc/clc/lib/generic/misc/clc_shuffle.cl
    libclc/clc/lib/generic/misc/clc_shuffle2.cl

Modified: 
    libclc/clc/include/clc/integer/gentype.inc
    libclc/clc/include/clc/math/gentype.inc
    libclc/clc/lib/generic/SOURCES
    libclc/generic/include/clc/misc/shuffle.h
    libclc/generic/lib/misc/shuffle.cl
    libclc/generic/lib/misc/shuffle2.cl

Removed: 
    


################################################################################
diff  --git a/libclc/clc/include/clc/integer/gentype.inc 
b/libclc/clc/include/clc/integer/gentype.inc
index 000e75cede6f7..b2b603e272573 100644
--- a/libclc/clc/include/clc/integer/gentype.inc
+++ b/libclc/clc/include/clc/integer/gentype.inc
@@ -27,15 +27,19 @@
 #define __CLC_GENTYPE char
 #define __CLC_U_GENTYPE uchar
 #define __CLC_S_GENTYPE char
-#define __CLC_SCALAR 1
+#define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #include __CLC_BODY
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_VECSIZE
 #undef __CLC_SCALAR
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE char2
 #define __CLC_U_GENTYPE uchar2
 #define __CLC_S_GENTYPE char2
@@ -85,6 +89,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
+#undef __CLC_VECSIZE_OR_1
 
 #undef __CLC_SCALAR_GENTYPE
 #define __CLC_SCALAR_GENTYPE uchar
@@ -92,15 +97,19 @@
 #define __CLC_GENTYPE uchar
 #define __CLC_U_GENTYPE uchar
 #define __CLC_S_GENTYPE char
-#define __CLC_SCALAR 1
+#define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #include __CLC_BODY
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_VECSIZE
 #undef __CLC_SCALAR
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE uchar2
 #define __CLC_U_GENTYPE uchar2
 #define __CLC_S_GENTYPE char2
@@ -150,6 +159,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
+#undef __CLC_VECSIZE_OR_1
 
 #undef __CLC_GENSIZE
 #define __CLC_GENSIZE 16
@@ -159,15 +169,19 @@
 #define __CLC_GENTYPE short
 #define __CLC_U_GENTYPE ushort
 #define __CLC_S_GENTYPE short
-#define __CLC_SCALAR 1
+#define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #include __CLC_BODY
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_VECSIZE
 #undef __CLC_SCALAR
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE short2
 #define __CLC_U_GENTYPE ushort2
 #define __CLC_S_GENTYPE short2
@@ -217,6 +231,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
+#undef __CLC_VECSIZE_OR_1
 
 #undef __CLC_SCALAR_GENTYPE
 #define __CLC_SCALAR_GENTYPE ushort
@@ -224,15 +239,19 @@
 #define __CLC_GENTYPE ushort
 #define __CLC_U_GENTYPE ushort
 #define __CLC_S_GENTYPE short
-#define __CLC_SCALAR 1
+#define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #include __CLC_BODY
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_VECSIZE
 #undef __CLC_SCALAR
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE ushort2
 #define __CLC_U_GENTYPE ushort2
 #define __CLC_S_GENTYPE short2
@@ -282,6 +301,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
+#undef __CLC_VECSIZE_OR_1
 
 #undef __CLC_GENSIZE
 #define __CLC_GENSIZE 32
@@ -291,15 +311,19 @@
 #define __CLC_GENTYPE int
 #define __CLC_U_GENTYPE uint
 #define __CLC_S_GENTYPE int
-#define __CLC_SCALAR 1
+#define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #include __CLC_BODY
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_VECSIZE
 #undef __CLC_SCALAR
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE int2
 #define __CLC_U_GENTYPE uint2
 #define __CLC_S_GENTYPE int2
@@ -349,6 +373,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
+#undef __CLC_VECSIZE_OR_1
 
 #undef __CLC_SCALAR_GENTYPE
 #define __CLC_SCALAR_GENTYPE uint
@@ -356,15 +381,19 @@
 #define __CLC_GENTYPE uint
 #define __CLC_U_GENTYPE uint
 #define __CLC_S_GENTYPE int
-#define __CLC_SCALAR 1
+#define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #include __CLC_BODY
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_VECSIZE
 #undef __CLC_SCALAR
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE uint2
 #define __CLC_U_GENTYPE uint2
 #define __CLC_S_GENTYPE int2
@@ -414,6 +443,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
+#undef __CLC_VECSIZE_OR_1
 
 #undef __CLC_GENSIZE
 #define __CLC_GENSIZE 64
@@ -423,15 +453,19 @@
 #define __CLC_GENTYPE long
 #define __CLC_U_GENTYPE ulong
 #define __CLC_S_GENTYPE long
-#define __CLC_SCALAR 1
+#define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #include __CLC_BODY
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_VECSIZE
 #undef __CLC_SCALAR
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE long2
 #define __CLC_U_GENTYPE ulong2
 #define __CLC_S_GENTYPE long2
@@ -481,6 +515,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
+#undef __CLC_VECSIZE_OR_1
 
 #undef __CLC_SCALAR_GENTYPE
 #define __CLC_SCALAR_GENTYPE ulong
@@ -488,15 +523,20 @@
 #define __CLC_GENTYPE ulong
 #define __CLC_U_GENTYPE ulong
 #define __CLC_S_GENTYPE long
-#define __CLC_SCALAR 1
+#define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #include __CLC_BODY
+#undef __CLC_VECSIZE_OR_1
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_VECSIZE
 #undef __CLC_SCALAR
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE ulong2
 #define __CLC_U_GENTYPE ulong2
 #define __CLC_S_GENTYPE long2
@@ -546,6 +586,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
+#undef __CLC_VECSIZE_OR_1
 
 #undef __CLC_GENSIZE
 #undef __CLC_SCALAR_GENTYPE

diff  --git a/libclc/clc/include/clc/math/gentype.inc 
b/libclc/clc/include/clc/math/gentype.inc
index f65ccc2d42f71..0c7dd5b220b9c 100644
--- a/libclc/clc/include/clc/math/gentype.inc
+++ b/libclc/clc/include/clc/math/gentype.inc
@@ -81,12 +81,16 @@
 #define __CLC_BIT_INTN int
 #define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #include __CLC_BODY
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_VECSIZE
 #undef __CLC_GENTYPE
 #undef __CLC_BIT_INTN
 #undef __CLC_SCALAR
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE float2
 #define __CLC_BIT_INTN int2
 #define __CLC_VECSIZE 2
@@ -127,6 +131,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_BIT_INTN
 
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 #undef __CLC_GENTYPE_NAN
@@ -150,14 +155,18 @@
 
 #define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #define __CLC_GENTYPE double
 #define __CLC_BIT_INTN long
 #include __CLC_BODY
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_GENTYPE
 #undef __CLC_BIT_INTN
 #undef __CLC_VECSIZE
 #undef __CLC_SCALAR
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE double2
 #define __CLC_BIT_INTN long2
 #define __CLC_VECSIZE 2
@@ -198,6 +207,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_BIT_INTN
 
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 #undef __CLC_GENTYPE_NAN
@@ -221,14 +231,18 @@
 
 #define __CLC_SCALAR
 #define __CLC_VECSIZE
+#define __CLC_VECSIZE_OR_1 1
 #define __CLC_GENTYPE half
 #define __CLC_BIT_INTN short
 #include __CLC_BODY
 #undef __CLC_GENTYPE
 #undef __CLC_BIT_INTN
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_VECSIZE
 #undef __CLC_SCALAR
 
+#define __CLC_VECSIZE_OR_1 __CLC_VECSIZE
+
 #define __CLC_GENTYPE half2
 #define __CLC_BIT_INTN short2
 #define __CLC_VECSIZE 2
@@ -269,6 +283,7 @@
 #undef __CLC_GENTYPE
 #undef __CLC_BIT_INTN
 
+#undef __CLC_VECSIZE_OR_1
 #undef __CLC_U_GENTYPE
 #undef __CLC_S_GENTYPE
 #undef __CLC_GENTYPE_NAN

diff  --git a/libclc/clc/include/clc/misc/clc_shuffle.h 
b/libclc/clc/include/clc/misc/clc_shuffle.h
new file mode 100644
index 0000000000000..ba3d7a2bddd62
--- /dev/null
+++ b/libclc/clc/include/clc/misc/clc_shuffle.h
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MISC_CLC_SHUFFLE_H__
+#define __CLC_MISC_CLC_SHUFFLE_H__
+
+#define __CLC_FUNCTION __clc_shuffle
+
+// Integer-type decls
+#define __CLC_BODY <clc/misc/shuffle_decl.inc>
+#include <clc/integer/gentype.inc>
+
+// Floating-point decls
+#define __CLC_BODY <clc/misc/shuffle_decl.inc>
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MISC_CLC_SHUFFLE_H__

diff  --git a/libclc/clc/include/clc/misc/clc_shuffle2.h 
b/libclc/clc/include/clc/misc/clc_shuffle2.h
new file mode 100644
index 0000000000000..84bafba12b211
--- /dev/null
+++ b/libclc/clc/include/clc/misc/clc_shuffle2.h
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MISC_CLC_SHUFFLE2_H__
+#define __CLC_MISC_CLC_SHUFFLE2_H__
+
+#define __CLC_FUNCTION __clc_shuffle2
+
+// Integer-type decls
+#define __CLC_BODY <clc/misc/shuffle2_decl.inc>
+#include <clc/integer/gentype.inc>
+
+// Floating-point decls
+#define __CLC_BODY <clc/misc/shuffle2_decl.inc>
+#include <clc/math/gentype.inc>
+
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MISC_CLC_SHUFFLE2_H__

diff  --git a/libclc/clc/include/clc/misc/shuffle2_decl.inc 
b/libclc/clc/include/clc/misc/shuffle2_decl.inc
new file mode 100644
index 0000000000000..2fc992ef2c177
--- /dev/null
+++ b/libclc/clc/include/clc/misc/shuffle2_decl.inc
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 ||                     
\
+     __CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16)
+
+// The return type is same base type as the input type, with the same vector
+// size as the mask. Elements in the mask must be the same size (number of 
bits)
+// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x,
+               __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) y, __CLC_U_GENTYPE mask);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x,
+               __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) y, __CLC_U_GENTYPE mask);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x,
+               __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) y, __CLC_U_GENTYPE mask);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x,
+               __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) y, __CLC_U_GENTYPE 
mask);
+
+#endif

diff  --git a/libclc/clc/include/clc/misc/shuffle2_def.inc 
b/libclc/clc/include/clc/misc/shuffle2_def.inc
new file mode 100644
index 0000000000000..099a175d665dd
--- /dev/null
+++ b/libclc/clc/include/clc/misc/shuffle2_def.inc
@@ -0,0 +1,42 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 ||                     
\
+     __CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16)
+
+#include <clc/utils.h>
+
+#ifndef __CLC_FUNCTION
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+#endif
+
+// The return type is same base type as the input type, with the same vector
+// size as the mask. Elements in the mask must be the same size (number of 
bits)
+// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x,
+         __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) y, __CLC_U_GENTYPE mask) {
+  return __CLC_FUNCTION(FUNCTION)(x, y, mask);
+}
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x,
+         __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) y, __CLC_U_GENTYPE mask) {
+  return __CLC_FUNCTION(FUNCTION)(x, y, mask);
+}
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x,
+         __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) y, __CLC_U_GENTYPE mask) {
+  return __CLC_FUNCTION(FUNCTION)(x, y, mask);
+}
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x,
+         __CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) y, __CLC_U_GENTYPE mask) {
+  return __CLC_FUNCTION(FUNCTION)(x, y, mask);
+}
+
+#endif

diff  --git a/libclc/clc/include/clc/misc/shuffle_decl.inc 
b/libclc/clc/include/clc/misc/shuffle_decl.inc
new file mode 100644
index 0000000000000..5e7e5b24c2873
--- /dev/null
+++ b/libclc/clc/include/clc/misc/shuffle_decl.inc
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 ||                     
\
+     __CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16)
+
+// The return type is same base type as the input type, with the same vector
+// size as the mask. Elements in the mask must be the same size (number of 
bits)
+// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, __CLC_U_GENTYPE mask);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, __CLC_U_GENTYPE mask);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, __CLC_U_GENTYPE mask);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+__CLC_FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, __CLC_U_GENTYPE 
mask);
+
+#endif

diff  --git a/libclc/clc/include/clc/misc/shuffle_def.inc 
b/libclc/clc/include/clc/misc/shuffle_def.inc
new file mode 100644
index 0000000000000..84c873e6458ec
--- /dev/null
+++ b/libclc/clc/include/clc/misc/shuffle_def.inc
@@ -0,0 +1,38 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 4 ||                     
\
+     __CLC_VECSIZE_OR_1 == 8 || __CLC_VECSIZE_OR_1 == 16)
+
+#include <clc/utils.h>
+
+#ifndef __CLC_FUNCTION
+#define __CLC_FUNCTION(x) __CLC_CONCAT(__clc_, x)
+#endif
+
+// The return type is same base type as the input type, with the same vector
+// size as the mask. Elements in the mask must be the same size (number of 
bits)
+// as the input value., e.g. char8 ret = shuffle(char2 x, uchar8 mask);
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 2) x, __CLC_U_GENTYPE mask) {
+  return __CLC_FUNCTION(FUNCTION)(x, mask);
+}
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 4) x, __CLC_U_GENTYPE mask) {
+  return __CLC_FUNCTION(FUNCTION)(x, mask);
+}
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 8) x, __CLC_U_GENTYPE mask) {
+  return __CLC_FUNCTION(FUNCTION)(x, mask);
+}
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE
+FUNCTION(__CLC_XCONCAT(__CLC_SCALAR_GENTYPE, 16) x, __CLC_U_GENTYPE mask) {
+  return __CLC_FUNCTION(FUNCTION)(x, mask);
+}
+
+#endif

diff  --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index 2e9878e6eaa4e..4503a20ad9848 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -89,6 +89,8 @@ math/clc_tanh.cl
 math/clc_tanpi.cl
 math/clc_tgamma.cl
 math/clc_trunc.cl
+misc/clc_shuffle.cl
+misc/clc_shuffle2.cl
 relational/clc_all.cl
 relational/clc_any.cl
 relational/clc_bitselect.cl

diff  --git a/libclc/clc/lib/generic/misc/clc_shuffle.cl 
b/libclc/clc/lib/generic/misc/clc_shuffle.cl
new file mode 100644
index 0000000000000..626a94df08131
--- /dev/null
+++ b/libclc/clc/lib/generic/misc/clc_shuffle.cl
@@ -0,0 +1,173 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/internal/clc.h>
+
+#define _CLC_ELEMENT_CASES2(VAR)                                               
\
+  case 0:                                                                      
\
+    return VAR.s0;                                                             
\
+  case 1:                                                                      
\
+    return VAR.s1;
+
+#define _CLC_ELEMENT_CASES4(VAR)                                               
\
+  _CLC_ELEMENT_CASES2(VAR)                                                     
\
+  case 2:                                                                      
\
+    return VAR.s2;                                                             
\
+  case 3:                                                                      
\
+    return VAR.s3;
+
+#define _CLC_ELEMENT_CASES8(VAR)                                               
\
+  _CLC_ELEMENT_CASES4(VAR)                                                     
\
+  case 4:                                                                      
\
+    return VAR.s4;                                                             
\
+  case 5:                                                                      
\
+    return VAR.s5;                                                             
\
+  case 6:                                                                      
\
+    return VAR.s6;                                                             
\
+  case 7:                                                                      
\
+    return VAR.s7;
+
+#define _CLC_ELEMENT_CASES16(VAR)                                              
\
+  _CLC_ELEMENT_CASES8(VAR)                                                     
\
+  case 8:                                                                      
\
+    return VAR.s8;                                                             
\
+  case 9:                                                                      
\
+    return VAR.s9;                                                             
\
+  case 10:                                                                     
\
+    return VAR.sA;                                                             
\
+  case 11:                                                                     
\
+    return VAR.sB;                                                             
\
+  case 12:                                                                     
\
+    return VAR.sC;                                                             
\
+  case 13:                                                                     
\
+    return VAR.sD;                                                             
\
+  case 14:                                                                     
\
+    return VAR.sE;                                                             
\
+  case 15:                                                                     
\
+    return VAR.sF;
+
+#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE)                     
\
+  inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(                  
\
+      ARGTYPE##ARGSIZE x, IDXTYPE idx) {                                       
\
+    switch (idx) { _CLC_ELEMENT_CASES##ARGSIZE(x) default : return 0; }        
\
+  }
+
+#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE)        
\
+  ret_val.s##INDEX =                                                           
\
+      __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX);
+
+#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                
\
+  ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0);       
\
+  ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1);
+
+#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                
\
+  _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                      
\
+  ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2);       
\
+  ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3);
+
+#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                
\
+  _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                      
\
+  ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4);       
\
+  ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5);       
\
+  ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6);       
\
+  ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7);
+
+#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)               
\
+  _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                      
\
+  ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8);       
\
+  ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9);       
\
+  ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA);       
\
+  ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB);       
\
+  ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC);       
\
+  ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD);       
\
+  ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE);       
\
+  ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF);
+
+#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE)                       
\
+  _CLC_DEF _CLC_OVERLOAD ARGTYPE##2 __clc_shuffle(ARGTYPE##ARGSIZE x,          
\
+                                                  MASKTYPE##2 mask) {          
\
+    ARGTYPE##2 ret_val;                                                        
\
+    mask &= (MASKTYPE##2)(ARGSIZE - 1);                                        
\
+    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                    
\
+    return ret_val;                                                            
\
+  }
+
+#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE)                       
\
+  _CLC_DEF _CLC_OVERLOAD ARGTYPE##4 __clc_shuffle(ARGTYPE##ARGSIZE x,          
\
+                                                  MASKTYPE##4 mask) {          
\
+    ARGTYPE##4 ret_val;                                                        
\
+    mask &= (MASKTYPE##4)(ARGSIZE - 1);                                        
\
+    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                    
\
+    return ret_val;                                                            
\
+  }
+
+#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE)                       
\
+  _CLC_DEF _CLC_OVERLOAD ARGTYPE##8 __clc_shuffle(ARGTYPE##ARGSIZE x,          
\
+                                                  MASKTYPE##8 mask) {          
\
+    ARGTYPE##8 ret_val;                                                        
\
+    mask &= (MASKTYPE##8)(ARGSIZE - 1);                                        
\
+    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                    
\
+    return ret_val;                                                            
\
+  }
+
+#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE)                      
\
+  _CLC_DEF _CLC_OVERLOAD ARGTYPE##16 __clc_shuffle(ARGTYPE##ARGSIZE x,         
\
+                                                   MASKTYPE##16 mask) {        
\
+    ARGTYPE##16 ret_val;                                                       
\
+    mask &= (MASKTYPE##16)(ARGSIZE - 1);                                       
\
+    _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                   
\
+    return ret_val;                                                            
\
+  }
+
+#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE)                
\
+  _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE)                           
\
+  _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE)                              
\
+  _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE)                              
\
+  _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE)                              
\
+  _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE)
+
+#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE)                             
\
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE)                              
\
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE)                              
\
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE)                              
\
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE)
+
+_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
+_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
+_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
+_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
+_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
+_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
+_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
+_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
+_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
+#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
+#endif
+
+#undef _CLC_ELEMENT_CASES2
+#undef _CLC_ELEMENT_CASES4
+#undef _CLC_ELEMENT_CASES8
+#undef _CLC_ELEMENT_CASES16
+#undef _CLC_GET_ELEMENT_DEFINE
+#undef _CLC_SHUFFLE_SET_ONE_ELEMENT
+#undef _CLC_SHUFFLE_SET_2_ELEMENTS
+#undef _CLC_SHUFFLE_SET_4_ELEMENTS
+#undef _CLC_SHUFFLE_SET_8_ELEMENTS
+#undef _CLC_SHUFFLE_SET_16_ELEMENTS
+#undef _CLC_SHUFFLE_DEFINE2
+#undef _CLC_SHUFFLE_DEFINE4
+#undef _CLC_SHUFFLE_DEFINE8
+#undef _CLC_SHUFFLE_DEFINE16
+#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
+#undef _CLC_VECTOR_SHUFFLE_INSIZE

diff  --git a/libclc/clc/lib/generic/misc/clc_shuffle2.cl 
b/libclc/clc/lib/generic/misc/clc_shuffle2.cl
new file mode 100644
index 0000000000000..3626e67423193
--- /dev/null
+++ b/libclc/clc/lib/generic/misc/clc_shuffle2.cl
@@ -0,0 +1,174 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/internal/clc.h>
+
+#define _CLC_ELEMENT_CASES2(VAR)                                               
\
+  case 0:                                                                      
\
+    return VAR.s0;                                                             
\
+  case 1:                                                                      
\
+    return VAR.s1;
+
+#define _CLC_ELEMENT_CASES4(VAR)                                               
\
+  _CLC_ELEMENT_CASES2(VAR)                                                     
\
+  case 2:                                                                      
\
+    return VAR.s2;                                                             
\
+  case 3:                                                                      
\
+    return VAR.s3;
+
+#define _CLC_ELEMENT_CASES8(VAR)                                               
\
+  _CLC_ELEMENT_CASES4(VAR)                                                     
\
+  case 4:                                                                      
\
+    return VAR.s4;                                                             
\
+  case 5:                                                                      
\
+    return VAR.s5;                                                             
\
+  case 6:                                                                      
\
+    return VAR.s6;                                                             
\
+  case 7:                                                                      
\
+    return VAR.s7;
+
+#define _CLC_ELEMENT_CASES16(VAR)                                              
\
+  _CLC_ELEMENT_CASES8(VAR)                                                     
\
+  case 8:                                                                      
\
+    return VAR.s8;                                                             
\
+  case 9:                                                                      
\
+    return VAR.s9;                                                             
\
+  case 10:                                                                     
\
+    return VAR.sA;                                                             
\
+  case 11:                                                                     
\
+    return VAR.sB;                                                             
\
+  case 12:                                                                     
\
+    return VAR.sC;                                                             
\
+  case 13:                                                                     
\
+    return VAR.sD;                                                             
\
+  case 14:                                                                     
\
+    return VAR.sE;                                                             
\
+  case 15:                                                                     
\
+    return VAR.sF;
+
+#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE)                     
\
+  __attribute__((always_inline)) ARGTYPE                                       
\
+      __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(                             
\
+          ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, IDXTYPE idx) {               
\
+    if (idx < ARGSIZE)                                                         
\
+      switch (idx) { _CLC_ELEMENT_CASES##ARGSIZE(x) default : return 0; }      
\
+    else                                                                       
\
+      switch (idx - ARGSIZE) {                                                 
\
+        _CLC_ELEMENT_CASES##ARGSIZE(y) default : return 0;                     
\
+      }                                                                        
\
+  }
+
+#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                
\
+  ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s0);    
\
+  ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s1);
+
+#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                
\
+  _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                      
\
+  ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s2);    
\
+  ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s3);
+
+#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                
\
+  _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                      
\
+  ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s4);    
\
+  ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s5);    
\
+  ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s6);    
\
+  ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s7);
+
+#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)               
\
+  _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                      
\
+  ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s8);    
\
+  ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s9);    
\
+  ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sA);    
\
+  ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sB);    
\
+  ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sC);    
\
+  ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sD);    
\
+  ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sE);    
\
+  ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sF);
+
+#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE)                       
\
+  _CLC_DEF _CLC_OVERLOAD ARGTYPE##2 __clc_shuffle2(                            
\
+      ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##2 mask) {              
\
+    ARGTYPE##2 ret_val;                                                        
\
+    mask &= (MASKTYPE##2)(ARGSIZE * 2 - 1);                                    
\
+    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                    
\
+    return ret_val;                                                            
\
+  }
+
+#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE)                       
\
+  _CLC_DEF _CLC_OVERLOAD ARGTYPE##4 __clc_shuffle2(                            
\
+      ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##4 mask) {              
\
+    ARGTYPE##4 ret_val;                                                        
\
+    mask &= (MASKTYPE##4)(ARGSIZE * 2 - 1);                                    
\
+    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                    
\
+    return ret_val;                                                            
\
+  }
+
+#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE)                       
\
+  _CLC_DEF _CLC_OVERLOAD ARGTYPE##8 __clc_shuffle2(                            
\
+      ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##8 mask) {              
\
+    ARGTYPE##8 ret_val;                                                        
\
+    mask &= (MASKTYPE##8)(ARGSIZE * 2 - 1);                                    
\
+    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                    
\
+    return ret_val;                                                            
\
+  }
+
+#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE)                      
\
+  _CLC_DEF _CLC_OVERLOAD ARGTYPE##16 __clc_shuffle2(                           
\
+      ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##16 mask) {             
\
+    ARGTYPE##16 ret_val;                                                       
\
+    mask &= (MASKTYPE##16)(ARGSIZE * 2 - 1);                                   
\
+    _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE)                   
\
+    return ret_val;                                                            
\
+  }
+
+#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE)                
\
+  _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE)                           
\
+  _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE)                              
\
+  _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE)                              
\
+  _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE)                              
\
+  _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE)
+
+#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE)                             
\
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE)                              
\
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE)                              
\
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE)                              
\
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE)
+
+_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
+_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
+_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
+_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
+_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
+_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
+_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
+_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
+_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
+#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
+#endif
+
+#undef _CLC_ELEMENT_CASES2
+#undef _CLC_ELEMENT_CASES4
+#undef _CLC_ELEMENT_CASES8
+#undef _CLC_ELEMENT_CASES16
+#undef _CLC_GET_ELEMENT_DEFINE
+#undef _CLC_SHUFFLE_SET_2_ELEMENTS
+#undef _CLC_SHUFFLE_SET_4_ELEMENTS
+#undef _CLC_SHUFFLE_SET_8_ELEMENTS
+#undef _CLC_SHUFFLE_SET_16_ELEMENTS
+#undef _CLC_SHUFFLE_DEFINE2
+#undef _CLC_SHUFFLE_DEFINE4
+#undef _CLC_SHUFFLE_DEFINE8
+#undef _CLC_SHUFFLE_DEFINE16
+#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
+#undef _CLC_VECTOR_SHUFFLE_INSIZE

diff  --git a/libclc/generic/include/clc/misc/shuffle.h 
b/libclc/generic/include/clc/misc/shuffle.h
index 74a7a528ba0e5..f9d84c9633d37 100644
--- a/libclc/generic/include/clc/misc/shuffle.h
+++ b/libclc/generic/include/clc/misc/shuffle.h
@@ -6,41 +6,14 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#define _CLC_SHUFFLE_DECL(TYPE, MASKTYPE, RETTYPE) \
-  _CLC_OVERLOAD _CLC_DECL RETTYPE shuffle(TYPE x, MASKTYPE mask);
+#define __CLC_FUNCTION shuffle
 
-//Return type is same base type as the input type, with the same vector size 
as the mask.
-//Elements in the mask must be the same size (number of bits) as the input 
value.
-//E.g. char8 ret = shuffle(char2 x, uchar8 mask);
+// Integer-type decls
+#define __CLC_BODY <clc/misc/shuffle_decl.inc>
+#include <clc/integer/gentype.inc>
 
-#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INBASE, INTYPE, MASKTYPE) \
-  _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##2, INBASE##2) \
-  _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##4, INBASE##4) \
-  _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##8, INBASE##8) \
-  _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##16, INBASE##16) \
+// Floating-point decls
+#define __CLC_BODY <clc/misc/shuffle_decl.inc>
+#include <clc/math/gentype.inc>
 
-#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##2, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##4, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##8, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##16, MASKTYPE) \
-
-_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
-_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
-_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
-_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
-_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
-_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
-_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
-_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
-_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
-#ifdef cl_khr_fp64
-_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
-#endif
-#ifdef cl_khr_fp16
-_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
-#endif
-
-#undef _CLC_SHUFFLE_DECL
-#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
-#undef _CLC_VECTOR_SHUFFLE_INSIZE
+#undef __CLC_FUNCTION

diff  --git a/libclc/generic/lib/misc/shuffle.cl 
b/libclc/generic/lib/misc/shuffle.cl
index 989e307efa579..44bcb36bde0e2 100644
--- a/libclc/generic/lib/misc/shuffle.cl
+++ b/libclc/generic/lib/misc/shuffle.cl
@@ -7,150 +7,12 @@
 
//===----------------------------------------------------------------------===//
 
 #include <clc/clc.h>
+#include <clc/misc/clc_shuffle.h>
 
-#define _CLC_ELEMENT_CASES2(VAR) \
-    case 0: return VAR.s0; \
-    case 1: return VAR.s1;
+#define FUNCTION shuffle
 
-#define _CLC_ELEMENT_CASES4(VAR) \
-    _CLC_ELEMENT_CASES2(VAR) \
-    case 2: return VAR.s2; \
-    case 3: return VAR.s3;
+#define __CLC_BODY <clc/misc/shuffle_def.inc>
+#include <clc/integer/gentype.inc>
 
-#define _CLC_ELEMENT_CASES8(VAR) \
-    _CLC_ELEMENT_CASES4(VAR) \
-    case 4: return VAR.s4; \
-    case 5: return VAR.s5; \
-    case 6: return VAR.s6; \
-    case 7: return VAR.s7;
-
-#define _CLC_ELEMENT_CASES16(VAR) \
-    _CLC_ELEMENT_CASES8(VAR) \
-    case 8: return VAR.s8; \
-    case 9: return VAR.s9; \
-    case 10: return VAR.sA; \
-    case 11: return VAR.sB; \
-    case 12: return VAR.sC; \
-    case 13: return VAR.sD; \
-    case 14: return VAR.sE; \
-    case 15: return VAR.sF;
-
-#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
-    inline ARGTYPE 
__clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, IDXTYPE idx) {\
-        switch (idx){ \
-            _CLC_ELEMENT_CASES##ARGSIZE(x) \
-            default: return 0; \
-        } \
-    } \
-
-#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \
-    ret_val.s##INDEX = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, 
mask.s##INDEX); \
-
-#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \
-    ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1);
-
-#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \
-    ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3);
-
-#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \
-    ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \
-    ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \
-    ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7);
-
-#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \
-    ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \
-    ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \
-    ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \
-    ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \
-    ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \
-    ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \
-    ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF); \
-
-#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
-_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##2 
mask){ \
-    ARGTYPE##2 ret_val; \
-    mask &= (MASKTYPE##2)(ARGSIZE-1); \
-    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    return ret_val; \
-}
-
-#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
-_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##4 
mask){ \
-    ARGTYPE##4 ret_val; \
-    mask &= (MASKTYPE##4)(ARGSIZE-1); \
-    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    return ret_val; \
-}
-
-#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
-_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##8 
mask){ \
-    ARGTYPE##8 ret_val; \
-    mask &= (MASKTYPE##8)(ARGSIZE-1); \
-    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    return ret_val; \
-}
-
-#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
-_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##16 
mask){ \
-    ARGTYPE##16 ret_val; \
-    mask &= (MASKTYPE##16)(ARGSIZE-1); \
-    _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    return ret_val; \
-}
-
-#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
-  _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
-  _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
-  _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
-  _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
-  _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
-
-#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
-
-
-
-_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
-_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
-_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
-_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
-_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
-_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
-_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
-_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
-_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
-#endif
-#ifdef cl_khr_fp16
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
-#endif
-
-#undef _CLC_ELEMENT_CASES2
-#undef _CLC_ELEMENT_CASES4
-#undef _CLC_ELEMENT_CASES8
-#undef _CLC_ELEMENT_CASES16
-#undef _CLC_GET_ELEMENT_DEFINE
-#undef _CLC_SHUFFLE_SET_ONE_ELEMENT
-#undef _CLC_SHUFFLE_SET_2_ELEMENTS
-#undef _CLC_SHUFFLE_SET_4_ELEMENTS
-#undef _CLC_SHUFFLE_SET_8_ELEMENTS
-#undef _CLC_SHUFFLE_SET_16_ELEMENTS
-#undef _CLC_SHUFFLE_DEFINE2
-#undef _CLC_SHUFFLE_DEFINE4
-#undef _CLC_SHUFFLE_DEFINE8
-#undef _CLC_SHUFFLE_DEFINE16
-#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
-#undef _CLC_VECTOR_SHUFFLE_INSIZE
+#define __CLC_BODY <clc/misc/shuffle_def.inc>
+#include <clc/math/gentype.inc>

diff  --git a/libclc/generic/lib/misc/shuffle2.cl 
b/libclc/generic/lib/misc/shuffle2.cl
index a65e40eece415..718f5580b7323 100644
--- a/libclc/generic/lib/misc/shuffle2.cl
+++ b/libclc/generic/lib/misc/shuffle2.cl
@@ -7,153 +7,12 @@
 
//===----------------------------------------------------------------------===//
 
 #include <clc/clc.h>
+#include <clc/misc/clc_shuffle2.h>
 
-#define _CLC_ELEMENT_CASES2(VAR) \
-    case 0: return VAR.s0; \
-    case 1: return VAR.s1;
+#define FUNCTION shuffle2
 
-#define _CLC_ELEMENT_CASES4(VAR) \
-    _CLC_ELEMENT_CASES2(VAR) \
-    case 2: return VAR.s2; \
-    case 3: return VAR.s3;
+#define __CLC_BODY <clc/misc/shuffle2_def.inc>
+#include <clc/integer/gentype.inc>
 
-#define _CLC_ELEMENT_CASES8(VAR) \
-    _CLC_ELEMENT_CASES4(VAR) \
-    case 4: return VAR.s4; \
-    case 5: return VAR.s5; \
-    case 6: return VAR.s6; \
-    case 7: return VAR.s7;
-
-#define _CLC_ELEMENT_CASES16(VAR) \
-    _CLC_ELEMENT_CASES8(VAR) \
-    case 8: return VAR.s8; \
-    case 9: return VAR.s9; \
-    case 10: return VAR.sA; \
-    case 11: return VAR.sB; \
-    case 12: return VAR.sC; \
-    case 13: return VAR.sD; \
-    case 14: return VAR.sE; \
-    case 15: return VAR.sF;
-
-#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
-     __attribute__((always_inline)) \
-     ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, 
ARGTYPE##ARGSIZE y, IDXTYPE idx) {\
-        if (idx < ARGSIZE) \
-            switch (idx){ \
-                _CLC_ELEMENT_CASES##ARGSIZE(x) \
-                default: return 0; \
-            } \
-        else \
-            switch (idx - ARGSIZE){ \
-                _CLC_ELEMENT_CASES##ARGSIZE(y) \
-                default: return 0; \
-            } \
-    } \
-
-#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s0); \
-    ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s1);
-
-#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s2); \
-    ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s3);
-
-#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s4); \
-    ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s5); \
-    ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s6); \
-    ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s7);
-
-#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s8); \
-    ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s9); \
-    ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sA); \
-    ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sB); \
-    ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sC); \
-    ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sD); \
-    ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sE); \
-    ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sF); \
-
-#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
-_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle2(ARGTYPE##ARGSIZE x, 
ARGTYPE##ARGSIZE y, MASKTYPE##2 mask){ \
-    ARGTYPE##2 ret_val; \
-    mask &= (MASKTYPE##2)(ARGSIZE * 2 - 1); \
-    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    return ret_val; \
-}
-
-#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
-_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle2(ARGTYPE##ARGSIZE x, 
ARGTYPE##ARGSIZE y, MASKTYPE##4 mask){ \
-    ARGTYPE##4 ret_val; \
-    mask &= (MASKTYPE##4)(ARGSIZE * 2 - 1); \
-    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    return ret_val; \
-}
-
-#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
-_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle2(ARGTYPE##ARGSIZE x, 
ARGTYPE##ARGSIZE y, MASKTYPE##8 mask){ \
-    ARGTYPE##8 ret_val; \
-    mask &= (MASKTYPE##8)(ARGSIZE * 2 - 1); \
-    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    return ret_val; \
-}
-
-#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
-_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle2(ARGTYPE##ARGSIZE x, 
ARGTYPE##ARGSIZE y, MASKTYPE##16 mask){ \
-    ARGTYPE##16 ret_val; \
-    mask &= (MASKTYPE##16)(ARGSIZE * 2 - 1); \
-    _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
-    return ret_val; \
-}
-
-#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
-  _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
-  _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
-  _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
-  _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
-  _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
-
-#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
-  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
-
-
-
-_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
-_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
-_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
-_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
-_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
-_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
-_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
-_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
-_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
-#endif
-#ifdef cl_khr_fp16
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
-#endif
-
-#undef _CLC_ELEMENT_CASES2
-#undef _CLC_ELEMENT_CASES4
-#undef _CLC_ELEMENT_CASES8
-#undef _CLC_ELEMENT_CASES16
-#undef _CLC_GET_ELEMENT_DEFINE
-#undef _CLC_SHUFFLE_SET_2_ELEMENTS
-#undef _CLC_SHUFFLE_SET_4_ELEMENTS
-#undef _CLC_SHUFFLE_SET_8_ELEMENTS
-#undef _CLC_SHUFFLE_SET_16_ELEMENTS
-#undef _CLC_SHUFFLE_DEFINE2
-#undef _CLC_SHUFFLE_DEFINE4
-#undef _CLC_SHUFFLE_DEFINE8
-#undef _CLC_SHUFFLE_DEFINE16
-#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
-#undef _CLC_VECTOR_SHUFFLE_INSIZE
+#define __CLC_BODY <clc/misc/shuffle2_def.inc>
+#include <clc/math/gentype.inc>


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to