https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/157002
>From 8390286ffa32ce98ba39cfbe313d9396ce0572fc Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Fri, 5 Sep 2025 04:47:56 +0200 Subject: [PATCH 1/2] Revert "[NFC][libclc] Move _CLC_V_V_VP_VECTORIZE macro into clc_lgamma_r.cl and delete clcmacro.h (#156280)" This partially reverts commit d50f2ef437aeb1784f7556fd63639487f245ffaa because _CLC_V_V_VP_VECTORIZE is also used in our downstream code: https://github.com/intel/llvm/blob/0433e4d6f5c9/libclc/libspirv/lib/ptx-nvidiacl/math/modf.cl#L30 https://github.com/intel/llvm/blob/0433e4d6f5c9/libclc/libspirv/lib/ptx-nvidiacl/math/sincos.cl#L31 --- libclc/clc/include/clc/clcmacro.h | 69 +++++++++++++++++++++ libclc/clc/lib/generic/math/clc_lgamma_r.cl | 55 +--------------- 2 files changed, 70 insertions(+), 54 deletions(-) create mode 100644 libclc/clc/include/clc/clcmacro.h diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h new file mode 100644 index 0000000000000..9fa11489b1457 --- /dev/null +++ b/libclc/clc/include/clc/clcmacro.h @@ -0,0 +1,69 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_CLCMACRO_H__ +#define __CLC_CLCMACRO_H__ + +#include <clc/internal/clc.h> +#include <clc/utils.h> + +#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE, \ + ADDR_SPACE, ARG2_TYPE) \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \ + __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \ + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ + return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr), \ + __CLC_FUNCTION(x.s1, ptr + 1)); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \ + __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \ + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ + return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr), \ + __CLC_FUNCTION(x.s1, ptr + 1), \ + __CLC_FUNCTION(x.s2, ptr + 2)); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \ + __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \ + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ + return (__CLC_XCONCAT(RET_TYPE, 4))( \ + __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ + __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3)); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \ + __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \ + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ + return (__CLC_XCONCAT(RET_TYPE, 8))( \ + __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ + __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \ + __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \ + __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7)); \ + } \ + \ + DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \ + __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \ + ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \ + ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ + return (__CLC_XCONCAT(RET_TYPE, 16))( \ + __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ + __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \ + __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \ + __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7), \ + __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9), \ + __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11), \ + __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13), \ + __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15)); \ + } + +#endif // __CLC_CLCMACRO_H__ diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.cl b/libclc/clc/lib/generic/math/clc_lgamma_r.cl index 662b34a69bb72..20aa80a63dd37 100644 --- a/libclc/clc/lib/generic/math/clc_lgamma_r.cl +++ b/libclc/clc/lib/generic/math/clc_lgamma_r.cl @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> +#include <clc/clcmacro.h> #include <clc/float/definitions.h> #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> @@ -16,60 +17,6 @@ #include <clc/math/clc_sinpi.h> #include <clc/math/math.h> -#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE, \ - ADDR_SPACE, ARG2_TYPE) \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr), \ - __CLC_FUNCTION(x.s1, ptr + 1)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr), \ - __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 4))( \ - __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 8))( \ - __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \ - __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \ - __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 16))( \ - __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \ - __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \ - __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7), \ - __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9), \ - __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11), \ - __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13), \ - __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15)); \ - } - // ==================================================== // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. // >From 2eb8acf587d8d97705120b0a126ebf9b01aba1ba Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Fri, 5 Sep 2025 06:05:14 +0200 Subject: [PATCH 2/2] replace _CLC_V_V_VP_VECTORIZE with use of unary_def_with_ptr_scalarize.inc --- libclc/clc/include/clc/clcmacro.h | 69 ------------ .../shared/unary_def_with_ptr_scalarize.inc | 106 ++++++++++++++++++ libclc/clc/lib/generic/math/clc_lgamma_r.cl | 27 +++-- libclc/clc/lib/generic/math/clc_lgamma_r.inc | 4 + 4 files changed, 126 insertions(+), 80 deletions(-) delete mode 100644 libclc/clc/include/clc/clcmacro.h create mode 100644 libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h deleted file mode 100644 index 9fa11489b1457..0000000000000 --- a/libclc/clc/include/clc/clcmacro.h +++ /dev/null @@ -1,69 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef __CLC_CLCMACRO_H__ -#define __CLC_CLCMACRO_H__ - -#include <clc/internal/clc.h> -#include <clc/utils.h> - -#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE, \ - ADDR_SPACE, ARG2_TYPE) \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr), \ - __CLC_FUNCTION(x.s1, ptr + 1)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr), \ - __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 4))( \ - __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 8))( \ - __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \ - __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \ - __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7)); \ - } \ - \ - DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \ - __CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \ - ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \ - ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \ - return (__CLC_XCONCAT(RET_TYPE, 16))( \ - __CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \ - __CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \ - __CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \ - __CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7), \ - __CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9), \ - __CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11), \ - __CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13), \ - __CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15)); \ - } - -#endif // __CLC_CLCMACRO_H__ diff --git a/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc b/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc new file mode 100644 index 0000000000000..fff91d36e626d --- /dev/null +++ b/libclc/clc/include/clc/shared/unary_def_with_ptr_scalarize.inc @@ -0,0 +1,106 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/utils.h> + +#ifdef __CLC_SCALAR + +#ifndef __CLC_IMPL_FUNCTION +#define __CLC_IMPL_FUNCTION __CLC_FUNCTION +#endif + +#ifndef __CLC_RET_TYPE +#define __CLC_RET_TYPE __CLC_GENTYPE +#endif + +#ifndef __CLC_ARG1_TYPE +#define __CLC_ARG1_TYPE __CLC_GENTYPE +#endif + +#ifndef __CLC_ARG2_TYPE +#define __CLC_ARG2_TYPE __CLC_GENTYPE +#endif + +#define __CLC_RET_VECTYPE __CLC_XCONCAT(__CLC_RET_TYPE, __CLC_VECTOR_SIZE) +#define __CLC_ARG1_VECTYPE __CLC_XCONCAT(__CLC_ARG1_TYPE, __CLC_VECTOR_SIZE) +#define __CLC_ARG2_VECTYPE __CLC_XCONCAT(__CLC_ARG2_TYPE, __CLC_VECTOR_SIZE) + +#define __CLC_VECTOR_SIZE 2 +_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE +__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) { + __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr; + + return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p), + __CLC_IMPL_FUNCTION(x.s1, p + 1)); +} +#undef __CLC_VECTOR_SIZE + +#define __CLC_VECTOR_SIZE 3 +_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE +__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) { + __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr; + return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p), + __CLC_IMPL_FUNCTION(x.s1, p + 1), + __CLC_IMPL_FUNCTION(x.s2, p + 2)); +} +#undef __CLC_VECTOR_SIZE + +#define __CLC_VECTOR_SIZE 4 +_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE +__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) { + __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr; + return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p), + __CLC_IMPL_FUNCTION(x.s1, p + 1), + __CLC_IMPL_FUNCTION(x.s2, p + 2), + __CLC_IMPL_FUNCTION(x.s3, p + 3)); +} +#undef __CLC_VECTOR_SIZE + +#define __CLC_VECTOR_SIZE 8 +_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE +__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) { + __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr; + return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p), + __CLC_IMPL_FUNCTION(x.s1, p + 1), + __CLC_IMPL_FUNCTION(x.s2, p + 2), + __CLC_IMPL_FUNCTION(x.s3, p + 3), + __CLC_IMPL_FUNCTION(x.s4, p + 4), + __CLC_IMPL_FUNCTION(x.s5, p + 5), + __CLC_IMPL_FUNCTION(x.s6, p + 6), + __CLC_IMPL_FUNCTION(x.s7, p + 7)); +} +#undef __CLC_VECTOR_SIZE + +#define __CLC_VECTOR_SIZE 16 +_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE +__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) { + __CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr; + return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p), + __CLC_IMPL_FUNCTION(x.s1, p + 1), + __CLC_IMPL_FUNCTION(x.s2, p + 2), + __CLC_IMPL_FUNCTION(x.s3, p + 3), + __CLC_IMPL_FUNCTION(x.s4, p + 4), + __CLC_IMPL_FUNCTION(x.s5, p + 5), + __CLC_IMPL_FUNCTION(x.s6, p + 6), + __CLC_IMPL_FUNCTION(x.s7, p + 7), + __CLC_IMPL_FUNCTION(x.s8, p + 8), + __CLC_IMPL_FUNCTION(x.s9, p + 9), + __CLC_IMPL_FUNCTION(x.sa, p + 10), + __CLC_IMPL_FUNCTION(x.sb, p + 11), + __CLC_IMPL_FUNCTION(x.sc, p + 12), + __CLC_IMPL_FUNCTION(x.sd, p + 13), + __CLC_IMPL_FUNCTION(x.se, p + 14), + __CLC_IMPL_FUNCTION(x.sf, p + 15)); +} +#undef __CLC_VECTOR_SIZE + +#undef __CLC_RET_VECTYPE +#undef __CLC_ARG1_VECTYPE +#undef __CLC_ARG2_VECTYPE + +#endif // __CLC_SCALAR diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.cl b/libclc/clc/lib/generic/math/clc_lgamma_r.cl index 20aa80a63dd37..5c9f673eef489 100644 --- a/libclc/clc/lib/generic/math/clc_lgamma_r.cl +++ b/libclc/clc/lib/generic/math/clc_lgamma_r.cl @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include <clc/clc_convert.h> -#include <clc/clcmacro.h> #include <clc/float/definitions.h> #include <clc/internal/clc.h> #include <clc/math/clc_fabs.h> @@ -280,9 +279,6 @@ _CLC_OVERLOAD _CLC_DEF float __clc_lgamma_r(float x, private int *signp) { return r; } -_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_lgamma_r, float, - private, int) - #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable // ==================================================== @@ -586,9 +582,7 @@ _CLC_OVERLOAD _CLC_DEF double __clc_lgamma_r(double x, private int *ip) { return r; } -_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_lgamma_r, double, - private, int) -#endif +#endif // cl_khr_fp64 #ifdef cl_khr_fp16 @@ -598,24 +592,35 @@ _CLC_OVERLOAD _CLC_DEF half __clc_lgamma_r(half x, private int *iptr) { return (half)__clc_lgamma_r((float)x, iptr); } -_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_lgamma_r, half, - private, int); +#endif // cl_khr_fp16 + +#define __CLC_FUNCTION __clc_lgamma_r +#define __CLC_ARG2_TYPE int -#endif +#define __CLC_ADDRSPACE private +#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc> +#include <clc/math/gentype.inc> +#undef __CLC_ADDRSPACE #define __CLC_ADDRSPACE global #define __CLC_BODY <clc_lgamma_r.inc> #include <clc/math/gentype.inc> +#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc> +#include <clc/math/gentype.inc> #undef __CLC_ADDRSPACE #define __CLC_ADDRSPACE local #define __CLC_BODY <clc_lgamma_r.inc> #include <clc/math/gentype.inc> +#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc> +#include <clc/math/gentype.inc> #undef __CLC_ADDRSPACE #if _CLC_DISTINCT_GENERIC_AS_SUPPORTED #define __CLC_ADDRSPACE generic #define __CLC_BODY <clc_lgamma_r.inc> #include <clc/math/gentype.inc> +#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc> +#include <clc/math/gentype.inc> #undef __CLC_ADDRSPACE -#endif +#endif // _CLC_DISTINCT_GENERIC_AS_SUPPORTED diff --git a/libclc/clc/lib/generic/math/clc_lgamma_r.inc b/libclc/clc/lib/generic/math/clc_lgamma_r.inc index 87891efd44755..931fa089ff3d1 100644 --- a/libclc/clc/lib/generic/math/clc_lgamma_r.inc +++ b/libclc/clc/lib/generic/math/clc_lgamma_r.inc @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#ifdef __CLC_SCALAR + _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) { __CLC_INTN private_iptr; @@ -13,3 +15,5 @@ __clc_lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) { *iptr = private_iptr; return ret; } + +#endif // __CLC_SCALAR _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits