https://github.com/frasercrmck updated https://github.com/llvm/llvm-project/pull/139759
>From c55c1f218d31d415872a7fee9b5b62fa66e23011 Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Thu, 27 Feb 2025 15:02:32 +0000 Subject: [PATCH 1/2] [libclc] Add (fast) normalize to CLC; add half overloads For simplicity the half overloads just call into the float versions of the builtin. Note that in the move some floating-point constants were combined. The vector2 versions of normalize used slightly different constants to the vector3 and vector4 versions of the same builtin. For float it was 0x1.0p-65 vs 0x1.0p-66 and for double 0x1.0p-513 vs 0x1.0p-514. I wasn't sure if this was necessary so this commit replaces the vector2 versions of the constants with the vector3/vector4 ones. The OpenCL-CTS seems okay with it. If this is incorrect then it's not very difficult to split them back out again. --- .../clc/geometric/clc_fast_normalize.h | 22 +++ .../clc/include/clc/geometric/clc_normalize.h | 20 +++ libclc/clc/lib/generic/SOURCES | 2 + .../generic/geometric/clc_fast_normalize.cl | 15 ++ .../generic/geometric/clc_fast_normalize.inc | 23 +++ .../lib/generic/geometric/clc_normalize.cl | 22 +++ .../lib/generic/geometric/clc_normalize.inc | 68 +++++++++ .../generic/lib/geometric/fast_normalize.cl | 11 +- .../generic/lib/geometric/fast_normalize.inc | 19 --- libclc/generic/lib/geometric/normalize.cl | 137 +----------------- 10 files changed, 182 insertions(+), 157 deletions(-) create mode 100644 libclc/clc/include/clc/geometric/clc_fast_normalize.h create mode 100644 libclc/clc/include/clc/geometric/clc_normalize.h create mode 100644 libclc/clc/lib/generic/geometric/clc_fast_normalize.cl create mode 100644 libclc/clc/lib/generic/geometric/clc_fast_normalize.inc create mode 100644 libclc/clc/lib/generic/geometric/clc_normalize.cl create mode 100644 libclc/clc/lib/generic/geometric/clc_normalize.inc delete mode 100644 libclc/generic/lib/geometric/fast_normalize.inc diff --git a/libclc/clc/include/clc/geometric/clc_fast_normalize.h b/libclc/clc/include/clc/geometric/clc_fast_normalize.h new file mode 100644 index 0000000000000..66eed8b83ab18 --- /dev/null +++ b/libclc/clc/include/clc/geometric/clc_fast_normalize.h @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__ +#define __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__ + +#define __FLOAT_ONLY +#define __CLC_GEOMETRIC_RET_GENTYPE +#define __CLC_FUNCTION __clc_fast_normalize +#define __CLC_BODY <clc/geometric/unary_decl.inc> +#include <clc/math/gentype.inc> + +#undef __CLC_FUNCTION +#undef __CLC_GEOMETRIC_RET_GENTYPE +#undef __FLOAT_ONLY + +#endif // __CLC_GEOMETRIC_CLC_FAST_NORMALIZE_H__ diff --git a/libclc/clc/include/clc/geometric/clc_normalize.h b/libclc/clc/include/clc/geometric/clc_normalize.h new file mode 100644 index 0000000000000..3058a72b2bbbe --- /dev/null +++ b/libclc/clc/include/clc/geometric/clc_normalize.h @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __CLC_GEOMETRIC_CLC_NORMALIZE_H__ +#define __CLC_GEOMETRIC_CLC_NORMALIZE_H__ + +#define __CLC_GEOMETRIC_RET_GENTYPE +#define __CLC_FUNCTION __clc_normalize +#define __CLC_BODY <clc/geometric/unary_decl.inc> +#include <clc/math/gentype.inc> + +#undef __CLC_FUNCTION +#undef __CLC_GEOMETRIC_RET_GENTYPE + +#endif // __CLC_GEOMETRIC_CLC_NORMALIZE_H__ diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 9fbd8d9a77150..cccefbff1087e 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -7,7 +7,9 @@ geometric/clc_distance.cl geometric/clc_dot.cl geometric/clc_fast_distance.cl geometric/clc_fast_length.cl +geometric/clc_fast_normalize.cl geometric/clc_length.cl +geometric/clc_normalize.cl integer/clc_abs.cl integer/clc_abs_diff.cl integer/clc_add_sat.cl diff --git a/libclc/clc/lib/generic/geometric/clc_fast_normalize.cl b/libclc/clc/lib/generic/geometric/clc_fast_normalize.cl new file mode 100644 index 0000000000000..c34418c5fe9f7 --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_fast_normalize.cl @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/geometric/clc_normalize.h> +#include <clc/geometric/clc_dot.h> +#include <clc/math/clc_half_rsqrt.h> + +#define __FLOAT_ONLY +#define __CLC_BODY <clc_fast_normalize.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/geometric/clc_fast_normalize.inc b/libclc/clc/lib/generic/geometric/clc_fast_normalize.inc new file mode 100644 index 0000000000000..e4c3ab2c5a657 --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_fast_normalize.inc @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __CLC_VECSIZE_OR_1 == 1 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) { + return __clc_normalize(p); +} + +#elif (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 3 || \ + __CLC_VECSIZE_OR_1 == 4) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_fast_normalize(__CLC_GENTYPE p) { + __CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p); + return l2 == 0.0f ? p : p * __clc_half_rsqrt(l2); +} + +#endif diff --git a/libclc/clc/lib/generic/geometric/clc_normalize.cl b/libclc/clc/lib/generic/geometric/clc_normalize.cl new file mode 100644 index 0000000000000..ec4a67647c13e --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_normalize.cl @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <clc/clc_convert.h> +#include <clc/common/clc_sign.h> +#include <clc/float/definitions.h> +#include <clc/geometric/clc_dot.h> +#include <clc/geometric/clc_normalize.h> +#include <clc/internal/clc.h> +#include <clc/math/clc_copysign.h> +#include <clc/math/clc_rsqrt.h> +#include <clc/relational/clc_all.h> +#include <clc/relational/clc_isinf.h> +#include <clc/relational/clc_select.h> + +#define __CLC_BODY <clc_normalize.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/geometric/clc_normalize.inc b/libclc/clc/lib/generic/geometric/clc_normalize.inc new file mode 100644 index 0000000000000..f0fc164de3f21 --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_normalize.inc @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \ + __CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4) + +// Until we have a native FP16 implementation, go via FP32 +#if __CLC_FPSIZE == 16 + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) { + return __CLC_CONVERT_GENTYPE(__clc_normalize(__CLC_CONVERT_FLOATN(p))); +} + +// Scalar normalize +#elif defined(__CLC_SCALAR) + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) { + return __clc_sign(p); +} + +// Vector normalize +#else + +#if __CLC_FPSIZE == 32 +#define MAX_SQRT 0x1.0p+86F +#define MIN_SQRT 0x1.0p-65F +#elif __CLC_FPSIZE == 64 +#define MAX_SQRT 0x1.0p+563 +#define MIN_SQRT 0x1.0p-513 +#else +#error "Invalid FP size" +#endif + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) { + if (__clc_all(p == __CLC_FP_LIT(0.0))) { + return p; + } + + __CLC_SCALAR_GENTYPE l2 = __clc_dot(p, p); + + if (l2 < FLT_MIN) { + p *= MAX_SQRT; + l2 = __clc_dot(p, p); + } else if (l2 == INFINITY) { + p *= MIN_SQRT; + l2 = __clc_dot(p, p); + if (l2 == INFINITY) { + p = __clc_copysign(__clc_select((__CLC_GENTYPE)__CLC_FP_LIT(0.0), + (__CLC_GENTYPE)__CLC_FP_LIT(1.0), + __clc_isinf(p)), + p); + l2 = __clc_dot(p, p); + } + } + return p * __clc_rsqrt(l2); +} + +#undef MIN_SQRT +#undef MAX_SQRT + +#endif + +#endif diff --git a/libclc/generic/lib/geometric/fast_normalize.cl b/libclc/generic/lib/geometric/fast_normalize.cl index 585400d2200fd..8d84b451f0128 100644 --- a/libclc/generic/lib/geometric/fast_normalize.cl +++ b/libclc/generic/lib/geometric/fast_normalize.cl @@ -7,12 +7,11 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> +#include <clc/geometric/clc_fast_normalize.h> -_CLC_OVERLOAD _CLC_DEF float fast_normalize(float p) { - return normalize(p); -} - -#define __CLC_BODY <fast_normalize.inc> +#define FUNCTION fast_normalize #define __FLOAT_ONLY +#define __CLC_GEOMETRIC_RET_GENTYPE +#define __CLC_BODY <clc/geometric/unary_def.inc> + #include <clc/math/gentype.inc> -#undef __FLOAT_ONLY diff --git a/libclc/generic/lib/geometric/fast_normalize.inc b/libclc/generic/lib/geometric/fast_normalize.inc deleted file mode 100644 index ec1be94efc279..0000000000000 --- a/libclc/generic/lib/geometric/fast_normalize.inc +++ /dev/null @@ -1,19 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// Geometric functions are only defined for scalar, vec2, vec3 and vec4 -// Only handle vector implementations -#if (__CLC_VECSIZE_OR_1 == 2 || __CLC_VECSIZE_OR_1 == 3 || \ - __CLC_VECSIZE_OR_1 == 4) - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fast_normalize(__CLC_GENTYPE p) { - __CLC_SCALAR_GENTYPE l2 = dot(p, p); - return l2 == 0.0f ? p : p * half_rsqrt(l2); -} - -#endif diff --git a/libclc/generic/lib/geometric/normalize.cl b/libclc/generic/lib/geometric/normalize.cl index 36c1f64687536..10194a87997f4 100644 --- a/libclc/generic/lib/geometric/normalize.cl +++ b/libclc/generic/lib/geometric/normalize.cl @@ -7,137 +7,10 @@ //===----------------------------------------------------------------------===// #include <clc/clc.h> +#include <clc/geometric/clc_normalize.h> -_CLC_OVERLOAD _CLC_DEF float normalize(float p) { - return sign(p); -} +#define FUNCTION normalize +#define __CLC_GEOMETRIC_RET_GENTYPE +#define __CLC_BODY <clc/geometric/unary_def.inc> -_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) { - if (all(p == (float2)0.0F)) - return p; - - float l2 = dot(p, p); - - if (l2 < FLT_MIN) { - p *= 0x1.0p+86F; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-65f; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) { - if (all(p == (float3)0.0F)) - return p; - - float l2 = dot(p, p); - - if (l2 < FLT_MIN) { - p *= 0x1.0p+86F; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-66f; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) { - if (all(p == (float4)0.0F)) - return p; - - float l2 = dot(p, p); - - if (l2 < FLT_MIN) { - p *= 0x1.0p+86F; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-66f; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double normalize(double p) { - return sign(p); -} - -_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) { - if (all(p == (double2)0.0)) - return p; - - double l2 = dot(p, p); - - if (l2 < DBL_MIN) { - p *= 0x1.0p+563; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-513; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) { - if (all(p == (double3)0.0)) - return p; - - double l2 = dot(p, p); - - if (l2 < DBL_MIN) { - p *= 0x1.0p+563; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-514; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) { - if (all(p == (double4)0.0)) - return p; - - double l2 = dot(p, p); - - if (l2 < DBL_MIN) { - p *= 0x1.0p+563; - l2 = dot(p, p); - } else if (l2 == INFINITY) { - p *= 0x1.0p-514; - l2 = dot(p, p); - if (l2 == INFINITY) { - p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p); - l2 = dot(p, p); - } - } - return p * rsqrt(l2); -} - -#endif +#include <clc/math/gentype.inc> >From 939d417fa519de9de57b262859824a599a30f42c Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Tue, 13 May 2025 17:12:38 +0100 Subject: [PATCH 2/2] fix-formatting --- libclc/clc/lib/generic/geometric/clc_fast_normalize.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libclc/clc/lib/generic/geometric/clc_fast_normalize.cl b/libclc/clc/lib/generic/geometric/clc_fast_normalize.cl index c34418c5fe9f7..85684d0f49bc1 100644 --- a/libclc/clc/lib/generic/geometric/clc_fast_normalize.cl +++ b/libclc/clc/lib/generic/geometric/clc_fast_normalize.cl @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include <clc/geometric/clc_normalize.h> #include <clc/geometric/clc_dot.h> +#include <clc/geometric/clc_normalize.h> #include <clc/math/clc_half_rsqrt.h> #define __FLOAT_ONLY _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits