https://github.com/wenju-he created https://github.com/llvm/llvm-project/pull/150165
This is ported from https://github.com/intel/llvm/blob/sycl/libclc/libspirv/lib/generic/geometric/normalize.cl and can pass a closed-source OpenCL CTS "test_geometrics geom_normalize --half CL_DEVICE_TYPE_GPU" on intel GPU. llvm-diff amdgcn--amdhsa.bc shows fpext/fptrunc insts are now removed from normalize function. >From dc8517b82e812e4238656e4a4ada0057a0487fda Mon Sep 17 00:00:00 2001 From: Wenju He <wenju...@intel.com> Date: Wed, 23 Jul 2025 05:42:04 +0200 Subject: [PATCH] [libclc] Add native half implementation of __clc_normalize This is ported from https://github.com/intel/llvm/blob/sycl/libclc/libspirv/lib/generic/geometric/normalize.cl and can pass a closed-source OpenCL CTS "test_geometrics geom_normalize --half CL_DEVICE_TYPE_GPU" on intel GPU. llvm-diff amdgcn--amdhsa.bc shows fpext/fptrunc insts are now removed from normalize function. --- .../clc/lib/generic/geometric/clc_normalize.inc | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/libclc/clc/lib/generic/geometric/clc_normalize.inc b/libclc/clc/lib/generic/geometric/clc_normalize.inc index 8a47c6d4826e9..9b2cbc8f19dd0 100644 --- a/libclc/clc/lib/generic/geometric/clc_normalize.inc +++ b/libclc/clc/lib/generic/geometric/clc_normalize.inc @@ -10,15 +10,8 @@ #if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \ __CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4) -// Until we have a native FP16 implementation, go via FP32 -#if __CLC_FPSIZE == 16 - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) { - return __CLC_CONVERT_GENTYPE(__clc_normalize(__CLC_CONVERT_FLOATN(p))); -} - // Scalar normalize -#elif defined(__CLC_SCALAR) +#if defined(__CLC_SCALAR) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) { return __clc_sign(p); @@ -27,7 +20,13 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_normalize(__CLC_GENTYPE p) { // Vector normalize #else -#if __CLC_FPSIZE == 32 +#if __CLC_FPSIZE == 16 + +#define MIN_VAL HALF_MIN +#define MAX_SQRT 0x1.0p+8h +#define MIN_SQRT 0x1.0p-8h + +#elif __CLC_FPSIZE == 32 #define MIN_VAL FLT_MIN #define MAX_SQRT 0x1.0p+86F _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits