https://github.com/frasercrmck updated https://github.com/llvm/llvm-project/pull/128506
>From 5c367b8c87bf07f851778f391e5130a0fd0b193b Mon Sep 17 00:00:00 2001 From: Fraser Cormack <fra...@codeplay.com> Date: Mon, 24 Feb 2025 12:25:22 +0000 Subject: [PATCH] [libclc] Optimize generic CLC fmin/fmax The CLC fmin/fmax builtins now use clang's __builtin_elementwise_(min|max) which helps us generate llvm.(min|max)num intrinsics directly. These intrinsics select the non-NAN input over the NAN input, which adheres to the OpenCL specification. Note that the OpenCL specification doesn't require support for sNAN, so returning qNAN over sNAN is acceptable. Note also that the intrinsics don't differentiate between -0.0 and +0.0; this does not appear to be required - going by the OpenCL CTS, at least. These intrinsics maintain the vector types, as opposed to scalarizing, which was previously happening. This commit therefore helps to optimize codegen for those targets. --- libclc/clc/lib/generic/math/clc_fmax.cl | 29 ++++--------------------- libclc/clc/lib/generic/math/clc_fmin.cl | 29 ++++--------------------- 2 files changed, 8 insertions(+), 50 deletions(-) diff --git a/libclc/clc/lib/generic/math/clc_fmax.cl b/libclc/clc/lib/generic/math/clc_fmax.cl index 8ee369f57d38b..c41a27974edf9 100644 --- a/libclc/clc/lib/generic/math/clc_fmax.cl +++ b/libclc/clc/lib/generic/math/clc_fmax.cl @@ -6,31 +6,10 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> -#include <clc/relational/clc_isnan.h> -_CLC_DEFINE_BINARY_BUILTIN(float, __clc_fmax, __builtin_fmaxf, float, float); +#define FUNCTION __clc_fmax +#define __CLC_FUNCTION(x) __builtin_elementwise_max +#define __CLC_BODY <clc/shared/binary_def.inc> -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_BINARY_BUILTIN(double, __clc_fmax, __builtin_fmax, double, double); - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) { - if (__clc_isnan(x)) - return y; - if (__clc_isnan(y)) - return x; - return (x < y) ? y : x; -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_fmax, half, half) - -#endif +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_fmin.cl b/libclc/clc/lib/generic/math/clc_fmin.cl index 2f307274b9be5..7b2efafcf90e0 100644 --- a/libclc/clc/lib/generic/math/clc_fmin.cl +++ b/libclc/clc/lib/generic/math/clc_fmin.cl @@ -6,31 +6,10 @@ // //===----------------------------------------------------------------------===// -#include <clc/clcmacro.h> #include <clc/internal/clc.h> -#include <clc/relational/clc_isnan.h> -_CLC_DEFINE_BINARY_BUILTIN(float, __clc_fmin, __builtin_fminf, float, float); +#define FUNCTION __clc_fmin +#define __CLC_FUNCTION(x) __builtin_elementwise_min +#define __CLC_BODY <clc/shared/binary_def.inc> -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_DEFINE_BINARY_BUILTIN(double, __clc_fmin, __builtin_fmin, double, double); - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) { - if (__clc_isnan(x)) - return y; - if (__clc_isnan(y)) - return x; - return (y < x) ? y : x; -} -_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_fmin, half, half) - -#endif +#include <clc/math/gentype.inc> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits