https://github.com/frasercrmck updated 
https://github.com/llvm/llvm-project/pull/128506

>From 5c367b8c87bf07f851778f391e5130a0fd0b193b Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fra...@codeplay.com>
Date: Mon, 24 Feb 2025 12:25:22 +0000
Subject: [PATCH] [libclc] Optimize generic CLC fmin/fmax

The CLC fmin/fmax builtins now use clang's
__builtin_elementwise_(min|max) which helps us generate
llvm.(min|max)num intrinsics directly. These intrinsics select the
non-NAN input over the NAN input, which adheres to the OpenCL
specification. Note that the OpenCL specification doesn't require
support for sNAN, so returning qNAN over sNAN is acceptable. Note also
that the intrinsics don't differentiate between -0.0 and +0.0; this does
not appear to be required - going by the OpenCL CTS, at least.

These intrinsics maintain the vector types, as opposed to scalarizing,
which was previously happening. This commit therefore helps to optimize
codegen for those targets.
---
 libclc/clc/lib/generic/math/clc_fmax.cl | 29 ++++---------------------
 libclc/clc/lib/generic/math/clc_fmin.cl | 29 ++++---------------------
 2 files changed, 8 insertions(+), 50 deletions(-)

diff --git a/libclc/clc/lib/generic/math/clc_fmax.cl 
b/libclc/clc/lib/generic/math/clc_fmax.cl
index 8ee369f57d38b..c41a27974edf9 100644
--- a/libclc/clc/lib/generic/math/clc_fmax.cl
+++ b/libclc/clc/lib/generic/math/clc_fmax.cl
@@ -6,31 +6,10 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
 
-_CLC_DEFINE_BINARY_BUILTIN(float, __clc_fmax, __builtin_fmaxf, float, float);
+#define FUNCTION __clc_fmax
+#define __CLC_FUNCTION(x) __builtin_elementwise_max
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEFINE_BINARY_BUILTIN(double, __clc_fmax, __builtin_fmax, double, double);
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmax(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (x < y) ? y : x;
-}
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_fmax, half, half)
-
-#endif
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_fmin.cl 
b/libclc/clc/lib/generic/math/clc_fmin.cl
index 2f307274b9be5..7b2efafcf90e0 100644
--- a/libclc/clc/lib/generic/math/clc_fmin.cl
+++ b/libclc/clc/lib/generic/math/clc_fmin.cl
@@ -6,31 +6,10 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#include <clc/clcmacro.h>
 #include <clc/internal/clc.h>
-#include <clc/relational/clc_isnan.h>
 
-_CLC_DEFINE_BINARY_BUILTIN(float, __clc_fmin, __builtin_fminf, float, float);
+#define FUNCTION __clc_fmin
+#define __CLC_FUNCTION(x) __builtin_elementwise_min
+#define __CLC_BODY <clc/shared/binary_def.inc>
 
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_DEFINE_BINARY_BUILTIN(double, __clc_fmin, __builtin_fmin, double, double);
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEF _CLC_OVERLOAD half __clc_fmin(half x, half y) {
-  if (__clc_isnan(x))
-    return y;
-  if (__clc_isnan(y))
-    return x;
-  return (y < x) ? y : x;
-}
-_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_fmin, half, half)
-
-#endif
+#include <clc/math/gentype.inc>

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to