Author: Wenju He Date: 2025-12-19T14:36:03+08:00 New Revision: d5326411fe866e010aadd3af3155b656a5aeaae3
URL: https://github.com/llvm/llvm-project/commit/d5326411fe866e010aadd3af3155b656a5aeaae3 DIFF: https://github.com/llvm/llvm-project/commit/d5326411fe866e010aadd3af3155b656a5aeaae3.diff LOG: [libclc] Improve __clc_min/max/clamp implementation (#172599) Replace __clc_max/min with __clc_fmax/fmin in __clc_clamp. FP __clc_min/max/clamp now lowers to @llvm.minimumnum/@llvm.maximumnum, and integer clamp lowers to @llvm.umin/@llvm.umax. This reduce fcmp+select chains and improving codegen. Example change to amdgcn--amdhsa.bc: ``` in function _Z5clamphhh: > %4 = icmp ugt i8 %0, %2 %4 = tail call noundef i8 @llvm.umax.i8(i8 %0, i8 %1) > %6 = select i1 %4, i8 %2, i8 %5 > ret i8 %6 < %5 = tail call noundef i8 @llvm.umin.i8(i8 %2, i8 %4) < ret i8 %5 in function _Z5clampddd: in block %3 / %3: > %4 = fcmp ogt double %0, %2 > %5 = fcmp olt double %0, %1 > %6 = select i1 %5, double %1, double %0 > %7 = select i1 %4, double %2, double %6 > ret double %7 < %4 = tail call noundef double @llvm.maximumnum.f64(double %0, double %1) < %5 = tail call noundef double @llvm.minimumnum.f64(double %4, double %2) < ret double %5 ``` Added: Modified: libclc/clc/lib/generic/shared/clc_clamp.cl libclc/clc/lib/generic/shared/clc_clamp.inc libclc/clc/lib/generic/shared/clc_max.cl libclc/clc/lib/generic/shared/clc_max.inc libclc/clc/lib/generic/shared/clc_min.cl libclc/clc/lib/generic/shared/clc_min.inc Removed: ################################################################################ diff --git a/libclc/clc/lib/generic/shared/clc_clamp.cl b/libclc/clc/lib/generic/shared/clc_clamp.cl index 4d83a08e44c3b..1e81405f74197 100644 --- a/libclc/clc/lib/generic/shared/clc_clamp.cl +++ b/libclc/clc/lib/generic/shared/clc_clamp.cl @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include <clc/internal/clc.h> +#include <clc/shared/clc_max.h> +#include <clc/shared/clc_min.h> #define __CLC_BODY <clc_clamp.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/shared/clc_clamp.inc b/libclc/clc/lib/generic/shared/clc_clamp.inc index 571f6470e5703..bb72e8ac5bd3c 100644 --- a/libclc/clc/lib/generic/shared/clc_clamp.inc +++ b/libclc/clc/lib/generic/shared/clc_clamp.inc @@ -9,14 +9,13 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z) { - return (x > z ? z : (x < y ? y : x)); + return __clc_min(__clc_max(x, y), z); } #ifndef __CLC_SCALAR _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_clamp(__CLC_GENTYPE x, __CLC_SCALAR_GENTYPE y, __CLC_SCALAR_GENTYPE z) { - return (x > (__CLC_GENTYPE)z ? (__CLC_GENTYPE)z - : (x < (__CLC_GENTYPE)y ? (__CLC_GENTYPE)y : x)); + return __clc_min(__clc_max(x, (__CLC_GENTYPE)y), (__CLC_GENTYPE)z); } #endif diff --git a/libclc/clc/lib/generic/shared/clc_max.cl b/libclc/clc/lib/generic/shared/clc_max.cl index 092ce33b38331..a6e831d1eac0c 100644 --- a/libclc/clc/lib/generic/shared/clc_max.cl +++ b/libclc/clc/lib/generic/shared/clc_max.cl @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include <clc/internal/clc.h> +#include <clc/math/clc_fmax.h> #define __CLC_BODY <clc_max.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/shared/clc_max.inc b/libclc/clc/lib/generic/shared/clc_max.inc index 882f29ce30d94..98e2a50cb2857 100644 --- a/libclc/clc/lib/generic/shared/clc_max.inc +++ b/libclc/clc/lib/generic/shared/clc_max.inc @@ -6,6 +6,22 @@ // //===----------------------------------------------------------------------===// +#ifdef __CLC_FPSIZE + +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, + __CLC_GENTYPE b) { + return __clc_fmax(a, b); +} + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b) { + return __clc_fmax(a, b); +} +#endif // __CLC_SCALAR + +#else // __CLC_FPSIZE + _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, __CLC_GENTYPE b) { return (a > b ? a : b); @@ -16,4 +32,6 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_max(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) { return (a > (__CLC_GENTYPE)b ? a : (__CLC_GENTYPE)b); } -#endif +#endif // __CLC_SCALAR + +#endif // __CLC_FPSIZE diff --git a/libclc/clc/lib/generic/shared/clc_min.cl b/libclc/clc/lib/generic/shared/clc_min.cl index d34a606839f21..f345e55c33be7 100644 --- a/libclc/clc/lib/generic/shared/clc_min.cl +++ b/libclc/clc/lib/generic/shared/clc_min.cl @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include <clc/internal/clc.h> +#include <clc/math/clc_fmin.h> #define __CLC_BODY <clc_min.inc> #include <clc/integer/gentype.inc> diff --git a/libclc/clc/lib/generic/shared/clc_min.inc b/libclc/clc/lib/generic/shared/clc_min.inc index d8a51291dbc27..90877d1ab9b17 100644 --- a/libclc/clc/lib/generic/shared/clc_min.inc +++ b/libclc/clc/lib/generic/shared/clc_min.inc @@ -6,6 +6,21 @@ // //===----------------------------------------------------------------------===// +#ifdef __CLC_FPSIZE +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, + __CLC_GENTYPE b) { + return __clc_fmin(a, b); +} + +#ifndef __CLC_SCALAR +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, + __CLC_SCALAR_GENTYPE b) { + return __clc_fmin(a, b); +} +#endif // __CLC_SCALAR + +#else // __CLC_FPSIZE + _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, __CLC_GENTYPE b) { return (b < a ? b : a); @@ -16,4 +31,6 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_min(__CLC_GENTYPE a, __CLC_SCALAR_GENTYPE b) { return (b < (__CLC_GENTYPE)a ? (__CLC_GENTYPE)b : a); } -#endif +#endif // __CLC_SCALAR + +#endif // __CLC_FPSIZE _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
