Author: Fraser Cormack Date: 2024-12-13T08:47:13Z New Revision: 06789ccb1695214f787cd471a300522973d33375
URL: https://github.com/llvm/llvm-project/commit/06789ccb1695214f787cd471a300522973d33375 DIFF: https://github.com/llvm/llvm-project/commit/06789ccb1695214f787cd471a300522973d33375.diff LOG: [libclc] Optimize ceil/fabs/floor/rint/trunc (#119596) These functions all map to the corresponding LLVM intrinsics, but the vector intrinsics weren't being generated. The intrinsic mapping from CLC vector function to vector intrinsic was working correctly, but the mapping from OpenCL builtin to CLC function was suboptimally recursively splitting vectors in halves. For example, with this change, `ceil(float16)` calls `llvm.ceil.v16f32` directly once optimizations are applied. Now also, instead of generating LLVM intrinsics through `__asm` we now call clang elementwise builtins for each CLC builtin. This should be a more standard way of achieving the same result The CLC versions of each of these builtins are also now built and enabled for SPIR-V targets. The LLVM -> SPIR-V translator maps the intrinsics to the appropriate OpExtInst, so there should be no difference in semantics, despite the newly introduced indirection from OpenCL builtin through the CLC builtin to the intrinsic. The AMDGPU targets make use of the same `_CLC_DEFINE_UNARY_BUILTIN` macro to override `sqrt`, so those functions also appear more optimal with this change, calling the vector `llvm.sqrt.vXf32` intrinsics directly. Added: libclc/clc/include/clc/math/unary_builtin.inc libclc/clc/lib/generic/math/clc_ceil.cl libclc/clc/lib/generic/math/clc_fabs.cl libclc/clc/lib/generic/math/clc_floor.cl libclc/clc/lib/generic/math/clc_rint.cl libclc/clc/lib/generic/math/clc_trunc.cl Modified: libclc/clc/include/clc/clcmacro.h libclc/clc/include/clc/math/clc_ceil.h libclc/clc/include/clc/math/clc_fabs.h libclc/clc/include/clc/math/clc_floor.h libclc/clc/include/clc/math/clc_rint.h libclc/clc/include/clc/math/clc_trunc.h libclc/clc/lib/clspv/SOURCES libclc/clc/lib/generic/SOURCES libclc/clc/lib/spirv/SOURCES libclc/clc/lib/spirv64/SOURCES libclc/generic/lib/math/ceil.cl libclc/generic/lib/math/fabs.cl libclc/generic/lib/math/floor.cl libclc/generic/lib/math/rint.cl libclc/generic/lib/math/round.cl libclc/generic/lib/math/sqrt.cl libclc/generic/lib/math/trunc.cl Removed: libclc/clc/lib/clspv/dummy.cl libclc/generic/lib/math/unary_builtin.inc ################################################################################ diff --git a/libclc/clc/include/clc/clcmacro.h b/libclc/clc/include/clc/clcmacro.h index 244239284ecabc..c6583749eca661 100644 --- a/libclc/clc/include/clc/clcmacro.h +++ b/libclc/clc/include/clc/clcmacro.h @@ -191,7 +191,21 @@ #define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE) \ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { return BUILTIN(x); } \ - _CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION, ARG1_TYPE) + _CLC_DEF _CLC_OVERLOAD RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \ + return BUILTIN(x); \ + } \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \ + return BUILTIN(x); \ + } \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x) { \ + return BUILTIN(x); \ + } \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x) { \ + return BUILTIN(x); \ + } \ + _CLC_DEF _CLC_OVERLOAD RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x) { \ + return BUILTIN(x); \ + } #ifdef cl_khr_fp16 diff --git a/libclc/clc/include/clc/math/clc_ceil.h b/libclc/clc/include/clc/math/clc_ceil.h index 66590687c34220..20adc6d81d863f 100644 --- a/libclc/clc/include/clc/math/clc_ceil.h +++ b/libclc/clc/include/clc/math/clc_ceil.h @@ -1,19 +1,12 @@ #ifndef __CLC_MATH_CLC_CEIL_H__ #define __CLC_MATH_CLC_CEIL_H__ -#if defined(CLC_CLSPV) || defined(CLC_SPIRV) -// clspv and spir-v targets provide their own OpenCL-compatible ceil -#define __clc_ceil ceil -#else - -// Map the function to an LLVM intrinsic +#define __CLC_BODY <clc/math/unary_decl.inc> #define __CLC_FUNCTION __clc_ceil -#define __CLC_INTRINSIC "llvm.ceil" -#include <clc/math/unary_intrin.inc> -#undef __CLC_INTRINSIC -#undef __CLC_FUNCTION +#include <clc/math/gentype.inc> -#endif +#undef __CLC_BODY +#undef __CLC_FUNCTION #endif // __CLC_MATH_CLC_CEIL_H__ diff --git a/libclc/clc/include/clc/math/clc_fabs.h b/libclc/clc/include/clc/math/clc_fabs.h index 93367b57313713..911d34f78c7d25 100644 --- a/libclc/clc/include/clc/math/clc_fabs.h +++ b/libclc/clc/include/clc/math/clc_fabs.h @@ -1,19 +1,12 @@ #ifndef __CLC_MATH_CLC_FABS_H__ #define __CLC_MATH_CLC_FABS_H__ -#if defined(CLC_CLSPV) || defined(CLC_SPIRV) -// clspv and spir-v targets provide their own OpenCL-compatible fabs -#define __clc_fabs fabs -#else - -// Map the function to an LLVM intrinsic +#define __CLC_BODY <clc/math/unary_decl.inc> #define __CLC_FUNCTION __clc_fabs -#define __CLC_INTRINSIC "llvm.fabs" -#include <clc/math/unary_intrin.inc> -#undef __CLC_INTRINSIC -#undef __CLC_FUNCTION +#include <clc/math/gentype.inc> -#endif +#undef __CLC_BODY +#undef __CLC_FUNCTION #endif // __CLC_MATH_CLC_FABS_H__ diff --git a/libclc/clc/include/clc/math/clc_floor.h b/libclc/clc/include/clc/math/clc_floor.h index 9919872ec633c6..c311cc0edae151 100644 --- a/libclc/clc/include/clc/math/clc_floor.h +++ b/libclc/clc/include/clc/math/clc_floor.h @@ -1,19 +1,12 @@ #ifndef __CLC_MATH_CLC_FLOOR_H__ #define __CLC_MATH_CLC_FLOOR_H__ -#if defined(CLC_CLSPV) || defined(CLC_SPIRV) -// clspv and spir-v targets provide their own OpenCL-compatible floor -#define __clc_floor floor -#else - -// Map the function to an LLVM intrinsic +#define __CLC_BODY <clc/math/unary_decl.inc> #define __CLC_FUNCTION __clc_floor -#define __CLC_INTRINSIC "llvm.floor" -#include <clc/math/unary_intrin.inc> -#undef __CLC_INTRINSIC -#undef __CLC_FUNCTION +#include <clc/math/gentype.inc> -#endif +#undef __CLC_BODY +#undef __CLC_FUNCTION #endif // __CLC_MATH_CLC_FLOOR_H__ diff --git a/libclc/clc/include/clc/math/clc_rint.h b/libclc/clc/include/clc/math/clc_rint.h index 3761407ad326d7..6faeed0b5696e5 100644 --- a/libclc/clc/include/clc/math/clc_rint.h +++ b/libclc/clc/include/clc/math/clc_rint.h @@ -1,19 +1,12 @@ #ifndef __CLC_MATH_CLC_RINT_H__ #define __CLC_MATH_CLC_RINT_H__ -#if defined(CLC_CLSPV) || defined(CLC_SPIRV) -// clspv and spir-v targets provide their own OpenCL-compatible rint -#define __clc_rint rint -#else - -// Map the function to an LLVM intrinsic +#define __CLC_BODY <clc/math/unary_decl.inc> #define __CLC_FUNCTION __clc_rint -#define __CLC_INTRINSIC "llvm.rint" -#include <clc/math/unary_intrin.inc> -#undef __CLC_INTRINSIC -#undef __CLC_FUNCTION +#include <clc/math/gentype.inc> -#endif +#undef __CLC_BODY +#undef __CLC_FUNCTION #endif // __CLC_MATH_CLC_RINT_H__ diff --git a/libclc/clc/include/clc/math/clc_trunc.h b/libclc/clc/include/clc/math/clc_trunc.h index c78c8899d85238..acfc9d5db48117 100644 --- a/libclc/clc/include/clc/math/clc_trunc.h +++ b/libclc/clc/include/clc/math/clc_trunc.h @@ -1,19 +1,12 @@ #ifndef __CLC_MATH_CLC_TRUNC_H__ #define __CLC_MATH_CLC_TRUNC_H__ -#if defined(CLC_CLSPV) || defined(CLC_SPIRV) -// clspv and spir-v targets provide their own OpenCL-compatible trunc -#define __clc_trunc trunc -#else - -// Map the function to an LLVM intrinsic +#define __CLC_BODY <clc/math/unary_decl.inc> #define __CLC_FUNCTION __clc_trunc -#define __CLC_INTRINSIC "llvm.trunc" -#include <clc/math/unary_intrin.inc> -#undef __CLC_INTRINSIC -#undef __CLC_FUNCTION +#include <clc/math/gentype.inc> -#endif +#undef __CLC_BODY +#undef __CLC_FUNCTION #endif // __CLC_MATH_CLC_TRUNC_H__ diff --git a/libclc/generic/lib/math/unary_builtin.inc b/libclc/clc/include/clc/math/unary_builtin.inc similarity index 100% rename from libclc/generic/lib/math/unary_builtin.inc rename to libclc/clc/include/clc/math/unary_builtin.inc diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES index 75a3130357c345..393e8d773cda0e 100644 --- a/libclc/clc/lib/clspv/SOURCES +++ b/libclc/clc/lib/clspv/SOURCES @@ -1 +1,5 @@ -dummy.cl +../generic/math/clc_ceil.cl +../generic/math/clc_fabs.cl +../generic/math/clc_floor.cl +../generic/math/clc_rint.cl +../generic/math/clc_trunc.cl diff --git a/libclc/clc/lib/clspv/dummy.cl b/libclc/clc/lib/clspv/dummy.cl deleted file mode 100644 index fab17ac780e375..00000000000000 --- a/libclc/clc/lib/clspv/dummy.cl +++ /dev/null @@ -1 +0,0 @@ -// Empty file diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index d7ffaaf6dc3f42..3916ea15f5c458 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -1,6 +1,11 @@ geometric/clc_dot.cl integer/clc_abs.cl integer/clc_abs_ diff .cl +math/clc_ceil.cl +math/clc_fabs.cl +math/clc_floor.cl +math/clc_rint.cl +math/clc_trunc.cl relational/clc_all.cl relational/clc_any.cl relational/clc_bitselect.cl diff --git a/libclc/clc/lib/generic/math/clc_ceil.cl b/libclc/clc/lib/generic/math/clc_ceil.cl new file mode 100644 index 00000000000000..c712e5fd024d90 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_ceil.cl @@ -0,0 +1,6 @@ +#include <clc/internal/clc.h> + +#undef __CLC_FUNCTION +#define __CLC_FUNCTION __clc_ceil +#define __CLC_BUILTIN __builtin_elementwise_ceil +#include <clc/math/unary_builtin.inc> diff --git a/libclc/clc/lib/generic/math/clc_fabs.cl b/libclc/clc/lib/generic/math/clc_fabs.cl new file mode 100644 index 00000000000000..23ff3a7a187e1d --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_fabs.cl @@ -0,0 +1,6 @@ +#include <clc/internal/clc.h> + +#undef __CLC_FUNCTION +#define __CLC_FUNCTION __clc_fabs +#define __CLC_BUILTIN __builtin_elementwise_abs +#include <clc/math/unary_builtin.inc> diff --git a/libclc/clc/lib/generic/math/clc_floor.cl b/libclc/clc/lib/generic/math/clc_floor.cl new file mode 100644 index 00000000000000..98345c768f2271 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_floor.cl @@ -0,0 +1,6 @@ +#include <clc/internal/clc.h> + +#undef __CLC_FUNCTION +#define __CLC_FUNCTION __clc_floor +#define __CLC_BUILTIN __builtin_elementwise_floor +#include <clc/math/unary_builtin.inc> diff --git a/libclc/clc/lib/generic/math/clc_rint.cl b/libclc/clc/lib/generic/math/clc_rint.cl new file mode 100644 index 00000000000000..28ad321a7b4f6d --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_rint.cl @@ -0,0 +1,6 @@ +#include <clc/internal/clc.h> + +#undef __CLC_FUNCTION +#define __CLC_FUNCTION __clc_rint +#define __CLC_BUILTIN __builtin_elementwise_rint +#include <clc/math/unary_builtin.inc> diff --git a/libclc/clc/lib/generic/math/clc_trunc.cl b/libclc/clc/lib/generic/math/clc_trunc.cl new file mode 100644 index 00000000000000..e62ae062e05020 --- /dev/null +++ b/libclc/clc/lib/generic/math/clc_trunc.cl @@ -0,0 +1,6 @@ +#include <clc/internal/clc.h> + +#undef __CLC_FUNCTION +#define __CLC_FUNCTION __clc_trunc +#define __CLC_BUILTIN __builtin_elementwise_trunc +#include <clc/math/unary_builtin.inc> diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES index d8effd19613c8b..3b29fa0a916243 100644 --- a/libclc/clc/lib/spirv/SOURCES +++ b/libclc/clc/lib/spirv/SOURCES @@ -1,2 +1,6 @@ ../generic/geometric/clc_dot.cl - +../generic/math/clc_ceil.cl +../generic/math/clc_fabs.cl +../generic/math/clc_floor.cl +../generic/math/clc_rint.cl +../generic/math/clc_trunc.cl diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES index 9200810ace38e7..3b29fa0a916243 100644 --- a/libclc/clc/lib/spirv64/SOURCES +++ b/libclc/clc/lib/spirv64/SOURCES @@ -1 +1,6 @@ ../generic/geometric/clc_dot.cl +../generic/math/clc_ceil.cl +../generic/math/clc_fabs.cl +../generic/math/clc_floor.cl +../generic/math/clc_rint.cl +../generic/math/clc_trunc.cl diff --git a/libclc/generic/lib/math/ceil.cl b/libclc/generic/lib/math/ceil.cl index e02789e694e06e..8df864a06314d8 100644 --- a/libclc/generic/lib/math/ceil.cl +++ b/libclc/generic/lib/math/ceil.cl @@ -4,4 +4,4 @@ #undef __CLC_FUNCTION #define __CLC_FUNCTION ceil -#include "unary_builtin.inc" +#include <clc/math/unary_builtin.inc> diff --git a/libclc/generic/lib/math/fabs.cl b/libclc/generic/lib/math/fabs.cl index 9644369d4a0953..55701cb36a9512 100644 --- a/libclc/generic/lib/math/fabs.cl +++ b/libclc/generic/lib/math/fabs.cl @@ -4,4 +4,4 @@ #undef __CLC_FUNCTION #define __CLC_FUNCTION fabs -#include "unary_builtin.inc" +#include <clc/math/unary_builtin.inc> diff --git a/libclc/generic/lib/math/floor.cl b/libclc/generic/lib/math/floor.cl index f5c36b73862a46..0854fa7efc4580 100644 --- a/libclc/generic/lib/math/floor.cl +++ b/libclc/generic/lib/math/floor.cl @@ -4,4 +4,4 @@ #undef __CLC_FUNCTION #define __CLC_FUNCTION floor -#include "unary_builtin.inc" +#include <clc/math/unary_builtin.inc> diff --git a/libclc/generic/lib/math/rint.cl b/libclc/generic/lib/math/rint.cl index 185bbbbf8c91d2..ecf7d5c1e6dde8 100644 --- a/libclc/generic/lib/math/rint.cl +++ b/libclc/generic/lib/math/rint.cl @@ -3,4 +3,4 @@ #undef __CLC_FUNCTION #define __CLC_FUNCTION rint -#include "unary_builtin.inc" +#include <clc/math/unary_builtin.inc> diff --git a/libclc/generic/lib/math/round.cl b/libclc/generic/lib/math/round.cl index 285328aaa5d563..6344051820c798 100644 --- a/libclc/generic/lib/math/round.cl +++ b/libclc/generic/lib/math/round.cl @@ -7,4 +7,4 @@ #undef __CLC_FUNCTION #define __CLC_FUNCTION round -#include "unary_builtin.inc" +#include <clc/math/unary_builtin.inc> diff --git a/libclc/generic/lib/math/sqrt.cl b/libclc/generic/lib/math/sqrt.cl index 8df25dd45adb67..a9192a9493d172 100644 --- a/libclc/generic/lib/math/sqrt.cl +++ b/libclc/generic/lib/math/sqrt.cl @@ -24,4 +24,4 @@ #include "math/clc_sqrt.h" #define __CLC_FUNCTION sqrt -#include "unary_builtin.inc" +#include <clc/math/unary_builtin.inc> diff --git a/libclc/generic/lib/math/trunc.cl b/libclc/generic/lib/math/trunc.cl index 00c2a4a80015fe..1d5f04a3230541 100644 --- a/libclc/generic/lib/math/trunc.cl +++ b/libclc/generic/lib/math/trunc.cl @@ -3,4 +3,4 @@ #undef __CLC_FUNCTION #define __CLC_FUNCTION trunc -#include "unary_builtin.inc" +#include <clc/math/unary_builtin.inc> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits