Author: Wenju He Date: 2025-06-06T08:09:53+08:00 New Revision: de3a9ea510fecd501b4dd57534cf300d1c9622c9
URL: https://github.com/llvm/llvm-project/commit/de3a9ea510fecd501b4dd57534cf300d1c9622c9 DIFF: https://github.com/llvm/llvm-project/commit/de3a9ea510fecd501b4dd57534cf300d1c9622c9.diff LOG: [NFC][libclc] Simplify clc_dot and dot implementation (#142922) llvm-diff shows no change to amdgcn--amdhsa.bc Added: libclc/clc/lib/generic/geometric/clc_dot.inc Modified: libclc/clc/lib/generic/geometric/clc_dot.cl libclc/opencl/lib/generic/geometric/dot.cl Removed: ################################################################################ diff --git a/libclc/clc/lib/generic/geometric/clc_dot.cl b/libclc/clc/lib/generic/geometric/clc_dot.cl index 2c82798d71a50..a30f9bc6e301a 100644 --- a/libclc/clc/lib/generic/geometric/clc_dot.cl +++ b/libclc/clc/lib/generic/geometric/clc_dot.cl @@ -7,59 +7,7 @@ //===----------------------------------------------------------------------===// #include <clc/internal/clc.h> +#include <clc/math/clc_fma.h> -_CLC_OVERLOAD _CLC_DEF float __clc_dot(float p0, float p1) { return p0 * p1; } - -_CLC_OVERLOAD _CLC_DEF float __clc_dot(float2 p0, float2 p1) { - return p0.x * p1.x + p0.y * p1.y; -} - -_CLC_OVERLOAD _CLC_DEF float __clc_dot(float3 p0, float3 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; -} - -_CLC_OVERLOAD _CLC_DEF float __clc_dot(float4 p0, float4 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; -} - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double __clc_dot(double p0, double p1) { - return p0 * p1; -} - -_CLC_OVERLOAD _CLC_DEF double __clc_dot(double2 p0, double2 p1) { - return p0.x * p1.x + p0.y * p1.y; -} - -_CLC_OVERLOAD _CLC_DEF double __clc_dot(double3 p0, double3 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; -} - -_CLC_OVERLOAD _CLC_DEF double __clc_dot(double4 p0, double4 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; -} - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half __clc_dot(half p0, half p1) { return p0 * p1; } - -_CLC_OVERLOAD _CLC_DEF half __clc_dot(half2 p0, half2 p1) { - return p0.x * p1.x + p0.y * p1.y; -} - -_CLC_OVERLOAD _CLC_DEF half __clc_dot(half3 p0, half3 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; -} - -_CLC_OVERLOAD _CLC_DEF half __clc_dot(half4 p0, half4 p1) { - return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; -} - -#endif +#define __CLC_BODY <clc_dot.inc> +#include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/geometric/clc_dot.inc b/libclc/clc/lib/generic/geometric/clc_dot.inc new file mode 100644 index 0000000000000..0c89493bf7332 --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_dot.inc @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if (__CLC_VECSIZE_OR_1 == 1 || __CLC_VECSIZE_OR_1 == 2 || \ + __CLC_VECSIZE_OR_1 == 3 || __CLC_VECSIZE_OR_1 == 4) + +_CLC_OVERLOAD _CLC_DEF __CLC_SCALAR_GENTYPE __clc_dot(__CLC_GENTYPE x, + __CLC_GENTYPE y) { +#if __CLC_VECSIZE_OR_1 == 1 + return x * y; +#elif __CLC_VECSIZE_OR_1 == 2 + return x.s0 * y.s0 + x.s1 * y.s1; +#elif __CLC_VECSIZE_OR_1 == 3 + return x.s0 * y.s0 + x.s1 * y.s1 + x.s2 * y.s2; +#else + return x.s0 * y.s0 + x.s1 * y.s1 + x.s2 * y.s2 + x.s3 * y.s3; +#endif +} + +#endif diff --git a/libclc/opencl/lib/generic/geometric/dot.cl b/libclc/opencl/lib/generic/geometric/dot.cl index 72d30ea53f8cf..785767f27aacf 100644 --- a/libclc/opencl/lib/generic/geometric/dot.cl +++ b/libclc/opencl/lib/generic/geometric/dot.cl @@ -9,60 +9,6 @@ #include <clc/geometric/clc_dot.h> #include <clc/opencl/clc.h> -_CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) { - return __clc_dot(p0, p1); -} - -#ifdef cl_khr_fp64 - -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) { - return __clc_dot(p0, p1); -} - -#endif - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { return __clc_dot(p0, p1); } - -_CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) { - return __clc_dot(p0, p1); -} - -_CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) { - return __clc_dot(p0, p1); -} - -#endif +#define FUNCTION dot +#define __CLC_BODY <clc/geometric/binary_def.inc> +#include <clc/math/gentype.inc> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits