Author: Wenju He Date: 2025-09-05T19:58:07+08:00 New Revision: 28d9255aa7c05738c7fd88711006d71d4dfc952a
URL: https://github.com/llvm/llvm-project/commit/28d9255aa7c05738c7fd88711006d71d4dfc952a DIFF: https://github.com/llvm/llvm-project/commit/28d9255aa7c05738c7fd88711006d71d4dfc952a.diff LOG: [libclc] Override generic symbol using llvm-link --override flag instead of using weak linkage (#156778) Before this PR, weak linkage is applied to a few CLC generic functions to allow target specific implementation to override generic one. However, adding weak linkage has a side effect of preventing inter-procedural optimization, such as PostOrderFunctionAttrsPass, because weak function doesn't have exact definition (as determined by hasExactDefinition in the pass). This PR resolves the issue by adding --override flag for every non-generic bitcode file in llvm-link run. This approach eliminates the need for weak linkage while still allowing target-specific implementation to override generic one. llvm-diff shows imporoved attribute deduction for some functions in amdgcn--amdhsa.bc, e.g. %23 = tail call half @llvm.sqrt.f16(half %22) => %23 = tail call noundef half @llvm.sqrt.f16(half %22) Added: Modified: libclc/clc/lib/generic/math/clc_ldexp.cl libclc/clc/lib/generic/math/clc_rsqrt.inc libclc/clc/lib/generic/math/clc_sqrt.inc libclc/cmake/modules/AddLibclc.cmake Removed: ################################################################################ diff --git a/libclc/clc/lib/generic/math/clc_ldexp.cl b/libclc/clc/lib/generic/math/clc_ldexp.cl index 8b41751e40282..f9252a75ab4bf 100644 --- a/libclc/clc/lib/generic/math/clc_ldexp.cl +++ b/libclc/clc/lib/generic/math/clc_ldexp.cl @@ -14,9 +14,7 @@ #include <clc/relational/clc_isnan.h> #include <clc/shared/clc_clamp.h> -#define _CLC_DEF_ldexp _CLC_DEF __attribute__((weak)) - -_CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int n) { +_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) { if (!__clc_fp32_subnormals_supported()) { // This treats subnormals as zeros @@ -89,7 +87,7 @@ _CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int n) { #pragma OPENCL EXTENSION cl_khr_fp64 : enable -_CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, int n) { +_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) { long l = __clc_as_ulong(x); int e = (l >> 52) & 0x7ff; long s = l & 0x8000000000000000; @@ -124,14 +122,13 @@ _CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, int n) { #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_OVERLOAD _CLC_DEF_ldexp half __clc_ldexp(half x, int n) { +_CLC_OVERLOAD _CLC_DEF half __clc_ldexp(half x, int n) { return (half)__clc_ldexp((float)x, n); } #endif #define __CLC_FUNCTION __clc_ldexp -#define __CLC_DEF_SPEC _CLC_DEF_ldexp #define __CLC_ARG2_TYPE int #define __CLC_BODY <clc/shared/binary_def_scalarize.inc> #include <clc/math/gentype.inc> diff --git a/libclc/clc/lib/generic/math/clc_rsqrt.inc b/libclc/clc/lib/generic/math/clc_rsqrt.inc index 4c04155a932c7..07aad16f91916 100644 --- a/libclc/clc/lib/generic/math/clc_rsqrt.inc +++ b/libclc/clc/lib/generic/math/clc_rsqrt.inc @@ -6,8 +6,7 @@ // //===----------------------------------------------------------------------===// -__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__clc_rsqrt(__CLC_GENTYPE val) { +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rsqrt(__CLC_GENTYPE val) { #pragma clang fp contract(fast) return __CLC_FP_LIT(1.0) / __builtin_elementwise_sqrt(val); } diff --git a/libclc/clc/lib/generic/math/clc_sqrt.inc b/libclc/clc/lib/generic/math/clc_sqrt.inc index 61e341993f5c8..e15dcf75ac3f2 100644 --- a/libclc/clc/lib/generic/math/clc_sqrt.inc +++ b/libclc/clc/lib/generic/math/clc_sqrt.inc @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE -__clc_sqrt(__CLC_GENTYPE val) { +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) { return __builtin_elementwise_sqrt(val); } diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 5cc202ddbaa8c..aa8dd9859cd22 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -92,19 +92,35 @@ function(link_bc) ${ARGN} ) - set( LINK_INPUT_ARG ${ARG_INPUTS} ) + if( ARG_INTERNALIZE ) + set( inputs_with_flag ${ARG_INPUTS} ) + else() + # Add the --override flag for non-generic bitcode files so that their + # symbols can override definitions in generic bitcode files. + set( inputs_with_flag ) + foreach( file IN LISTS ARG_INPUTS ) + string( FIND ${file} "/generic/" is_generic ) + if( is_generic LESS 0 ) + list( APPEND inputs_with_flag "--override" ) + endif() + list( APPEND inputs_with_flag ${file} ) + endforeach() + endif() + if( WIN32 OR CYGWIN ) # Create a response file in case the number of inputs exceeds command-line # character limits on certain platforms. file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE ) # Turn it into a space-separate list of input files - list( JOIN ARG_INPUTS " " RSP_INPUT ) + list( JOIN inputs_with_flag " " RSP_INPUT ) file( GENERATE OUTPUT ${RSP_FILE} CONTENT ${RSP_INPUT} ) # Ensure that if this file is removed, we re-run CMake set_property( DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${RSP_FILE} ) set( LINK_INPUT_ARG "@${RSP_FILE}" ) + else() + set( LINK_INPUT_ARG ${inputs_with_flag} ) endif() if( ARG_INTERNALIZE ) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits