Author: Wenju He
Date: 2025-09-05T19:58:07+08:00
New Revision: 28d9255aa7c05738c7fd88711006d71d4dfc952a

URL: 
https://github.com/llvm/llvm-project/commit/28d9255aa7c05738c7fd88711006d71d4dfc952a
DIFF: 
https://github.com/llvm/llvm-project/commit/28d9255aa7c05738c7fd88711006d71d4dfc952a.diff

LOG: [libclc] Override generic symbol using llvm-link --override flag instead 
of using weak linkage (#156778)

Before this PR, weak linkage is applied to a few CLC generic functions
to allow target specific implementation to override generic one.
However, adding weak linkage has a side effect of preventing
inter-procedural optimization, such as PostOrderFunctionAttrsPass,
because weak function doesn't have exact definition (as determined by
hasExactDefinition in the pass).

This PR resolves the issue by adding --override flag for every
non-generic bitcode file in llvm-link run. This approach eliminates the
need for weak linkage while still allowing target-specific
implementation to override generic one.
llvm-diff shows imporoved attribute deduction for some functions in
amdgcn--amdhsa.bc, e.g.
  %23 = tail call half @llvm.sqrt.f16(half %22)
=>
  %23 = tail call noundef half @llvm.sqrt.f16(half %22)

Added: 
    

Modified: 
    libclc/clc/lib/generic/math/clc_ldexp.cl
    libclc/clc/lib/generic/math/clc_rsqrt.inc
    libclc/clc/lib/generic/math/clc_sqrt.inc
    libclc/cmake/modules/AddLibclc.cmake

Removed: 
    


################################################################################
diff  --git a/libclc/clc/lib/generic/math/clc_ldexp.cl 
b/libclc/clc/lib/generic/math/clc_ldexp.cl
index 8b41751e40282..f9252a75ab4bf 100644
--- a/libclc/clc/lib/generic/math/clc_ldexp.cl
+++ b/libclc/clc/lib/generic/math/clc_ldexp.cl
@@ -14,9 +14,7 @@
 #include <clc/relational/clc_isnan.h>
 #include <clc/shared/clc_clamp.h>
 
-#define _CLC_DEF_ldexp _CLC_DEF __attribute__((weak))
-
-_CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
+_CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float x, int n) {
 
   if (!__clc_fp32_subnormals_supported()) {
     // This treats subnormals as zeros
@@ -89,7 +87,7 @@ _CLC_DEF_ldexp _CLC_OVERLOAD float __clc_ldexp(float x, int 
n) {
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
-_CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
+_CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double x, int n) {
   long l = __clc_as_ulong(x);
   int e = (l >> 52) & 0x7ff;
   long s = l & 0x8000000000000000;
@@ -124,14 +122,13 @@ _CLC_DEF_ldexp _CLC_OVERLOAD double __clc_ldexp(double x, 
int n) {
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
-_CLC_OVERLOAD _CLC_DEF_ldexp half __clc_ldexp(half x, int n) {
+_CLC_OVERLOAD _CLC_DEF half __clc_ldexp(half x, int n) {
   return (half)__clc_ldexp((float)x, n);
 }
 
 #endif
 
 #define __CLC_FUNCTION __clc_ldexp
-#define __CLC_DEF_SPEC _CLC_DEF_ldexp
 #define __CLC_ARG2_TYPE int
 #define __CLC_BODY <clc/shared/binary_def_scalarize.inc>
 #include <clc/math/gentype.inc>

diff  --git a/libclc/clc/lib/generic/math/clc_rsqrt.inc 
b/libclc/clc/lib/generic/math/clc_rsqrt.inc
index 4c04155a932c7..07aad16f91916 100644
--- a/libclc/clc/lib/generic/math/clc_rsqrt.inc
+++ b/libclc/clc/lib/generic/math/clc_rsqrt.inc
@@ -6,8 +6,7 @@
 //
 
//===----------------------------------------------------------------------===//
 
-__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
-__clc_rsqrt(__CLC_GENTYPE val) {
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_rsqrt(__CLC_GENTYPE val) {
 #pragma clang fp contract(fast)
   return __CLC_FP_LIT(1.0) / __builtin_elementwise_sqrt(val);
 }

diff  --git a/libclc/clc/lib/generic/math/clc_sqrt.inc 
b/libclc/clc/lib/generic/math/clc_sqrt.inc
index 61e341993f5c8..e15dcf75ac3f2 100644
--- a/libclc/clc/lib/generic/math/clc_sqrt.inc
+++ b/libclc/clc/lib/generic/math/clc_sqrt.inc
@@ -6,7 +6,6 @@
 //
 
//===----------------------------------------------------------------------===//
 
-__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE
-__clc_sqrt(__CLC_GENTYPE val) {
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) {
   return __builtin_elementwise_sqrt(val);
 }

diff  --git a/libclc/cmake/modules/AddLibclc.cmake 
b/libclc/cmake/modules/AddLibclc.cmake
index 5cc202ddbaa8c..aa8dd9859cd22 100644
--- a/libclc/cmake/modules/AddLibclc.cmake
+++ b/libclc/cmake/modules/AddLibclc.cmake
@@ -92,19 +92,35 @@ function(link_bc)
     ${ARGN}
   )
 
-  set( LINK_INPUT_ARG ${ARG_INPUTS} )
+  if( ARG_INTERNALIZE )
+    set( inputs_with_flag ${ARG_INPUTS} )
+  else()
+    # Add the --override flag for non-generic bitcode files so that their
+    # symbols can override definitions in generic bitcode files.
+    set( inputs_with_flag )
+    foreach( file IN LISTS ARG_INPUTS )
+      string( FIND ${file} "/generic/" is_generic )
+      if( is_generic LESS 0 )
+        list( APPEND inputs_with_flag "--override" )
+      endif()
+      list( APPEND inputs_with_flag ${file} )
+    endforeach()
+  endif()
+
   if( WIN32 OR CYGWIN )
     # Create a response file in case the number of inputs exceeds command-line
     # character limits on certain platforms.
     file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE )
     # Turn it into a space-separate list of input files
-    list( JOIN ARG_INPUTS " " RSP_INPUT )
+    list( JOIN inputs_with_flag " " RSP_INPUT )
     file( GENERATE OUTPUT ${RSP_FILE} CONTENT ${RSP_INPUT} )
     # Ensure that if this file is removed, we re-run CMake
     set_property( DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
       ${RSP_FILE}
     )
     set( LINK_INPUT_ARG "@${RSP_FILE}" )
+  else()
+    set( LINK_INPUT_ARG ${inputs_with_flag} )
   endif()
 
   if( ARG_INTERNALIZE )


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to