Author: Matt Arsenault Date: 2026-03-17T09:57:08+01:00 New Revision: 19460ff859766e9eb08e2dae6ab70ac0c648b340
URL: https://github.com/llvm/llvm-project/commit/19460ff859766e9eb08e2dae6ab70ac0c648b340 DIFF: https://github.com/llvm/llvm-project/commit/19460ff859766e9eb08e2dae6ab70ac0c648b340.diff LOG: libclc: Use fshr builtin in sincos helpers (#186427) Added: Modified: libclc/clc/lib/generic/math/clc_sincos_helpers.cl libclc/clc/lib/generic/math/clc_sincos_helpers.inc Removed: ################################################################################ diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl index 19705c42f6f07..aeba3c14dd9b9 100644 --- a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl @@ -16,7 +16,7 @@ #include <clc/math/clc_trunc.h> #include <clc/math/math.h> -#define bitalign(hi, lo, shift) ((hi) << (32 - (shift))) | ((lo) >> (shift)); +#define bitalign(hi, lo, shift) __builtin_elementwise_fshr(hi, lo, shift) #define __CLC_FULL_MUL(A, B, HI, LO) \ LO = A * B; \ diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc index f8c2b03054374..8b126a64f49ac 100644 --- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc +++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc @@ -277,9 +277,9 @@ _CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionLargeS( __CLC_INTN i = __CLC_AS_INTN(p7 >> 29U); // Scoot up 2 more bits so only fraction remains - p7 = bitalign(p7, p6, 30); - p6 = bitalign(p6, p5, 30); - p5 = bitalign(p5, p4, 30); + p7 = bitalign(p7, p6, (__CLC_UINTN)30u); + p6 = bitalign(p6, p5, (__CLC_UINTN)30u); + p5 = bitalign(p5, p4, (__CLC_UINTN)30u); // Subtract 1 if msb of fraction is 1, i.e. fraction >= 0.5 __CLC_UINTN flip = (i & 1) != 0 ? 0xFFFFFFFFU : 0U; @@ -299,12 +299,12 @@ _CLC_DEF _CLC_OVERLOAD __CLC_INTN __clc_argReductionLargeS( __CLC_AS_FLOATN(sign | ((127U - __CLC_AS_UINTN(xe)) << 23U) | p7 >> 9); // Shift out bits we captured on q1 - p7 = bitalign(p7, p6, 32 - 23); + p7 = bitalign(p7, p6, (__CLC_UINTN)(32u - 23u)); // Get 24 more bits of fraction in another float, there are not long strings // of zeroes here __CLC_INTN xxe = __CLC_AS_INTN(__clc_clz(p7)) + 1; - p7 = bitalign(p7, p6, 32 - xxe); + p7 = bitalign(p7, p6, __CLC_CONVERT_UINTN(32 - xxe)); __CLC_FLOATN q0 = __CLC_AS_FLOATN( sign | ((127U - __CLC_AS_UINTN(xe + 23 + xxe)) << 23U) | p7 >> 9); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
