On 2024-11-19 15:38, Adrian Bunk wrote: > On Mon, Oct 28, 2024 at 06:38:47PM +0100, Aurelien Jarno wrote: > > control: tag 1086108 + patch > > > > Hi, > >... > > Please find the patch attached, > >... > > The patch was not attached.
Oops... Here it is. -- Aurelien Jarno GPG: 4096R/1DDD8C9B aurel...@aurel32.net http://aurel32.net
--- a/scipy/linalg/_matfuncs_expm.pyx.in +++ b/scipy/linalg/_matfuncs_expm.pyx.in @@ -11,7 +11,7 @@ cimport numpy as cnp import numpy as np from libc.stdlib cimport malloc, free -from libc.math cimport fabs, ceil, log2, pow +from libc.math cimport fabs, ceil, log2, pow, isfinite from scipy.linalg.cython_lapack cimport (sgetrf, sgetrs, dgetrf, dgetrs, cgetrf, cgetrs, zgetrf, zgetrs) from scipy.linalg.cython_blas cimport (sgemm, saxpy, sscal, scopy, sgemv, @@ -106,6 +106,7 @@ double temp double [5] theta double [5] coeff + double d numpy_lapack_t [:, :, ::1] Amv = Am dims[0] = n @@ -150,7 +151,9 @@ np.matmul(absA, work_arr, out=work_arr) temp = np.max(work_arr) - lm = max(<int>ceil(log2(temp/normA/coeff[0])/6), 0) + # emulate the x86 behaviour for non-finite numbers + d = ceil(log2(temp/normA/coeff[0])/6) + lm = max(<int>d, 0) if isfinite(d) else 0 if eta0 < theta[0] and lm == 0: return 3, s @@ -161,7 +164,9 @@ np.matmul(absA, work_arr, out=work_arr) temp = np.max(work_arr) - lm = max(<int>ceil(log2(temp/normA/coeff[1])/10), 0) + # emulate the x86 behaviour for non-finite numbers + d = ceil(log2(temp/normA/coeff[1])/10) + lm = max(<int>d, 0) if isfinite(d) else 0 if eta1 < theta[1] and lm == 0: return 5, s @@ -180,7 +185,9 @@ np.matmul(absA, work_arr, out=work_arr) temp = np.max(work_arr) - lm = max(<int>ceil(log2(temp/normA/coeff[2])/14), 0) + # emulate the x86 behaviour for non-finite numbers + d = <int>ceil(log2(temp/normA/coeff[2])/14) + lm = max(<int>d, 0) if isfinite(d) else 0 if eta2 < theta[2] and lm == 0: return 7, s @@ -191,7 +198,9 @@ np.matmul(absA, work_arr, out=work_arr) temp = np.max(work_arr) - lm = max(<int>ceil(log2(temp/normA/coeff[3])/18), 0) + # emulate the x86 behaviour for non-finite numbers + d = ceil(log2(temp/normA/coeff[3])/18) + lm = max(<int>d, 0) if isfinite(d) else 0 if eta2 < theta[3] and lm == 0: return 9, s @@ -206,7 +215,9 @@ eta3 = max(d8, d10) eta4 = min(eta2, eta3) - s = max(<int>ceil(log2(eta4/theta[4])), 0) + # emulate the x86 behaviour for non-finite numbers + d = ceil(log2(eta4/theta[4])) + s = max(<int>d, 0) if isfinite(d) else 0 if s != 0: two_pow_s = 2.** (-s) absA *= two_pow_s @@ -221,7 +232,9 @@ np.matmul(absA, work_arr, out=work_arr) temp = np.max(work_arr) - s += max(<int>ceil(log2(temp/normA/coeff[4])/26), 0) + # emulate the x86 behaviour for non-finite numbers + d = ceil(log2(temp/normA/coeff[4])/26) + s += max(<int>d, 0) if isfinite(d) else 0 return 13, s # ============================================================================