https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118949
--- Comment #5 from Li Pan <pan2.li at intel dot com> --- Thanks Vineet, update another case with explicit convert. It is unrelated to the global_reg change. 1 │ #define T float 2 │ 3 │ void func(const T * restrict a, const T * restrict b, 4 │ T * restrict c) 5 │ { 6 │ for (long i = 0; i < 1024; ++i) { 7 │ double a_d = (double)a[i]; 8 │ double b_d = (double)b[i]; 9 │ 10 │ long a_l = __builtin_lround(a_d); 11 │ long b_l = __builtin_lround(b_d); 12 │ 13 │ c[i] = (T)(a_l + b_l); 14 │ } 15 │ } The diff almost occurs after vect pass. from: vect__4.9_36 = .MASK_LEN_LOAD (vectp_a.7_38, 32B, { -1, ... }, _11, 0); vect__6.12_32 = .MASK_LEN_LOAD (vectp_b.10_34, 32B, { -1, ... }, _11, 0) vect_a_l_15.13_31 = .LROUND (vect__4.9_36); vect_b_l_16.14_30 = .LROUND (vect__6.12_32); vect__7.15_29 = vect_a_l_15.13_31 + vect_b_l_16.14_30; vect__9.16_28 = (vector([2,2]) float) vect__7.15_29; .MASK_LEN_STORE (vectp_c.17_26, 32B, { -1, ... }, _11, 0, vect__9.16_28); to: vect__4.9_43 = .MASK_LEN_LOAD (vectp_a.7_46, 32B, { -1, ... }, _44(D), _23, 0); vect_a_d_14.10_42 = (vector([2,2]) double) vect__4.9_43; // Only in GCC-15 vect_a_l_17.11_41 = .LROUND (vect_a_d_14.10_42); vect__6.14_36 = .MASK_LEN_LOAD (vectp_b.12_39, 32B, { -1, ... }, _37(D), _23, 0); vect_b_d_16.15_35 = (vector([2,2]) double) vect__6.14_36; // Only in GCC-15 vect_b_l_18.16_34 = .LROUND (vect_b_d_16.15_35); vect__7.17_33 = vect_a_l_17.11_41 + vect_b_l_18.16_34; vect__9.18_32 = (vector([2,2]) float) vect__7.17_33; .MASK_LEN_STORE (vectp_c.19_30, 32B, { -1, ... }, _23, 0, vect__9.18_32); looks like have more convert after load...