https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118949

--- Comment #5 from Li Pan <pan2.li at intel dot com> ---
Thanks Vineet, update another case with explicit convert. It is unrelated to
the global_reg change.

   1   │ #define T float
   2   │
   3   │ void func(const T * restrict a, const T * restrict  b,
   4   │         T * restrict c)
   5   │ {
   6   │     for (long i = 0; i < 1024; ++i) {
   7   │         double a_d = (double)a[i];
   8   │         double b_d = (double)b[i];
   9   │
  10   │         long a_l = __builtin_lround(a_d);
  11   │         long b_l = __builtin_lround(b_d);
  12   │
  13   │         c[i] = (T)(a_l + b_l);
  14   │     }
  15   │ }

The diff almost occurs after vect pass.

from:

vect__4.9_36 = .MASK_LEN_LOAD (vectp_a.7_38, 32B, { -1, ... }, _11, 0);
vect__6.12_32 = .MASK_LEN_LOAD (vectp_b.10_34, 32B, { -1, ... }, _11, 0)

vect_a_l_15.13_31 = .LROUND (vect__4.9_36);
vect_b_l_16.14_30 = .LROUND (vect__6.12_32);
vect__7.15_29 = vect_a_l_15.13_31 + vect_b_l_16.14_30;
vect__9.16_28 = (vector([2,2]) float) vect__7.15_29;
.MASK_LEN_STORE (vectp_c.17_26, 32B, { -1, ... }, _11, 0, vect__9.16_28);

to:

vect__4.9_43 = .MASK_LEN_LOAD (vectp_a.7_46, 32B, { -1, ... }, _44(D), _23, 0);
vect_a_d_14.10_42 = (vector([2,2]) double) vect__4.9_43; // Only in GCC-15
vect_a_l_17.11_41 = .LROUND (vect_a_d_14.10_42);

vect__6.14_36 = .MASK_LEN_LOAD (vectp_b.12_39, 32B, { -1, ... }, _37(D), _23,
0);
vect_b_d_16.15_35 = (vector([2,2]) double) vect__6.14_36; // Only in GCC-15
vect_b_l_18.16_34 = .LROUND (vect_b_d_16.15_35);

vect__7.17_33 = vect_a_l_17.11_41 + vect_b_l_18.16_34;
vect__9.18_32 = (vector([2,2]) float) vect__7.17_33;

.MASK_LEN_STORE (vectp_c.19_30, 32B, { -1, ... }, _23, 0, vect__9.18_32);

looks like have more convert after load...

Reply via email to