Hello world, the attached patch reduces the stack usage by the blocked version of matmul for cases where we don't need the full buffer. This should improve stack usage.
Regression-tested. I also added a stress test (around 3 secs of CPU time on my system), it will only run once due to the "dg-do run" hack). OK for trunk? Thomas 2017-05-05 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/80602 * m4/matmul_internal.m4: 'matmul_name`: Change t1 to a VLA of the required size. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regenerated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. 2017-05-05 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/80602 * gfortran.dg/matmul_15.f90: New test case.
Index: generated/matmul_c10.c =================================================================== --- generated/matmul_c10.c (Revision 247566) +++ generated/matmul_c10.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_10 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_10 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_10 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_10 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_c10 (gfc_array_c10 * const restrict retarra i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_10 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_10 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_c10 (gfc_array_c10 * const restrict retarra if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_10 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_c16.c =================================================================== --- generated/matmul_c16.c (Revision 247566) +++ generated/matmul_c16.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_c16_avx (gfc_array_c16 * const restrict ret i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_c16_avx (gfc_array_c16 * const restrict ret if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict re i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict re if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_c16 (gfc_array_c16 * const restrict retarra i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_c16 (gfc_array_c16 * const restrict retarra if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_c4.c =================================================================== --- generated/matmul_c4.c (Revision 247566) +++ generated/matmul_c4.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_c4_avx (gfc_array_c4 * const restrict retar i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_c4_avx (gfc_array_c4 * const restrict retar if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict reta i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict reta if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_c4 (gfc_array_c4 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_c4 (gfc_array_c4 * const restrict retarray, if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_c8.c =================================================================== --- generated/matmul_c8.c (Revision 247566) +++ generated/matmul_c8.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_c8_avx (gfc_array_c8 * const restrict retar i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_c8_avx (gfc_array_c8 * const restrict retar if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict reta i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict reta if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_c8 (gfc_array_c8 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_COMPLEX_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_COMPLEX_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_c8 (gfc_array_c8 * const restrict retarray, if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_COMPLEX_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_i1.c =================================================================== --- generated/matmul_i1.c (Revision 247566) +++ generated/matmul_i1.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_i1_avx (gfc_array_i1 * const restrict retar i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_1 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_i1_avx (gfc_array_i1 * const restrict retar if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict reta i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_1 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict reta if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_1 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_1 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_1 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_1 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_1 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_i16.c =================================================================== --- generated/matmul_i16.c (Revision 247566) +++ generated/matmul_i16.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_i16_avx (gfc_array_i16 * const restrict ret i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_i16_avx (gfc_array_i16 * const restrict ret if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict re i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict re if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_i16 (gfc_array_i16 * const restrict retarra i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_i16 (gfc_array_i16 * const restrict retarra if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_i2.c =================================================================== --- generated/matmul_i2.c (Revision 247566) +++ generated/matmul_i2.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_i2_avx (gfc_array_i2 * const restrict retar i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_2 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_i2_avx (gfc_array_i2 * const restrict retar if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict reta i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_2 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict reta if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_2 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_2 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_2 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_2 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_2 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_i4.c =================================================================== --- generated/matmul_i4.c (Revision 247566) +++ generated/matmul_i4.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_i4_avx (gfc_array_i4 * const restrict retar i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_i4_avx (gfc_array_i4 * const restrict retar if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict reta i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict reta if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_i8.c =================================================================== --- generated/matmul_i8.c (Revision 247566) +++ generated/matmul_i8.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_i8_avx (gfc_array_i8 * const restrict retar i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_i8_avx (gfc_array_i8 * const restrict retar if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict reta i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict reta if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_INTEGER_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_INTEGER_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_INTEGER_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_r10.c =================================================================== --- generated/matmul_r10.c (Revision 247566) +++ generated/matmul_r10.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_r10_avx (gfc_array_r10 * const restrict ret i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_10 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_r10_avx (gfc_array_r10 * const restrict ret if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_10 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict re i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_10 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict re if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_10 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_10 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_10 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_10 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_10 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_r10 (gfc_array_r10 * const restrict retarra i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_10 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_10 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_r10 (gfc_array_r10 * const restrict retarra if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_10 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_r16.c =================================================================== --- generated/matmul_r16.c (Revision 247566) +++ generated/matmul_r16.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_r16_avx (gfc_array_r16 * const restrict ret i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_r16_avx (gfc_array_r16 * const restrict ret if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict re i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict re if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_r16 (gfc_array_r16 * const restrict retarra i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_16 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_16 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_r16 (gfc_array_r16 * const restrict retarra if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_16 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_r4.c =================================================================== --- generated/matmul_r4.c (Revision 247566) +++ generated/matmul_r4.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_r4_avx (gfc_array_r4 * const restrict retar i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_r4_avx (gfc_array_r4 * const restrict retar if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict reta i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict reta if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_r4 (gfc_array_r4 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_4 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_4 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_r4 (gfc_array_r4 * const restrict retarray, if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_4 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: generated/matmul_r8.c =================================================================== --- generated/matmul_r8.c (Revision 247566) +++ generated/matmul_r8.c (Arbeitskopie) @@ -286,8 +286,7 @@ matmul_r8_avx (gfc_array_r8 * const restrict retar i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -311,6 +310,17 @@ matmul_r8_avx (gfc_array_r8 * const restrict retar if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -829,8 +839,7 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict reta i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -854,6 +863,17 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict reta if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1372,8 +1392,7 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1397,6 +1416,17 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -1911,8 +1941,7 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict r i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -1936,6 +1965,17 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict r if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) @@ -2508,8 +2548,7 @@ matmul_r8 (gfc_array_r8 * const restrict retarray, i1, i2, i3, i4, i5, i6; /* Local variables */ - GFC_REAL_8 t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + GFC_REAL_8 f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -2533,6 +2572,17 @@ matmul_r8 (gfc_array_r8 * const restrict retarray, if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + GFC_REAL_8 t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++) Index: m4/matmul_internal.m4 =================================================================== --- m4/matmul_internal.m4 (Revision 247566) +++ m4/matmul_internal.m4 (Arbeitskopie) @@ -202,8 +202,7 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl i1, i2, i3, i4, i5, i6; /* Local variables */ - 'rtype_name` t1[65536], /* was [256][256] */ - f11, f12, f21, f22, f31, f32, f41, f42, + 'rtype_name` f11, f12, f21, f22, f31, f32, f41, f42, f13, f14, f23, f24, f33, f34, f43, f44; index_type i, j, l, ii, jj, ll; index_type isec, jsec, lsec, uisec, ujsec, ulsec; @@ -227,6 +226,17 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl if (m == 0 || n == 0 || k == 0) return; + /* Adjust size of t1 to what is needed. */ + index_type t1_dim; + t1_dim = (a_dim1-1) * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvla" + 'rtype_name` t1[t1_dim]; /* was [256][256] */ +#pragma GCC diagnostic pop + /* Empty c first. */ for (j=1; j<=n; j++) for (i=1; i<=m; i++)
! { dg-do run } ! { dg-options "-finline-matmul-limit=0" } ! Stress-test the matmul blocking code with sizes close to or ! equal to powers ot two. program main implicit none integer, dimension(*), parameter :: nn = & & [2,3,4,5, 7,8,9, 15,16,17, 31,32,33, 63,64,65, & 127 ,228,129, 255,256,257]; integer, parameter :: s = size(nn) real, dimension(:,:),allocatable :: a, b, c integer :: i1, i2, i3 integer :: nx, ny, count real :: sm sm = 0.0 do i1=1, s nx = nn(i1) do i2=1,s ny = nn(i2) do i3=1,s count = nn(i3) allocate (a(nx,ny), b(ny,count), c(nx,count)) call random_number(a) call random_number(b) c = matmul(a,b) sm = sm + sum(c) deallocate(a,b,c) end do end do end do end program main