Hello world, the attached patch fixes a regression where the calculation of the size of the buffer for matmul was too small for ine special case.
Regression-tested. OK for trunk? Regards Thomas 2018-08-25 Thomas Koenig <tkoe...@gcc.gnu.org> PR libfortran/86704 * m4/matmul_internal.m4: Correct calculation of needed buffer size for arrays of shape (1,n). * generated/matmul_c10.c: Regenerated * generated/matmul_c16.c: Regenerated * generated/matmul_c4.c: Regenerated * generated/matmul_c8.c: Regenerated * generated/matmul_i1.c: Regenerated * generated/matmul_i16.c: Regenerated * generated/matmul_i2.c: Regenerated * generated/matmul_i4.c: Regenerated * generated/matmul_i8.c: Regenerated * generated/matmul_r10.c: Regenerated * generated/matmul_r16.c: Regenerated * generated/matmul_r4.c: Regenerated * generated/matmul_r8.c: Regenerated * generated/matmulavx128_c10.c: Regenerated * generated/matmulavx128_c16.c: Regenerated * generated/matmulavx128_c4.c: Regenerated * generated/matmulavx128_c8.c: Regenerated * generated/matmulavx128_i1.c: Regenerated * generated/matmulavx128_i16.c: Regenerated * generated/matmulavx128_i2.c: Regenerated * generated/matmulavx128_i4.c: Regenerated * generated/matmulavx128_i8.c: Regenerated * generated/matmulavx128_r10.c: Regenerated * generated/matmulavx128_r16.c: Regenerated * generated/matmulavx128_r4.c: Regenerated * generated/matmulavx128_r8.c: Regenerated 2018-08-25 Thomas Koenig <tkoe...@gcc.gnu.org> PR libfortran/86704 * gfortran.dg/matmul_19.f90: New test.
Index: m4/matmul_internal.m4 =================================================================== --- m4/matmul_internal.m4 (Revision 263752) +++ m4/matmul_internal.m4 (Arbeitskopie) @@ -233,8 +233,13 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_c10.c =================================================================== --- generated/matmul_c10.c (Revision 263752) +++ generated/matmul_c10.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_c10 (gfc_array_c10 * const restrict retarra return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_c16.c =================================================================== --- generated/matmul_c16.c (Revision 263752) +++ generated/matmul_c16.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_c16_avx (gfc_array_c16 * const restrict ret return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict re return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_c16 (gfc_array_c16 * const restrict retarra return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_c4.c =================================================================== --- generated/matmul_c4.c (Revision 263752) +++ generated/matmul_c4.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_c4_avx (gfc_array_c4 * const restrict retar return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict reta return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_c4 (gfc_array_c4 * const restrict retarray, return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_c8.c =================================================================== --- generated/matmul_c8.c (Revision 263752) +++ generated/matmul_c8.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_c8_avx (gfc_array_c8 * const restrict retar return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict reta return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_c8 (gfc_array_c8 * const restrict retarray, return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_i1.c =================================================================== --- generated/matmul_i1.c (Revision 263752) +++ generated/matmul_i1.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_i1_avx (gfc_array_i1 * const restrict retar return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict reta return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_i1 (gfc_array_i1 * const restrict retarray, return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_i16.c =================================================================== --- generated/matmul_i16.c (Revision 263752) +++ generated/matmul_i16.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_i16_avx (gfc_array_i16 * const restrict ret return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict re return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_i16 (gfc_array_i16 * const restrict retarra return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_i2.c =================================================================== --- generated/matmul_i2.c (Revision 263752) +++ generated/matmul_i2.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_i2_avx (gfc_array_i2 * const restrict retar return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict reta return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_i2 (gfc_array_i2 * const restrict retarray, return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_i4.c =================================================================== --- generated/matmul_i4.c (Revision 263752) +++ generated/matmul_i4.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_i4_avx (gfc_array_i4 * const restrict retar return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict reta return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_i4 (gfc_array_i4 * const restrict retarray, return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_i8.c =================================================================== --- generated/matmul_i8.c (Revision 263752) +++ generated/matmul_i8.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_i8_avx (gfc_array_i8 * const restrict retar return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict reta return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_i8 (gfc_array_i8 * const restrict retarray, return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_r10.c =================================================================== --- generated/matmul_r10.c (Revision 263752) +++ generated/matmul_r10.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_r10_avx (gfc_array_r10 * const restrict ret return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict re return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_r10 (gfc_array_r10 * const restrict retarra return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_r16.c =================================================================== --- generated/matmul_r16.c (Revision 263752) +++ generated/matmul_r16.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_r16_avx (gfc_array_r16 * const restrict ret return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict re return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_r16 (gfc_array_r16 * const restrict retarra return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_r4.c =================================================================== --- generated/matmul_r4.c (Revision 263752) +++ generated/matmul_r4.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_r4_avx (gfc_array_r4 * const restrict retar return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict reta return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_r4 (gfc_array_r4 * const restrict retarray, return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmul_r8.c =================================================================== --- generated/matmul_r8.c (Revision 263752) +++ generated/matmul_r8.c (Arbeitskopie) @@ -317,8 +317,13 @@ matmul_r8_avx (gfc_array_r8 * const restrict retar return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -869,8 +874,13 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict reta return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1421,8 +1431,13 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -1987,8 +2002,13 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict r return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -2613,8 +2633,13 @@ matmul_r8 (gfc_array_r8 * const restrict retarray, return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_c10.c =================================================================== --- generated/matmulavx128_c10.c (Revision 263752) +++ generated/matmulavx128_c10.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const rest return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const rest return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_c16.c =================================================================== --- generated/matmulavx128_c16.c (Revision 263752) +++ generated/matmulavx128_c16.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const rest return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const rest return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_c4.c =================================================================== --- generated/matmulavx128_c4.c (Revision 263752) +++ generated/matmulavx128_c4.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_c8.c =================================================================== --- generated/matmulavx128_c8.c (Revision 263752) +++ generated/matmulavx128_c8.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_i1.c =================================================================== --- generated/matmulavx128_i1.c (Revision 263752) +++ generated/matmulavx128_i1.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_i16.c =================================================================== --- generated/matmulavx128_i16.c (Revision 263752) +++ generated/matmulavx128_i16.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const rest return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const rest return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_i2.c =================================================================== --- generated/matmulavx128_i2.c (Revision 263752) +++ generated/matmulavx128_i2.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_i4.c =================================================================== --- generated/matmulavx128_i4.c (Revision 263752) +++ generated/matmulavx128_i4.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_i8.c =================================================================== --- generated/matmulavx128_i8.c (Revision 263752) +++ generated/matmulavx128_i8.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_r10.c =================================================================== --- generated/matmulavx128_r10.c (Revision 263752) +++ generated/matmulavx128_r10.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const rest return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const rest return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_r16.c =================================================================== --- generated/matmulavx128_r16.c (Revision 263752) +++ generated/matmulavx128_r16.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const rest return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const rest return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_r4.c =================================================================== --- generated/matmulavx128_r4.c (Revision 263752) +++ generated/matmulavx128_r4.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; Index: generated/matmulavx128_r8.c =================================================================== --- generated/matmulavx128_r8.c (Revision 263752) +++ generated/matmulavx128_r8.c (Arbeitskopie) @@ -282,8 +282,13 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536; @@ -835,8 +840,13 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restri return; /* Adjust size of t1 to what is needed. */ - index_type t1_dim; - t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1; + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; if (t1_dim > 65536) t1_dim = 65536;
! { dg-do run } ! { dg-options "-finline-matmul-limit=0" } ! PR 86704 - this used to segfault. Test case by Stanislav PaláÄek. program testmaticovenasobeni implicit none character(len=10) :: line write (unit=line,fmt=*) testmatmul(120,1,3) contains function testmatmul(m,n,o) integer, intent(in) :: m,n,o real :: A(n,m),B(n,o),C(m,o) logical :: testmatmul call random_number(A) call random_number(B) C=matmul(transpose(A),B) testmatmul=.true. end function end program testmaticovenasobeni