Hello world, this patch generates direct calls to *GEMM when -fexternal-blas is specified. This allows to handle arguments to conjugate and transposed elements, which is quite a common use case.
While looking at the code, I found that the inline limit checks were not correctly handled for cases except for A2B2. This is also fixed. In order to check all cases at runtime, I simply copied the reference BLAS routines to the test cases, why they are *.f instead of *.f90. Regarding the bounds checking: I added three new test cases, but as for checking everything, that would be a bit too much. The code is clear enough that I think the other cases should be OK. OK for trunk? Regards Thomas 2018-09-18 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/29550 * gfortran.h (gfc_expr): Add external_blas flag. * frontend-passes.c (matrix_case): Add case A2TB2T. (optimize_namespace): Handle flag_external_blas by calling call_external_blas. (get_array_inq_function): Add argument okind. If it is nonzero, use it as the kind of argument to be used. (inline_limit_check): Remove m_case argument, add limit argument instead. Remove assert about m_case. Set the limit for inlining from the limit argument. (matmul_lhs_realloc): Handle case A2TB2T. (inline_matmul_assign): Handle inline limit for other cases with two rank-two matrices. Remove no-op calls to inline_limit_check. (call_external_blas): New function. * trans-intrinsic.c (gfc_conv_intrinsic_funcall): Do not add argument to external BLAS if external_blas is already set. 2018-09-18 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/29550 * gfortran.dg/inline_matmul_13.f90: Adjust count for _gfortran_matmul. * gfortran.dg/inline_matmul_16.f90: Likewise. * gfortran.dg/promotion_2.f90: Add -fblas-matmul-limit=1. Scan for dgemm instead of dgemm_. Add call to random_number to make standard conforming. * gfortran.dg/matmul_blas_1.f90: New test. * gfortran.dg/matmul_bounds_14.f: New test. * gfortran.dg/matmul_bounds_15.f: New test. * gfortran.dg/matmul_bounds_16.f: New test.
Index: fortran/frontend-passes.c =================================================================== --- fortran/frontend-passes.c (Revision 264349) +++ fortran/frontend-passes.c (Arbeitskopie) @@ -53,6 +53,7 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_ char *vname=NULL); static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *, bool *); +static int call_external_blas (gfc_code **, int *, void *); static bool has_dimen_vector_ref (gfc_expr *); static int matmul_temp_args (gfc_code **, int *,void *data); static int index_interchange (gfc_code **, int*, void *); @@ -131,7 +132,7 @@ static int var_num = 1; /* What sort of matrix we are dealing with when inlining MATMUL. */ -enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2 }; +enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2, A2TB2T }; /* Keep track of the number of expressions we have inserted so far using create_var. */ @@ -1428,7 +1429,7 @@ optimize_namespace (gfc_namespace *ns) gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL); gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL); gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL); - if (flag_inline_matmul_limit != 0) + if (flag_inline_matmul_limit != 0 || flag_external_blas) { bool found; do @@ -1441,9 +1442,15 @@ optimize_namespace (gfc_namespace *ns) gfc_code_walker (&ns->code, matmul_temp_args, dummy_expr_callback, NULL); - gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback, - NULL); } + + if (flag_external_blas) + gfc_code_walker (&ns->code, call_external_blas, dummy_expr_callback, + NULL); + + if (flag_inline_matmul_limit != 0) + gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback, + NULL); } if (flag_frontend_loop_interchange) @@ -2938,7 +2945,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees dim is zero-based. */ static gfc_expr * -get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim) +get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim, int okind = 0) { gfc_expr *fcn; gfc_expr *dim_arg, *kind; @@ -2964,8 +2971,12 @@ static gfc_expr * } dim_arg = gfc_get_int_expr (gfc_default_integer_kind, &e->where, dim); - kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where, - gfc_index_integer_kind); + if (okind != 0) + kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where, + okind); + else + kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where, + gfc_index_integer_kind); ec = gfc_copy_expr (e); @@ -3026,7 +3037,7 @@ get_operand (gfc_intrinsic_op op, gfc_expr *e1, gf removed by DCE. Only called for rank-two matrices A and B. */ static gfc_code * -inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case) +inline_limit_check (gfc_expr *a, gfc_expr *b, int limit) { gfc_expr *inline_limit; gfc_code *if_1, *if_2, *else_2; @@ -3034,14 +3045,11 @@ static gfc_code * gfc_typespec ts; gfc_expr *cond; - gcc_assert (m_case == A2B2 || m_case == A2B2T || m_case == A2TB2); - /* Calculation is done in real to avoid integer overflow. */ inline_limit = gfc_get_constant_expr (BT_REAL, gfc_default_real_kind, &a->where); - mpfr_set_si (inline_limit->value.real, flag_inline_matmul_limit, - GFC_RND_MODE); + mpfr_set_si (inline_limit->value.real, limit, GFC_RND_MODE); mpfr_pow_ui (inline_limit->value.real, inline_limit->value.real, 3, GFC_RND_MODE); @@ -3235,6 +3243,22 @@ matmul_lhs_realloc (gfc_expr *c, gfc_expr *a, gfc_ get_array_inq_function (GFC_ISYM_SIZE, b, 2)); break; + case A2TB2T: + /* This can only happen for BLAS, we do not handle that case in + inline mamtul. */ + ar->start[0] = get_array_inq_function (GFC_ISYM_SIZE, a, 2); + ar->start[1] = get_array_inq_function (GFC_ISYM_SIZE, b, 1); + + ne1 = build_logical_expr (INTRINSIC_NE, + get_array_inq_function (GFC_ISYM_SIZE, c, 1), + get_array_inq_function (GFC_ISYM_SIZE, a, 2)); + ne2 = build_logical_expr (INTRINSIC_NE, + get_array_inq_function (GFC_ISYM_SIZE, c, 2), + get_array_inq_function (GFC_ISYM_SIZE, b, 1)); + + cond = build_logical_expr (INTRINSIC_OR, ne1, ne2); + break; + default: gcc_unreachable(); @@ -3946,9 +3970,11 @@ inline_matmul_assign (gfc_code **c, int *walk_subt /* Take care of the inline flag. If the limit check evaluates to a constant, dead code elimination will eliminate the unneeded branch. */ - if (m_case == A2B2 && flag_inline_matmul_limit > 0) + if (flag_inline_matmul_limit > 0 && matrix_a->rank == 2 + && matrix_b->rank == 2) { - if_limit = inline_limit_check (matrix_a, matrix_b, m_case); + if_limit = inline_limit_check (matrix_a, matrix_b, + flag_inline_matmul_limit); /* Insert the original statement into the else branch. */ if_limit->block->block->next = co; @@ -4118,7 +4144,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subt switch (m_case) { case A2B2: - inline_limit_check (matrix_a, matrix_b, m_case); u1 = get_size_m1 (matrix_b, 2); u2 = get_size_m1 (matrix_a, 2); @@ -4151,7 +4176,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subt break; case A2B2T: - inline_limit_check (matrix_a, matrix_b, m_case); u1 = get_size_m1 (matrix_b, 1); u2 = get_size_m1 (matrix_a, 2); @@ -4184,7 +4208,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subt break; case A2TB2: - inline_limit_check (matrix_a, matrix_b, m_case); u1 = get_size_m1 (matrix_a, 2); u2 = get_size_m1 (matrix_b, 2); @@ -4311,7 +4334,406 @@ inline_matmul_assign (gfc_code **c, int *walk_subt return 0; } +/* Change matmul function calls in the form of + c = matmul(a,b) + + to the corresponding call to a BLAS routine, if applicable. */ + +static int +call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + gfc_code *co, *co_next; + gfc_expr *expr1, *expr2; + gfc_expr *matrix_a, *matrix_b; + gfc_code *if_limit = NULL; + gfc_actual_arglist *a, *b; + bool conjg_a, conjg_b, transpose_a, transpose_b; + gfc_code *call; + const char *blas_name; + const char *transa, *transb; + gfc_expr *c1, *c2, *b1; + gfc_actual_arglist *actual, *next; + bt type; + int kind; + enum matrix_case m_case; + bool realloc_c; + gfc_code **next_code_point; + + /* Many of the tests for inline matmul also apply here. */ + + co = *c; + + if (co->op != EXEC_ASSIGN) + return 0; + + if (in_where || in_assoc_list) + return 0; + + /* The BLOCKS generated for the temporary variables and FORALL don't + mix. */ + if (forall_level > 0) + return 0; + + /* For now don't do anything in OpenMP workshare, it confuses + its translation, which expects only the allowed statements in there. */ + + if (in_omp_workshare) + return 0; + + expr1 = co->expr1; + expr2 = co->expr2; + if (expr2->expr_type != EXPR_FUNCTION + || expr2->value.function.isym == NULL + || expr2->value.function.isym->id != GFC_ISYM_MATMUL) + return 0; + + type = expr2->ts.type; + kind = expr2->ts.kind; + + /* Guard against recursion. */ + + if (expr2->external_blas) + return 0; + + if (type != expr1->ts.type || kind != expr1->ts.kind) + return 0; + + if (type == BT_REAL) + { + if (kind == 4) + blas_name = "sgemm"; + else if (kind == 8) + blas_name = "dgemm"; + else + return 0; + } + else if (type == BT_COMPLEX) + { + if (kind == 4) + blas_name = "cgemm"; + else if (kind == 8) + blas_name = "zgemm"; + else + return 0; + } + else + return 0; + + a = expr2->value.function.actual; + if (a->expr->rank != 2) + return 0; + + b = a->next; + if (b->expr->rank != 2) + return 0; + + matrix_a = check_conjg_transpose_variable (a->expr, &conjg_a, &transpose_a); + if (matrix_a == NULL) + return 0; + + if (transpose_a) + { + if (conjg_a) + transa = "C"; + else + transa = "T"; + } + else + transa = "N"; + + matrix_b = check_conjg_transpose_variable (b->expr, &conjg_b, &transpose_b); + if (matrix_b == NULL) + return 0; + + if (transpose_b) + { + if (conjg_b) + transb = "C"; + else + transb = "T"; + } + else + transb = "N"; + + if (transpose_a) + { + if (transpose_b) + m_case = A2TB2T; + else + m_case = A2TB2; + } + else + { + if (transpose_b) + m_case = A2B2T; + else + m_case = A2B2; + } + + current_code = c; + inserted_block = NULL; + changed_statement = NULL; + + expr2->external_blas = 1; + + /* We do not handle data dependencies yet. */ + if (gfc_check_dependency (expr1, matrix_a, true) + || gfc_check_dependency (expr1, matrix_b, true)) + return 0; + + /* Generate the if statement and hang it into the tree. */ + if_limit = inline_limit_check (matrix_a, matrix_b, flag_blas_matmul_limit); + co_next = co->next; + (*current_code) = if_limit; + co->next = NULL; + if_limit->block->next = co; + + call = XCNEW (gfc_code); + call->loc = co->loc; + + /* Bounds checking - a bit simpler than for inlining since we only + have to take care of two-dimensional arrays here. */ + + realloc_c = flag_realloc_lhs && gfc_is_reallocatable_lhs (expr1); + next_code_point = &(if_limit->block->block->next); + + if (gfc_option.rtcheck & GFC_RTCHECK_BOUNDS) + { + gfc_code *test; + // gfc_expr *a2, *b1, *c1, *c2, *a1, *b2; + gfc_expr *c1, *a1, *c2, *b2, *a2; + switch (m_case) + { + case A2B2: + b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); + a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); + test = runtime_error_ne (b1, a2, B_ERROR(1)); + *next_code_point = test; + next_code_point = &test->next; + + if (!realloc_c) + { + c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); + a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); + test = runtime_error_ne (c1, a1, C_ERROR(1)); + *next_code_point = test; + next_code_point = &test->next; + + c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2); + b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); + test = runtime_error_ne (c2, b2, C_ERROR(2)); + *next_code_point = test; + next_code_point = &test->next; + } + break; + + case A2B2T: + + b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); + a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); + /* matrix_b is transposed, hence dimension 1 for the error message. */ + test = runtime_error_ne (b2, a2, B_ERROR(1)); + *next_code_point = test; + next_code_point = &test->next; + + if (!realloc_c) + { + c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); + a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); + test = runtime_error_ne (c1, a1, C_ERROR(1)); + *next_code_point = test; + next_code_point = &test->next; + + c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2); + b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); + test = runtime_error_ne (c2, b1, C_ERROR(2)); + *next_code_point = test; + next_code_point = &test->next; + } + break; + + case A2TB2: + + b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); + a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); + test = runtime_error_ne (b1, a1, B_ERROR(1)); + *next_code_point = test; + next_code_point = &test->next; + + if (!realloc_c) + { + c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); + a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); + test = runtime_error_ne (c1, a2, C_ERROR(1)); + *next_code_point = test; + next_code_point = &test->next; + + c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2); + b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); + test = runtime_error_ne (c2, b2, C_ERROR(2)); + *next_code_point = test; + next_code_point = &test->next; + } + break; + + case A2TB2T: + b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2); + a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1); + test = runtime_error_ne (b2, a1, B_ERROR(1)); + *next_code_point = test; + next_code_point = &test->next; + + if (!realloc_c) + { + c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1); + a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2); + test = runtime_error_ne (c1, a2, C_ERROR(1)); + *next_code_point = test; + next_code_point = &test->next; + + c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2); + b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1); + test = runtime_error_ne (c2, b1, C_ERROR(2)); + *next_code_point = test; + next_code_point = &test->next; + } + break; + + default: + gcc_unreachable (); + } + } + + /* Handle the reallocation, if needed. */ + + if (realloc_c) + { + gfc_code *lhs_alloc; + + lhs_alloc = matmul_lhs_realloc (expr1, matrix_a, matrix_b, m_case); + *next_code_point = lhs_alloc; + next_code_point = &lhs_alloc->next; + } + + *next_code_point = call; + if_limit->next = co_next; + + /* Set up the BLAS call. */ + + call->op = EXEC_CALL; + + gfc_get_sym_tree (blas_name, current_ns, &(call->symtree), true); + call->symtree->n.sym->attr.subroutine = 1; + call->symtree->n.sym->attr.procedure = 1; + call->symtree->n.sym->attr.flavor = FL_PROCEDURE; + call->resolved_sym = call->symtree->n.sym; + + /* Argument TRANSA. */ + next = gfc_get_actual_arglist (); + next->expr = gfc_get_character_expr (gfc_default_character_kind, &co->loc, + transa, 1); + + call->ext.actual = next; + + /* Argument TRANSB. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = gfc_get_character_expr (gfc_default_character_kind, &co->loc, + transb, 1); + actual->next = next; + + c1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (a->expr), 1, + gfc_integer_4_kind); + c2 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 2, + gfc_integer_4_kind); + + b1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 1, + gfc_integer_4_kind); + + /* Argument M. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = c1; + actual->next = next; + + /* Argument N. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = c2; + actual->next = next; + + /* Argument K. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = b1; + actual->next = next; + + /* Argument ALPHA - set to one. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = gfc_get_constant_expr (type, kind, &co->loc); + if (type == BT_REAL) + mpfr_set_ui (next->expr->value.real, 1, GFC_RND_MODE); + else + mpc_set_ui (next->expr->value.complex, 1, GFC_MPC_RND_MODE); + actual->next = next; + + /* Argument A. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = gfc_copy_expr (matrix_a); + actual->next = next; + + /* Argument LDA. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_a), + 1, gfc_integer_4_kind); + actual->next = next; + + /* Argument B. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = gfc_copy_expr (matrix_b); + actual->next = next; + + /* Argument LDB. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_b), + 1, gfc_integer_4_kind); + actual->next = next; + + /* Argument BETA - set to zero. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = gfc_get_constant_expr (type, kind, &co->loc); + if (type == BT_REAL) + mpfr_set_ui (next->expr->value.real, 0, GFC_RND_MODE); + else + mpc_set_ui (next->expr->value.complex, 0, GFC_MPC_RND_MODE); + actual->next = next; + + /* Argument C. */ + + actual = next; + next = gfc_get_actual_arglist (); + next->expr = gfc_copy_expr (expr1); + actual->next = next; + + /* Argument LDC. */ + actual = next; + next = gfc_get_actual_arglist (); + next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (expr1), + 1, gfc_integer_4_kind); + actual->next = next; + + return 0; +} + + /* Code for index interchange for loops which are grouped together in DO CONCURRENT or FORALL statements. This is currently only applied if the iterations are grouped together in a single statement. Index: fortran/gfortran.h =================================================================== --- fortran/gfortran.h (Revision 263752) +++ fortran/gfortran.h (Arbeitskopie) @@ -2143,6 +2143,11 @@ typedef struct gfc_expr unsigned int no_bounds_check : 1; + /* Set this if a matmul expression has already been evaluated for conversion + to a BLAS call. */ + + unsigned int external_blas : 1; + /* If an expression comes from a Hollerith constant or compile-time evaluation of a transfer statement, it may have a prescribed target- memory representation, and these cannot always be backformed from Index: fortran/trans-intrinsic.c =================================================================== --- fortran/trans-intrinsic.c (Revision 263752) +++ fortran/trans-intrinsic.c (Arbeitskopie) @@ -4055,6 +4055,7 @@ gfc_conv_intrinsic_funcall (gfc_se * se, gfc_expr to be able to call the BLAS ?gemm functions if required and possible. */ append_args = NULL; if (expr->value.function.isym->id == GFC_ISYM_MATMUL + && !expr->external_blas && sym->ts.type != BT_LOGICAL) { tree cint = gfc_get_int_type (gfc_c_int_kind); Index: testsuite/gfortran.dg/inline_matmul_13.f90 =================================================================== --- testsuite/gfortran.dg/inline_matmul_13.f90 (Revision 263752) +++ testsuite/gfortran.dg/inline_matmul_13.f90 (Arbeitskopie) @@ -44,4 +44,4 @@ program main deallocate(calloc) end program main -! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } } +! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "original" } } Index: testsuite/gfortran.dg/inline_matmul_16.f90 =================================================================== --- testsuite/gfortran.dg/inline_matmul_16.f90 (Revision 263752) +++ testsuite/gfortran.dg/inline_matmul_16.f90 (Arbeitskopie) @@ -58,4 +58,4 @@ program main end do end do end program main -! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } } +! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "optimized" } } Index: testsuite/gfortran.dg/promotion_2.f90 =================================================================== --- testsuite/gfortran.dg/promotion_2.f90 (Revision 263752) +++ testsuite/gfortran.dg/promotion_2.f90 (Arbeitskopie) @@ -1,5 +1,5 @@ ! { dg-do compile } -! { dg-options "-fdefault-real-8 -fexternal-blas -fdump-tree-original -finline-matmul-limit=0" } +! { dg-options "-fdefault-real-8 -fexternal-blas -fblas-matmul-limit=1 -fdump-tree-original -finline-matmul-limit=0" } ! ! PR fortran/54463 ! @@ -8,8 +8,9 @@ program test implicit none real, dimension(3,3) :: A + call random_number(a) A = matmul(A,A) end program test -! { dg-final { scan-tree-dump-times "sgemm_" 0 "original" } } -! { dg-final { scan-tree-dump-times "dgemm_" 1 "original" } } +! { dg-final { scan-tree-dump-times "sgemm" 0 "original" } } +! { dg-final { scan-tree-dump-times "dgemm" 1 "original" } }
C { dg-do run } C { dg-options "-fcheck=bounds -fdump-tree-optimized -fblas-matmul-limit=1 -O -fexternal-blas" } C Test calling of BLAS routines program main call sub_s call sub_d call sub_c call sub_z end subroutine sub_d implicit none real(8), dimension(3,2) :: a real(8), dimension(2,3) :: at real(8), dimension(2,4) :: b real(8), dimension(4,2) :: bt real(8), dimension(3,4) :: c real(8), dimension(3,4) :: cres real(8), dimension(:,:), allocatable :: c_alloc data a / 2., -3., 5., -7., 11., -13./ data b /17., -23., 29., -31., 37., -39., 41., -47./ data cres /195., -304., 384., 275., -428., 548., 347., -540., & 692., 411., -640., 816./ c = matmul(a,b) if (any (c /= cres)) stop 31 at = transpose(a) c = (1.2,-2.2) c = matmul(transpose(at), b) if (any (c /= cres)) stop 32 bt = transpose(b) c = (1.2,-2.1) c = matmul(a, transpose(bt)) if (any (c /= cres)) stop 33 c_alloc = matmul(a,b) if (any (c /= cres)) stop 34 at = transpose(a) deallocate (c_alloc) c = matmul(transpose(at), b) if (any (c /= cres)) stop 35 bt = transpose(b) allocate (c_alloc(20,20)) c = (1.2,-2.1) c = matmul(a, transpose(bt)) if (any (c /= cres)) stop 36 end subroutine sub_s implicit none real, dimension(3,2) :: a real, dimension(2,3) :: at real, dimension(2,4) :: b real, dimension(4,2) :: bt real, dimension(3,4) :: c real, dimension(3,4) :: cres real, dimension(:,:), allocatable :: c_alloc data a / 2., -3., 5., -7., 11., -13./ data b /17., -23., 29., -31., 37., -39., 41., -47./ data cres /195., -304., 384., 275., -428., 548., 347., -540., & 692., 411., -640., 816./ c = matmul(a,b) if (any (c /= cres)) stop 21 at = transpose(a) c = (1.2,-2.2) c = matmul(transpose(at), b) if (any (c /= cres)) stop 22 bt = transpose(b) c = (1.2,-2.1) c = matmul(a, transpose(bt)) if (any (c /= cres)) stop 23 c_alloc = matmul(a,b) if (any (c /= cres)) stop 24 at = transpose(a) deallocate (c_alloc) c = matmul(transpose(at), b) if (any (c /= cres)) stop 25 bt = transpose(b) allocate (c_alloc(20,20)) c = (1.2,-2.1) c = matmul(a, transpose(bt)) if (any (c /= cres)) stop 26 end subroutine sub_c implicit none complex, dimension(3,2) :: a complex, dimension(2,3) :: at, ah complex, dimension(2,4) :: b complex, dimension(4,2) :: bt, bh complex, dimension(3,4) :: c complex, dimension(3,4) :: cres complex, dimension(:,:), allocatable :: c_alloc data a / (2.,-3.), (-5.,7.), (11.,-13.), (17.,19), (-23., -29), & (-31., 37.)/ data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71), & ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/ data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.), & (-2789.,-11.), & (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.), & (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) / c = matmul(a,b) if (any (c /= cres)) stop 1 at = transpose(a) c = (1.2,-2.2) c = matmul(transpose(at), b) if (any (c /= cres)) stop 2 bt = transpose(b) c = (1.2,-2.1) c = matmul(a, transpose(bt)) if (any (c /= cres)) stop 3 ah = transpose(conjg(a)) c = (1.2,-2.2) c = matmul(conjg(transpose(ah)), b) if (any (c /= cres)) stop 4 bh = transpose(conjg(b)) c = (1.2,-2.2) c = matmul(a, transpose(conjg(bh))) if (any (c /= cres)) stop 5 c_alloc = matmul(a,b) if (any (c /= cres)) stop 6 at = transpose(a) deallocate (c_alloc) c = matmul(transpose(at), b) if (any (c /= cres)) stop 7 bt = transpose(b) allocate (c_alloc(20,20)) c = (1.2,-2.1) c = matmul(a, transpose(bt)) if (any (c /= cres)) stop 8 ah = transpose(conjg(a)) c = (1.2,-2.2) c = matmul(conjg(transpose(ah)), b) if (any (c /= cres)) stop 9 deallocate (c_alloc) allocate (c_alloc(0,0)) bh = transpose(conjg(b)) c = (1.2,-2.2) c = matmul(a, transpose(conjg(bh))) if (any (c /= cres)) stop 10 end subroutine sub_z implicit none complex(8), dimension(3,2) :: a complex(8), dimension(2,3) :: at, ah complex(8), dimension(2,4) :: b complex(8), dimension(4,2) :: bt, bh complex(8), dimension(3,4) :: c complex(8), dimension(3,4) :: cres complex(8), dimension(:,:), allocatable :: c_alloc data a / (2.,-3.), (-5._8,7.), (11.,-13.), (17.,19), & (-23., -29), (-31., 37.)/ data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71), & ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/ data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.), & (-2789.,-11.), & (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.), & (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) / c = matmul(a,b) if (any (c /= cres)) stop 11 at = transpose(a) c = (1.2,-2.2) c = matmul(transpose(at), b) if (any (c /= cres)) stop 12 bt = transpose(b) c = (1.2,-2.1) c = matmul(a, transpose(bt)) if (any (c /= cres)) stop 13 ah = transpose(conjg(a)) c = (1.2,-2.2) c = matmul(conjg(transpose(ah)), b) if (any (c /= cres)) stop 14 bh = transpose(conjg(b)) c = (1.2,-2.2) c = matmul(a, transpose(conjg(bh))) if (any (c /= cres)) stop 15 c_alloc = matmul(a,b) if (any (c /= cres)) stop 16 at = transpose(a) deallocate (c_alloc) c = matmul(transpose(at), b) if (any (c /= cres)) stop 17 bt = transpose(b) allocate (c_alloc(20,20)) c = (1.2,-2.1) c = matmul(a, transpose(bt)) if (any (c /= cres)) stop 18 ah = transpose(conjg(a)) c = (1.2,-2.2) c = matmul(conjg(transpose(ah)), b) if (any (c /= cres)) stop 19 deallocate (c_alloc) allocate (c_alloc(0,0)) bh = transpose(conjg(b)) c = (1.2,-2.2) c = matmul(a, transpose(conjg(bh))) if (any (c /= cres)) stop 20 end *> \brief \b CGEMM * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * .. Scalar Arguments .. * COMPLEX ALPHA,BETA * INTEGER K,LDA,LDB,LDC,M,N * CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. * COMPLEX A(LDA,*),B(LDB,*),C(LDC,*) * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> CGEMM performs one of the matrix-matrix operations *> *> C := alpha*op( A )*op( B ) + beta*C, *> *> where op( X ) is one of *> *> op( X ) = X or op( X ) = X**T or op( X ) = X**H, *> *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) *> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. *> \endverbatim * * Arguments: * ========== * *> \param[in] TRANSA *> \verbatim *> TRANSA is CHARACTER*1 *> On entry, TRANSA specifies the form of op( A ) to be used in *> the matrix multiplication as follows: *> *> TRANSA = 'N' or 'n', op( A ) = A. *> *> TRANSA = 'T' or 't', op( A ) = A**T. *> *> TRANSA = 'C' or 'c', op( A ) = A**H. *> \endverbatim *> *> \param[in] TRANSB *> \verbatim *> TRANSB is CHARACTER*1 *> On entry, TRANSB specifies the form of op( B ) to be used in *> the matrix multiplication as follows: *> *> TRANSB = 'N' or 'n', op( B ) = B. *> *> TRANSB = 'T' or 't', op( B ) = B**T. *> *> TRANSB = 'C' or 'c', op( B ) = B**H. *> \endverbatim *> *> \param[in] M *> \verbatim *> M is INTEGER *> On entry, M specifies the number of rows of the matrix *> op( A ) and of the matrix C. M must be at least zero. *> \endverbatim *> *> \param[in] N *> \verbatim *> N is INTEGER *> On entry, N specifies the number of columns of the matrix *> op( B ) and the number of columns of the matrix C. N must be *> at least zero. *> \endverbatim *> *> \param[in] K *> \verbatim *> K is INTEGER *> On entry, K specifies the number of columns of the matrix *> op( A ) and the number of rows of the matrix op( B ). K must *> be at least zero. *> \endverbatim *> *> \param[in] ALPHA *> \verbatim *> ALPHA is COMPLEX *> On entry, ALPHA specifies the scalar alpha. *> \endverbatim *> *> \param[in] A *> \verbatim *> A is COMPLEX array, dimension ( LDA, ka ), where ka is *> k when TRANSA = 'N' or 'n', and is m otherwise. *> Before entry with TRANSA = 'N' or 'n', the leading m by k *> part of the array A must contain the matrix A, otherwise *> the leading k by m part of the array A must contain the *> matrix A. *> \endverbatim *> *> \param[in] LDA *> \verbatim *> LDA is INTEGER *> On entry, LDA specifies the first dimension of A as declared *> in the calling (sub) program. When TRANSA = 'N' or 'n' then *> LDA must be at least max( 1, m ), otherwise LDA must be at *> least max( 1, k ). *> \endverbatim *> *> \param[in] B *> \verbatim *> B is COMPLEX array, dimension ( LDB, kb ), where kb is *> n when TRANSB = 'N' or 'n', and is k otherwise. *> Before entry with TRANSB = 'N' or 'n', the leading k by n *> part of the array B must contain the matrix B, otherwise *> the leading n by k part of the array B must contain the *> matrix B. *> \endverbatim *> *> \param[in] LDB *> \verbatim *> LDB is INTEGER *> On entry, LDB specifies the first dimension of B as declared *> in the calling (sub) program. When TRANSB = 'N' or 'n' then *> LDB must be at least max( 1, k ), otherwise LDB must be at *> least max( 1, n ). *> \endverbatim *> *> \param[in] BETA *> \verbatim *> BETA is COMPLEX *> On entry, BETA specifies the scalar beta. When BETA is *> supplied as zero then C need not be set on input. *> \endverbatim *> *> \param[in,out] C *> \verbatim *> C is COMPLEX array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. *> On exit, the array C is overwritten by the m by n matrix *> ( alpha*op( A )*op( B ) + beta*C ). *> \endverbatim *> *> \param[in] LDC *> \verbatim *> LDC is INTEGER *> On entry, LDC specifies the first dimension of C as declared *> in the calling (sub) program. LDC must be at least *> max( 1, m ). *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup complex_blas_level3 * *> \par Further Details: * ===================== *> *> \verbatim *> *> Level 3 Blas routine. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> \endverbatim *> * ===================================================================== SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * -- Reference BLAS level3 routine (version 3.7.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. COMPLEX ALPHA,BETA INTEGER K,LDA,LDB,LDC,M,N CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. COMPLEX A(LDA,*),B(LDB,*),C(LDC,*) * .. * * ===================================================================== * * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC CONJG,MAX * .. * .. Local Scalars .. COMPLEX TEMP INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB LOGICAL CONJA,CONJB,NOTA,NOTB * .. * .. Parameters .. COMPLEX ONE PARAMETER (ONE= (1.0E+0,0.0E+0)) COMPLEX ZERO PARAMETER (ZERO= (0.0E+0,0.0E+0)) * .. * * Set NOTA and NOTB as true if A and B respectively are not * conjugated or transposed, set CONJA and CONJB as true if A and * B respectively are to be transposed but not conjugated and set * NROWA, NCOLA and NROWB as the number of rows and columns of A * and the number of rows of B respectively. * NOTA = LSAME(TRANSA,'N') NOTB = LSAME(TRANSB,'N') CONJA = LSAME(TRANSA,'C') CONJB = LSAME(TRANSB,'C') IF (NOTA) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF (NOTB) THEN NROWB = K ELSE NROWB = N END IF * * Test the input parameters. * INFO = 0 IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND. + (.NOT.LSAME(TRANSA,'T'))) THEN INFO = 1 ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND. + (.NOT.LSAME(TRANSB,'T'))) THEN INFO = 2 ELSE IF (M.LT.0) THEN INFO = 3 ELSE IF (N.LT.0) THEN INFO = 4 ELSE IF (K.LT.0) THEN INFO = 5 ELSE IF (LDA.LT.MAX(1,NROWA)) THEN INFO = 8 ELSE IF (LDB.LT.MAX(1,NROWB)) THEN INFO = 10 ELSE IF (LDC.LT.MAX(1,M)) THEN INFO = 13 END IF IF (INFO.NE.0) THEN CALL XERBLA('CGEMM ',INFO) RETURN END IF * * Quick return if possible. * IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN * * And when alpha.eq.zero. * IF (ALPHA.EQ.ZERO) THEN IF (BETA.EQ.ZERO) THEN DO 20 J = 1,N DO 10 I = 1,M C(I,J) = ZERO 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1,N DO 30 I = 1,M C(I,J) = BETA*C(I,J) 30 CONTINUE 40 CONTINUE END IF RETURN END IF * * Start the operations. * IF (NOTB) THEN IF (NOTA) THEN * * Form C := alpha*A*B + beta*C. * DO 90 J = 1,N IF (BETA.EQ.ZERO) THEN DO 50 I = 1,M C(I,J) = ZERO 50 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 60 I = 1,M C(I,J) = BETA*C(I,J) 60 CONTINUE END IF DO 80 L = 1,K TEMP = ALPHA*B(L,J) DO 70 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 70 CONTINUE 80 CONTINUE 90 CONTINUE ELSE IF (CONJA) THEN * * Form C := alpha*A**H*B + beta*C. * DO 120 J = 1,N DO 110 I = 1,M TEMP = ZERO DO 100 L = 1,K TEMP = TEMP + CONJG(A(L,I))*B(L,J) 100 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 110 CONTINUE 120 CONTINUE ELSE * * Form C := alpha*A**T*B + beta*C * DO 150 J = 1,N DO 140 I = 1,M TEMP = ZERO DO 130 L = 1,K TEMP = TEMP + A(L,I)*B(L,J) 130 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 140 CONTINUE 150 CONTINUE END IF ELSE IF (NOTA) THEN IF (CONJB) THEN * * Form C := alpha*A*B**H + beta*C. * DO 200 J = 1,N IF (BETA.EQ.ZERO) THEN DO 160 I = 1,M C(I,J) = ZERO 160 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 170 I = 1,M C(I,J) = BETA*C(I,J) 170 CONTINUE END IF DO 190 L = 1,K TEMP = ALPHA*CONJG(B(J,L)) DO 180 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 180 CONTINUE 190 CONTINUE 200 CONTINUE ELSE * * Form C := alpha*A*B**T + beta*C * DO 250 J = 1,N IF (BETA.EQ.ZERO) THEN DO 210 I = 1,M C(I,J) = ZERO 210 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 220 I = 1,M C(I,J) = BETA*C(I,J) 220 CONTINUE END IF DO 240 L = 1,K TEMP = ALPHA*B(J,L) DO 230 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 230 CONTINUE 240 CONTINUE 250 CONTINUE END IF ELSE IF (CONJA) THEN IF (CONJB) THEN * * Form C := alpha*A**H*B**H + beta*C. * DO 280 J = 1,N DO 270 I = 1,M TEMP = ZERO DO 260 L = 1,K TEMP = TEMP + CONJG(A(L,I))*CONJG(B(J,L)) 260 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 270 CONTINUE 280 CONTINUE ELSE * * Form C := alpha*A**H*B**T + beta*C * DO 310 J = 1,N DO 300 I = 1,M TEMP = ZERO DO 290 L = 1,K TEMP = TEMP + CONJG(A(L,I))*B(J,L) 290 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 300 CONTINUE 310 CONTINUE END IF ELSE IF (CONJB) THEN * * Form C := alpha*A**T*B**H + beta*C * DO 340 J = 1,N DO 330 I = 1,M TEMP = ZERO DO 320 L = 1,K TEMP = TEMP + A(L,I)*CONJG(B(J,L)) 320 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 330 CONTINUE 340 CONTINUE ELSE * * Form C := alpha*A**T*B**T + beta*C * DO 370 J = 1,N DO 360 I = 1,M TEMP = ZERO DO 350 L = 1,K TEMP = TEMP + A(L,I)*B(J,L) 350 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 360 CONTINUE 370 CONTINUE END IF END IF * RETURN * * End of CGEMM . * END *> \brief \b LSAME * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * LOGICAL FUNCTION LSAME(CA,CB) * * .. Scalar Arguments .. * CHARACTER CA,CB * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> LSAME returns .TRUE. if CA is the same letter as CB regardless of *> case. *> \endverbatim * * Arguments: * ========== * *> \param[in] CA *> \verbatim *> CA is CHARACTER*1 *> \endverbatim *> *> \param[in] CB *> \verbatim *> CB is CHARACTER*1 *> CA and CB specify the single characters to be compared. *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup aux_blas * * ===================================================================== LOGICAL FUNCTION LSAME(CA,CB) * * -- Reference BLAS level1 routine (version 3.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. CHARACTER CA,CB * .. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ICHAR * .. * .. Local Scalars .. INTEGER INTA,INTB,ZCODE * .. * * Test if the characters are equal * LSAME = CA .EQ. CB IF (LSAME) RETURN * * Now test for equivalence if both characters are alphabetic. * ZCODE = ICHAR('Z') * * Use 'Z' rather than 'A' so that ASCII can be detected on Prime * machines, on which ICHAR returns a value with bit 8 set. * ICHAR('A') on Prime machines returns 193 which is the same as * ICHAR('A') on an EBCDIC machine. * INTA = ICHAR(CA) INTB = ICHAR(CB) * IF (ZCODE.EQ.90 .OR. ZCODE.EQ.122) THEN * * ASCII is assumed - ZCODE is the ASCII code of either lower or * upper case 'Z'. * IF (INTA.GE.97 .AND. INTA.LE.122) INTA = INTA - 32 IF (INTB.GE.97 .AND. INTB.LE.122) INTB = INTB - 32 * ELSE IF (ZCODE.EQ.233 .OR. ZCODE.EQ.169) THEN * * EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or * upper case 'Z'. * IF (INTA.GE.129 .AND. INTA.LE.137 .OR. + INTA.GE.145 .AND. INTA.LE.153 .OR. + INTA.GE.162 .AND. INTA.LE.169) INTA = INTA + 64 IF (INTB.GE.129 .AND. INTB.LE.137 .OR. + INTB.GE.145 .AND. INTB.LE.153 .OR. + INTB.GE.162 .AND. INTB.LE.169) INTB = INTB + 64 * ELSE IF (ZCODE.EQ.218 .OR. ZCODE.EQ.250) THEN * * ASCII is assumed, on Prime machines - ZCODE is the ASCII code * plus 128 of either lower or upper case 'Z'. * IF (INTA.GE.225 .AND. INTA.LE.250) INTA = INTA - 32 IF (INTB.GE.225 .AND. INTB.LE.250) INTB = INTB - 32 END IF LSAME = INTA .EQ. INTB * * RETURN * * End of LSAME * END *> \brief \b XERBLA * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * SUBROUTINE XERBLA( SRNAME, INFO ) * * .. Scalar Arguments .. * CHARACTER*(*) SRNAME * INTEGER INFO * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> XERBLA is an error handler for the LAPACK routines. *> It is called by an LAPACK routine if an input parameter has an *> invalid value. A message is printed and execution stops. *> *> Installers may consider modifying the STOP statement in order to *> call system-specific exception-handling facilities. *> \endverbatim * * Arguments: * ========== * *> \param[in] SRNAME *> \verbatim *> SRNAME is CHARACTER*(*) *> The name of the routine which called XERBLA. *> \endverbatim *> *> \param[in] INFO *> \verbatim *> INFO is INTEGER *> The position of the invalid parameter in the parameter list *> of the calling routine. *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup aux_blas * * ===================================================================== SUBROUTINE XERBLA( SRNAME, INFO ) * * -- Reference BLAS level1 routine (version 3.7.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. CHARACTER*(*) SRNAME INTEGER INFO * .. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC LEN_TRIM * .. * .. Executable Statements .. * WRITE( *, FMT = 9999 )SRNAME( 1:LEN_TRIM( SRNAME ) ), INFO * STOP * 9999 FORMAT( ' ** On entry to ', A, ' parameter number ', I2, ' had ', $ 'an illegal value' ) * * End of XERBLA * END *> \brief \b SGEMM * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * .. Scalar Arguments .. * REAL ALPHA,BETA * INTEGER K,LDA,LDB,LDC,M,N * CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. * REAL A(LDA,*),B(LDB,*),C(LDC,*) * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> SGEMM performs one of the matrix-matrix operations *> *> C := alpha*op( A )*op( B ) + beta*C, *> *> where op( X ) is one of *> *> op( X ) = X or op( X ) = X**T, *> *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) *> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. *> \endverbatim * * Arguments: * ========== * *> \param[in] TRANSA *> \verbatim *> TRANSA is CHARACTER*1 *> On entry, TRANSA specifies the form of op( A ) to be used in *> the matrix multiplication as follows: *> *> TRANSA = 'N' or 'n', op( A ) = A. *> *> TRANSA = 'T' or 't', op( A ) = A**T. *> *> TRANSA = 'C' or 'c', op( A ) = A**T. *> \endverbatim *> *> \param[in] TRANSB *> \verbatim *> TRANSB is CHARACTER*1 *> On entry, TRANSB specifies the form of op( B ) to be used in *> the matrix multiplication as follows: *> *> TRANSB = 'N' or 'n', op( B ) = B. *> *> TRANSB = 'T' or 't', op( B ) = B**T. *> *> TRANSB = 'C' or 'c', op( B ) = B**T. *> \endverbatim *> *> \param[in] M *> \verbatim *> M is INTEGER *> On entry, M specifies the number of rows of the matrix *> op( A ) and of the matrix C. M must be at least zero. *> \endverbatim *> *> \param[in] N *> \verbatim *> N is INTEGER *> On entry, N specifies the number of columns of the matrix *> op( B ) and the number of columns of the matrix C. N must be *> at least zero. *> \endverbatim *> *> \param[in] K *> \verbatim *> K is INTEGER *> On entry, K specifies the number of columns of the matrix *> op( A ) and the number of rows of the matrix op( B ). K must *> be at least zero. *> \endverbatim *> *> \param[in] ALPHA *> \verbatim *> ALPHA is REAL *> On entry, ALPHA specifies the scalar alpha. *> \endverbatim *> *> \param[in] A *> \verbatim *> A is REAL array, dimension ( LDA, ka ), where ka is *> k when TRANSA = 'N' or 'n', and is m otherwise. *> Before entry with TRANSA = 'N' or 'n', the leading m by k *> part of the array A must contain the matrix A, otherwise *> the leading k by m part of the array A must contain the *> matrix A. *> \endverbatim *> *> \param[in] LDA *> \verbatim *> LDA is INTEGER *> On entry, LDA specifies the first dimension of A as declared *> in the calling (sub) program. When TRANSA = 'N' or 'n' then *> LDA must be at least max( 1, m ), otherwise LDA must be at *> least max( 1, k ). *> \endverbatim *> *> \param[in] B *> \verbatim *> B is REAL array, dimension ( LDB, kb ), where kb is *> n when TRANSB = 'N' or 'n', and is k otherwise. *> Before entry with TRANSB = 'N' or 'n', the leading k by n *> part of the array B must contain the matrix B, otherwise *> the leading n by k part of the array B must contain the *> matrix B. *> \endverbatim *> *> \param[in] LDB *> \verbatim *> LDB is INTEGER *> On entry, LDB specifies the first dimension of B as declared *> in the calling (sub) program. When TRANSB = 'N' or 'n' then *> LDB must be at least max( 1, k ), otherwise LDB must be at *> least max( 1, n ). *> \endverbatim *> *> \param[in] BETA *> \verbatim *> BETA is REAL *> On entry, BETA specifies the scalar beta. When BETA is *> supplied as zero then C need not be set on input. *> \endverbatim *> *> \param[in,out] C *> \verbatim *> C is REAL array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. *> On exit, the array C is overwritten by the m by n matrix *> ( alpha*op( A )*op( B ) + beta*C ). *> \endverbatim *> *> \param[in] LDC *> \verbatim *> LDC is INTEGER *> On entry, LDC specifies the first dimension of C as declared *> in the calling (sub) program. LDC must be at least *> max( 1, m ). *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup single_blas_level3 * *> \par Further Details: * ===================== *> *> \verbatim *> *> Level 3 Blas routine. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> \endverbatim *> * ===================================================================== SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * -- Reference BLAS level3 routine (version 3.7.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. REAL ALPHA,BETA INTEGER K,LDA,LDB,LDC,M,N CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. REAL A(LDA,*),B(LDB,*),C(LDC,*) * .. * * ===================================================================== * * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Local Scalars .. REAL TEMP INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB LOGICAL NOTA,NOTB * .. * .. Parameters .. REAL ONE,ZERO PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) * .. * * Set NOTA and NOTB as true if A and B respectively are not * transposed and set NROWA, NCOLA and NROWB as the number of rows * and columns of A and the number of rows of B respectively. * NOTA = LSAME(TRANSA,'N') NOTB = LSAME(TRANSB,'N') IF (NOTA) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF (NOTB) THEN NROWB = K ELSE NROWB = N END IF * * Test the input parameters. * INFO = 0 IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. + (.NOT.LSAME(TRANSA,'T'))) THEN INFO = 1 ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. + (.NOT.LSAME(TRANSB,'T'))) THEN INFO = 2 ELSE IF (M.LT.0) THEN INFO = 3 ELSE IF (N.LT.0) THEN INFO = 4 ELSE IF (K.LT.0) THEN INFO = 5 ELSE IF (LDA.LT.MAX(1,NROWA)) THEN INFO = 8 ELSE IF (LDB.LT.MAX(1,NROWB)) THEN INFO = 10 ELSE IF (LDC.LT.MAX(1,M)) THEN INFO = 13 END IF IF (INFO.NE.0) THEN CALL XERBLA('SGEMM ',INFO) RETURN END IF * * Quick return if possible. * IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN * * And if alpha.eq.zero. * IF (ALPHA.EQ.ZERO) THEN IF (BETA.EQ.ZERO) THEN DO 20 J = 1,N DO 10 I = 1,M C(I,J) = ZERO 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1,N DO 30 I = 1,M C(I,J) = BETA*C(I,J) 30 CONTINUE 40 CONTINUE END IF RETURN END IF * * Start the operations. * IF (NOTB) THEN IF (NOTA) THEN * * Form C := alpha*A*B + beta*C. * DO 90 J = 1,N IF (BETA.EQ.ZERO) THEN DO 50 I = 1,M C(I,J) = ZERO 50 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 60 I = 1,M C(I,J) = BETA*C(I,J) 60 CONTINUE END IF DO 80 L = 1,K TEMP = ALPHA*B(L,J) DO 70 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 70 CONTINUE 80 CONTINUE 90 CONTINUE ELSE * * Form C := alpha*A**T*B + beta*C * DO 120 J = 1,N DO 110 I = 1,M TEMP = ZERO DO 100 L = 1,K TEMP = TEMP + A(L,I)*B(L,J) 100 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 110 CONTINUE 120 CONTINUE END IF ELSE IF (NOTA) THEN * * Form C := alpha*A*B**T + beta*C * DO 170 J = 1,N IF (BETA.EQ.ZERO) THEN DO 130 I = 1,M C(I,J) = ZERO 130 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 140 I = 1,M C(I,J) = BETA*C(I,J) 140 CONTINUE END IF DO 160 L = 1,K TEMP = ALPHA*B(J,L) DO 150 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 150 CONTINUE 160 CONTINUE 170 CONTINUE ELSE * * Form C := alpha*A**T*B**T + beta*C * DO 200 J = 1,N DO 190 I = 1,M TEMP = ZERO DO 180 L = 1,K TEMP = TEMP + A(L,I)*B(J,L) 180 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 190 CONTINUE 200 CONTINUE END IF END IF * RETURN * * End of SGEMM . * END *> \brief \b DGEMM * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * .. Scalar Arguments .. * DOUBLE PRECISION ALPHA,BETA * INTEGER K,LDA,LDB,LDC,M,N * CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. * DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> DGEMM performs one of the matrix-matrix operations *> *> C := alpha*op( A )*op( B ) + beta*C, *> *> where op( X ) is one of *> *> op( X ) = X or op( X ) = X**T, *> *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) *> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. *> \endverbatim * * Arguments: * ========== * *> \param[in] TRANSA *> \verbatim *> TRANSA is CHARACTER*1 *> On entry, TRANSA specifies the form of op( A ) to be used in *> the matrix multiplication as follows: *> *> TRANSA = 'N' or 'n', op( A ) = A. *> *> TRANSA = 'T' or 't', op( A ) = A**T. *> *> TRANSA = 'C' or 'c', op( A ) = A**T. *> \endverbatim *> *> \param[in] TRANSB *> \verbatim *> TRANSB is CHARACTER*1 *> On entry, TRANSB specifies the form of op( B ) to be used in *> the matrix multiplication as follows: *> *> TRANSB = 'N' or 'n', op( B ) = B. *> *> TRANSB = 'T' or 't', op( B ) = B**T. *> *> TRANSB = 'C' or 'c', op( B ) = B**T. *> \endverbatim *> *> \param[in] M *> \verbatim *> M is INTEGER *> On entry, M specifies the number of rows of the matrix *> op( A ) and of the matrix C. M must be at least zero. *> \endverbatim *> *> \param[in] N *> \verbatim *> N is INTEGER *> On entry, N specifies the number of columns of the matrix *> op( B ) and the number of columns of the matrix C. N must be *> at least zero. *> \endverbatim *> *> \param[in] K *> \verbatim *> K is INTEGER *> On entry, K specifies the number of columns of the matrix *> op( A ) and the number of rows of the matrix op( B ). K must *> be at least zero. *> \endverbatim *> *> \param[in] ALPHA *> \verbatim *> ALPHA is DOUBLE PRECISION. *> On entry, ALPHA specifies the scalar alpha. *> \endverbatim *> *> \param[in] A *> \verbatim *> A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is *> k when TRANSA = 'N' or 'n', and is m otherwise. *> Before entry with TRANSA = 'N' or 'n', the leading m by k *> part of the array A must contain the matrix A, otherwise *> the leading k by m part of the array A must contain the *> matrix A. *> \endverbatim *> *> \param[in] LDA *> \verbatim *> LDA is INTEGER *> On entry, LDA specifies the first dimension of A as declared *> in the calling (sub) program. When TRANSA = 'N' or 'n' then *> LDA must be at least max( 1, m ), otherwise LDA must be at *> least max( 1, k ). *> \endverbatim *> *> \param[in] B *> \verbatim *> B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is *> n when TRANSB = 'N' or 'n', and is k otherwise. *> Before entry with TRANSB = 'N' or 'n', the leading k by n *> part of the array B must contain the matrix B, otherwise *> the leading n by k part of the array B must contain the *> matrix B. *> \endverbatim *> *> \param[in] LDB *> \verbatim *> LDB is INTEGER *> On entry, LDB specifies the first dimension of B as declared *> in the calling (sub) program. When TRANSB = 'N' or 'n' then *> LDB must be at least max( 1, k ), otherwise LDB must be at *> least max( 1, n ). *> \endverbatim *> *> \param[in] BETA *> \verbatim *> BETA is DOUBLE PRECISION. *> On entry, BETA specifies the scalar beta. When BETA is *> supplied as zero then C need not be set on input. *> \endverbatim *> *> \param[in,out] C *> \verbatim *> C is DOUBLE PRECISION array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. *> On exit, the array C is overwritten by the m by n matrix *> ( alpha*op( A )*op( B ) + beta*C ). *> \endverbatim *> *> \param[in] LDC *> \verbatim *> LDC is INTEGER *> On entry, LDC specifies the first dimension of C as declared *> in the calling (sub) program. LDC must be at least *> max( 1, m ). *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup double_blas_level3 * *> \par Further Details: * ===================== *> *> \verbatim *> *> Level 3 Blas routine. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> \endverbatim *> * ===================================================================== SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * -- Reference BLAS level3 routine (version 3.7.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. DOUBLE PRECISION ALPHA,BETA INTEGER K,LDA,LDB,LDC,M,N CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*) * .. * * ===================================================================== * * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Local Scalars .. DOUBLE PRECISION TEMP INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB LOGICAL NOTA,NOTB * .. * .. Parameters .. DOUBLE PRECISION ONE,ZERO PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) * .. * * Set NOTA and NOTB as true if A and B respectively are not * transposed and set NROWA, NCOLA and NROWB as the number of rows * and columns of A and the number of rows of B respectively. * NOTA = LSAME(TRANSA,'N') NOTB = LSAME(TRANSB,'N') IF (NOTA) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF (NOTB) THEN NROWB = K ELSE NROWB = N END IF * * Test the input parameters. * INFO = 0 IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. + (.NOT.LSAME(TRANSA,'T'))) THEN INFO = 1 ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. + (.NOT.LSAME(TRANSB,'T'))) THEN INFO = 2 ELSE IF (M.LT.0) THEN INFO = 3 ELSE IF (N.LT.0) THEN INFO = 4 ELSE IF (K.LT.0) THEN INFO = 5 ELSE IF (LDA.LT.MAX(1,NROWA)) THEN INFO = 8 ELSE IF (LDB.LT.MAX(1,NROWB)) THEN INFO = 10 ELSE IF (LDC.LT.MAX(1,M)) THEN INFO = 13 END IF IF (INFO.NE.0) THEN CALL XERBLA('DGEMM ',INFO) RETURN END IF * * Quick return if possible. * IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN * * And if alpha.eq.zero. * IF (ALPHA.EQ.ZERO) THEN IF (BETA.EQ.ZERO) THEN DO 20 J = 1,N DO 10 I = 1,M C(I,J) = ZERO 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1,N DO 30 I = 1,M C(I,J) = BETA*C(I,J) 30 CONTINUE 40 CONTINUE END IF RETURN END IF * * Start the operations. * IF (NOTB) THEN IF (NOTA) THEN * * Form C := alpha*A*B + beta*C. * DO 90 J = 1,N IF (BETA.EQ.ZERO) THEN DO 50 I = 1,M C(I,J) = ZERO 50 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 60 I = 1,M C(I,J) = BETA*C(I,J) 60 CONTINUE END IF DO 80 L = 1,K TEMP = ALPHA*B(L,J) DO 70 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 70 CONTINUE 80 CONTINUE 90 CONTINUE ELSE * * Form C := alpha*A**T*B + beta*C * DO 120 J = 1,N DO 110 I = 1,M TEMP = ZERO DO 100 L = 1,K TEMP = TEMP + A(L,I)*B(L,J) 100 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 110 CONTINUE 120 CONTINUE END IF ELSE IF (NOTA) THEN * * Form C := alpha*A*B**T + beta*C * DO 170 J = 1,N IF (BETA.EQ.ZERO) THEN DO 130 I = 1,M C(I,J) = ZERO 130 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 140 I = 1,M C(I,J) = BETA*C(I,J) 140 CONTINUE END IF DO 160 L = 1,K TEMP = ALPHA*B(J,L) DO 150 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 150 CONTINUE 160 CONTINUE 170 CONTINUE ELSE * * Form C := alpha*A**T*B**T + beta*C * DO 200 J = 1,N DO 190 I = 1,M TEMP = ZERO DO 180 L = 1,K TEMP = TEMP + A(L,I)*B(J,L) 180 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 190 CONTINUE 200 CONTINUE END IF END IF * RETURN * * End of DGEMM . * END *> \brief \b ZGEMM * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * .. Scalar Arguments .. * COMPLEX*16 ALPHA,BETA * INTEGER K,LDA,LDB,LDC,M,N * CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. * COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*) * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> ZGEMM performs one of the matrix-matrix operations *> *> C := alpha*op( A )*op( B ) + beta*C, *> *> where op( X ) is one of *> *> op( X ) = X or op( X ) = X**T or op( X ) = X**H, *> *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) *> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. *> \endverbatim * * Arguments: * ========== * *> \param[in] TRANSA *> \verbatim *> TRANSA is CHARACTER*1 *> On entry, TRANSA specifies the form of op( A ) to be used in *> the matrix multiplication as follows: *> *> TRANSA = 'N' or 'n', op( A ) = A. *> *> TRANSA = 'T' or 't', op( A ) = A**T. *> *> TRANSA = 'C' or 'c', op( A ) = A**H. *> \endverbatim *> *> \param[in] TRANSB *> \verbatim *> TRANSB is CHARACTER*1 *> On entry, TRANSB specifies the form of op( B ) to be used in *> the matrix multiplication as follows: *> *> TRANSB = 'N' or 'n', op( B ) = B. *> *> TRANSB = 'T' or 't', op( B ) = B**T. *> *> TRANSB = 'C' or 'c', op( B ) = B**H. *> \endverbatim *> *> \param[in] M *> \verbatim *> M is INTEGER *> On entry, M specifies the number of rows of the matrix *> op( A ) and of the matrix C. M must be at least zero. *> \endverbatim *> *> \param[in] N *> \verbatim *> N is INTEGER *> On entry, N specifies the number of columns of the matrix *> op( B ) and the number of columns of the matrix C. N must be *> at least zero. *> \endverbatim *> *> \param[in] K *> \verbatim *> K is INTEGER *> On entry, K specifies the number of columns of the matrix *> op( A ) and the number of rows of the matrix op( B ). K must *> be at least zero. *> \endverbatim *> *> \param[in] ALPHA *> \verbatim *> ALPHA is COMPLEX*16 *> On entry, ALPHA specifies the scalar alpha. *> \endverbatim *> *> \param[in] A *> \verbatim *> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is *> k when TRANSA = 'N' or 'n', and is m otherwise. *> Before entry with TRANSA = 'N' or 'n', the leading m by k *> part of the array A must contain the matrix A, otherwise *> the leading k by m part of the array A must contain the *> matrix A. *> \endverbatim *> *> \param[in] LDA *> \verbatim *> LDA is INTEGER *> On entry, LDA specifies the first dimension of A as declared *> in the calling (sub) program. When TRANSA = 'N' or 'n' then *> LDA must be at least max( 1, m ), otherwise LDA must be at *> least max( 1, k ). *> \endverbatim *> *> \param[in] B *> \verbatim *> B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is *> n when TRANSB = 'N' or 'n', and is k otherwise. *> Before entry with TRANSB = 'N' or 'n', the leading k by n *> part of the array B must contain the matrix B, otherwise *> the leading n by k part of the array B must contain the *> matrix B. *> \endverbatim *> *> \param[in] LDB *> \verbatim *> LDB is INTEGER *> On entry, LDB specifies the first dimension of B as declared *> in the calling (sub) program. When TRANSB = 'N' or 'n' then *> LDB must be at least max( 1, k ), otherwise LDB must be at *> least max( 1, n ). *> \endverbatim *> *> \param[in] BETA *> \verbatim *> BETA is COMPLEX*16 *> On entry, BETA specifies the scalar beta. When BETA is *> supplied as zero then C need not be set on input. *> \endverbatim *> *> \param[in,out] C *> \verbatim *> C is COMPLEX*16 array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. *> On exit, the array C is overwritten by the m by n matrix *> ( alpha*op( A )*op( B ) + beta*C ). *> \endverbatim *> *> \param[in] LDC *> \verbatim *> LDC is INTEGER *> On entry, LDC specifies the first dimension of C as declared *> in the calling (sub) program. LDC must be at least *> max( 1, m ). *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup complex16_blas_level3 * *> \par Further Details: * ===================== *> *> \verbatim *> *> Level 3 Blas routine. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> \endverbatim *> * ===================================================================== SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * -- Reference BLAS level3 routine (version 3.7.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. COMPLEX*16 ALPHA,BETA INTEGER K,LDA,LDB,LDC,M,N CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*) * .. * * ===================================================================== * * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC DCONJG,MAX * .. * .. Local Scalars .. COMPLEX*16 TEMP INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB LOGICAL CONJA,CONJB,NOTA,NOTB * .. * .. Parameters .. COMPLEX*16 ONE PARAMETER (ONE= (1.0D+0,0.0D+0)) COMPLEX*16 ZERO PARAMETER (ZERO= (0.0D+0,0.0D+0)) * .. * * Set NOTA and NOTB as true if A and B respectively are not * conjugated or transposed, set CONJA and CONJB as true if A and * B respectively are to be transposed but not conjugated and set * NROWA, NCOLA and NROWB as the number of rows and columns of A * and the number of rows of B respectively. * NOTA = LSAME(TRANSA,'N') NOTB = LSAME(TRANSB,'N') CONJA = LSAME(TRANSA,'C') CONJB = LSAME(TRANSB,'C') IF (NOTA) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF (NOTB) THEN NROWB = K ELSE NROWB = N END IF * * Test the input parameters. * INFO = 0 IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND. + (.NOT.LSAME(TRANSA,'T'))) THEN INFO = 1 ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND. + (.NOT.LSAME(TRANSB,'T'))) THEN INFO = 2 ELSE IF (M.LT.0) THEN INFO = 3 ELSE IF (N.LT.0) THEN INFO = 4 ELSE IF (K.LT.0) THEN INFO = 5 ELSE IF (LDA.LT.MAX(1,NROWA)) THEN INFO = 8 ELSE IF (LDB.LT.MAX(1,NROWB)) THEN INFO = 10 ELSE IF (LDC.LT.MAX(1,M)) THEN INFO = 13 END IF IF (INFO.NE.0) THEN CALL XERBLA('ZGEMM ',INFO) RETURN END IF * * Quick return if possible. * IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN * * And when alpha.eq.zero. * IF (ALPHA.EQ.ZERO) THEN IF (BETA.EQ.ZERO) THEN DO 20 J = 1,N DO 10 I = 1,M C(I,J) = ZERO 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1,N DO 30 I = 1,M C(I,J) = BETA*C(I,J) 30 CONTINUE 40 CONTINUE END IF RETURN END IF * * Start the operations. * IF (NOTB) THEN IF (NOTA) THEN * * Form C := alpha*A*B + beta*C. * DO 90 J = 1,N IF (BETA.EQ.ZERO) THEN DO 50 I = 1,M C(I,J) = ZERO 50 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 60 I = 1,M C(I,J) = BETA*C(I,J) 60 CONTINUE END IF DO 80 L = 1,K TEMP = ALPHA*B(L,J) DO 70 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 70 CONTINUE 80 CONTINUE 90 CONTINUE ELSE IF (CONJA) THEN * * Form C := alpha*A**H*B + beta*C. * DO 120 J = 1,N DO 110 I = 1,M TEMP = ZERO DO 100 L = 1,K TEMP = TEMP + DCONJG(A(L,I))*B(L,J) 100 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 110 CONTINUE 120 CONTINUE ELSE * * Form C := alpha*A**T*B + beta*C * DO 150 J = 1,N DO 140 I = 1,M TEMP = ZERO DO 130 L = 1,K TEMP = TEMP + A(L,I)*B(L,J) 130 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 140 CONTINUE 150 CONTINUE END IF ELSE IF (NOTA) THEN IF (CONJB) THEN * * Form C := alpha*A*B**H + beta*C. * DO 200 J = 1,N IF (BETA.EQ.ZERO) THEN DO 160 I = 1,M C(I,J) = ZERO 160 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 170 I = 1,M C(I,J) = BETA*C(I,J) 170 CONTINUE END IF DO 190 L = 1,K TEMP = ALPHA*DCONJG(B(J,L)) DO 180 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 180 CONTINUE 190 CONTINUE 200 CONTINUE ELSE * * Form C := alpha*A*B**T + beta*C * DO 250 J = 1,N IF (BETA.EQ.ZERO) THEN DO 210 I = 1,M C(I,J) = ZERO 210 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 220 I = 1,M C(I,J) = BETA*C(I,J) 220 CONTINUE END IF DO 240 L = 1,K TEMP = ALPHA*B(J,L) DO 230 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 230 CONTINUE 240 CONTINUE 250 CONTINUE END IF ELSE IF (CONJA) THEN IF (CONJB) THEN * * Form C := alpha*A**H*B**H + beta*C. * DO 280 J = 1,N DO 270 I = 1,M TEMP = ZERO DO 260 L = 1,K TEMP = TEMP + DCONJG(A(L,I))*DCONJG(B(J,L)) 260 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 270 CONTINUE 280 CONTINUE ELSE * * Form C := alpha*A**H*B**T + beta*C * DO 310 J = 1,N DO 300 I = 1,M TEMP = ZERO DO 290 L = 1,K TEMP = TEMP + DCONJG(A(L,I))*B(J,L) 290 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 300 CONTINUE 310 CONTINUE END IF ELSE IF (CONJB) THEN * * Form C := alpha*A**T*B**H + beta*C * DO 340 J = 1,N DO 330 I = 1,M TEMP = ZERO DO 320 L = 1,K TEMP = TEMP + A(L,I)*DCONJG(B(J,L)) 320 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 330 CONTINUE 340 CONTINUE ELSE * * Form C := alpha*A**T*B**T + beta*C * DO 370 J = 1,N DO 360 I = 1,M TEMP = ZERO DO 350 L = 1,K TEMP = TEMP + A(L,I)*B(J,L) 350 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 360 CONTINUE 370 CONTINUE END IF END IF * RETURN * * End of ZGEMM . * END ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
C { dg-do run } C { dg-options "-fno-realloc-lhs -fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" } C { dg-shouldfail "Fortran runtime error: Array bound mismatch for dimension 2 of array." } program main real, dimension(3,2) :: a real, dimension(2,3) :: b real, dimension(:,:), allocatable :: ret a = 1.0 b = 2.3 allocate(ret(3,2)) ret = matmul(a,b) ! This should throw an error. end ! { dg-output "Fortran runtime error: Array bound mismatch for dimension 2 of array.*" } *> \brief \b SGEMM * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * .. Scalar Arguments .. * REAL ALPHA,BETA * INTEGER K,LDA,LDB,LDC,M,N * CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. * REAL A(LDA,*),B(LDB,*),C(LDC,*) * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> SGEMM performs one of the matrix-matrix operations *> *> C := alpha*op( A )*op( B ) + beta*C, *> *> where op( X ) is one of *> *> op( X ) = X or op( X ) = X**T, *> *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) *> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. *> \endverbatim * * Arguments: * ========== * *> \param[in] TRANSA *> \verbatim *> TRANSA is CHARACTER*1 *> On entry, TRANSA specifies the form of op( A ) to be used in *> the matrix multiplication as follows: *> *> TRANSA = 'N' or 'n', op( A ) = A. *> *> TRANSA = 'T' or 't', op( A ) = A**T. *> *> TRANSA = 'C' or 'c', op( A ) = A**T. *> \endverbatim *> *> \param[in] TRANSB *> \verbatim *> TRANSB is CHARACTER*1 *> On entry, TRANSB specifies the form of op( B ) to be used in *> the matrix multiplication as follows: *> *> TRANSB = 'N' or 'n', op( B ) = B. *> *> TRANSB = 'T' or 't', op( B ) = B**T. *> *> TRANSB = 'C' or 'c', op( B ) = B**T. *> \endverbatim *> *> \param[in] M *> \verbatim *> M is INTEGER *> On entry, M specifies the number of rows of the matrix *> op( A ) and of the matrix C. M must be at least zero. *> \endverbatim *> *> \param[in] N *> \verbatim *> N is INTEGER *> On entry, N specifies the number of columns of the matrix *> op( B ) and the number of columns of the matrix C. N must be *> at least zero. *> \endverbatim *> *> \param[in] K *> \verbatim *> K is INTEGER *> On entry, K specifies the number of columns of the matrix *> op( A ) and the number of rows of the matrix op( B ). K must *> be at least zero. *> \endverbatim *> *> \param[in] ALPHA *> \verbatim *> ALPHA is REAL *> On entry, ALPHA specifies the scalar alpha. *> \endverbatim *> *> \param[in] A *> \verbatim *> A is REAL array, dimension ( LDA, ka ), where ka is *> k when TRANSA = 'N' or 'n', and is m otherwise. *> Before entry with TRANSA = 'N' or 'n', the leading m by k *> part of the array A must contain the matrix A, otherwise *> the leading k by m part of the array A must contain the *> matrix A. *> \endverbatim *> *> \param[in] LDA *> \verbatim *> LDA is INTEGER *> On entry, LDA specifies the first dimension of A as declared *> in the calling (sub) program. When TRANSA = 'N' or 'n' then *> LDA must be at least max( 1, m ), otherwise LDA must be at *> least max( 1, k ). *> \endverbatim *> *> \param[in] B *> \verbatim *> B is REAL array, dimension ( LDB, kb ), where kb is *> n when TRANSB = 'N' or 'n', and is k otherwise. *> Before entry with TRANSB = 'N' or 'n', the leading k by n *> part of the array B must contain the matrix B, otherwise *> the leading n by k part of the array B must contain the *> matrix B. *> \endverbatim *> *> \param[in] LDB *> \verbatim *> LDB is INTEGER *> On entry, LDB specifies the first dimension of B as declared *> in the calling (sub) program. When TRANSB = 'N' or 'n' then *> LDB must be at least max( 1, k ), otherwise LDB must be at *> least max( 1, n ). *> \endverbatim *> *> \param[in] BETA *> \verbatim *> BETA is REAL *> On entry, BETA specifies the scalar beta. When BETA is *> supplied as zero then C need not be set on input. *> \endverbatim *> *> \param[in,out] C *> \verbatim *> C is REAL array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. *> On exit, the array C is overwritten by the m by n matrix *> ( alpha*op( A )*op( B ) + beta*C ). *> \endverbatim *> *> \param[in] LDC *> \verbatim *> LDC is INTEGER *> On entry, LDC specifies the first dimension of C as declared *> in the calling (sub) program. LDC must be at least *> max( 1, m ). *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup single_blas_level3 * *> \par Further Details: * ===================== *> *> \verbatim *> *> Level 3 Blas routine. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> \endverbatim *> * ===================================================================== SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * -- Reference BLAS level3 routine (version 3.7.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. REAL ALPHA,BETA INTEGER K,LDA,LDB,LDC,M,N CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. REAL A(LDA,*),B(LDB,*),C(LDC,*) * .. * * ===================================================================== * * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Local Scalars .. REAL TEMP INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB LOGICAL NOTA,NOTB * .. * .. Parameters .. REAL ONE,ZERO PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) * .. * * Set NOTA and NOTB as true if A and B respectively are not * transposed and set NROWA, NCOLA and NROWB as the number of rows * and columns of A and the number of rows of B respectively. * NOTA = LSAME(TRANSA,'N') NOTB = LSAME(TRANSB,'N') IF (NOTA) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF (NOTB) THEN NROWB = K ELSE NROWB = N END IF * * Test the input parameters. * INFO = 0 IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. + (.NOT.LSAME(TRANSA,'T'))) THEN INFO = 1 ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. + (.NOT.LSAME(TRANSB,'T'))) THEN INFO = 2 ELSE IF (M.LT.0) THEN INFO = 3 ELSE IF (N.LT.0) THEN INFO = 4 ELSE IF (K.LT.0) THEN INFO = 5 ELSE IF (LDA.LT.MAX(1,NROWA)) THEN INFO = 8 ELSE IF (LDB.LT.MAX(1,NROWB)) THEN INFO = 10 ELSE IF (LDC.LT.MAX(1,M)) THEN INFO = 13 END IF IF (INFO.NE.0) THEN CALL XERBLA('SGEMM ',INFO) RETURN END IF * * Quick return if possible. * IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN * * And if alpha.eq.zero. * IF (ALPHA.EQ.ZERO) THEN IF (BETA.EQ.ZERO) THEN DO 20 J = 1,N DO 10 I = 1,M C(I,J) = ZERO 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1,N DO 30 I = 1,M C(I,J) = BETA*C(I,J) 30 CONTINUE 40 CONTINUE END IF RETURN END IF * * Start the operations. * IF (NOTB) THEN IF (NOTA) THEN * * Form C := alpha*A*B + beta*C. * DO 90 J = 1,N IF (BETA.EQ.ZERO) THEN DO 50 I = 1,M C(I,J) = ZERO 50 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 60 I = 1,M C(I,J) = BETA*C(I,J) 60 CONTINUE END IF DO 80 L = 1,K TEMP = ALPHA*B(L,J) DO 70 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 70 CONTINUE 80 CONTINUE 90 CONTINUE ELSE * * Form C := alpha*A**T*B + beta*C * DO 120 J = 1,N DO 110 I = 1,M TEMP = ZERO DO 100 L = 1,K TEMP = TEMP + A(L,I)*B(L,J) 100 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 110 CONTINUE 120 CONTINUE END IF ELSE IF (NOTA) THEN * * Form C := alpha*A*B**T + beta*C * DO 170 J = 1,N IF (BETA.EQ.ZERO) THEN DO 130 I = 1,M C(I,J) = ZERO 130 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 140 I = 1,M C(I,J) = BETA*C(I,J) 140 CONTINUE END IF DO 160 L = 1,K TEMP = ALPHA*B(J,L) DO 150 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 150 CONTINUE 160 CONTINUE 170 CONTINUE ELSE * * Form C := alpha*A**T*B**T + beta*C * DO 200 J = 1,N DO 190 I = 1,M TEMP = ZERO DO 180 L = 1,K TEMP = TEMP + A(L,I)*B(J,L) 180 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 190 CONTINUE 200 CONTINUE END IF END IF * RETURN * * End of SGEMM . * END *> \brief \b LSAME * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * LOGICAL FUNCTION LSAME(CA,CB) * * .. Scalar Arguments .. * CHARACTER CA,CB * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> LSAME returns .TRUE. if CA is the same letter as CB regardless of *> case. *> \endverbatim * * Arguments: * ========== * *> \param[in] CA *> \verbatim *> CA is CHARACTER*1 *> \endverbatim *> *> \param[in] CB *> \verbatim *> CB is CHARACTER*1 *> CA and CB specify the single characters to be compared. *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup aux_blas * * ===================================================================== LOGICAL FUNCTION LSAME(CA,CB) * * -- Reference BLAS level1 routine (version 3.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. CHARACTER CA,CB * .. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ICHAR * .. * .. Local Scalars .. INTEGER INTA,INTB,ZCODE * .. * * Test if the characters are equal * LSAME = CA .EQ. CB IF (LSAME) RETURN * * Now test for equivalence if both characters are alphabetic. * ZCODE = ICHAR('Z') * * Use 'Z' rather than 'A' so that ASCII can be detected on Prime * machines, on which ICHAR returns a value with bit 8 set. * ICHAR('A') on Prime machines returns 193 which is the same as * ICHAR('A') on an EBCDIC machine. * INTA = ICHAR(CA) INTB = ICHAR(CB) * IF (ZCODE.EQ.90 .OR. ZCODE.EQ.122) THEN * * ASCII is assumed - ZCODE is the ASCII code of either lower or * upper case 'Z'. * IF (INTA.GE.97 .AND. INTA.LE.122) INTA = INTA - 32 IF (INTB.GE.97 .AND. INTB.LE.122) INTB = INTB - 32 * ELSE IF (ZCODE.EQ.233 .OR. ZCODE.EQ.169) THEN * * EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or * upper case 'Z'. * IF (INTA.GE.129 .AND. INTA.LE.137 .OR. + INTA.GE.145 .AND. INTA.LE.153 .OR. + INTA.GE.162 .AND. INTA.LE.169) INTA = INTA + 64 IF (INTB.GE.129 .AND. INTB.LE.137 .OR. + INTB.GE.145 .AND. INTB.LE.153 .OR. + INTB.GE.162 .AND. INTB.LE.169) INTB = INTB + 64 * ELSE IF (ZCODE.EQ.218 .OR. ZCODE.EQ.250) THEN * * ASCII is assumed, on Prime machines - ZCODE is the ASCII code * plus 128 of either lower or upper case 'Z'. * IF (INTA.GE.225 .AND. INTA.LE.250) INTA = INTA - 32 IF (INTB.GE.225 .AND. INTB.LE.250) INTB = INTB - 32 END IF LSAME = INTA .EQ. INTB * * RETURN * * End of LSAME * END *> \brief \b XERBLA * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * SUBROUTINE XERBLA( SRNAME, INFO ) * * .. Scalar Arguments .. * CHARACTER*(*) SRNAME * INTEGER INFO * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> XERBLA is an error handler for the LAPACK routines. *> It is called by an LAPACK routine if an input parameter has an *> invalid value. A message is printed and execution stops. *> *> Installers may consider modifying the STOP statement in order to *> call system-specific exception-handling facilities. *> \endverbatim * * Arguments: * ========== * *> \param[in] SRNAME *> \verbatim *> SRNAME is CHARACTER*(*) *> The name of the routine which called XERBLA. *> \endverbatim *> *> \param[in] INFO *> \verbatim *> INFO is INTEGER *> The position of the invalid parameter in the parameter list *> of the calling routine. *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup aux_blas * * ===================================================================== SUBROUTINE XERBLA( SRNAME, INFO ) * * -- Reference BLAS level1 routine (version 3.7.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. CHARACTER*(*) SRNAME INTEGER INFO * .. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC LEN_TRIM * .. * .. Executable Statements .. * WRITE( *, FMT = 9999 )SRNAME( 1:LEN_TRIM( SRNAME ) ), INFO * STOP * 9999 FORMAT( ' ** On entry to ', A, ' parameter number ', I2, ' had ', $ 'an illegal value' ) * * End of XERBLA * END ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
C { dg-do run } C { dg-options "-fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" } C { dg-shouldfail "Fortran runtime error: Array bound mismatch for dimension 2 of array." } program main character(len=20) :: line integer :: n, m real, dimension(3,2) :: a real, dimension(:,:), allocatable :: b real, dimension(:,:), allocatable :: ret a = 1.0 line = '3 3' read (unit=line,fmt=*) n, m allocate (b(n,m)) b = 2.3 ret = matmul(a,b) ! This should throw an error. end ! { dg-output "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" } *> \brief \b SGEMM * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * .. Scalar Arguments .. * REAL ALPHA,BETA * INTEGER K,LDA,LDB,LDC,M,N * CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. * REAL A(LDA,*),B(LDB,*),C(LDC,*) * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> SGEMM performs one of the matrix-matrix operations *> *> C := alpha*op( A )*op( B ) + beta*C, *> *> where op( X ) is one of *> *> op( X ) = X or op( X ) = X**T, *> *> alpha and beta are scalars, and A, B and C are matrices, with op( A ) *> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. *> \endverbatim * * Arguments: * ========== * *> \param[in] TRANSA *> \verbatim *> TRANSA is CHARACTER*1 *> On entry, TRANSA specifies the form of op( A ) to be used in *> the matrix multiplication as follows: *> *> TRANSA = 'N' or 'n', op( A ) = A. *> *> TRANSA = 'T' or 't', op( A ) = A**T. *> *> TRANSA = 'C' or 'c', op( A ) = A**T. *> \endverbatim *> *> \param[in] TRANSB *> \verbatim *> TRANSB is CHARACTER*1 *> On entry, TRANSB specifies the form of op( B ) to be used in *> the matrix multiplication as follows: *> *> TRANSB = 'N' or 'n', op( B ) = B. *> *> TRANSB = 'T' or 't', op( B ) = B**T. *> *> TRANSB = 'C' or 'c', op( B ) = B**T. *> \endverbatim *> *> \param[in] M *> \verbatim *> M is INTEGER *> On entry, M specifies the number of rows of the matrix *> op( A ) and of the matrix C. M must be at least zero. *> \endverbatim *> *> \param[in] N *> \verbatim *> N is INTEGER *> On entry, N specifies the number of columns of the matrix *> op( B ) and the number of columns of the matrix C. N must be *> at least zero. *> \endverbatim *> *> \param[in] K *> \verbatim *> K is INTEGER *> On entry, K specifies the number of columns of the matrix *> op( A ) and the number of rows of the matrix op( B ). K must *> be at least zero. *> \endverbatim *> *> \param[in] ALPHA *> \verbatim *> ALPHA is REAL *> On entry, ALPHA specifies the scalar alpha. *> \endverbatim *> *> \param[in] A *> \verbatim *> A is REAL array, dimension ( LDA, ka ), where ka is *> k when TRANSA = 'N' or 'n', and is m otherwise. *> Before entry with TRANSA = 'N' or 'n', the leading m by k *> part of the array A must contain the matrix A, otherwise *> the leading k by m part of the array A must contain the *> matrix A. *> \endverbatim *> *> \param[in] LDA *> \verbatim *> LDA is INTEGER *> On entry, LDA specifies the first dimension of A as declared *> in the calling (sub) program. When TRANSA = 'N' or 'n' then *> LDA must be at least max( 1, m ), otherwise LDA must be at *> least max( 1, k ). *> \endverbatim *> *> \param[in] B *> \verbatim *> B is REAL array, dimension ( LDB, kb ), where kb is *> n when TRANSB = 'N' or 'n', and is k otherwise. *> Before entry with TRANSB = 'N' or 'n', the leading k by n *> part of the array B must contain the matrix B, otherwise *> the leading n by k part of the array B must contain the *> matrix B. *> \endverbatim *> *> \param[in] LDB *> \verbatim *> LDB is INTEGER *> On entry, LDB specifies the first dimension of B as declared *> in the calling (sub) program. When TRANSB = 'N' or 'n' then *> LDB must be at least max( 1, k ), otherwise LDB must be at *> least max( 1, n ). *> \endverbatim *> *> \param[in] BETA *> \verbatim *> BETA is REAL *> On entry, BETA specifies the scalar beta. When BETA is *> supplied as zero then C need not be set on input. *> \endverbatim *> *> \param[in,out] C *> \verbatim *> C is REAL array, dimension ( LDC, N ) *> Before entry, the leading m by n part of the array C must *> contain the matrix C, except when beta is zero, in which *> case C need not be set on entry. *> On exit, the array C is overwritten by the m by n matrix *> ( alpha*op( A )*op( B ) + beta*C ). *> \endverbatim *> *> \param[in] LDC *> \verbatim *> LDC is INTEGER *> On entry, LDC specifies the first dimension of C as declared *> in the calling (sub) program. LDC must be at least *> max( 1, m ). *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup single_blas_level3 * *> \par Further Details: * ===================== *> *> \verbatim *> *> Level 3 Blas routine. *> *> -- Written on 8-February-1989. *> Jack Dongarra, Argonne National Laboratory. *> Iain Duff, AERE Harwell. *> Jeremy Du Croz, Numerical Algorithms Group Ltd. *> Sven Hammarling, Numerical Algorithms Group Ltd. *> \endverbatim *> * ===================================================================== SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) * * -- Reference BLAS level3 routine (version 3.7.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. REAL ALPHA,BETA INTEGER K,LDA,LDB,LDC,M,N CHARACTER TRANSA,TRANSB * .. * .. Array Arguments .. REAL A(LDA,*),B(LDB,*),C(LDC,*) * .. * * ===================================================================== * * .. External Functions .. LOGICAL LSAME EXTERNAL LSAME * .. * .. External Subroutines .. EXTERNAL XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX * .. * .. Local Scalars .. REAL TEMP INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB LOGICAL NOTA,NOTB * .. * .. Parameters .. REAL ONE,ZERO PARAMETER (ONE=1.0E+0,ZERO=0.0E+0) * .. * * Set NOTA and NOTB as true if A and B respectively are not * transposed and set NROWA, NCOLA and NROWB as the number of rows * and columns of A and the number of rows of B respectively. * NOTA = LSAME(TRANSA,'N') NOTB = LSAME(TRANSB,'N') IF (NOTA) THEN NROWA = M NCOLA = K ELSE NROWA = K NCOLA = M END IF IF (NOTB) THEN NROWB = K ELSE NROWB = N END IF * * Test the input parameters. * INFO = 0 IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND. + (.NOT.LSAME(TRANSA,'T'))) THEN INFO = 1 ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND. + (.NOT.LSAME(TRANSB,'T'))) THEN INFO = 2 ELSE IF (M.LT.0) THEN INFO = 3 ELSE IF (N.LT.0) THEN INFO = 4 ELSE IF (K.LT.0) THEN INFO = 5 ELSE IF (LDA.LT.MAX(1,NROWA)) THEN INFO = 8 ELSE IF (LDB.LT.MAX(1,NROWB)) THEN INFO = 10 ELSE IF (LDC.LT.MAX(1,M)) THEN INFO = 13 END IF IF (INFO.NE.0) THEN CALL XERBLA('SGEMM ',INFO) RETURN END IF * * Quick return if possible. * IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN * * And if alpha.eq.zero. * IF (ALPHA.EQ.ZERO) THEN IF (BETA.EQ.ZERO) THEN DO 20 J = 1,N DO 10 I = 1,M C(I,J) = ZERO 10 CONTINUE 20 CONTINUE ELSE DO 40 J = 1,N DO 30 I = 1,M C(I,J) = BETA*C(I,J) 30 CONTINUE 40 CONTINUE END IF RETURN END IF * * Start the operations. * IF (NOTB) THEN IF (NOTA) THEN * * Form C := alpha*A*B + beta*C. * DO 90 J = 1,N IF (BETA.EQ.ZERO) THEN DO 50 I = 1,M C(I,J) = ZERO 50 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 60 I = 1,M C(I,J) = BETA*C(I,J) 60 CONTINUE END IF DO 80 L = 1,K TEMP = ALPHA*B(L,J) DO 70 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 70 CONTINUE 80 CONTINUE 90 CONTINUE ELSE * * Form C := alpha*A**T*B + beta*C * DO 120 J = 1,N DO 110 I = 1,M TEMP = ZERO DO 100 L = 1,K TEMP = TEMP + A(L,I)*B(L,J) 100 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 110 CONTINUE 120 CONTINUE END IF ELSE IF (NOTA) THEN * * Form C := alpha*A*B**T + beta*C * DO 170 J = 1,N IF (BETA.EQ.ZERO) THEN DO 130 I = 1,M C(I,J) = ZERO 130 CONTINUE ELSE IF (BETA.NE.ONE) THEN DO 140 I = 1,M C(I,J) = BETA*C(I,J) 140 CONTINUE END IF DO 160 L = 1,K TEMP = ALPHA*B(J,L) DO 150 I = 1,M C(I,J) = C(I,J) + TEMP*A(I,L) 150 CONTINUE 160 CONTINUE 170 CONTINUE ELSE * * Form C := alpha*A**T*B**T + beta*C * DO 200 J = 1,N DO 190 I = 1,M TEMP = ZERO DO 180 L = 1,K TEMP = TEMP + A(L,I)*B(J,L) 180 CONTINUE IF (BETA.EQ.ZERO) THEN C(I,J) = ALPHA*TEMP ELSE C(I,J) = ALPHA*TEMP + BETA*C(I,J) END IF 190 CONTINUE 200 CONTINUE END IF END IF * RETURN * * End of SGEMM . * END *> \brief \b LSAME * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * LOGICAL FUNCTION LSAME(CA,CB) * * .. Scalar Arguments .. * CHARACTER CA,CB * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> LSAME returns .TRUE. if CA is the same letter as CB regardless of *> case. *> \endverbatim * * Arguments: * ========== * *> \param[in] CA *> \verbatim *> CA is CHARACTER*1 *> \endverbatim *> *> \param[in] CB *> \verbatim *> CB is CHARACTER*1 *> CA and CB specify the single characters to be compared. *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup aux_blas * * ===================================================================== LOGICAL FUNCTION LSAME(CA,CB) * * -- Reference BLAS level1 routine (version 3.1) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. CHARACTER CA,CB * .. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC ICHAR * .. * .. Local Scalars .. INTEGER INTA,INTB,ZCODE * .. * * Test if the characters are equal * LSAME = CA .EQ. CB IF (LSAME) RETURN * * Now test for equivalence if both characters are alphabetic. * ZCODE = ICHAR('Z') * * Use 'Z' rather than 'A' so that ASCII can be detected on Prime * machines, on which ICHAR returns a value with bit 8 set. * ICHAR('A') on Prime machines returns 193 which is the same as * ICHAR('A') on an EBCDIC machine. * INTA = ICHAR(CA) INTB = ICHAR(CB) * IF (ZCODE.EQ.90 .OR. ZCODE.EQ.122) THEN * * ASCII is assumed - ZCODE is the ASCII code of either lower or * upper case 'Z'. * IF (INTA.GE.97 .AND. INTA.LE.122) INTA = INTA - 32 IF (INTB.GE.97 .AND. INTB.LE.122) INTB = INTB - 32 * ELSE IF (ZCODE.EQ.233 .OR. ZCODE.EQ.169) THEN * * EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or * upper case 'Z'. * IF (INTA.GE.129 .AND. INTA.LE.137 .OR. + INTA.GE.145 .AND. INTA.LE.153 .OR. + INTA.GE.162 .AND. INTA.LE.169) INTA = INTA + 64 IF (INTB.GE.129 .AND. INTB.LE.137 .OR. + INTB.GE.145 .AND. INTB.LE.153 .OR. + INTB.GE.162 .AND. INTB.LE.169) INTB = INTB + 64 * ELSE IF (ZCODE.EQ.218 .OR. ZCODE.EQ.250) THEN * * ASCII is assumed, on Prime machines - ZCODE is the ASCII code * plus 128 of either lower or upper case 'Z'. * IF (INTA.GE.225 .AND. INTA.LE.250) INTA = INTA - 32 IF (INTB.GE.225 .AND. INTB.LE.250) INTB = INTB - 32 END IF LSAME = INTA .EQ. INTB * * RETURN * * End of LSAME * END *> \brief \b XERBLA * * =========== DOCUMENTATION =========== * * Online html documentation available at * http://www.netlib.org/lapack/explore-html/ * * Definition: * =========== * * SUBROUTINE XERBLA( SRNAME, INFO ) * * .. Scalar Arguments .. * CHARACTER*(*) SRNAME * INTEGER INFO * .. * * *> \par Purpose: * ============= *> *> \verbatim *> *> XERBLA is an error handler for the LAPACK routines. *> It is called by an LAPACK routine if an input parameter has an *> invalid value. A message is printed and execution stops. *> *> Installers may consider modifying the STOP statement in order to *> call system-specific exception-handling facilities. *> \endverbatim * * Arguments: * ========== * *> \param[in] SRNAME *> \verbatim *> SRNAME is CHARACTER*(*) *> The name of the routine which called XERBLA. *> \endverbatim *> *> \param[in] INFO *> \verbatim *> INFO is INTEGER *> The position of the invalid parameter in the parameter list *> of the calling routine. *> \endverbatim * * Authors: * ======== * *> \author Univ. of Tennessee *> \author Univ. of California Berkeley *> \author Univ. of Colorado Denver *> \author NAG Ltd. * *> \date December 2016 * *> \ingroup aux_blas * * ===================================================================== SUBROUTINE XERBLA( SRNAME, INFO ) * * -- Reference BLAS level1 routine (version 3.7.0) -- * -- Reference BLAS is a software package provided by Univ. of Tennessee, -- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- * December 2016 * * .. Scalar Arguments .. CHARACTER*(*) SRNAME INTEGER INFO * .. * * ===================================================================== * * .. Intrinsic Functions .. INTRINSIC LEN_TRIM * .. * .. Executable Statements .. * WRITE( *, FMT = 9999 )SRNAME( 1:LEN_TRIM( SRNAME ) ), INFO * STOP * 9999 FORMAT( ' ** On entry to ', A, ' parameter number ', I2, ' had ', $ 'an illegal value' ) * * End of XERBLA * END ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
! { dg-do run } ! { dg-options "-ffrontend-optimize -fdump-tree-optimized -Wrealloc-lhs -finline-matmul-limit=1000 -O" } ! PR 66094: Check functionality for MATMUL(TRANSPOSE(A),B)) for two-dimensional arrays program main implicit none integer, parameter :: n = 3, m=4, cnt=2 real, dimension(cnt,n) :: a real, dimension(cnt,m) :: b real, dimension(n,m) :: c, cres real, dimension(:,:), allocatable :: calloc integer :: in, im, icnt data a / 2., -3., 5., -7., 11., -13./ data b /17., -23., 29., -31., 37., -39., 41., -47./ data cres /103., 246., 486., 151., 362., 722., & 191., 458., 914., 223., 534., 1062./ c = matmul(transpose(a),b) if (sum(c-cres)>1e-4) STOP 1 if (sum(c-cres)>1e-4) STOP 2 ! Unallocated calloc = matmul(transpose(a),b) ! { dg-warning "Code for reallocating the allocatable array" } if (any(shape(c) /= shape(calloc))) STOP 3 if (sum(calloc-cres)>1e-4) STOP 4 deallocate(calloc) ! Allocated to wrong shape allocate (calloc(10,10)) calloc = matmul(transpose(a),b) ! { dg-warning "Code for reallocating the allocatable array" } if (any(shape(c) /= shape(calloc))) STOP 5 if (sum(calloc-cres)>1e-4) STOP 6 deallocate(calloc) ! cycle through a few test cases... do in=2,10 do im = 2,10 do icnt = 2,10 block real, dimension(icnt,in) :: a2 real, dimension(icnt,im) :: b2 real, dimension(in,im) :: c2,cr integer :: i,j,k call random_number(a2) call random_number(b2) c2 = 0 do i=1,size(a2,2) do j=1, size(b2,2) do k=1, size(a2,1) c2(i,j) = c2(i,j) + a2(k,i) * b2(k,j) end do end do end do cr = matmul(transpose(a2), b2) if (any(abs(c2-cr) > 1e-4)) STOP 7 end block end do end do end do end program main ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "optimized" } }