This patch introduces a new -finform-parallelism flag to report any detected parallelism encountered by the compiler. Initially, it's being used to report how oaccdevlow partitions OpenACC loops. Currently, if you want to extract this information, you need to compile the program with -fdump-tree-oaccdevlow, then scan the tree dump for lines marked Loop and decode the decimal bitmask that represents the parallelism. This patch makes this process more user friendly by utilizing inform messages to highlight the directives inside the source code, and clearly print out the associated parallelism. E.g. given
!$acc parallel loop do i = ... !$acc parallel loop do j = ... -finform-parallelism reports inform-parallelism.f90: In function ‘MAIN__._omp_fn.0’: inform-parallelism.f90:10:0: note: ACC LOOP GANG !$acc parallel loop inform-parallelism.f90:12:0: note: ACC LOOP WORKER VECTOR !$acc loop Unfortunately, because this oaccdevlow runs so late, the offloaded function name doesn't match the one specified by the user. While working on this, I noticed that the fortran FE wasn't recording the location of combined loop directives properly, so I fixed that bug. I also removed an unused variable inside trans-openmp.c. This patch still isn't complete because I found a similar bug in the c++ FE. Thomas, before I fix that bug, do you think this patch is worth pursuing for gomp-4_0-branch or maybe even trunk in general? Ideally, we can extend these diagnostics to report any detected loops inside kernels regions. Cesar
2017-02-20 Cesar Philippidis <ce...@codesourcery.com> gcc/ * common.opt (finform-parallelism): New option. * omp-low.c (inform_oacc_loop): New function. (execute_oacc_device_lower): Use it to report how ACC LOOPs are assigned parallelism. gcc/doc/ * invoke.texi: Document -finform-parallelism. gcc/fortran/ * trans-openmp.c (gfc_trans_omp_clauses_1): Delete unused orig_decl. (gfc_trans_oacc_combined_directive): Set the location of combined acc loops. gcc/testsuite/ * c-c++-common/goacc/inform-parallelism.c: New test. * gfortran.dg/goacc/inform-parallelism.f90: New test. diff --git a/gcc/common.opt b/gcc/common.opt index 42c0b2f..a7e5494 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1451,6 +1451,10 @@ fif-conversion2 Common Report Var(flag_if_conversion2) Optimization Perform conversion of conditional jumps to conditional execution. +finform-parallelism +Common Var(flag_inform_parallelism) Init(0) +Report all paralllelism detected inside offloaded regions. + fstack-reuse= Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization -fstack-reuse=[all|named_vars|none] Set stack reuse level for local variables. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index fd8ba42..9cc8a8d 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -351,7 +351,7 @@ Objective-C and Objective-C++ Dialects}. -fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol -fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol -fgcse-sm -fhoist-adjacent-loads -fif-conversion @gol --fif-conversion2 -findirect-inlining @gol +-fif-conversion2 -findirect-inlining -finform-parallelism @gol -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol -finline-small-functions -fipa-cp -fipa-cp-clone -fipa-cp-alignment @gol -fipa-pta -fipa-profile -fipa-pure-const -fipa-reference -fipa-icf @gol @@ -6428,6 +6428,13 @@ or @option{-finline-small-functions} options. Enabled at level @option{-O2}. +@item -finform-parallelism +@opindex finform-parallelism +Report any parallelism detected by the compiler. Inside OpenACC +offloaded regions, this includes the gang, worker and vector level +parallelism associated with any @code{ACC LOOP}s. This option is disabled +by default. + @item -finline-functions @opindex finline-functions Consider all functions for inlining, even if they are not declared inline. diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c index 295f172..8688425 100644 --- a/gcc/fortran/trans-openmp.c +++ b/gcc/fortran/trans-openmp.c @@ -1947,7 +1947,6 @@ gfc_trans_omp_clauses_1 (stmtblock_t *block, gfc_omp_clauses *clauses, && n->expr->ref->next->u.ar.type == AR_FULL))) { gfc_ref *ref = n->expr->ref; - tree orig_decl = decl; gfc_component *c = ref->u.c.component; tree field; tree context; @@ -3819,6 +3818,7 @@ gfc_trans_oacc_combined_directive (gfc_code *code) enum tree_code construct_code; bool scan_nodesc_arrays = false; hash_set<gfc_symbol *> *array_set = NULL; + location_t loc = input_location; switch (code->op) { @@ -3850,6 +3850,9 @@ gfc_trans_oacc_combined_directive (gfc_code *code) pushlevel (); stmt = gfc_trans_omp_do (code, EXEC_OACC_LOOP, pblock, loop_clauses, NULL); + if (CAN_HAVE_LOCATION_P (stmt)) + SET_EXPR_LOCATION (stmt, loc); + if (array_set && array_set->elements ()) gfc_add_expr_to_block (&inner, stmt); @@ -3865,8 +3868,7 @@ gfc_trans_oacc_combined_directive (gfc_code *code) delete array_set; } - stmt = build2_loc (input_location, construct_code, void_type_node, stmt, - oacc_clauses); + stmt = build2_loc (loc, construct_code, void_type_node, stmt, oacc_clauses); gfc_add_expr_to_block (&block, stmt); gfc_free_omp_clauses (loop_clauses); diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 0f79533..6ea8738 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -20399,6 +20399,28 @@ debug_oacc_loop (oacc_loop *loop) dump_oacc_loop (stderr, loop, 0); } +/* Provide diagnostics on OpenACC loops LOOP, its siblings and its + children. */ + +static void +inform_oacc_loop (oacc_loop *loop) +{ + const char *seq = loop->mask == 0 ? " SEQ" : ""; + const char *gang = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) + ? " GANG" : ""; + const char *worker = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) + ? " WORKER" : ""; + const char *vector = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) + ? " VECTOR" : ""; + + inform (loop->loc, "ACC LOOP%s%s%s%s", seq, gang, worker, vector); + + if (loop->child) + inform_oacc_loop (loop->child); + if (loop->sibling) + inform_oacc_loop (loop->sibling); +} + /* DFS walk of basic blocks BB onwards, creating OpenACC loop structures as we go. By construction these loops are properly nested. */ @@ -21069,6 +21091,8 @@ execute_oacc_device_lower () dump_oacc_loop (dump_file, loops, 0); fprintf (dump_file, "\n"); } + if (flag_inform_parallelism && loops->child) + inform_oacc_loop (loops->child); /* Offloaded targets may introduce new basic blocks, which require dominance information to update SSA. */ diff --git a/gcc/testsuite/c-c++-common/goacc/inform-parallelism.c b/gcc/testsuite/c-c++-common/goacc/inform-parallelism.c new file mode 100644 index 0000000..b892bf0 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/inform-parallelism.c @@ -0,0 +1,61 @@ +/* Test the output of -finform-parallelism. */ + +/* { dg-additional-options "-finform-parallelism" } */ + +int +main () +{ + int x, y, z; + +#pragma acc parallel loop seq /* { dg-message "ACC LOOP SEQ" } */ + for (x = 0; x < 10; x++) + ; + +#pragma acc parallel loop gang /* { dg-message "ACC LOOP GANG" } */ + for (x = 0; x < 10; x++) + ; + +#pragma acc parallel loop worker /* { dg-message "ACC LOOP WORKER" } */ + for (x = 0; x < 10; x++) + ; + +#pragma acc parallel loop vector /* { dg-message "ACC LOOP VECTOR" } */ + for (x = 0; x < 10; x++) + ; + +#pragma acc parallel loop gang vector /* { dg-message "ACC LOOP GANG VECTOR" } */ + for (x = 0; x < 10; x++) + ; + +#pragma acc parallel loop gang worker /* { dg-message "ACC LOOP GANG WORKER" } */ + for (x = 0; x < 10; x++) + ; + +#pragma acc parallel loop worker vector /* { dg-message "ACC LOOP WORKER VECTOR" } */ + for (x = 0; x < 10; x++) + ; + +#pragma acc parallel loop gang worker vector /* { dg-message "ACC LOOP GANG WORKER VECTOR" } */ + for (x = 0; x < 10; x++) + ; + +#pragma acc parallel loop /* { dg-message "ACC LOOP GANG VECTOR" } */ + for (x = 0; x < 10; x++) + ; + +#pragma acc parallel loop /* { dg-message "ACC LOOP GANG WORKER" } */ + for (x = 0; x < 10; x++) +#pragma acc loop /* { dg-message "ACC LOOP VECTOR" } */ + for (y = 0; y < 10; y++) + ; + +#pragma acc parallel loop gang /* { dg-message "ACC LOOP GANG" } */ + for (x = 0; x < 10; x++) +#pragma acc loop worker /* { dg-message "ACC LOOP WORKER" } */ + for (y = 0; y < 10; y++) +#pragma acc loop vector /* { dg-message "ACC LOOP VECTOR" } */ + for (z = 0; z < 10; z++) + ; + + return 0; +} diff --git a/gcc/testsuite/gfortran.dg/goacc/inform-parallelism.f90 b/gcc/testsuite/gfortran.dg/goacc/inform-parallelism.f90 new file mode 100644 index 0000000..6e11331 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/inform-parallelism.f90 @@ -0,0 +1,62 @@ +! Test the output of -finform-parallellism. + +! { dg-additional-options "-finform-parallelism" } + +program test + implicit none + + integer x, y, z + + !$acc parallel loop seq ! { dg-message "ACC LOOP SEQ" } + do x = 1, 10 + end do + + !$acc parallel loop gang ! { dg-message "ACC LOOP GANG" } + do x = 1, 10 + end do + + !$acc parallel loop worker ! { dg-message "ACC LOOP WORKER" } + do x = 1, 10 + end do + + !$acc parallel loop vector ! { dg-message "ACC LOOP VECTOR" } + do x = 1, 10 + end do + + !$acc parallel loop gang vector ! { dg-message "ACC LOOP GANG VECTOR" } + do x = 1, 10 + end do + + !$acc parallel loop gang worker ! { dg-message "ACC LOOP GANG WORKER" } + do x = 1, 10 + end do + + !$acc parallel loop worker vector ! { dg-message "ACC LOOP WORKER VECTOR" } + do x = 1, 10 + end do + + !$acc parallel loop gang worker vector ! { dg-message "ACC LOOP GANG WORKER VECTOR" } + do x = 1, 10 + end do + + !$acc parallel loop ! { dg-message "ACC LOOP GANG VECTOR" } + do x = 1, 10 + end do + + !$acc parallel loop ! { dg-message "ACC LOOP GANG WORKER" } + do x = 1, 10 + !$acc loop ! { dg-message "ACC LOOP VECTOR" } + do y = 1, 10 + end do + end do + + !$acc parallel loop gang ! { dg-message "ACC LOOP GANG" } + do x = 1, 10 + !$acc loop worker ! { dg-message "ACC LOOP WORKER" } + do y = 1, 10 + !$acc loop vector ! { dg-message "ACC LOOP VECTOR" } + do z = 1, 10 + end do + end do + end do +end program test